cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

dev.c (53274B)


      1/*
      2  FUSE: Filesystem in Userspace
      3  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
      4
      5  This program can be distributed under the terms of the GNU GPL.
      6  See the file COPYING.
      7*/
      8
      9#include "fuse_i.h"
     10
     11#include <linux/init.h>
     12#include <linux/module.h>
     13#include <linux/poll.h>
     14#include <linux/sched/signal.h>
     15#include <linux/uio.h>
     16#include <linux/miscdevice.h>
     17#include <linux/pagemap.h>
     18#include <linux/file.h>
     19#include <linux/slab.h>
     20#include <linux/pipe_fs_i.h>
     21#include <linux/swap.h>
     22#include <linux/splice.h>
     23#include <linux/sched.h>
     24
     25MODULE_ALIAS_MISCDEV(FUSE_MINOR);
     26MODULE_ALIAS("devname:fuse");
     27
     28/* Ordinary requests have even IDs, while interrupts IDs are odd */
     29#define FUSE_INT_REQ_BIT (1ULL << 0)
     30#define FUSE_REQ_ID_STEP (1ULL << 1)
     31
     32static struct kmem_cache *fuse_req_cachep;
     33
     34static struct fuse_dev *fuse_get_dev(struct file *file)
     35{
     36	/*
     37	 * Lockless access is OK, because file->private data is set
     38	 * once during mount and is valid until the file is released.
     39	 */
     40	return READ_ONCE(file->private_data);
     41}
     42
     43static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
     44{
     45	INIT_LIST_HEAD(&req->list);
     46	INIT_LIST_HEAD(&req->intr_entry);
     47	init_waitqueue_head(&req->waitq);
     48	refcount_set(&req->count, 1);
     49	__set_bit(FR_PENDING, &req->flags);
     50	req->fm = fm;
     51}
     52
     53static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
     54{
     55	struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
     56	if (req)
     57		fuse_request_init(fm, req);
     58
     59	return req;
     60}
     61
     62static void fuse_request_free(struct fuse_req *req)
     63{
     64	kmem_cache_free(fuse_req_cachep, req);
     65}
     66
     67static void __fuse_get_request(struct fuse_req *req)
     68{
     69	refcount_inc(&req->count);
     70}
     71
     72/* Must be called with > 1 refcount */
     73static void __fuse_put_request(struct fuse_req *req)
     74{
     75	refcount_dec(&req->count);
     76}
     77
     78void fuse_set_initialized(struct fuse_conn *fc)
     79{
     80	/* Make sure stores before this are seen on another CPU */
     81	smp_wmb();
     82	fc->initialized = 1;
     83}
     84
     85static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
     86{
     87	return !fc->initialized || (for_background && fc->blocked);
     88}
     89
     90static void fuse_drop_waiting(struct fuse_conn *fc)
     91{
     92	/*
     93	 * lockess check of fc->connected is okay, because atomic_dec_and_test()
     94	 * provides a memory barrier matched with the one in fuse_wait_aborted()
     95	 * to ensure no wake-up is missed.
     96	 */
     97	if (atomic_dec_and_test(&fc->num_waiting) &&
     98	    !READ_ONCE(fc->connected)) {
     99		/* wake up aborters */
    100		wake_up_all(&fc->blocked_waitq);
    101	}
    102}
    103
    104static void fuse_put_request(struct fuse_req *req);
    105
    106static struct fuse_req *fuse_get_req(struct fuse_mount *fm, bool for_background)
    107{
    108	struct fuse_conn *fc = fm->fc;
    109	struct fuse_req *req;
    110	int err;
    111	atomic_inc(&fc->num_waiting);
    112
    113	if (fuse_block_alloc(fc, for_background)) {
    114		err = -EINTR;
    115		if (wait_event_killable_exclusive(fc->blocked_waitq,
    116				!fuse_block_alloc(fc, for_background)))
    117			goto out;
    118	}
    119	/* Matches smp_wmb() in fuse_set_initialized() */
    120	smp_rmb();
    121
    122	err = -ENOTCONN;
    123	if (!fc->connected)
    124		goto out;
    125
    126	err = -ECONNREFUSED;
    127	if (fc->conn_error)
    128		goto out;
    129
    130	req = fuse_request_alloc(fm, GFP_KERNEL);
    131	err = -ENOMEM;
    132	if (!req) {
    133		if (for_background)
    134			wake_up(&fc->blocked_waitq);
    135		goto out;
    136	}
    137
    138	req->in.h.uid = from_kuid(fc->user_ns, current_fsuid());
    139	req->in.h.gid = from_kgid(fc->user_ns, current_fsgid());
    140	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
    141
    142	__set_bit(FR_WAITING, &req->flags);
    143	if (for_background)
    144		__set_bit(FR_BACKGROUND, &req->flags);
    145
    146	if (unlikely(req->in.h.uid == ((uid_t)-1) ||
    147		     req->in.h.gid == ((gid_t)-1))) {
    148		fuse_put_request(req);
    149		return ERR_PTR(-EOVERFLOW);
    150	}
    151	return req;
    152
    153 out:
    154	fuse_drop_waiting(fc);
    155	return ERR_PTR(err);
    156}
    157
    158static void fuse_put_request(struct fuse_req *req)
    159{
    160	struct fuse_conn *fc = req->fm->fc;
    161
    162	if (refcount_dec_and_test(&req->count)) {
    163		if (test_bit(FR_BACKGROUND, &req->flags)) {
    164			/*
    165			 * We get here in the unlikely case that a background
    166			 * request was allocated but not sent
    167			 */
    168			spin_lock(&fc->bg_lock);
    169			if (!fc->blocked)
    170				wake_up(&fc->blocked_waitq);
    171			spin_unlock(&fc->bg_lock);
    172		}
    173
    174		if (test_bit(FR_WAITING, &req->flags)) {
    175			__clear_bit(FR_WAITING, &req->flags);
    176			fuse_drop_waiting(fc);
    177		}
    178
    179		fuse_request_free(req);
    180	}
    181}
    182
    183unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
    184{
    185	unsigned nbytes = 0;
    186	unsigned i;
    187
    188	for (i = 0; i < numargs; i++)
    189		nbytes += args[i].size;
    190
    191	return nbytes;
    192}
    193EXPORT_SYMBOL_GPL(fuse_len_args);
    194
    195u64 fuse_get_unique(struct fuse_iqueue *fiq)
    196{
    197	fiq->reqctr += FUSE_REQ_ID_STEP;
    198	return fiq->reqctr;
    199}
    200EXPORT_SYMBOL_GPL(fuse_get_unique);
    201
    202static unsigned int fuse_req_hash(u64 unique)
    203{
    204	return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
    205}
    206
    207/**
    208 * A new request is available, wake fiq->waitq
    209 */
    210static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq)
    211__releases(fiq->lock)
    212{
    213	wake_up(&fiq->waitq);
    214	kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
    215	spin_unlock(&fiq->lock);
    216}
    217
    218const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
    219	.wake_forget_and_unlock		= fuse_dev_wake_and_unlock,
    220	.wake_interrupt_and_unlock	= fuse_dev_wake_and_unlock,
    221	.wake_pending_and_unlock	= fuse_dev_wake_and_unlock,
    222};
    223EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
    224
    225static void queue_request_and_unlock(struct fuse_iqueue *fiq,
    226				     struct fuse_req *req)
    227__releases(fiq->lock)
    228{
    229	req->in.h.len = sizeof(struct fuse_in_header) +
    230		fuse_len_args(req->args->in_numargs,
    231			      (struct fuse_arg *) req->args->in_args);
    232	list_add_tail(&req->list, &fiq->pending);
    233	fiq->ops->wake_pending_and_unlock(fiq);
    234}
    235
    236void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
    237		       u64 nodeid, u64 nlookup)
    238{
    239	struct fuse_iqueue *fiq = &fc->iq;
    240
    241	forget->forget_one.nodeid = nodeid;
    242	forget->forget_one.nlookup = nlookup;
    243
    244	spin_lock(&fiq->lock);
    245	if (fiq->connected) {
    246		fiq->forget_list_tail->next = forget;
    247		fiq->forget_list_tail = forget;
    248		fiq->ops->wake_forget_and_unlock(fiq);
    249	} else {
    250		kfree(forget);
    251		spin_unlock(&fiq->lock);
    252	}
    253}
    254
    255static void flush_bg_queue(struct fuse_conn *fc)
    256{
    257	struct fuse_iqueue *fiq = &fc->iq;
    258
    259	while (fc->active_background < fc->max_background &&
    260	       !list_empty(&fc->bg_queue)) {
    261		struct fuse_req *req;
    262
    263		req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
    264		list_del(&req->list);
    265		fc->active_background++;
    266		spin_lock(&fiq->lock);
    267		req->in.h.unique = fuse_get_unique(fiq);
    268		queue_request_and_unlock(fiq, req);
    269	}
    270}
    271
    272/*
    273 * This function is called when a request is finished.  Either a reply
    274 * has arrived or it was aborted (and not yet sent) or some error
    275 * occurred during communication with userspace, or the device file
    276 * was closed.  The requester thread is woken up (if still waiting),
    277 * the 'end' callback is called if given, else the reference to the
    278 * request is released
    279 */
    280void fuse_request_end(struct fuse_req *req)
    281{
    282	struct fuse_mount *fm = req->fm;
    283	struct fuse_conn *fc = fm->fc;
    284	struct fuse_iqueue *fiq = &fc->iq;
    285
    286	if (test_and_set_bit(FR_FINISHED, &req->flags))
    287		goto put_request;
    288
    289	/*
    290	 * test_and_set_bit() implies smp_mb() between bit
    291	 * changing and below FR_INTERRUPTED check. Pairs with
    292	 * smp_mb() from queue_interrupt().
    293	 */
    294	if (test_bit(FR_INTERRUPTED, &req->flags)) {
    295		spin_lock(&fiq->lock);
    296		list_del_init(&req->intr_entry);
    297		spin_unlock(&fiq->lock);
    298	}
    299	WARN_ON(test_bit(FR_PENDING, &req->flags));
    300	WARN_ON(test_bit(FR_SENT, &req->flags));
    301	if (test_bit(FR_BACKGROUND, &req->flags)) {
    302		spin_lock(&fc->bg_lock);
    303		clear_bit(FR_BACKGROUND, &req->flags);
    304		if (fc->num_background == fc->max_background) {
    305			fc->blocked = 0;
    306			wake_up(&fc->blocked_waitq);
    307		} else if (!fc->blocked) {
    308			/*
    309			 * Wake up next waiter, if any.  It's okay to use
    310			 * waitqueue_active(), as we've already synced up
    311			 * fc->blocked with waiters with the wake_up() call
    312			 * above.
    313			 */
    314			if (waitqueue_active(&fc->blocked_waitq))
    315				wake_up(&fc->blocked_waitq);
    316		}
    317
    318		fc->num_background--;
    319		fc->active_background--;
    320		flush_bg_queue(fc);
    321		spin_unlock(&fc->bg_lock);
    322	} else {
    323		/* Wake up waiter sleeping in request_wait_answer() */
    324		wake_up(&req->waitq);
    325	}
    326
    327	if (test_bit(FR_ASYNC, &req->flags))
    328		req->args->end(fm, req->args, req->out.h.error);
    329put_request:
    330	fuse_put_request(req);
    331}
    332EXPORT_SYMBOL_GPL(fuse_request_end);
    333
    334static int queue_interrupt(struct fuse_req *req)
    335{
    336	struct fuse_iqueue *fiq = &req->fm->fc->iq;
    337
    338	spin_lock(&fiq->lock);
    339	/* Check for we've sent request to interrupt this req */
    340	if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) {
    341		spin_unlock(&fiq->lock);
    342		return -EINVAL;
    343	}
    344
    345	if (list_empty(&req->intr_entry)) {
    346		list_add_tail(&req->intr_entry, &fiq->interrupts);
    347		/*
    348		 * Pairs with smp_mb() implied by test_and_set_bit()
    349		 * from fuse_request_end().
    350		 */
    351		smp_mb();
    352		if (test_bit(FR_FINISHED, &req->flags)) {
    353			list_del_init(&req->intr_entry);
    354			spin_unlock(&fiq->lock);
    355			return 0;
    356		}
    357		fiq->ops->wake_interrupt_and_unlock(fiq);
    358	} else {
    359		spin_unlock(&fiq->lock);
    360	}
    361	return 0;
    362}
    363
    364static void request_wait_answer(struct fuse_req *req)
    365{
    366	struct fuse_conn *fc = req->fm->fc;
    367	struct fuse_iqueue *fiq = &fc->iq;
    368	int err;
    369
    370	if (!fc->no_interrupt) {
    371		/* Any signal may interrupt this */
    372		err = wait_event_interruptible(req->waitq,
    373					test_bit(FR_FINISHED, &req->flags));
    374		if (!err)
    375			return;
    376
    377		set_bit(FR_INTERRUPTED, &req->flags);
    378		/* matches barrier in fuse_dev_do_read() */
    379		smp_mb__after_atomic();
    380		if (test_bit(FR_SENT, &req->flags))
    381			queue_interrupt(req);
    382	}
    383
    384	if (!test_bit(FR_FORCE, &req->flags)) {
    385		/* Only fatal signals may interrupt this */
    386		err = wait_event_killable(req->waitq,
    387					test_bit(FR_FINISHED, &req->flags));
    388		if (!err)
    389			return;
    390
    391		spin_lock(&fiq->lock);
    392		/* Request is not yet in userspace, bail out */
    393		if (test_bit(FR_PENDING, &req->flags)) {
    394			list_del(&req->list);
    395			spin_unlock(&fiq->lock);
    396			__fuse_put_request(req);
    397			req->out.h.error = -EINTR;
    398			return;
    399		}
    400		spin_unlock(&fiq->lock);
    401	}
    402
    403	/*
    404	 * Either request is already in userspace, or it was forced.
    405	 * Wait it out.
    406	 */
    407	wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
    408}
    409
    410static void __fuse_request_send(struct fuse_req *req)
    411{
    412	struct fuse_iqueue *fiq = &req->fm->fc->iq;
    413
    414	BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
    415	spin_lock(&fiq->lock);
    416	if (!fiq->connected) {
    417		spin_unlock(&fiq->lock);
    418		req->out.h.error = -ENOTCONN;
    419	} else {
    420		req->in.h.unique = fuse_get_unique(fiq);
    421		/* acquire extra reference, since request is still needed
    422		   after fuse_request_end() */
    423		__fuse_get_request(req);
    424		queue_request_and_unlock(fiq, req);
    425
    426		request_wait_answer(req);
    427		/* Pairs with smp_wmb() in fuse_request_end() */
    428		smp_rmb();
    429	}
    430}
    431
    432static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
    433{
    434	if (fc->minor < 4 && args->opcode == FUSE_STATFS)
    435		args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE;
    436
    437	if (fc->minor < 9) {
    438		switch (args->opcode) {
    439		case FUSE_LOOKUP:
    440		case FUSE_CREATE:
    441		case FUSE_MKNOD:
    442		case FUSE_MKDIR:
    443		case FUSE_SYMLINK:
    444		case FUSE_LINK:
    445			args->out_args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
    446			break;
    447		case FUSE_GETATTR:
    448		case FUSE_SETATTR:
    449			args->out_args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
    450			break;
    451		}
    452	}
    453	if (fc->minor < 12) {
    454		switch (args->opcode) {
    455		case FUSE_CREATE:
    456			args->in_args[0].size = sizeof(struct fuse_open_in);
    457			break;
    458		case FUSE_MKNOD:
    459			args->in_args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
    460			break;
    461		}
    462	}
    463}
    464
    465static void fuse_force_creds(struct fuse_req *req)
    466{
    467	struct fuse_conn *fc = req->fm->fc;
    468
    469	req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
    470	req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
    471	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
    472}
    473
    474static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
    475{
    476	req->in.h.opcode = args->opcode;
    477	req->in.h.nodeid = args->nodeid;
    478	req->args = args;
    479	if (args->end)
    480		__set_bit(FR_ASYNC, &req->flags);
    481}
    482
    483ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args)
    484{
    485	struct fuse_conn *fc = fm->fc;
    486	struct fuse_req *req;
    487	ssize_t ret;
    488
    489	if (args->force) {
    490		atomic_inc(&fc->num_waiting);
    491		req = fuse_request_alloc(fm, GFP_KERNEL | __GFP_NOFAIL);
    492
    493		if (!args->nocreds)
    494			fuse_force_creds(req);
    495
    496		__set_bit(FR_WAITING, &req->flags);
    497		__set_bit(FR_FORCE, &req->flags);
    498	} else {
    499		WARN_ON(args->nocreds);
    500		req = fuse_get_req(fm, false);
    501		if (IS_ERR(req))
    502			return PTR_ERR(req);
    503	}
    504
    505	/* Needs to be done after fuse_get_req() so that fc->minor is valid */
    506	fuse_adjust_compat(fc, args);
    507	fuse_args_to_req(req, args);
    508
    509	if (!args->noreply)
    510		__set_bit(FR_ISREPLY, &req->flags);
    511	__fuse_request_send(req);
    512	ret = req->out.h.error;
    513	if (!ret && args->out_argvar) {
    514		BUG_ON(args->out_numargs == 0);
    515		ret = args->out_args[args->out_numargs - 1].size;
    516	}
    517	fuse_put_request(req);
    518
    519	return ret;
    520}
    521
    522static bool fuse_request_queue_background(struct fuse_req *req)
    523{
    524	struct fuse_mount *fm = req->fm;
    525	struct fuse_conn *fc = fm->fc;
    526	bool queued = false;
    527
    528	WARN_ON(!test_bit(FR_BACKGROUND, &req->flags));
    529	if (!test_bit(FR_WAITING, &req->flags)) {
    530		__set_bit(FR_WAITING, &req->flags);
    531		atomic_inc(&fc->num_waiting);
    532	}
    533	__set_bit(FR_ISREPLY, &req->flags);
    534	spin_lock(&fc->bg_lock);
    535	if (likely(fc->connected)) {
    536		fc->num_background++;
    537		if (fc->num_background == fc->max_background)
    538			fc->blocked = 1;
    539		list_add_tail(&req->list, &fc->bg_queue);
    540		flush_bg_queue(fc);
    541		queued = true;
    542	}
    543	spin_unlock(&fc->bg_lock);
    544
    545	return queued;
    546}
    547
    548int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
    549			    gfp_t gfp_flags)
    550{
    551	struct fuse_req *req;
    552
    553	if (args->force) {
    554		WARN_ON(!args->nocreds);
    555		req = fuse_request_alloc(fm, gfp_flags);
    556		if (!req)
    557			return -ENOMEM;
    558		__set_bit(FR_BACKGROUND, &req->flags);
    559	} else {
    560		WARN_ON(args->nocreds);
    561		req = fuse_get_req(fm, true);
    562		if (IS_ERR(req))
    563			return PTR_ERR(req);
    564	}
    565
    566	fuse_args_to_req(req, args);
    567
    568	if (!fuse_request_queue_background(req)) {
    569		fuse_put_request(req);
    570		return -ENOTCONN;
    571	}
    572
    573	return 0;
    574}
    575EXPORT_SYMBOL_GPL(fuse_simple_background);
    576
    577static int fuse_simple_notify_reply(struct fuse_mount *fm,
    578				    struct fuse_args *args, u64 unique)
    579{
    580	struct fuse_req *req;
    581	struct fuse_iqueue *fiq = &fm->fc->iq;
    582	int err = 0;
    583
    584	req = fuse_get_req(fm, false);
    585	if (IS_ERR(req))
    586		return PTR_ERR(req);
    587
    588	__clear_bit(FR_ISREPLY, &req->flags);
    589	req->in.h.unique = unique;
    590
    591	fuse_args_to_req(req, args);
    592
    593	spin_lock(&fiq->lock);
    594	if (fiq->connected) {
    595		queue_request_and_unlock(fiq, req);
    596	} else {
    597		err = -ENODEV;
    598		spin_unlock(&fiq->lock);
    599		fuse_put_request(req);
    600	}
    601
    602	return err;
    603}
    604
    605/*
    606 * Lock the request.  Up to the next unlock_request() there mustn't be
    607 * anything that could cause a page-fault.  If the request was already
    608 * aborted bail out.
    609 */
    610static int lock_request(struct fuse_req *req)
    611{
    612	int err = 0;
    613	if (req) {
    614		spin_lock(&req->waitq.lock);
    615		if (test_bit(FR_ABORTED, &req->flags))
    616			err = -ENOENT;
    617		else
    618			set_bit(FR_LOCKED, &req->flags);
    619		spin_unlock(&req->waitq.lock);
    620	}
    621	return err;
    622}
    623
    624/*
    625 * Unlock request.  If it was aborted while locked, caller is responsible
    626 * for unlocking and ending the request.
    627 */
    628static int unlock_request(struct fuse_req *req)
    629{
    630	int err = 0;
    631	if (req) {
    632		spin_lock(&req->waitq.lock);
    633		if (test_bit(FR_ABORTED, &req->flags))
    634			err = -ENOENT;
    635		else
    636			clear_bit(FR_LOCKED, &req->flags);
    637		spin_unlock(&req->waitq.lock);
    638	}
    639	return err;
    640}
    641
    642struct fuse_copy_state {
    643	int write;
    644	struct fuse_req *req;
    645	struct iov_iter *iter;
    646	struct pipe_buffer *pipebufs;
    647	struct pipe_buffer *currbuf;
    648	struct pipe_inode_info *pipe;
    649	unsigned long nr_segs;
    650	struct page *pg;
    651	unsigned len;
    652	unsigned offset;
    653	unsigned move_pages:1;
    654};
    655
    656static void fuse_copy_init(struct fuse_copy_state *cs, int write,
    657			   struct iov_iter *iter)
    658{
    659	memset(cs, 0, sizeof(*cs));
    660	cs->write = write;
    661	cs->iter = iter;
    662}
    663
    664/* Unmap and put previous page of userspace buffer */
    665static void fuse_copy_finish(struct fuse_copy_state *cs)
    666{
    667	if (cs->currbuf) {
    668		struct pipe_buffer *buf = cs->currbuf;
    669
    670		if (cs->write)
    671			buf->len = PAGE_SIZE - cs->len;
    672		cs->currbuf = NULL;
    673	} else if (cs->pg) {
    674		if (cs->write) {
    675			flush_dcache_page(cs->pg);
    676			set_page_dirty_lock(cs->pg);
    677		}
    678		put_page(cs->pg);
    679	}
    680	cs->pg = NULL;
    681}
    682
    683/*
    684 * Get another pagefull of userspace buffer, and map it to kernel
    685 * address space, and lock request
    686 */
    687static int fuse_copy_fill(struct fuse_copy_state *cs)
    688{
    689	struct page *page;
    690	int err;
    691
    692	err = unlock_request(cs->req);
    693	if (err)
    694		return err;
    695
    696	fuse_copy_finish(cs);
    697	if (cs->pipebufs) {
    698		struct pipe_buffer *buf = cs->pipebufs;
    699
    700		if (!cs->write) {
    701			err = pipe_buf_confirm(cs->pipe, buf);
    702			if (err)
    703				return err;
    704
    705			BUG_ON(!cs->nr_segs);
    706			cs->currbuf = buf;
    707			cs->pg = buf->page;
    708			cs->offset = buf->offset;
    709			cs->len = buf->len;
    710			cs->pipebufs++;
    711			cs->nr_segs--;
    712		} else {
    713			if (cs->nr_segs >= cs->pipe->max_usage)
    714				return -EIO;
    715
    716			page = alloc_page(GFP_HIGHUSER);
    717			if (!page)
    718				return -ENOMEM;
    719
    720			buf->page = page;
    721			buf->offset = 0;
    722			buf->len = 0;
    723
    724			cs->currbuf = buf;
    725			cs->pg = page;
    726			cs->offset = 0;
    727			cs->len = PAGE_SIZE;
    728			cs->pipebufs++;
    729			cs->nr_segs++;
    730		}
    731	} else {
    732		size_t off;
    733		err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off);
    734		if (err < 0)
    735			return err;
    736		BUG_ON(!err);
    737		cs->len = err;
    738		cs->offset = off;
    739		cs->pg = page;
    740		iov_iter_advance(cs->iter, err);
    741	}
    742
    743	return lock_request(cs->req);
    744}
    745
    746/* Do as much copy to/from userspace buffer as we can */
    747static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
    748{
    749	unsigned ncpy = min(*size, cs->len);
    750	if (val) {
    751		void *pgaddr = kmap_local_page(cs->pg);
    752		void *buf = pgaddr + cs->offset;
    753
    754		if (cs->write)
    755			memcpy(buf, *val, ncpy);
    756		else
    757			memcpy(*val, buf, ncpy);
    758
    759		kunmap_local(pgaddr);
    760		*val += ncpy;
    761	}
    762	*size -= ncpy;
    763	cs->len -= ncpy;
    764	cs->offset += ncpy;
    765	return ncpy;
    766}
    767
    768static int fuse_check_page(struct page *page)
    769{
    770	if (page_mapcount(page) ||
    771	    page->mapping != NULL ||
    772	    (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
    773	     ~(1 << PG_locked |
    774	       1 << PG_referenced |
    775	       1 << PG_uptodate |
    776	       1 << PG_lru |
    777	       1 << PG_active |
    778	       1 << PG_workingset |
    779	       1 << PG_reclaim |
    780	       1 << PG_waiters))) {
    781		dump_page(page, "fuse: trying to steal weird page");
    782		return 1;
    783	}
    784	return 0;
    785}
    786
    787static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
    788{
    789	int err;
    790	struct page *oldpage = *pagep;
    791	struct page *newpage;
    792	struct pipe_buffer *buf = cs->pipebufs;
    793
    794	get_page(oldpage);
    795	err = unlock_request(cs->req);
    796	if (err)
    797		goto out_put_old;
    798
    799	fuse_copy_finish(cs);
    800
    801	err = pipe_buf_confirm(cs->pipe, buf);
    802	if (err)
    803		goto out_put_old;
    804
    805	BUG_ON(!cs->nr_segs);
    806	cs->currbuf = buf;
    807	cs->len = buf->len;
    808	cs->pipebufs++;
    809	cs->nr_segs--;
    810
    811	if (cs->len != PAGE_SIZE)
    812		goto out_fallback;
    813
    814	if (!pipe_buf_try_steal(cs->pipe, buf))
    815		goto out_fallback;
    816
    817	newpage = buf->page;
    818
    819	if (!PageUptodate(newpage))
    820		SetPageUptodate(newpage);
    821
    822	ClearPageMappedToDisk(newpage);
    823
    824	if (fuse_check_page(newpage) != 0)
    825		goto out_fallback_unlock;
    826
    827	/*
    828	 * This is a new and locked page, it shouldn't be mapped or
    829	 * have any special flags on it
    830	 */
    831	if (WARN_ON(page_mapped(oldpage)))
    832		goto out_fallback_unlock;
    833	if (WARN_ON(page_has_private(oldpage)))
    834		goto out_fallback_unlock;
    835	if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
    836		goto out_fallback_unlock;
    837	if (WARN_ON(PageMlocked(oldpage)))
    838		goto out_fallback_unlock;
    839
    840	replace_page_cache_page(oldpage, newpage);
    841
    842	get_page(newpage);
    843
    844	if (!(buf->flags & PIPE_BUF_FLAG_LRU))
    845		lru_cache_add(newpage);
    846
    847	/*
    848	 * Release while we have extra ref on stolen page.  Otherwise
    849	 * anon_pipe_buf_release() might think the page can be reused.
    850	 */
    851	pipe_buf_release(cs->pipe, buf);
    852
    853	err = 0;
    854	spin_lock(&cs->req->waitq.lock);
    855	if (test_bit(FR_ABORTED, &cs->req->flags))
    856		err = -ENOENT;
    857	else
    858		*pagep = newpage;
    859	spin_unlock(&cs->req->waitq.lock);
    860
    861	if (err) {
    862		unlock_page(newpage);
    863		put_page(newpage);
    864		goto out_put_old;
    865	}
    866
    867	unlock_page(oldpage);
    868	/* Drop ref for ap->pages[] array */
    869	put_page(oldpage);
    870	cs->len = 0;
    871
    872	err = 0;
    873out_put_old:
    874	/* Drop ref obtained in this function */
    875	put_page(oldpage);
    876	return err;
    877
    878out_fallback_unlock:
    879	unlock_page(newpage);
    880out_fallback:
    881	cs->pg = buf->page;
    882	cs->offset = buf->offset;
    883
    884	err = lock_request(cs->req);
    885	if (!err)
    886		err = 1;
    887
    888	goto out_put_old;
    889}
    890
    891static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
    892			 unsigned offset, unsigned count)
    893{
    894	struct pipe_buffer *buf;
    895	int err;
    896
    897	if (cs->nr_segs >= cs->pipe->max_usage)
    898		return -EIO;
    899
    900	get_page(page);
    901	err = unlock_request(cs->req);
    902	if (err) {
    903		put_page(page);
    904		return err;
    905	}
    906
    907	fuse_copy_finish(cs);
    908
    909	buf = cs->pipebufs;
    910	buf->page = page;
    911	buf->offset = offset;
    912	buf->len = count;
    913
    914	cs->pipebufs++;
    915	cs->nr_segs++;
    916	cs->len = 0;
    917
    918	return 0;
    919}
    920
    921/*
    922 * Copy a page in the request to/from the userspace buffer.  Must be
    923 * done atomically
    924 */
    925static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
    926			  unsigned offset, unsigned count, int zeroing)
    927{
    928	int err;
    929	struct page *page = *pagep;
    930
    931	if (page && zeroing && count < PAGE_SIZE)
    932		clear_highpage(page);
    933
    934	while (count) {
    935		if (cs->write && cs->pipebufs && page) {
    936			/*
    937			 * Can't control lifetime of pipe buffers, so always
    938			 * copy user pages.
    939			 */
    940			if (cs->req->args->user_pages) {
    941				err = fuse_copy_fill(cs);
    942				if (err)
    943					return err;
    944			} else {
    945				return fuse_ref_page(cs, page, offset, count);
    946			}
    947		} else if (!cs->len) {
    948			if (cs->move_pages && page &&
    949			    offset == 0 && count == PAGE_SIZE) {
    950				err = fuse_try_move_page(cs, pagep);
    951				if (err <= 0)
    952					return err;
    953			} else {
    954				err = fuse_copy_fill(cs);
    955				if (err)
    956					return err;
    957			}
    958		}
    959		if (page) {
    960			void *mapaddr = kmap_local_page(page);
    961			void *buf = mapaddr + offset;
    962			offset += fuse_copy_do(cs, &buf, &count);
    963			kunmap_local(mapaddr);
    964		} else
    965			offset += fuse_copy_do(cs, NULL, &count);
    966	}
    967	if (page && !cs->write)
    968		flush_dcache_page(page);
    969	return 0;
    970}
    971
    972/* Copy pages in the request to/from userspace buffer */
    973static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
    974			   int zeroing)
    975{
    976	unsigned i;
    977	struct fuse_req *req = cs->req;
    978	struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
    979
    980
    981	for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) {
    982		int err;
    983		unsigned int offset = ap->descs[i].offset;
    984		unsigned int count = min(nbytes, ap->descs[i].length);
    985
    986		err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing);
    987		if (err)
    988			return err;
    989
    990		nbytes -= count;
    991	}
    992	return 0;
    993}
    994
    995/* Copy a single argument in the request to/from userspace buffer */
    996static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
    997{
    998	while (size) {
    999		if (!cs->len) {
   1000			int err = fuse_copy_fill(cs);
   1001			if (err)
   1002				return err;
   1003		}
   1004		fuse_copy_do(cs, &val, &size);
   1005	}
   1006	return 0;
   1007}
   1008
   1009/* Copy request arguments to/from userspace buffer */
   1010static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
   1011			  unsigned argpages, struct fuse_arg *args,
   1012			  int zeroing)
   1013{
   1014	int err = 0;
   1015	unsigned i;
   1016
   1017	for (i = 0; !err && i < numargs; i++)  {
   1018		struct fuse_arg *arg = &args[i];
   1019		if (i == numargs - 1 && argpages)
   1020			err = fuse_copy_pages(cs, arg->size, zeroing);
   1021		else
   1022			err = fuse_copy_one(cs, arg->value, arg->size);
   1023	}
   1024	return err;
   1025}
   1026
   1027static int forget_pending(struct fuse_iqueue *fiq)
   1028{
   1029	return fiq->forget_list_head.next != NULL;
   1030}
   1031
   1032static int request_pending(struct fuse_iqueue *fiq)
   1033{
   1034	return !list_empty(&fiq->pending) || !list_empty(&fiq->interrupts) ||
   1035		forget_pending(fiq);
   1036}
   1037
   1038/*
   1039 * Transfer an interrupt request to userspace
   1040 *
   1041 * Unlike other requests this is assembled on demand, without a need
   1042 * to allocate a separate fuse_req structure.
   1043 *
   1044 * Called with fiq->lock held, releases it
   1045 */
   1046static int fuse_read_interrupt(struct fuse_iqueue *fiq,
   1047			       struct fuse_copy_state *cs,
   1048			       size_t nbytes, struct fuse_req *req)
   1049__releases(fiq->lock)
   1050{
   1051	struct fuse_in_header ih;
   1052	struct fuse_interrupt_in arg;
   1053	unsigned reqsize = sizeof(ih) + sizeof(arg);
   1054	int err;
   1055
   1056	list_del_init(&req->intr_entry);
   1057	memset(&ih, 0, sizeof(ih));
   1058	memset(&arg, 0, sizeof(arg));
   1059	ih.len = reqsize;
   1060	ih.opcode = FUSE_INTERRUPT;
   1061	ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT);
   1062	arg.unique = req->in.h.unique;
   1063
   1064	spin_unlock(&fiq->lock);
   1065	if (nbytes < reqsize)
   1066		return -EINVAL;
   1067
   1068	err = fuse_copy_one(cs, &ih, sizeof(ih));
   1069	if (!err)
   1070		err = fuse_copy_one(cs, &arg, sizeof(arg));
   1071	fuse_copy_finish(cs);
   1072
   1073	return err ? err : reqsize;
   1074}
   1075
   1076struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
   1077					     unsigned int max,
   1078					     unsigned int *countp)
   1079{
   1080	struct fuse_forget_link *head = fiq->forget_list_head.next;
   1081	struct fuse_forget_link **newhead = &head;
   1082	unsigned count;
   1083
   1084	for (count = 0; *newhead != NULL && count < max; count++)
   1085		newhead = &(*newhead)->next;
   1086
   1087	fiq->forget_list_head.next = *newhead;
   1088	*newhead = NULL;
   1089	if (fiq->forget_list_head.next == NULL)
   1090		fiq->forget_list_tail = &fiq->forget_list_head;
   1091
   1092	if (countp != NULL)
   1093		*countp = count;
   1094
   1095	return head;
   1096}
   1097EXPORT_SYMBOL(fuse_dequeue_forget);
   1098
   1099static int fuse_read_single_forget(struct fuse_iqueue *fiq,
   1100				   struct fuse_copy_state *cs,
   1101				   size_t nbytes)
   1102__releases(fiq->lock)
   1103{
   1104	int err;
   1105	struct fuse_forget_link *forget = fuse_dequeue_forget(fiq, 1, NULL);
   1106	struct fuse_forget_in arg = {
   1107		.nlookup = forget->forget_one.nlookup,
   1108	};
   1109	struct fuse_in_header ih = {
   1110		.opcode = FUSE_FORGET,
   1111		.nodeid = forget->forget_one.nodeid,
   1112		.unique = fuse_get_unique(fiq),
   1113		.len = sizeof(ih) + sizeof(arg),
   1114	};
   1115
   1116	spin_unlock(&fiq->lock);
   1117	kfree(forget);
   1118	if (nbytes < ih.len)
   1119		return -EINVAL;
   1120
   1121	err = fuse_copy_one(cs, &ih, sizeof(ih));
   1122	if (!err)
   1123		err = fuse_copy_one(cs, &arg, sizeof(arg));
   1124	fuse_copy_finish(cs);
   1125
   1126	if (err)
   1127		return err;
   1128
   1129	return ih.len;
   1130}
   1131
   1132static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
   1133				   struct fuse_copy_state *cs, size_t nbytes)
   1134__releases(fiq->lock)
   1135{
   1136	int err;
   1137	unsigned max_forgets;
   1138	unsigned count;
   1139	struct fuse_forget_link *head;
   1140	struct fuse_batch_forget_in arg = { .count = 0 };
   1141	struct fuse_in_header ih = {
   1142		.opcode = FUSE_BATCH_FORGET,
   1143		.unique = fuse_get_unique(fiq),
   1144		.len = sizeof(ih) + sizeof(arg),
   1145	};
   1146
   1147	if (nbytes < ih.len) {
   1148		spin_unlock(&fiq->lock);
   1149		return -EINVAL;
   1150	}
   1151
   1152	max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
   1153	head = fuse_dequeue_forget(fiq, max_forgets, &count);
   1154	spin_unlock(&fiq->lock);
   1155
   1156	arg.count = count;
   1157	ih.len += count * sizeof(struct fuse_forget_one);
   1158	err = fuse_copy_one(cs, &ih, sizeof(ih));
   1159	if (!err)
   1160		err = fuse_copy_one(cs, &arg, sizeof(arg));
   1161
   1162	while (head) {
   1163		struct fuse_forget_link *forget = head;
   1164
   1165		if (!err) {
   1166			err = fuse_copy_one(cs, &forget->forget_one,
   1167					    sizeof(forget->forget_one));
   1168		}
   1169		head = forget->next;
   1170		kfree(forget);
   1171	}
   1172
   1173	fuse_copy_finish(cs);
   1174
   1175	if (err)
   1176		return err;
   1177
   1178	return ih.len;
   1179}
   1180
   1181static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
   1182			    struct fuse_copy_state *cs,
   1183			    size_t nbytes)
   1184__releases(fiq->lock)
   1185{
   1186	if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
   1187		return fuse_read_single_forget(fiq, cs, nbytes);
   1188	else
   1189		return fuse_read_batch_forget(fiq, cs, nbytes);
   1190}
   1191
   1192/*
   1193 * Read a single request into the userspace filesystem's buffer.  This
   1194 * function waits until a request is available, then removes it from
   1195 * the pending list and copies request data to userspace buffer.  If
   1196 * no reply is needed (FORGET) or request has been aborted or there
   1197 * was an error during the copying then it's finished by calling
   1198 * fuse_request_end().  Otherwise add it to the processing list, and set
   1199 * the 'sent' flag.
   1200 */
   1201static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
   1202				struct fuse_copy_state *cs, size_t nbytes)
   1203{
   1204	ssize_t err;
   1205	struct fuse_conn *fc = fud->fc;
   1206	struct fuse_iqueue *fiq = &fc->iq;
   1207	struct fuse_pqueue *fpq = &fud->pq;
   1208	struct fuse_req *req;
   1209	struct fuse_args *args;
   1210	unsigned reqsize;
   1211	unsigned int hash;
   1212
   1213	/*
   1214	 * Require sane minimum read buffer - that has capacity for fixed part
   1215	 * of any request header + negotiated max_write room for data.
   1216	 *
   1217	 * Historically libfuse reserves 4K for fixed header room, but e.g.
   1218	 * GlusterFS reserves only 80 bytes
   1219	 *
   1220	 *	= `sizeof(fuse_in_header) + sizeof(fuse_write_in)`
   1221	 *
   1222	 * which is the absolute minimum any sane filesystem should be using
   1223	 * for header room.
   1224	 */
   1225	if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER,
   1226			   sizeof(struct fuse_in_header) +
   1227			   sizeof(struct fuse_write_in) +
   1228			   fc->max_write))
   1229		return -EINVAL;
   1230
   1231 restart:
   1232	for (;;) {
   1233		spin_lock(&fiq->lock);
   1234		if (!fiq->connected || request_pending(fiq))
   1235			break;
   1236		spin_unlock(&fiq->lock);
   1237
   1238		if (file->f_flags & O_NONBLOCK)
   1239			return -EAGAIN;
   1240		err = wait_event_interruptible_exclusive(fiq->waitq,
   1241				!fiq->connected || request_pending(fiq));
   1242		if (err)
   1243			return err;
   1244	}
   1245
   1246	if (!fiq->connected) {
   1247		err = fc->aborted ? -ECONNABORTED : -ENODEV;
   1248		goto err_unlock;
   1249	}
   1250
   1251	if (!list_empty(&fiq->interrupts)) {
   1252		req = list_entry(fiq->interrupts.next, struct fuse_req,
   1253				 intr_entry);
   1254		return fuse_read_interrupt(fiq, cs, nbytes, req);
   1255	}
   1256
   1257	if (forget_pending(fiq)) {
   1258		if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0)
   1259			return fuse_read_forget(fc, fiq, cs, nbytes);
   1260
   1261		if (fiq->forget_batch <= -8)
   1262			fiq->forget_batch = 16;
   1263	}
   1264
   1265	req = list_entry(fiq->pending.next, struct fuse_req, list);
   1266	clear_bit(FR_PENDING, &req->flags);
   1267	list_del_init(&req->list);
   1268	spin_unlock(&fiq->lock);
   1269
   1270	args = req->args;
   1271	reqsize = req->in.h.len;
   1272
   1273	/* If request is too large, reply with an error and restart the read */
   1274	if (nbytes < reqsize) {
   1275		req->out.h.error = -EIO;
   1276		/* SETXATTR is special, since it may contain too large data */
   1277		if (args->opcode == FUSE_SETXATTR)
   1278			req->out.h.error = -E2BIG;
   1279		fuse_request_end(req);
   1280		goto restart;
   1281	}
   1282	spin_lock(&fpq->lock);
   1283	/*
   1284	 *  Must not put request on fpq->io queue after having been shut down by
   1285	 *  fuse_abort_conn()
   1286	 */
   1287	if (!fpq->connected) {
   1288		req->out.h.error = err = -ECONNABORTED;
   1289		goto out_end;
   1290
   1291	}
   1292	list_add(&req->list, &fpq->io);
   1293	spin_unlock(&fpq->lock);
   1294	cs->req = req;
   1295	err = fuse_copy_one(cs, &req->in.h, sizeof(req->in.h));
   1296	if (!err)
   1297		err = fuse_copy_args(cs, args->in_numargs, args->in_pages,
   1298				     (struct fuse_arg *) args->in_args, 0);
   1299	fuse_copy_finish(cs);
   1300	spin_lock(&fpq->lock);
   1301	clear_bit(FR_LOCKED, &req->flags);
   1302	if (!fpq->connected) {
   1303		err = fc->aborted ? -ECONNABORTED : -ENODEV;
   1304		goto out_end;
   1305	}
   1306	if (err) {
   1307		req->out.h.error = -EIO;
   1308		goto out_end;
   1309	}
   1310	if (!test_bit(FR_ISREPLY, &req->flags)) {
   1311		err = reqsize;
   1312		goto out_end;
   1313	}
   1314	hash = fuse_req_hash(req->in.h.unique);
   1315	list_move_tail(&req->list, &fpq->processing[hash]);
   1316	__fuse_get_request(req);
   1317	set_bit(FR_SENT, &req->flags);
   1318	spin_unlock(&fpq->lock);
   1319	/* matches barrier in request_wait_answer() */
   1320	smp_mb__after_atomic();
   1321	if (test_bit(FR_INTERRUPTED, &req->flags))
   1322		queue_interrupt(req);
   1323	fuse_put_request(req);
   1324
   1325	return reqsize;
   1326
   1327out_end:
   1328	if (!test_bit(FR_PRIVATE, &req->flags))
   1329		list_del_init(&req->list);
   1330	spin_unlock(&fpq->lock);
   1331	fuse_request_end(req);
   1332	return err;
   1333
   1334 err_unlock:
   1335	spin_unlock(&fiq->lock);
   1336	return err;
   1337}
   1338
   1339static int fuse_dev_open(struct inode *inode, struct file *file)
   1340{
   1341	/*
   1342	 * The fuse device's file's private_data is used to hold
   1343	 * the fuse_conn(ection) when it is mounted, and is used to
   1344	 * keep track of whether the file has been mounted already.
   1345	 */
   1346	file->private_data = NULL;
   1347	return 0;
   1348}
   1349
   1350static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
   1351{
   1352	struct fuse_copy_state cs;
   1353	struct file *file = iocb->ki_filp;
   1354	struct fuse_dev *fud = fuse_get_dev(file);
   1355
   1356	if (!fud)
   1357		return -EPERM;
   1358
   1359	if (!iter_is_iovec(to))
   1360		return -EINVAL;
   1361
   1362	fuse_copy_init(&cs, 1, to);
   1363
   1364	return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
   1365}
   1366
   1367static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
   1368				    struct pipe_inode_info *pipe,
   1369				    size_t len, unsigned int flags)
   1370{
   1371	int total, ret;
   1372	int page_nr = 0;
   1373	struct pipe_buffer *bufs;
   1374	struct fuse_copy_state cs;
   1375	struct fuse_dev *fud = fuse_get_dev(in);
   1376
   1377	if (!fud)
   1378		return -EPERM;
   1379
   1380	bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer),
   1381			      GFP_KERNEL);
   1382	if (!bufs)
   1383		return -ENOMEM;
   1384
   1385	fuse_copy_init(&cs, 1, NULL);
   1386	cs.pipebufs = bufs;
   1387	cs.pipe = pipe;
   1388	ret = fuse_dev_do_read(fud, in, &cs, len);
   1389	if (ret < 0)
   1390		goto out;
   1391
   1392	if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->max_usage) {
   1393		ret = -EIO;
   1394		goto out;
   1395	}
   1396
   1397	for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
   1398		/*
   1399		 * Need to be careful about this.  Having buf->ops in module
   1400		 * code can Oops if the buffer persists after module unload.
   1401		 */
   1402		bufs[page_nr].ops = &nosteal_pipe_buf_ops;
   1403		bufs[page_nr].flags = 0;
   1404		ret = add_to_pipe(pipe, &bufs[page_nr++]);
   1405		if (unlikely(ret < 0))
   1406			break;
   1407	}
   1408	if (total)
   1409		ret = total;
   1410out:
   1411	for (; page_nr < cs.nr_segs; page_nr++)
   1412		put_page(bufs[page_nr].page);
   1413
   1414	kvfree(bufs);
   1415	return ret;
   1416}
   1417
   1418static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
   1419			    struct fuse_copy_state *cs)
   1420{
   1421	struct fuse_notify_poll_wakeup_out outarg;
   1422	int err = -EINVAL;
   1423
   1424	if (size != sizeof(outarg))
   1425		goto err;
   1426
   1427	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
   1428	if (err)
   1429		goto err;
   1430
   1431	fuse_copy_finish(cs);
   1432	return fuse_notify_poll_wakeup(fc, &outarg);
   1433
   1434err:
   1435	fuse_copy_finish(cs);
   1436	return err;
   1437}
   1438
   1439static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
   1440				   struct fuse_copy_state *cs)
   1441{
   1442	struct fuse_notify_inval_inode_out outarg;
   1443	int err = -EINVAL;
   1444
   1445	if (size != sizeof(outarg))
   1446		goto err;
   1447
   1448	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
   1449	if (err)
   1450		goto err;
   1451	fuse_copy_finish(cs);
   1452
   1453	down_read(&fc->killsb);
   1454	err = fuse_reverse_inval_inode(fc, outarg.ino,
   1455				       outarg.off, outarg.len);
   1456	up_read(&fc->killsb);
   1457	return err;
   1458
   1459err:
   1460	fuse_copy_finish(cs);
   1461	return err;
   1462}
   1463
   1464static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
   1465				   struct fuse_copy_state *cs)
   1466{
   1467	struct fuse_notify_inval_entry_out outarg;
   1468	int err = -ENOMEM;
   1469	char *buf;
   1470	struct qstr name;
   1471
   1472	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
   1473	if (!buf)
   1474		goto err;
   1475
   1476	err = -EINVAL;
   1477	if (size < sizeof(outarg))
   1478		goto err;
   1479
   1480	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
   1481	if (err)
   1482		goto err;
   1483
   1484	err = -ENAMETOOLONG;
   1485	if (outarg.namelen > FUSE_NAME_MAX)
   1486		goto err;
   1487
   1488	err = -EINVAL;
   1489	if (size != sizeof(outarg) + outarg.namelen + 1)
   1490		goto err;
   1491
   1492	name.name = buf;
   1493	name.len = outarg.namelen;
   1494	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
   1495	if (err)
   1496		goto err;
   1497	fuse_copy_finish(cs);
   1498	buf[outarg.namelen] = 0;
   1499
   1500	down_read(&fc->killsb);
   1501	err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name);
   1502	up_read(&fc->killsb);
   1503	kfree(buf);
   1504	return err;
   1505
   1506err:
   1507	kfree(buf);
   1508	fuse_copy_finish(cs);
   1509	return err;
   1510}
   1511
   1512static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
   1513			      struct fuse_copy_state *cs)
   1514{
   1515	struct fuse_notify_delete_out outarg;
   1516	int err = -ENOMEM;
   1517	char *buf;
   1518	struct qstr name;
   1519
   1520	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
   1521	if (!buf)
   1522		goto err;
   1523
   1524	err = -EINVAL;
   1525	if (size < sizeof(outarg))
   1526		goto err;
   1527
   1528	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
   1529	if (err)
   1530		goto err;
   1531
   1532	err = -ENAMETOOLONG;
   1533	if (outarg.namelen > FUSE_NAME_MAX)
   1534		goto err;
   1535
   1536	err = -EINVAL;
   1537	if (size != sizeof(outarg) + outarg.namelen + 1)
   1538		goto err;
   1539
   1540	name.name = buf;
   1541	name.len = outarg.namelen;
   1542	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
   1543	if (err)
   1544		goto err;
   1545	fuse_copy_finish(cs);
   1546	buf[outarg.namelen] = 0;
   1547
   1548	down_read(&fc->killsb);
   1549	err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name);
   1550	up_read(&fc->killsb);
   1551	kfree(buf);
   1552	return err;
   1553
   1554err:
   1555	kfree(buf);
   1556	fuse_copy_finish(cs);
   1557	return err;
   1558}
   1559
   1560static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
   1561			     struct fuse_copy_state *cs)
   1562{
   1563	struct fuse_notify_store_out outarg;
   1564	struct inode *inode;
   1565	struct address_space *mapping;
   1566	u64 nodeid;
   1567	int err;
   1568	pgoff_t index;
   1569	unsigned int offset;
   1570	unsigned int num;
   1571	loff_t file_size;
   1572	loff_t end;
   1573
   1574	err = -EINVAL;
   1575	if (size < sizeof(outarg))
   1576		goto out_finish;
   1577
   1578	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
   1579	if (err)
   1580		goto out_finish;
   1581
   1582	err = -EINVAL;
   1583	if (size - sizeof(outarg) != outarg.size)
   1584		goto out_finish;
   1585
   1586	nodeid = outarg.nodeid;
   1587
   1588	down_read(&fc->killsb);
   1589
   1590	err = -ENOENT;
   1591	inode = fuse_ilookup(fc, nodeid,  NULL);
   1592	if (!inode)
   1593		goto out_up_killsb;
   1594
   1595	mapping = inode->i_mapping;
   1596	index = outarg.offset >> PAGE_SHIFT;
   1597	offset = outarg.offset & ~PAGE_MASK;
   1598	file_size = i_size_read(inode);
   1599	end = outarg.offset + outarg.size;
   1600	if (end > file_size) {
   1601		file_size = end;
   1602		fuse_write_update_attr(inode, file_size, outarg.size);
   1603	}
   1604
   1605	num = outarg.size;
   1606	while (num) {
   1607		struct page *page;
   1608		unsigned int this_num;
   1609
   1610		err = -ENOMEM;
   1611		page = find_or_create_page(mapping, index,
   1612					   mapping_gfp_mask(mapping));
   1613		if (!page)
   1614			goto out_iput;
   1615
   1616		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
   1617		err = fuse_copy_page(cs, &page, offset, this_num, 0);
   1618		if (!err && offset == 0 &&
   1619		    (this_num == PAGE_SIZE || file_size == end))
   1620			SetPageUptodate(page);
   1621		unlock_page(page);
   1622		put_page(page);
   1623
   1624		if (err)
   1625			goto out_iput;
   1626
   1627		num -= this_num;
   1628		offset = 0;
   1629		index++;
   1630	}
   1631
   1632	err = 0;
   1633
   1634out_iput:
   1635	iput(inode);
   1636out_up_killsb:
   1637	up_read(&fc->killsb);
   1638out_finish:
   1639	fuse_copy_finish(cs);
   1640	return err;
   1641}
   1642
   1643struct fuse_retrieve_args {
   1644	struct fuse_args_pages ap;
   1645	struct fuse_notify_retrieve_in inarg;
   1646};
   1647
   1648static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args,
   1649			      int error)
   1650{
   1651	struct fuse_retrieve_args *ra =
   1652		container_of(args, typeof(*ra), ap.args);
   1653
   1654	release_pages(ra->ap.pages, ra->ap.num_pages);
   1655	kfree(ra);
   1656}
   1657
   1658static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
   1659			 struct fuse_notify_retrieve_out *outarg)
   1660{
   1661	int err;
   1662	struct address_space *mapping = inode->i_mapping;
   1663	pgoff_t index;
   1664	loff_t file_size;
   1665	unsigned int num;
   1666	unsigned int offset;
   1667	size_t total_len = 0;
   1668	unsigned int num_pages;
   1669	struct fuse_conn *fc = fm->fc;
   1670	struct fuse_retrieve_args *ra;
   1671	size_t args_size = sizeof(*ra);
   1672	struct fuse_args_pages *ap;
   1673	struct fuse_args *args;
   1674
   1675	offset = outarg->offset & ~PAGE_MASK;
   1676	file_size = i_size_read(inode);
   1677
   1678	num = min(outarg->size, fc->max_write);
   1679	if (outarg->offset > file_size)
   1680		num = 0;
   1681	else if (outarg->offset + num > file_size)
   1682		num = file_size - outarg->offset;
   1683
   1684	num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
   1685	num_pages = min(num_pages, fc->max_pages);
   1686
   1687	args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0]));
   1688
   1689	ra = kzalloc(args_size, GFP_KERNEL);
   1690	if (!ra)
   1691		return -ENOMEM;
   1692
   1693	ap = &ra->ap;
   1694	ap->pages = (void *) (ra + 1);
   1695	ap->descs = (void *) (ap->pages + num_pages);
   1696
   1697	args = &ap->args;
   1698	args->nodeid = outarg->nodeid;
   1699	args->opcode = FUSE_NOTIFY_REPLY;
   1700	args->in_numargs = 2;
   1701	args->in_pages = true;
   1702	args->end = fuse_retrieve_end;
   1703
   1704	index = outarg->offset >> PAGE_SHIFT;
   1705
   1706	while (num && ap->num_pages < num_pages) {
   1707		struct page *page;
   1708		unsigned int this_num;
   1709
   1710		page = find_get_page(mapping, index);
   1711		if (!page)
   1712			break;
   1713
   1714		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
   1715		ap->pages[ap->num_pages] = page;
   1716		ap->descs[ap->num_pages].offset = offset;
   1717		ap->descs[ap->num_pages].length = this_num;
   1718		ap->num_pages++;
   1719
   1720		offset = 0;
   1721		num -= this_num;
   1722		total_len += this_num;
   1723		index++;
   1724	}
   1725	ra->inarg.offset = outarg->offset;
   1726	ra->inarg.size = total_len;
   1727	args->in_args[0].size = sizeof(ra->inarg);
   1728	args->in_args[0].value = &ra->inarg;
   1729	args->in_args[1].size = total_len;
   1730
   1731	err = fuse_simple_notify_reply(fm, args, outarg->notify_unique);
   1732	if (err)
   1733		fuse_retrieve_end(fm, args, err);
   1734
   1735	return err;
   1736}
   1737
   1738static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
   1739				struct fuse_copy_state *cs)
   1740{
   1741	struct fuse_notify_retrieve_out outarg;
   1742	struct fuse_mount *fm;
   1743	struct inode *inode;
   1744	u64 nodeid;
   1745	int err;
   1746
   1747	err = -EINVAL;
   1748	if (size != sizeof(outarg))
   1749		goto copy_finish;
   1750
   1751	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
   1752	if (err)
   1753		goto copy_finish;
   1754
   1755	fuse_copy_finish(cs);
   1756
   1757	down_read(&fc->killsb);
   1758	err = -ENOENT;
   1759	nodeid = outarg.nodeid;
   1760
   1761	inode = fuse_ilookup(fc, nodeid, &fm);
   1762	if (inode) {
   1763		err = fuse_retrieve(fm, inode, &outarg);
   1764		iput(inode);
   1765	}
   1766	up_read(&fc->killsb);
   1767
   1768	return err;
   1769
   1770copy_finish:
   1771	fuse_copy_finish(cs);
   1772	return err;
   1773}
   1774
   1775static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
   1776		       unsigned int size, struct fuse_copy_state *cs)
   1777{
   1778	/* Don't try to move pages (yet) */
   1779	cs->move_pages = 0;
   1780
   1781	switch (code) {
   1782	case FUSE_NOTIFY_POLL:
   1783		return fuse_notify_poll(fc, size, cs);
   1784
   1785	case FUSE_NOTIFY_INVAL_INODE:
   1786		return fuse_notify_inval_inode(fc, size, cs);
   1787
   1788	case FUSE_NOTIFY_INVAL_ENTRY:
   1789		return fuse_notify_inval_entry(fc, size, cs);
   1790
   1791	case FUSE_NOTIFY_STORE:
   1792		return fuse_notify_store(fc, size, cs);
   1793
   1794	case FUSE_NOTIFY_RETRIEVE:
   1795		return fuse_notify_retrieve(fc, size, cs);
   1796
   1797	case FUSE_NOTIFY_DELETE:
   1798		return fuse_notify_delete(fc, size, cs);
   1799
   1800	default:
   1801		fuse_copy_finish(cs);
   1802		return -EINVAL;
   1803	}
   1804}
   1805
   1806/* Look up request on processing list by unique ID */
   1807static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
   1808{
   1809	unsigned int hash = fuse_req_hash(unique);
   1810	struct fuse_req *req;
   1811
   1812	list_for_each_entry(req, &fpq->processing[hash], list) {
   1813		if (req->in.h.unique == unique)
   1814			return req;
   1815	}
   1816	return NULL;
   1817}
   1818
   1819static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
   1820			 unsigned nbytes)
   1821{
   1822	unsigned reqsize = sizeof(struct fuse_out_header);
   1823
   1824	reqsize += fuse_len_args(args->out_numargs, args->out_args);
   1825
   1826	if (reqsize < nbytes || (reqsize > nbytes && !args->out_argvar))
   1827		return -EINVAL;
   1828	else if (reqsize > nbytes) {
   1829		struct fuse_arg *lastarg = &args->out_args[args->out_numargs-1];
   1830		unsigned diffsize = reqsize - nbytes;
   1831
   1832		if (diffsize > lastarg->size)
   1833			return -EINVAL;
   1834		lastarg->size -= diffsize;
   1835	}
   1836	return fuse_copy_args(cs, args->out_numargs, args->out_pages,
   1837			      args->out_args, args->page_zeroing);
   1838}
   1839
   1840/*
   1841 * Write a single reply to a request.  First the header is copied from
   1842 * the write buffer.  The request is then searched on the processing
   1843 * list by the unique ID found in the header.  If found, then remove
   1844 * it from the list and copy the rest of the buffer to the request.
   1845 * The request is finished by calling fuse_request_end().
   1846 */
   1847static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
   1848				 struct fuse_copy_state *cs, size_t nbytes)
   1849{
   1850	int err;
   1851	struct fuse_conn *fc = fud->fc;
   1852	struct fuse_pqueue *fpq = &fud->pq;
   1853	struct fuse_req *req;
   1854	struct fuse_out_header oh;
   1855
   1856	err = -EINVAL;
   1857	if (nbytes < sizeof(struct fuse_out_header))
   1858		goto out;
   1859
   1860	err = fuse_copy_one(cs, &oh, sizeof(oh));
   1861	if (err)
   1862		goto copy_finish;
   1863
   1864	err = -EINVAL;
   1865	if (oh.len != nbytes)
   1866		goto copy_finish;
   1867
   1868	/*
   1869	 * Zero oh.unique indicates unsolicited notification message
   1870	 * and error contains notification code.
   1871	 */
   1872	if (!oh.unique) {
   1873		err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
   1874		goto out;
   1875	}
   1876
   1877	err = -EINVAL;
   1878	if (oh.error <= -512 || oh.error > 0)
   1879		goto copy_finish;
   1880
   1881	spin_lock(&fpq->lock);
   1882	req = NULL;
   1883	if (fpq->connected)
   1884		req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
   1885
   1886	err = -ENOENT;
   1887	if (!req) {
   1888		spin_unlock(&fpq->lock);
   1889		goto copy_finish;
   1890	}
   1891
   1892	/* Is it an interrupt reply ID? */
   1893	if (oh.unique & FUSE_INT_REQ_BIT) {
   1894		__fuse_get_request(req);
   1895		spin_unlock(&fpq->lock);
   1896
   1897		err = 0;
   1898		if (nbytes != sizeof(struct fuse_out_header))
   1899			err = -EINVAL;
   1900		else if (oh.error == -ENOSYS)
   1901			fc->no_interrupt = 1;
   1902		else if (oh.error == -EAGAIN)
   1903			err = queue_interrupt(req);
   1904
   1905		fuse_put_request(req);
   1906
   1907		goto copy_finish;
   1908	}
   1909
   1910	clear_bit(FR_SENT, &req->flags);
   1911	list_move(&req->list, &fpq->io);
   1912	req->out.h = oh;
   1913	set_bit(FR_LOCKED, &req->flags);
   1914	spin_unlock(&fpq->lock);
   1915	cs->req = req;
   1916	if (!req->args->page_replace)
   1917		cs->move_pages = 0;
   1918
   1919	if (oh.error)
   1920		err = nbytes != sizeof(oh) ? -EINVAL : 0;
   1921	else
   1922		err = copy_out_args(cs, req->args, nbytes);
   1923	fuse_copy_finish(cs);
   1924
   1925	spin_lock(&fpq->lock);
   1926	clear_bit(FR_LOCKED, &req->flags);
   1927	if (!fpq->connected)
   1928		err = -ENOENT;
   1929	else if (err)
   1930		req->out.h.error = -EIO;
   1931	if (!test_bit(FR_PRIVATE, &req->flags))
   1932		list_del_init(&req->list);
   1933	spin_unlock(&fpq->lock);
   1934
   1935	fuse_request_end(req);
   1936out:
   1937	return err ? err : nbytes;
   1938
   1939copy_finish:
   1940	fuse_copy_finish(cs);
   1941	goto out;
   1942}
   1943
   1944static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
   1945{
   1946	struct fuse_copy_state cs;
   1947	struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp);
   1948
   1949	if (!fud)
   1950		return -EPERM;
   1951
   1952	if (!iter_is_iovec(from))
   1953		return -EINVAL;
   1954
   1955	fuse_copy_init(&cs, 0, from);
   1956
   1957	return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
   1958}
   1959
   1960static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
   1961				     struct file *out, loff_t *ppos,
   1962				     size_t len, unsigned int flags)
   1963{
   1964	unsigned int head, tail, mask, count;
   1965	unsigned nbuf;
   1966	unsigned idx;
   1967	struct pipe_buffer *bufs;
   1968	struct fuse_copy_state cs;
   1969	struct fuse_dev *fud;
   1970	size_t rem;
   1971	ssize_t ret;
   1972
   1973	fud = fuse_get_dev(out);
   1974	if (!fud)
   1975		return -EPERM;
   1976
   1977	pipe_lock(pipe);
   1978
   1979	head = pipe->head;
   1980	tail = pipe->tail;
   1981	mask = pipe->ring_size - 1;
   1982	count = head - tail;
   1983
   1984	bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL);
   1985	if (!bufs) {
   1986		pipe_unlock(pipe);
   1987		return -ENOMEM;
   1988	}
   1989
   1990	nbuf = 0;
   1991	rem = 0;
   1992	for (idx = tail; idx != head && rem < len; idx++)
   1993		rem += pipe->bufs[idx & mask].len;
   1994
   1995	ret = -EINVAL;
   1996	if (rem < len)
   1997		goto out_free;
   1998
   1999	rem = len;
   2000	while (rem) {
   2001		struct pipe_buffer *ibuf;
   2002		struct pipe_buffer *obuf;
   2003
   2004		if (WARN_ON(nbuf >= count || tail == head))
   2005			goto out_free;
   2006
   2007		ibuf = &pipe->bufs[tail & mask];
   2008		obuf = &bufs[nbuf];
   2009
   2010		if (rem >= ibuf->len) {
   2011			*obuf = *ibuf;
   2012			ibuf->ops = NULL;
   2013			tail++;
   2014			pipe->tail = tail;
   2015		} else {
   2016			if (!pipe_buf_get(pipe, ibuf))
   2017				goto out_free;
   2018
   2019			*obuf = *ibuf;
   2020			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
   2021			obuf->len = rem;
   2022			ibuf->offset += obuf->len;
   2023			ibuf->len -= obuf->len;
   2024		}
   2025		nbuf++;
   2026		rem -= obuf->len;
   2027	}
   2028	pipe_unlock(pipe);
   2029
   2030	fuse_copy_init(&cs, 0, NULL);
   2031	cs.pipebufs = bufs;
   2032	cs.nr_segs = nbuf;
   2033	cs.pipe = pipe;
   2034
   2035	if (flags & SPLICE_F_MOVE)
   2036		cs.move_pages = 1;
   2037
   2038	ret = fuse_dev_do_write(fud, &cs, len);
   2039
   2040	pipe_lock(pipe);
   2041out_free:
   2042	for (idx = 0; idx < nbuf; idx++) {
   2043		struct pipe_buffer *buf = &bufs[idx];
   2044
   2045		if (buf->ops)
   2046			pipe_buf_release(pipe, buf);
   2047	}
   2048	pipe_unlock(pipe);
   2049
   2050	kvfree(bufs);
   2051	return ret;
   2052}
   2053
   2054static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
   2055{
   2056	__poll_t mask = EPOLLOUT | EPOLLWRNORM;
   2057	struct fuse_iqueue *fiq;
   2058	struct fuse_dev *fud = fuse_get_dev(file);
   2059
   2060	if (!fud)
   2061		return EPOLLERR;
   2062
   2063	fiq = &fud->fc->iq;
   2064	poll_wait(file, &fiq->waitq, wait);
   2065
   2066	spin_lock(&fiq->lock);
   2067	if (!fiq->connected)
   2068		mask = EPOLLERR;
   2069	else if (request_pending(fiq))
   2070		mask |= EPOLLIN | EPOLLRDNORM;
   2071	spin_unlock(&fiq->lock);
   2072
   2073	return mask;
   2074}
   2075
   2076/* Abort all requests on the given list (pending or processing) */
   2077static void end_requests(struct list_head *head)
   2078{
   2079	while (!list_empty(head)) {
   2080		struct fuse_req *req;
   2081		req = list_entry(head->next, struct fuse_req, list);
   2082		req->out.h.error = -ECONNABORTED;
   2083		clear_bit(FR_SENT, &req->flags);
   2084		list_del_init(&req->list);
   2085		fuse_request_end(req);
   2086	}
   2087}
   2088
   2089static void end_polls(struct fuse_conn *fc)
   2090{
   2091	struct rb_node *p;
   2092
   2093	p = rb_first(&fc->polled_files);
   2094
   2095	while (p) {
   2096		struct fuse_file *ff;
   2097		ff = rb_entry(p, struct fuse_file, polled_node);
   2098		wake_up_interruptible_all(&ff->poll_wait);
   2099
   2100		p = rb_next(p);
   2101	}
   2102}
   2103
   2104/*
   2105 * Abort all requests.
   2106 *
   2107 * Emergency exit in case of a malicious or accidental deadlock, or just a hung
   2108 * filesystem.
   2109 *
   2110 * The same effect is usually achievable through killing the filesystem daemon
   2111 * and all users of the filesystem.  The exception is the combination of an
   2112 * asynchronous request and the tricky deadlock (see
   2113 * Documentation/filesystems/fuse.rst).
   2114 *
   2115 * Aborting requests under I/O goes as follows: 1: Separate out unlocked
   2116 * requests, they should be finished off immediately.  Locked requests will be
   2117 * finished after unlock; see unlock_request(). 2: Finish off the unlocked
   2118 * requests.  It is possible that some request will finish before we can.  This
   2119 * is OK, the request will in that case be removed from the list before we touch
   2120 * it.
   2121 */
   2122void fuse_abort_conn(struct fuse_conn *fc)
   2123{
   2124	struct fuse_iqueue *fiq = &fc->iq;
   2125
   2126	spin_lock(&fc->lock);
   2127	if (fc->connected) {
   2128		struct fuse_dev *fud;
   2129		struct fuse_req *req, *next;
   2130		LIST_HEAD(to_end);
   2131		unsigned int i;
   2132
   2133		/* Background queuing checks fc->connected under bg_lock */
   2134		spin_lock(&fc->bg_lock);
   2135		fc->connected = 0;
   2136		spin_unlock(&fc->bg_lock);
   2137
   2138		fuse_set_initialized(fc);
   2139		list_for_each_entry(fud, &fc->devices, entry) {
   2140			struct fuse_pqueue *fpq = &fud->pq;
   2141
   2142			spin_lock(&fpq->lock);
   2143			fpq->connected = 0;
   2144			list_for_each_entry_safe(req, next, &fpq->io, list) {
   2145				req->out.h.error = -ECONNABORTED;
   2146				spin_lock(&req->waitq.lock);
   2147				set_bit(FR_ABORTED, &req->flags);
   2148				if (!test_bit(FR_LOCKED, &req->flags)) {
   2149					set_bit(FR_PRIVATE, &req->flags);
   2150					__fuse_get_request(req);
   2151					list_move(&req->list, &to_end);
   2152				}
   2153				spin_unlock(&req->waitq.lock);
   2154			}
   2155			for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
   2156				list_splice_tail_init(&fpq->processing[i],
   2157						      &to_end);
   2158			spin_unlock(&fpq->lock);
   2159		}
   2160		spin_lock(&fc->bg_lock);
   2161		fc->blocked = 0;
   2162		fc->max_background = UINT_MAX;
   2163		flush_bg_queue(fc);
   2164		spin_unlock(&fc->bg_lock);
   2165
   2166		spin_lock(&fiq->lock);
   2167		fiq->connected = 0;
   2168		list_for_each_entry(req, &fiq->pending, list)
   2169			clear_bit(FR_PENDING, &req->flags);
   2170		list_splice_tail_init(&fiq->pending, &to_end);
   2171		while (forget_pending(fiq))
   2172			kfree(fuse_dequeue_forget(fiq, 1, NULL));
   2173		wake_up_all(&fiq->waitq);
   2174		spin_unlock(&fiq->lock);
   2175		kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
   2176		end_polls(fc);
   2177		wake_up_all(&fc->blocked_waitq);
   2178		spin_unlock(&fc->lock);
   2179
   2180		end_requests(&to_end);
   2181	} else {
   2182		spin_unlock(&fc->lock);
   2183	}
   2184}
   2185EXPORT_SYMBOL_GPL(fuse_abort_conn);
   2186
   2187void fuse_wait_aborted(struct fuse_conn *fc)
   2188{
   2189	/* matches implicit memory barrier in fuse_drop_waiting() */
   2190	smp_mb();
   2191	wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
   2192}
   2193
   2194int fuse_dev_release(struct inode *inode, struct file *file)
   2195{
   2196	struct fuse_dev *fud = fuse_get_dev(file);
   2197
   2198	if (fud) {
   2199		struct fuse_conn *fc = fud->fc;
   2200		struct fuse_pqueue *fpq = &fud->pq;
   2201		LIST_HEAD(to_end);
   2202		unsigned int i;
   2203
   2204		spin_lock(&fpq->lock);
   2205		WARN_ON(!list_empty(&fpq->io));
   2206		for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
   2207			list_splice_init(&fpq->processing[i], &to_end);
   2208		spin_unlock(&fpq->lock);
   2209
   2210		end_requests(&to_end);
   2211
   2212		/* Are we the last open device? */
   2213		if (atomic_dec_and_test(&fc->dev_count)) {
   2214			WARN_ON(fc->iq.fasync != NULL);
   2215			fuse_abort_conn(fc);
   2216		}
   2217		fuse_dev_free(fud);
   2218	}
   2219	return 0;
   2220}
   2221EXPORT_SYMBOL_GPL(fuse_dev_release);
   2222
   2223static int fuse_dev_fasync(int fd, struct file *file, int on)
   2224{
   2225	struct fuse_dev *fud = fuse_get_dev(file);
   2226
   2227	if (!fud)
   2228		return -EPERM;
   2229
   2230	/* No locking - fasync_helper does its own locking */
   2231	return fasync_helper(fd, file, on, &fud->fc->iq.fasync);
   2232}
   2233
   2234static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
   2235{
   2236	struct fuse_dev *fud;
   2237
   2238	if (new->private_data)
   2239		return -EINVAL;
   2240
   2241	fud = fuse_dev_alloc_install(fc);
   2242	if (!fud)
   2243		return -ENOMEM;
   2244
   2245	new->private_data = fud;
   2246	atomic_inc(&fc->dev_count);
   2247
   2248	return 0;
   2249}
   2250
   2251static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
   2252			   unsigned long arg)
   2253{
   2254	int res;
   2255	int oldfd;
   2256	struct fuse_dev *fud = NULL;
   2257
   2258	switch (cmd) {
   2259	case FUSE_DEV_IOC_CLONE:
   2260		res = -EFAULT;
   2261		if (!get_user(oldfd, (__u32 __user *)arg)) {
   2262			struct file *old = fget(oldfd);
   2263
   2264			res = -EINVAL;
   2265			if (old) {
   2266				/*
   2267				 * Check against file->f_op because CUSE
   2268				 * uses the same ioctl handler.
   2269				 */
   2270				if (old->f_op == file->f_op &&
   2271				    old->f_cred->user_ns == file->f_cred->user_ns)
   2272					fud = fuse_get_dev(old);
   2273
   2274				if (fud) {
   2275					mutex_lock(&fuse_mutex);
   2276					res = fuse_device_clone(fud->fc, file);
   2277					mutex_unlock(&fuse_mutex);
   2278				}
   2279				fput(old);
   2280			}
   2281		}
   2282		break;
   2283	default:
   2284		res = -ENOTTY;
   2285		break;
   2286	}
   2287	return res;
   2288}
   2289
   2290const struct file_operations fuse_dev_operations = {
   2291	.owner		= THIS_MODULE,
   2292	.open		= fuse_dev_open,
   2293	.llseek		= no_llseek,
   2294	.read_iter	= fuse_dev_read,
   2295	.splice_read	= fuse_dev_splice_read,
   2296	.write_iter	= fuse_dev_write,
   2297	.splice_write	= fuse_dev_splice_write,
   2298	.poll		= fuse_dev_poll,
   2299	.release	= fuse_dev_release,
   2300	.fasync		= fuse_dev_fasync,
   2301	.unlocked_ioctl = fuse_dev_ioctl,
   2302	.compat_ioctl   = compat_ptr_ioctl,
   2303};
   2304EXPORT_SYMBOL_GPL(fuse_dev_operations);
   2305
   2306static struct miscdevice fuse_miscdevice = {
   2307	.minor = FUSE_MINOR,
   2308	.name  = "fuse",
   2309	.fops = &fuse_dev_operations,
   2310};
   2311
   2312int __init fuse_dev_init(void)
   2313{
   2314	int err = -ENOMEM;
   2315	fuse_req_cachep = kmem_cache_create("fuse_request",
   2316					    sizeof(struct fuse_req),
   2317					    0, 0, NULL);
   2318	if (!fuse_req_cachep)
   2319		goto out;
   2320
   2321	err = misc_register(&fuse_miscdevice);
   2322	if (err)
   2323		goto out_cache_clean;
   2324
   2325	return 0;
   2326
   2327 out_cache_clean:
   2328	kmem_cache_destroy(fuse_req_cachep);
   2329 out:
   2330	return err;
   2331}
   2332
   2333void fuse_dev_cleanup(void)
   2334{
   2335	misc_deregister(&fuse_miscdevice);
   2336	kmem_cache_destroy(fuse_req_cachep);
   2337}