cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

virtio_fs.c (38245B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * virtio-fs: Virtio Filesystem
      4 * Copyright (C) 2018 Red Hat, Inc.
      5 */
      6
      7#include <linux/fs.h>
      8#include <linux/dax.h>
      9#include <linux/pci.h>
     10#include <linux/pfn_t.h>
     11#include <linux/memremap.h>
     12#include <linux/module.h>
     13#include <linux/virtio.h>
     14#include <linux/virtio_fs.h>
     15#include <linux/delay.h>
     16#include <linux/fs_context.h>
     17#include <linux/fs_parser.h>
     18#include <linux/highmem.h>
     19#include <linux/uio.h>
     20#include "fuse_i.h"
     21
     22/* Used to help calculate the FUSE connection's max_pages limit for a request's
     23 * size. Parts of the struct fuse_req are sliced into scattergather lists in
     24 * addition to the pages used, so this can help account for that overhead.
     25 */
     26#define FUSE_HEADER_OVERHEAD    4
     27
     28/* List of virtio-fs device instances and a lock for the list. Also provides
     29 * mutual exclusion in device removal and mounting path
     30 */
     31static DEFINE_MUTEX(virtio_fs_mutex);
     32static LIST_HEAD(virtio_fs_instances);
     33
     34enum {
     35	VQ_HIPRIO,
     36	VQ_REQUEST
     37};
     38
     39#define VQ_NAME_LEN	24
     40
     41/* Per-virtqueue state */
     42struct virtio_fs_vq {
     43	spinlock_t lock;
     44	struct virtqueue *vq;     /* protected by ->lock */
     45	struct work_struct done_work;
     46	struct list_head queued_reqs;
     47	struct list_head end_reqs;	/* End these requests */
     48	struct delayed_work dispatch_work;
     49	struct fuse_dev *fud;
     50	bool connected;
     51	long in_flight;
     52	struct completion in_flight_zero; /* No inflight requests */
     53	char name[VQ_NAME_LEN];
     54} ____cacheline_aligned_in_smp;
     55
     56/* A virtio-fs device instance */
     57struct virtio_fs {
     58	struct kref refcount;
     59	struct list_head list;    /* on virtio_fs_instances */
     60	char *tag;
     61	struct virtio_fs_vq *vqs;
     62	unsigned int nvqs;               /* number of virtqueues */
     63	unsigned int num_request_queues; /* number of request queues */
     64	struct dax_device *dax_dev;
     65
     66	/* DAX memory window where file contents are mapped */
     67	void *window_kaddr;
     68	phys_addr_t window_phys_addr;
     69	size_t window_len;
     70};
     71
     72struct virtio_fs_forget_req {
     73	struct fuse_in_header ih;
     74	struct fuse_forget_in arg;
     75};
     76
     77struct virtio_fs_forget {
     78	/* This request can be temporarily queued on virt queue */
     79	struct list_head list;
     80	struct virtio_fs_forget_req req;
     81};
     82
     83struct virtio_fs_req_work {
     84	struct fuse_req *req;
     85	struct virtio_fs_vq *fsvq;
     86	struct work_struct done_work;
     87};
     88
     89static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
     90				 struct fuse_req *req, bool in_flight);
     91
     92static const struct constant_table dax_param_enums[] = {
     93	{"always",	FUSE_DAX_ALWAYS },
     94	{"never",	FUSE_DAX_NEVER },
     95	{"inode",	FUSE_DAX_INODE_USER },
     96	{}
     97};
     98
     99enum {
    100	OPT_DAX,
    101	OPT_DAX_ENUM,
    102};
    103
    104static const struct fs_parameter_spec virtio_fs_parameters[] = {
    105	fsparam_flag("dax", OPT_DAX),
    106	fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums),
    107	{}
    108};
    109
    110static int virtio_fs_parse_param(struct fs_context *fsc,
    111				 struct fs_parameter *param)
    112{
    113	struct fs_parse_result result;
    114	struct fuse_fs_context *ctx = fsc->fs_private;
    115	int opt;
    116
    117	opt = fs_parse(fsc, virtio_fs_parameters, param, &result);
    118	if (opt < 0)
    119		return opt;
    120
    121	switch (opt) {
    122	case OPT_DAX:
    123		ctx->dax_mode = FUSE_DAX_ALWAYS;
    124		break;
    125	case OPT_DAX_ENUM:
    126		ctx->dax_mode = result.uint_32;
    127		break;
    128	default:
    129		return -EINVAL;
    130	}
    131
    132	return 0;
    133}
    134
    135static void virtio_fs_free_fsc(struct fs_context *fsc)
    136{
    137	struct fuse_fs_context *ctx = fsc->fs_private;
    138
    139	kfree(ctx);
    140}
    141
    142static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
    143{
    144	struct virtio_fs *fs = vq->vdev->priv;
    145
    146	return &fs->vqs[vq->index];
    147}
    148
    149/* Should be called with fsvq->lock held. */
    150static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
    151{
    152	fsvq->in_flight++;
    153}
    154
    155/* Should be called with fsvq->lock held. */
    156static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
    157{
    158	WARN_ON(fsvq->in_flight <= 0);
    159	fsvq->in_flight--;
    160	if (!fsvq->in_flight)
    161		complete(&fsvq->in_flight_zero);
    162}
    163
    164static void release_virtio_fs_obj(struct kref *ref)
    165{
    166	struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
    167
    168	kfree(vfs->vqs);
    169	kfree(vfs);
    170}
    171
    172/* Make sure virtiofs_mutex is held */
    173static void virtio_fs_put(struct virtio_fs *fs)
    174{
    175	kref_put(&fs->refcount, release_virtio_fs_obj);
    176}
    177
    178static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
    179{
    180	struct virtio_fs *vfs = fiq->priv;
    181
    182	mutex_lock(&virtio_fs_mutex);
    183	virtio_fs_put(vfs);
    184	mutex_unlock(&virtio_fs_mutex);
    185}
    186
    187static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
    188{
    189	WARN_ON(fsvq->in_flight < 0);
    190
    191	/* Wait for in flight requests to finish.*/
    192	spin_lock(&fsvq->lock);
    193	if (fsvq->in_flight) {
    194		/* We are holding virtio_fs_mutex. There should not be any
    195		 * waiters waiting for completion.
    196		 */
    197		reinit_completion(&fsvq->in_flight_zero);
    198		spin_unlock(&fsvq->lock);
    199		wait_for_completion(&fsvq->in_flight_zero);
    200	} else {
    201		spin_unlock(&fsvq->lock);
    202	}
    203
    204	flush_work(&fsvq->done_work);
    205	flush_delayed_work(&fsvq->dispatch_work);
    206}
    207
    208static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs)
    209{
    210	struct virtio_fs_vq *fsvq;
    211	int i;
    212
    213	for (i = 0; i < fs->nvqs; i++) {
    214		fsvq = &fs->vqs[i];
    215		virtio_fs_drain_queue(fsvq);
    216	}
    217}
    218
    219static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
    220{
    221	/* Provides mutual exclusion between ->remove and ->kill_sb
    222	 * paths. We don't want both of these draining queue at the
    223	 * same time. Current completion logic reinits completion
    224	 * and that means there should not be any other thread
    225	 * doing reinit or waiting for completion already.
    226	 */
    227	mutex_lock(&virtio_fs_mutex);
    228	virtio_fs_drain_all_queues_locked(fs);
    229	mutex_unlock(&virtio_fs_mutex);
    230}
    231
    232static void virtio_fs_start_all_queues(struct virtio_fs *fs)
    233{
    234	struct virtio_fs_vq *fsvq;
    235	int i;
    236
    237	for (i = 0; i < fs->nvqs; i++) {
    238		fsvq = &fs->vqs[i];
    239		spin_lock(&fsvq->lock);
    240		fsvq->connected = true;
    241		spin_unlock(&fsvq->lock);
    242	}
    243}
    244
    245/* Add a new instance to the list or return -EEXIST if tag name exists*/
    246static int virtio_fs_add_instance(struct virtio_fs *fs)
    247{
    248	struct virtio_fs *fs2;
    249	bool duplicate = false;
    250
    251	mutex_lock(&virtio_fs_mutex);
    252
    253	list_for_each_entry(fs2, &virtio_fs_instances, list) {
    254		if (strcmp(fs->tag, fs2->tag) == 0)
    255			duplicate = true;
    256	}
    257
    258	if (!duplicate)
    259		list_add_tail(&fs->list, &virtio_fs_instances);
    260
    261	mutex_unlock(&virtio_fs_mutex);
    262
    263	if (duplicate)
    264		return -EEXIST;
    265	return 0;
    266}
    267
    268/* Return the virtio_fs with a given tag, or NULL */
    269static struct virtio_fs *virtio_fs_find_instance(const char *tag)
    270{
    271	struct virtio_fs *fs;
    272
    273	mutex_lock(&virtio_fs_mutex);
    274
    275	list_for_each_entry(fs, &virtio_fs_instances, list) {
    276		if (strcmp(fs->tag, tag) == 0) {
    277			kref_get(&fs->refcount);
    278			goto found;
    279		}
    280	}
    281
    282	fs = NULL; /* not found */
    283
    284found:
    285	mutex_unlock(&virtio_fs_mutex);
    286
    287	return fs;
    288}
    289
    290static void virtio_fs_free_devs(struct virtio_fs *fs)
    291{
    292	unsigned int i;
    293
    294	for (i = 0; i < fs->nvqs; i++) {
    295		struct virtio_fs_vq *fsvq = &fs->vqs[i];
    296
    297		if (!fsvq->fud)
    298			continue;
    299
    300		fuse_dev_free(fsvq->fud);
    301		fsvq->fud = NULL;
    302	}
    303}
    304
    305/* Read filesystem name from virtio config into fs->tag (must kfree()). */
    306static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
    307{
    308	char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
    309	char *end;
    310	size_t len;
    311
    312	virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
    313			   &tag_buf, sizeof(tag_buf));
    314	end = memchr(tag_buf, '\0', sizeof(tag_buf));
    315	if (end == tag_buf)
    316		return -EINVAL; /* empty tag */
    317	if (!end)
    318		end = &tag_buf[sizeof(tag_buf)];
    319
    320	len = end - tag_buf;
    321	fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL);
    322	if (!fs->tag)
    323		return -ENOMEM;
    324	memcpy(fs->tag, tag_buf, len);
    325	fs->tag[len] = '\0';
    326	return 0;
    327}
    328
    329/* Work function for hiprio completion */
    330static void virtio_fs_hiprio_done_work(struct work_struct *work)
    331{
    332	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
    333						 done_work);
    334	struct virtqueue *vq = fsvq->vq;
    335
    336	/* Free completed FUSE_FORGET requests */
    337	spin_lock(&fsvq->lock);
    338	do {
    339		unsigned int len;
    340		void *req;
    341
    342		virtqueue_disable_cb(vq);
    343
    344		while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
    345			kfree(req);
    346			dec_in_flight_req(fsvq);
    347		}
    348	} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
    349	spin_unlock(&fsvq->lock);
    350}
    351
    352static void virtio_fs_request_dispatch_work(struct work_struct *work)
    353{
    354	struct fuse_req *req;
    355	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
    356						 dispatch_work.work);
    357	int ret;
    358
    359	pr_debug("virtio-fs: worker %s called.\n", __func__);
    360	while (1) {
    361		spin_lock(&fsvq->lock);
    362		req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req,
    363					       list);
    364		if (!req) {
    365			spin_unlock(&fsvq->lock);
    366			break;
    367		}
    368
    369		list_del_init(&req->list);
    370		spin_unlock(&fsvq->lock);
    371		fuse_request_end(req);
    372	}
    373
    374	/* Dispatch pending requests */
    375	while (1) {
    376		spin_lock(&fsvq->lock);
    377		req = list_first_entry_or_null(&fsvq->queued_reqs,
    378					       struct fuse_req, list);
    379		if (!req) {
    380			spin_unlock(&fsvq->lock);
    381			return;
    382		}
    383		list_del_init(&req->list);
    384		spin_unlock(&fsvq->lock);
    385
    386		ret = virtio_fs_enqueue_req(fsvq, req, true);
    387		if (ret < 0) {
    388			if (ret == -ENOMEM || ret == -ENOSPC) {
    389				spin_lock(&fsvq->lock);
    390				list_add_tail(&req->list, &fsvq->queued_reqs);
    391				schedule_delayed_work(&fsvq->dispatch_work,
    392						      msecs_to_jiffies(1));
    393				spin_unlock(&fsvq->lock);
    394				return;
    395			}
    396			req->out.h.error = ret;
    397			spin_lock(&fsvq->lock);
    398			dec_in_flight_req(fsvq);
    399			spin_unlock(&fsvq->lock);
    400			pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
    401			       ret);
    402			fuse_request_end(req);
    403		}
    404	}
    405}
    406
    407/*
    408 * Returns 1 if queue is full and sender should wait a bit before sending
    409 * next request, 0 otherwise.
    410 */
    411static int send_forget_request(struct virtio_fs_vq *fsvq,
    412			       struct virtio_fs_forget *forget,
    413			       bool in_flight)
    414{
    415	struct scatterlist sg;
    416	struct virtqueue *vq;
    417	int ret = 0;
    418	bool notify;
    419	struct virtio_fs_forget_req *req = &forget->req;
    420
    421	spin_lock(&fsvq->lock);
    422	if (!fsvq->connected) {
    423		if (in_flight)
    424			dec_in_flight_req(fsvq);
    425		kfree(forget);
    426		goto out;
    427	}
    428
    429	sg_init_one(&sg, req, sizeof(*req));
    430	vq = fsvq->vq;
    431	dev_dbg(&vq->vdev->dev, "%s\n", __func__);
    432
    433	ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC);
    434	if (ret < 0) {
    435		if (ret == -ENOMEM || ret == -ENOSPC) {
    436			pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
    437				 ret);
    438			list_add_tail(&forget->list, &fsvq->queued_reqs);
    439			schedule_delayed_work(&fsvq->dispatch_work,
    440					      msecs_to_jiffies(1));
    441			if (!in_flight)
    442				inc_in_flight_req(fsvq);
    443			/* Queue is full */
    444			ret = 1;
    445		} else {
    446			pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
    447				 ret);
    448			kfree(forget);
    449			if (in_flight)
    450				dec_in_flight_req(fsvq);
    451		}
    452		goto out;
    453	}
    454
    455	if (!in_flight)
    456		inc_in_flight_req(fsvq);
    457	notify = virtqueue_kick_prepare(vq);
    458	spin_unlock(&fsvq->lock);
    459
    460	if (notify)
    461		virtqueue_notify(vq);
    462	return ret;
    463out:
    464	spin_unlock(&fsvq->lock);
    465	return ret;
    466}
    467
    468static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
    469{
    470	struct virtio_fs_forget *forget;
    471	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
    472						 dispatch_work.work);
    473	pr_debug("virtio-fs: worker %s called.\n", __func__);
    474	while (1) {
    475		spin_lock(&fsvq->lock);
    476		forget = list_first_entry_or_null(&fsvq->queued_reqs,
    477					struct virtio_fs_forget, list);
    478		if (!forget) {
    479			spin_unlock(&fsvq->lock);
    480			return;
    481		}
    482
    483		list_del(&forget->list);
    484		spin_unlock(&fsvq->lock);
    485		if (send_forget_request(fsvq, forget, true))
    486			return;
    487	}
    488}
    489
    490/* Allocate and copy args into req->argbuf */
    491static int copy_args_to_argbuf(struct fuse_req *req)
    492{
    493	struct fuse_args *args = req->args;
    494	unsigned int offset = 0;
    495	unsigned int num_in;
    496	unsigned int num_out;
    497	unsigned int len;
    498	unsigned int i;
    499
    500	num_in = args->in_numargs - args->in_pages;
    501	num_out = args->out_numargs - args->out_pages;
    502	len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
    503	      fuse_len_args(num_out, args->out_args);
    504
    505	req->argbuf = kmalloc(len, GFP_ATOMIC);
    506	if (!req->argbuf)
    507		return -ENOMEM;
    508
    509	for (i = 0; i < num_in; i++) {
    510		memcpy(req->argbuf + offset,
    511		       args->in_args[i].value,
    512		       args->in_args[i].size);
    513		offset += args->in_args[i].size;
    514	}
    515
    516	return 0;
    517}
    518
    519/* Copy args out of and free req->argbuf */
    520static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
    521{
    522	unsigned int remaining;
    523	unsigned int offset;
    524	unsigned int num_in;
    525	unsigned int num_out;
    526	unsigned int i;
    527
    528	remaining = req->out.h.len - sizeof(req->out.h);
    529	num_in = args->in_numargs - args->in_pages;
    530	num_out = args->out_numargs - args->out_pages;
    531	offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
    532
    533	for (i = 0; i < num_out; i++) {
    534		unsigned int argsize = args->out_args[i].size;
    535
    536		if (args->out_argvar &&
    537		    i == args->out_numargs - 1 &&
    538		    argsize > remaining) {
    539			argsize = remaining;
    540		}
    541
    542		memcpy(args->out_args[i].value, req->argbuf + offset, argsize);
    543		offset += argsize;
    544
    545		if (i != args->out_numargs - 1)
    546			remaining -= argsize;
    547	}
    548
    549	/* Store the actual size of the variable-length arg */
    550	if (args->out_argvar)
    551		args->out_args[args->out_numargs - 1].size = remaining;
    552
    553	kfree(req->argbuf);
    554	req->argbuf = NULL;
    555}
    556
    557/* Work function for request completion */
    558static void virtio_fs_request_complete(struct fuse_req *req,
    559				       struct virtio_fs_vq *fsvq)
    560{
    561	struct fuse_pqueue *fpq = &fsvq->fud->pq;
    562	struct fuse_args *args;
    563	struct fuse_args_pages *ap;
    564	unsigned int len, i, thislen;
    565	struct page *page;
    566
    567	/*
    568	 * TODO verify that server properly follows FUSE protocol
    569	 * (oh.uniq, oh.len)
    570	 */
    571	args = req->args;
    572	copy_args_from_argbuf(args, req);
    573
    574	if (args->out_pages && args->page_zeroing) {
    575		len = args->out_args[args->out_numargs - 1].size;
    576		ap = container_of(args, typeof(*ap), args);
    577		for (i = 0; i < ap->num_pages; i++) {
    578			thislen = ap->descs[i].length;
    579			if (len < thislen) {
    580				WARN_ON(ap->descs[i].offset);
    581				page = ap->pages[i];
    582				zero_user_segment(page, len, thislen);
    583				len = 0;
    584			} else {
    585				len -= thislen;
    586			}
    587		}
    588	}
    589
    590	spin_lock(&fpq->lock);
    591	clear_bit(FR_SENT, &req->flags);
    592	spin_unlock(&fpq->lock);
    593
    594	fuse_request_end(req);
    595	spin_lock(&fsvq->lock);
    596	dec_in_flight_req(fsvq);
    597	spin_unlock(&fsvq->lock);
    598}
    599
    600static void virtio_fs_complete_req_work(struct work_struct *work)
    601{
    602	struct virtio_fs_req_work *w =
    603		container_of(work, typeof(*w), done_work);
    604
    605	virtio_fs_request_complete(w->req, w->fsvq);
    606	kfree(w);
    607}
    608
    609static void virtio_fs_requests_done_work(struct work_struct *work)
    610{
    611	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
    612						 done_work);
    613	struct fuse_pqueue *fpq = &fsvq->fud->pq;
    614	struct virtqueue *vq = fsvq->vq;
    615	struct fuse_req *req;
    616	struct fuse_req *next;
    617	unsigned int len;
    618	LIST_HEAD(reqs);
    619
    620	/* Collect completed requests off the virtqueue */
    621	spin_lock(&fsvq->lock);
    622	do {
    623		virtqueue_disable_cb(vq);
    624
    625		while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
    626			spin_lock(&fpq->lock);
    627			list_move_tail(&req->list, &reqs);
    628			spin_unlock(&fpq->lock);
    629		}
    630	} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
    631	spin_unlock(&fsvq->lock);
    632
    633	/* End requests */
    634	list_for_each_entry_safe(req, next, &reqs, list) {
    635		list_del_init(&req->list);
    636
    637		/* blocking async request completes in a worker context */
    638		if (req->args->may_block) {
    639			struct virtio_fs_req_work *w;
    640
    641			w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL);
    642			INIT_WORK(&w->done_work, virtio_fs_complete_req_work);
    643			w->fsvq = fsvq;
    644			w->req = req;
    645			schedule_work(&w->done_work);
    646		} else {
    647			virtio_fs_request_complete(req, fsvq);
    648		}
    649	}
    650}
    651
    652/* Virtqueue interrupt handler */
    653static void virtio_fs_vq_done(struct virtqueue *vq)
    654{
    655	struct virtio_fs_vq *fsvq = vq_to_fsvq(vq);
    656
    657	dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name);
    658
    659	schedule_work(&fsvq->done_work);
    660}
    661
    662static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name,
    663			      int vq_type)
    664{
    665	strscpy(fsvq->name, name, VQ_NAME_LEN);
    666	spin_lock_init(&fsvq->lock);
    667	INIT_LIST_HEAD(&fsvq->queued_reqs);
    668	INIT_LIST_HEAD(&fsvq->end_reqs);
    669	init_completion(&fsvq->in_flight_zero);
    670
    671	if (vq_type == VQ_REQUEST) {
    672		INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work);
    673		INIT_DELAYED_WORK(&fsvq->dispatch_work,
    674				  virtio_fs_request_dispatch_work);
    675	} else {
    676		INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work);
    677		INIT_DELAYED_WORK(&fsvq->dispatch_work,
    678				  virtio_fs_hiprio_dispatch_work);
    679	}
    680}
    681
    682/* Initialize virtqueues */
    683static int virtio_fs_setup_vqs(struct virtio_device *vdev,
    684			       struct virtio_fs *fs)
    685{
    686	struct virtqueue **vqs;
    687	vq_callback_t **callbacks;
    688	const char **names;
    689	unsigned int i;
    690	int ret = 0;
    691
    692	virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues,
    693			&fs->num_request_queues);
    694	if (fs->num_request_queues == 0)
    695		return -EINVAL;
    696
    697	fs->nvqs = VQ_REQUEST + fs->num_request_queues;
    698	fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
    699	if (!fs->vqs)
    700		return -ENOMEM;
    701
    702	vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL);
    703	callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]),
    704					GFP_KERNEL);
    705	names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL);
    706	if (!vqs || !callbacks || !names) {
    707		ret = -ENOMEM;
    708		goto out;
    709	}
    710
    711	/* Initialize the hiprio/forget request virtqueue */
    712	callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
    713	virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO);
    714	names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
    715
    716	/* Initialize the requests virtqueues */
    717	for (i = VQ_REQUEST; i < fs->nvqs; i++) {
    718		char vq_name[VQ_NAME_LEN];
    719
    720		snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST);
    721		virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST);
    722		callbacks[i] = virtio_fs_vq_done;
    723		names[i] = fs->vqs[i].name;
    724	}
    725
    726	ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL);
    727	if (ret < 0)
    728		goto out;
    729
    730	for (i = 0; i < fs->nvqs; i++)
    731		fs->vqs[i].vq = vqs[i];
    732
    733	virtio_fs_start_all_queues(fs);
    734out:
    735	kfree(names);
    736	kfree(callbacks);
    737	kfree(vqs);
    738	if (ret)
    739		kfree(fs->vqs);
    740	return ret;
    741}
    742
    743/* Free virtqueues (device must already be reset) */
    744static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
    745				  struct virtio_fs *fs)
    746{
    747	vdev->config->del_vqs(vdev);
    748}
    749
    750/* Map a window offset to a page frame number.  The window offset will have
    751 * been produced by .iomap_begin(), which maps a file offset to a window
    752 * offset.
    753 */
    754static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
    755				    long nr_pages, enum dax_access_mode mode,
    756				    void **kaddr, pfn_t *pfn)
    757{
    758	struct virtio_fs *fs = dax_get_private(dax_dev);
    759	phys_addr_t offset = PFN_PHYS(pgoff);
    760	size_t max_nr_pages = fs->window_len/PAGE_SIZE - pgoff;
    761
    762	if (kaddr)
    763		*kaddr = fs->window_kaddr + offset;
    764	if (pfn)
    765		*pfn = phys_to_pfn_t(fs->window_phys_addr + offset,
    766					PFN_DEV | PFN_MAP);
    767	return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
    768}
    769
    770static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
    771				     pgoff_t pgoff, size_t nr_pages)
    772{
    773	long rc;
    774	void *kaddr;
    775
    776	rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr,
    777			       NULL);
    778	if (rc < 0)
    779		return rc;
    780	memset(kaddr, 0, nr_pages << PAGE_SHIFT);
    781	dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
    782	return 0;
    783}
    784
    785static const struct dax_operations virtio_fs_dax_ops = {
    786	.direct_access = virtio_fs_direct_access,
    787	.zero_page_range = virtio_fs_zero_page_range,
    788};
    789
    790static void virtio_fs_cleanup_dax(void *data)
    791{
    792	struct dax_device *dax_dev = data;
    793
    794	kill_dax(dax_dev);
    795	put_dax(dax_dev);
    796}
    797
    798static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs)
    799{
    800	struct virtio_shm_region cache_reg;
    801	struct dev_pagemap *pgmap;
    802	bool have_cache;
    803
    804	if (!IS_ENABLED(CONFIG_FUSE_DAX))
    805		return 0;
    806
    807	/* Get cache region */
    808	have_cache = virtio_get_shm_region(vdev, &cache_reg,
    809					   (u8)VIRTIO_FS_SHMCAP_ID_CACHE);
    810	if (!have_cache) {
    811		dev_notice(&vdev->dev, "%s: No cache capability\n", __func__);
    812		return 0;
    813	}
    814
    815	if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len,
    816				     dev_name(&vdev->dev))) {
    817		dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n",
    818			 cache_reg.addr, cache_reg.len);
    819		return -EBUSY;
    820	}
    821
    822	dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len,
    823		   cache_reg.addr);
    824
    825	pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL);
    826	if (!pgmap)
    827		return -ENOMEM;
    828
    829	pgmap->type = MEMORY_DEVICE_FS_DAX;
    830
    831	/* Ideally we would directly use the PCI BAR resource but
    832	 * devm_memremap_pages() wants its own copy in pgmap.  So
    833	 * initialize a struct resource from scratch (only the start
    834	 * and end fields will be used).
    835	 */
    836	pgmap->range = (struct range) {
    837		.start = (phys_addr_t) cache_reg.addr,
    838		.end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1,
    839	};
    840	pgmap->nr_range = 1;
    841
    842	fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap);
    843	if (IS_ERR(fs->window_kaddr))
    844		return PTR_ERR(fs->window_kaddr);
    845
    846	fs->window_phys_addr = (phys_addr_t) cache_reg.addr;
    847	fs->window_len = (phys_addr_t) cache_reg.len;
    848
    849	dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n",
    850		__func__, fs->window_kaddr, cache_reg.addr, cache_reg.len);
    851
    852	fs->dax_dev = alloc_dax(fs, &virtio_fs_dax_ops);
    853	if (IS_ERR(fs->dax_dev))
    854		return PTR_ERR(fs->dax_dev);
    855
    856	return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax,
    857					fs->dax_dev);
    858}
    859
    860static int virtio_fs_probe(struct virtio_device *vdev)
    861{
    862	struct virtio_fs *fs;
    863	int ret;
    864
    865	fs = kzalloc(sizeof(*fs), GFP_KERNEL);
    866	if (!fs)
    867		return -ENOMEM;
    868	kref_init(&fs->refcount);
    869	vdev->priv = fs;
    870
    871	ret = virtio_fs_read_tag(vdev, fs);
    872	if (ret < 0)
    873		goto out;
    874
    875	ret = virtio_fs_setup_vqs(vdev, fs);
    876	if (ret < 0)
    877		goto out;
    878
    879	/* TODO vq affinity */
    880
    881	ret = virtio_fs_setup_dax(vdev, fs);
    882	if (ret < 0)
    883		goto out_vqs;
    884
    885	/* Bring the device online in case the filesystem is mounted and
    886	 * requests need to be sent before we return.
    887	 */
    888	virtio_device_ready(vdev);
    889
    890	ret = virtio_fs_add_instance(fs);
    891	if (ret < 0)
    892		goto out_vqs;
    893
    894	return 0;
    895
    896out_vqs:
    897	virtio_reset_device(vdev);
    898	virtio_fs_cleanup_vqs(vdev, fs);
    899	kfree(fs->vqs);
    900
    901out:
    902	vdev->priv = NULL;
    903	kfree(fs);
    904	return ret;
    905}
    906
    907static void virtio_fs_stop_all_queues(struct virtio_fs *fs)
    908{
    909	struct virtio_fs_vq *fsvq;
    910	int i;
    911
    912	for (i = 0; i < fs->nvqs; i++) {
    913		fsvq = &fs->vqs[i];
    914		spin_lock(&fsvq->lock);
    915		fsvq->connected = false;
    916		spin_unlock(&fsvq->lock);
    917	}
    918}
    919
    920static void virtio_fs_remove(struct virtio_device *vdev)
    921{
    922	struct virtio_fs *fs = vdev->priv;
    923
    924	mutex_lock(&virtio_fs_mutex);
    925	/* This device is going away. No one should get new reference */
    926	list_del_init(&fs->list);
    927	virtio_fs_stop_all_queues(fs);
    928	virtio_fs_drain_all_queues_locked(fs);
    929	virtio_reset_device(vdev);
    930	virtio_fs_cleanup_vqs(vdev, fs);
    931
    932	vdev->priv = NULL;
    933	/* Put device reference on virtio_fs object */
    934	virtio_fs_put(fs);
    935	mutex_unlock(&virtio_fs_mutex);
    936}
    937
    938#ifdef CONFIG_PM_SLEEP
    939static int virtio_fs_freeze(struct virtio_device *vdev)
    940{
    941	/* TODO need to save state here */
    942	pr_warn("virtio-fs: suspend/resume not yet supported\n");
    943	return -EOPNOTSUPP;
    944}
    945
    946static int virtio_fs_restore(struct virtio_device *vdev)
    947{
    948	 /* TODO need to restore state here */
    949	return 0;
    950}
    951#endif /* CONFIG_PM_SLEEP */
    952
    953static const struct virtio_device_id id_table[] = {
    954	{ VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
    955	{},
    956};
    957
    958static const unsigned int feature_table[] = {};
    959
    960static struct virtio_driver virtio_fs_driver = {
    961	.driver.name		= KBUILD_MODNAME,
    962	.driver.owner		= THIS_MODULE,
    963	.id_table		= id_table,
    964	.feature_table		= feature_table,
    965	.feature_table_size	= ARRAY_SIZE(feature_table),
    966	.probe			= virtio_fs_probe,
    967	.remove			= virtio_fs_remove,
    968#ifdef CONFIG_PM_SLEEP
    969	.freeze			= virtio_fs_freeze,
    970	.restore		= virtio_fs_restore,
    971#endif
    972};
    973
    974static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
    975__releases(fiq->lock)
    976{
    977	struct fuse_forget_link *link;
    978	struct virtio_fs_forget *forget;
    979	struct virtio_fs_forget_req *req;
    980	struct virtio_fs *fs;
    981	struct virtio_fs_vq *fsvq;
    982	u64 unique;
    983
    984	link = fuse_dequeue_forget(fiq, 1, NULL);
    985	unique = fuse_get_unique(fiq);
    986
    987	fs = fiq->priv;
    988	fsvq = &fs->vqs[VQ_HIPRIO];
    989	spin_unlock(&fiq->lock);
    990
    991	/* Allocate a buffer for the request */
    992	forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
    993	req = &forget->req;
    994
    995	req->ih = (struct fuse_in_header){
    996		.opcode = FUSE_FORGET,
    997		.nodeid = link->forget_one.nodeid,
    998		.unique = unique,
    999		.len = sizeof(*req),
   1000	};
   1001	req->arg = (struct fuse_forget_in){
   1002		.nlookup = link->forget_one.nlookup,
   1003	};
   1004
   1005	send_forget_request(fsvq, forget, false);
   1006	kfree(link);
   1007}
   1008
   1009static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
   1010__releases(fiq->lock)
   1011{
   1012	/*
   1013	 * TODO interrupts.
   1014	 *
   1015	 * Normal fs operations on a local filesystems aren't interruptible.
   1016	 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
   1017	 * with shared lock between host and guest.
   1018	 */
   1019	spin_unlock(&fiq->lock);
   1020}
   1021
   1022/* Count number of scatter-gather elements required */
   1023static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs,
   1024				       unsigned int num_pages,
   1025				       unsigned int total_len)
   1026{
   1027	unsigned int i;
   1028	unsigned int this_len;
   1029
   1030	for (i = 0; i < num_pages && total_len; i++) {
   1031		this_len =  min(page_descs[i].length, total_len);
   1032		total_len -= this_len;
   1033	}
   1034
   1035	return i;
   1036}
   1037
   1038/* Return the number of scatter-gather list elements required */
   1039static unsigned int sg_count_fuse_req(struct fuse_req *req)
   1040{
   1041	struct fuse_args *args = req->args;
   1042	struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
   1043	unsigned int size, total_sgs = 1 /* fuse_in_header */;
   1044
   1045	if (args->in_numargs - args->in_pages)
   1046		total_sgs += 1;
   1047
   1048	if (args->in_pages) {
   1049		size = args->in_args[args->in_numargs - 1].size;
   1050		total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
   1051						 size);
   1052	}
   1053
   1054	if (!test_bit(FR_ISREPLY, &req->flags))
   1055		return total_sgs;
   1056
   1057	total_sgs += 1 /* fuse_out_header */;
   1058
   1059	if (args->out_numargs - args->out_pages)
   1060		total_sgs += 1;
   1061
   1062	if (args->out_pages) {
   1063		size = args->out_args[args->out_numargs - 1].size;
   1064		total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
   1065						 size);
   1066	}
   1067
   1068	return total_sgs;
   1069}
   1070
   1071/* Add pages to scatter-gather list and return number of elements used */
   1072static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
   1073				       struct page **pages,
   1074				       struct fuse_page_desc *page_descs,
   1075				       unsigned int num_pages,
   1076				       unsigned int total_len)
   1077{
   1078	unsigned int i;
   1079	unsigned int this_len;
   1080
   1081	for (i = 0; i < num_pages && total_len; i++) {
   1082		sg_init_table(&sg[i], 1);
   1083		this_len =  min(page_descs[i].length, total_len);
   1084		sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
   1085		total_len -= this_len;
   1086	}
   1087
   1088	return i;
   1089}
   1090
   1091/* Add args to scatter-gather list and return number of elements used */
   1092static unsigned int sg_init_fuse_args(struct scatterlist *sg,
   1093				      struct fuse_req *req,
   1094				      struct fuse_arg *args,
   1095				      unsigned int numargs,
   1096				      bool argpages,
   1097				      void *argbuf,
   1098				      unsigned int *len_used)
   1099{
   1100	struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
   1101	unsigned int total_sgs = 0;
   1102	unsigned int len;
   1103
   1104	len = fuse_len_args(numargs - argpages, args);
   1105	if (len)
   1106		sg_init_one(&sg[total_sgs++], argbuf, len);
   1107
   1108	if (argpages)
   1109		total_sgs += sg_init_fuse_pages(&sg[total_sgs],
   1110						ap->pages, ap->descs,
   1111						ap->num_pages,
   1112						args[numargs - 1].size);
   1113
   1114	if (len_used)
   1115		*len_used = len;
   1116
   1117	return total_sgs;
   1118}
   1119
   1120/* Add a request to a virtqueue and kick the device */
   1121static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
   1122				 struct fuse_req *req, bool in_flight)
   1123{
   1124	/* requests need at least 4 elements */
   1125	struct scatterlist *stack_sgs[6];
   1126	struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
   1127	struct scatterlist **sgs = stack_sgs;
   1128	struct scatterlist *sg = stack_sg;
   1129	struct virtqueue *vq;
   1130	struct fuse_args *args = req->args;
   1131	unsigned int argbuf_used = 0;
   1132	unsigned int out_sgs = 0;
   1133	unsigned int in_sgs = 0;
   1134	unsigned int total_sgs;
   1135	unsigned int i;
   1136	int ret;
   1137	bool notify;
   1138	struct fuse_pqueue *fpq;
   1139
   1140	/* Does the sglist fit on the stack? */
   1141	total_sgs = sg_count_fuse_req(req);
   1142	if (total_sgs > ARRAY_SIZE(stack_sgs)) {
   1143		sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
   1144		sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
   1145		if (!sgs || !sg) {
   1146			ret = -ENOMEM;
   1147			goto out;
   1148		}
   1149	}
   1150
   1151	/* Use a bounce buffer since stack args cannot be mapped */
   1152	ret = copy_args_to_argbuf(req);
   1153	if (ret < 0)
   1154		goto out;
   1155
   1156	/* Request elements */
   1157	sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h));
   1158	out_sgs += sg_init_fuse_args(&sg[out_sgs], req,
   1159				     (struct fuse_arg *)args->in_args,
   1160				     args->in_numargs, args->in_pages,
   1161				     req->argbuf, &argbuf_used);
   1162
   1163	/* Reply elements */
   1164	if (test_bit(FR_ISREPLY, &req->flags)) {
   1165		sg_init_one(&sg[out_sgs + in_sgs++],
   1166			    &req->out.h, sizeof(req->out.h));
   1167		in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req,
   1168					    args->out_args, args->out_numargs,
   1169					    args->out_pages,
   1170					    req->argbuf + argbuf_used, NULL);
   1171	}
   1172
   1173	WARN_ON(out_sgs + in_sgs != total_sgs);
   1174
   1175	for (i = 0; i < total_sgs; i++)
   1176		sgs[i] = &sg[i];
   1177
   1178	spin_lock(&fsvq->lock);
   1179
   1180	if (!fsvq->connected) {
   1181		spin_unlock(&fsvq->lock);
   1182		ret = -ENOTCONN;
   1183		goto out;
   1184	}
   1185
   1186	vq = fsvq->vq;
   1187	ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
   1188	if (ret < 0) {
   1189		spin_unlock(&fsvq->lock);
   1190		goto out;
   1191	}
   1192
   1193	/* Request successfully sent. */
   1194	fpq = &fsvq->fud->pq;
   1195	spin_lock(&fpq->lock);
   1196	list_add_tail(&req->list, fpq->processing);
   1197	spin_unlock(&fpq->lock);
   1198	set_bit(FR_SENT, &req->flags);
   1199	/* matches barrier in request_wait_answer() */
   1200	smp_mb__after_atomic();
   1201
   1202	if (!in_flight)
   1203		inc_in_flight_req(fsvq);
   1204	notify = virtqueue_kick_prepare(vq);
   1205
   1206	spin_unlock(&fsvq->lock);
   1207
   1208	if (notify)
   1209		virtqueue_notify(vq);
   1210
   1211out:
   1212	if (ret < 0 && req->argbuf) {
   1213		kfree(req->argbuf);
   1214		req->argbuf = NULL;
   1215	}
   1216	if (sgs != stack_sgs) {
   1217		kfree(sgs);
   1218		kfree(sg);
   1219	}
   1220
   1221	return ret;
   1222}
   1223
   1224static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
   1225__releases(fiq->lock)
   1226{
   1227	unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
   1228	struct virtio_fs *fs;
   1229	struct fuse_req *req;
   1230	struct virtio_fs_vq *fsvq;
   1231	int ret;
   1232
   1233	WARN_ON(list_empty(&fiq->pending));
   1234	req = list_last_entry(&fiq->pending, struct fuse_req, list);
   1235	clear_bit(FR_PENDING, &req->flags);
   1236	list_del_init(&req->list);
   1237	WARN_ON(!list_empty(&fiq->pending));
   1238	spin_unlock(&fiq->lock);
   1239
   1240	fs = fiq->priv;
   1241
   1242	pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
   1243		  __func__, req->in.h.opcode, req->in.h.unique,
   1244		 req->in.h.nodeid, req->in.h.len,
   1245		 fuse_len_args(req->args->out_numargs, req->args->out_args));
   1246
   1247	fsvq = &fs->vqs[queue_id];
   1248	ret = virtio_fs_enqueue_req(fsvq, req, false);
   1249	if (ret < 0) {
   1250		if (ret == -ENOMEM || ret == -ENOSPC) {
   1251			/*
   1252			 * Virtqueue full. Retry submission from worker
   1253			 * context as we might be holding fc->bg_lock.
   1254			 */
   1255			spin_lock(&fsvq->lock);
   1256			list_add_tail(&req->list, &fsvq->queued_reqs);
   1257			inc_in_flight_req(fsvq);
   1258			schedule_delayed_work(&fsvq->dispatch_work,
   1259						msecs_to_jiffies(1));
   1260			spin_unlock(&fsvq->lock);
   1261			return;
   1262		}
   1263		req->out.h.error = ret;
   1264		pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
   1265
   1266		/* Can't end request in submission context. Use a worker */
   1267		spin_lock(&fsvq->lock);
   1268		list_add_tail(&req->list, &fsvq->end_reqs);
   1269		schedule_delayed_work(&fsvq->dispatch_work, 0);
   1270		spin_unlock(&fsvq->lock);
   1271		return;
   1272	}
   1273}
   1274
   1275static const struct fuse_iqueue_ops virtio_fs_fiq_ops = {
   1276	.wake_forget_and_unlock		= virtio_fs_wake_forget_and_unlock,
   1277	.wake_interrupt_and_unlock	= virtio_fs_wake_interrupt_and_unlock,
   1278	.wake_pending_and_unlock	= virtio_fs_wake_pending_and_unlock,
   1279	.release			= virtio_fs_fiq_release,
   1280};
   1281
   1282static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx)
   1283{
   1284	ctx->rootmode = S_IFDIR;
   1285	ctx->default_permissions = 1;
   1286	ctx->allow_other = 1;
   1287	ctx->max_read = UINT_MAX;
   1288	ctx->blksize = 512;
   1289	ctx->destroy = true;
   1290	ctx->no_control = true;
   1291	ctx->no_force_umount = true;
   1292}
   1293
   1294static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
   1295{
   1296	struct fuse_mount *fm = get_fuse_mount_super(sb);
   1297	struct fuse_conn *fc = fm->fc;
   1298	struct virtio_fs *fs = fc->iq.priv;
   1299	struct fuse_fs_context *ctx = fsc->fs_private;
   1300	unsigned int i;
   1301	int err;
   1302
   1303	virtio_fs_ctx_set_defaults(ctx);
   1304	mutex_lock(&virtio_fs_mutex);
   1305
   1306	/* After holding mutex, make sure virtiofs device is still there.
   1307	 * Though we are holding a reference to it, drive ->remove might
   1308	 * still have cleaned up virtual queues. In that case bail out.
   1309	 */
   1310	err = -EINVAL;
   1311	if (list_empty(&fs->list)) {
   1312		pr_info("virtio-fs: tag <%s> not found\n", fs->tag);
   1313		goto err;
   1314	}
   1315
   1316	err = -ENOMEM;
   1317	/* Allocate fuse_dev for hiprio and notification queues */
   1318	for (i = 0; i < fs->nvqs; i++) {
   1319		struct virtio_fs_vq *fsvq = &fs->vqs[i];
   1320
   1321		fsvq->fud = fuse_dev_alloc();
   1322		if (!fsvq->fud)
   1323			goto err_free_fuse_devs;
   1324	}
   1325
   1326	/* virtiofs allocates and installs its own fuse devices */
   1327	ctx->fudptr = NULL;
   1328	if (ctx->dax_mode != FUSE_DAX_NEVER) {
   1329		if (ctx->dax_mode == FUSE_DAX_ALWAYS && !fs->dax_dev) {
   1330			err = -EINVAL;
   1331			pr_err("virtio-fs: dax can't be enabled as filesystem"
   1332			       " device does not support it.\n");
   1333			goto err_free_fuse_devs;
   1334		}
   1335		ctx->dax_dev = fs->dax_dev;
   1336	}
   1337	err = fuse_fill_super_common(sb, ctx);
   1338	if (err < 0)
   1339		goto err_free_fuse_devs;
   1340
   1341	for (i = 0; i < fs->nvqs; i++) {
   1342		struct virtio_fs_vq *fsvq = &fs->vqs[i];
   1343
   1344		fuse_dev_install(fsvq->fud, fc);
   1345	}
   1346
   1347	/* Previous unmount will stop all queues. Start these again */
   1348	virtio_fs_start_all_queues(fs);
   1349	fuse_send_init(fm);
   1350	mutex_unlock(&virtio_fs_mutex);
   1351	return 0;
   1352
   1353err_free_fuse_devs:
   1354	virtio_fs_free_devs(fs);
   1355err:
   1356	mutex_unlock(&virtio_fs_mutex);
   1357	return err;
   1358}
   1359
   1360static void virtio_fs_conn_destroy(struct fuse_mount *fm)
   1361{
   1362	struct fuse_conn *fc = fm->fc;
   1363	struct virtio_fs *vfs = fc->iq.priv;
   1364	struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO];
   1365
   1366	/* Stop dax worker. Soon evict_inodes() will be called which
   1367	 * will free all memory ranges belonging to all inodes.
   1368	 */
   1369	if (IS_ENABLED(CONFIG_FUSE_DAX))
   1370		fuse_dax_cancel_work(fc);
   1371
   1372	/* Stop forget queue. Soon destroy will be sent */
   1373	spin_lock(&fsvq->lock);
   1374	fsvq->connected = false;
   1375	spin_unlock(&fsvq->lock);
   1376	virtio_fs_drain_all_queues(vfs);
   1377
   1378	fuse_conn_destroy(fm);
   1379
   1380	/* fuse_conn_destroy() must have sent destroy. Stop all queues
   1381	 * and drain one more time and free fuse devices. Freeing fuse
   1382	 * devices will drop their reference on fuse_conn and that in
   1383	 * turn will drop its reference on virtio_fs object.
   1384	 */
   1385	virtio_fs_stop_all_queues(vfs);
   1386	virtio_fs_drain_all_queues(vfs);
   1387	virtio_fs_free_devs(vfs);
   1388}
   1389
   1390static void virtio_kill_sb(struct super_block *sb)
   1391{
   1392	struct fuse_mount *fm = get_fuse_mount_super(sb);
   1393	bool last;
   1394
   1395	/* If mount failed, we can still be called without any fc */
   1396	if (sb->s_root) {
   1397		last = fuse_mount_remove(fm);
   1398		if (last)
   1399			virtio_fs_conn_destroy(fm);
   1400	}
   1401	kill_anon_super(sb);
   1402	fuse_mount_destroy(fm);
   1403}
   1404
   1405static int virtio_fs_test_super(struct super_block *sb,
   1406				struct fs_context *fsc)
   1407{
   1408	struct fuse_mount *fsc_fm = fsc->s_fs_info;
   1409	struct fuse_mount *sb_fm = get_fuse_mount_super(sb);
   1410
   1411	return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv;
   1412}
   1413
   1414static int virtio_fs_get_tree(struct fs_context *fsc)
   1415{
   1416	struct virtio_fs *fs;
   1417	struct super_block *sb;
   1418	struct fuse_conn *fc = NULL;
   1419	struct fuse_mount *fm;
   1420	unsigned int virtqueue_size;
   1421	int err = -EIO;
   1422
   1423	/* This gets a reference on virtio_fs object. This ptr gets installed
   1424	 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
   1425	 * to drop the reference to this object.
   1426	 */
   1427	fs = virtio_fs_find_instance(fsc->source);
   1428	if (!fs) {
   1429		pr_info("virtio-fs: tag <%s> not found\n", fsc->source);
   1430		return -EINVAL;
   1431	}
   1432
   1433	virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq);
   1434	if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD))
   1435		goto out_err;
   1436
   1437	err = -ENOMEM;
   1438	fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
   1439	if (!fc)
   1440		goto out_err;
   1441
   1442	fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
   1443	if (!fm)
   1444		goto out_err;
   1445
   1446	fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs);
   1447	fc->release = fuse_free_conn;
   1448	fc->delete_stale = true;
   1449	fc->auto_submounts = true;
   1450	fc->sync_fs = true;
   1451
   1452	/* Tell FUSE to split requests that exceed the virtqueue's size */
   1453	fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit,
   1454				    virtqueue_size - FUSE_HEADER_OVERHEAD);
   1455
   1456	fsc->s_fs_info = fm;
   1457	sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc);
   1458	if (fsc->s_fs_info)
   1459		fuse_mount_destroy(fm);
   1460	if (IS_ERR(sb))
   1461		return PTR_ERR(sb);
   1462
   1463	if (!sb->s_root) {
   1464		err = virtio_fs_fill_super(sb, fsc);
   1465		if (err) {
   1466			deactivate_locked_super(sb);
   1467			return err;
   1468		}
   1469
   1470		sb->s_flags |= SB_ACTIVE;
   1471	}
   1472
   1473	WARN_ON(fsc->root);
   1474	fsc->root = dget(sb->s_root);
   1475	return 0;
   1476
   1477out_err:
   1478	kfree(fc);
   1479	mutex_lock(&virtio_fs_mutex);
   1480	virtio_fs_put(fs);
   1481	mutex_unlock(&virtio_fs_mutex);
   1482	return err;
   1483}
   1484
   1485static const struct fs_context_operations virtio_fs_context_ops = {
   1486	.free		= virtio_fs_free_fsc,
   1487	.parse_param	= virtio_fs_parse_param,
   1488	.get_tree	= virtio_fs_get_tree,
   1489};
   1490
   1491static int virtio_fs_init_fs_context(struct fs_context *fsc)
   1492{
   1493	struct fuse_fs_context *ctx;
   1494
   1495	if (fsc->purpose == FS_CONTEXT_FOR_SUBMOUNT)
   1496		return fuse_init_fs_context_submount(fsc);
   1497
   1498	ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
   1499	if (!ctx)
   1500		return -ENOMEM;
   1501	fsc->fs_private = ctx;
   1502	fsc->ops = &virtio_fs_context_ops;
   1503	return 0;
   1504}
   1505
   1506static struct file_system_type virtio_fs_type = {
   1507	.owner		= THIS_MODULE,
   1508	.name		= "virtiofs",
   1509	.init_fs_context = virtio_fs_init_fs_context,
   1510	.kill_sb	= virtio_kill_sb,
   1511};
   1512
   1513static int __init virtio_fs_init(void)
   1514{
   1515	int ret;
   1516
   1517	ret = register_virtio_driver(&virtio_fs_driver);
   1518	if (ret < 0)
   1519		return ret;
   1520
   1521	ret = register_filesystem(&virtio_fs_type);
   1522	if (ret < 0) {
   1523		unregister_virtio_driver(&virtio_fs_driver);
   1524		return ret;
   1525	}
   1526
   1527	return 0;
   1528}
   1529module_init(virtio_fs_init);
   1530
   1531static void __exit virtio_fs_exit(void)
   1532{
   1533	unregister_filesystem(&virtio_fs_type);
   1534	unregister_virtio_driver(&virtio_fs_driver);
   1535}
   1536module_exit(virtio_fs_exit);
   1537
   1538MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
   1539MODULE_DESCRIPTION("Virtio Filesystem");
   1540MODULE_LICENSE("GPL");
   1541MODULE_ALIAS_FS(KBUILD_MODNAME);
   1542MODULE_DEVICE_TABLE(virtio, id_table);