cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

xenbus.c (30074B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*  Xenbus code for blkif backend
      3    Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
      4    Copyright (C) 2005 XenSource Ltd
      5
      6
      7*/
      8
      9#define pr_fmt(fmt) "xen-blkback: " fmt
     10
     11#include <linux/module.h>
     12#include <linux/kthread.h>
     13#include <linux/pagemap.h>
     14#include <xen/events.h>
     15#include <xen/grant_table.h>
     16#include "common.h"
     17
     18/* On the XenBus the max length of 'ring-ref%u'. */
     19#define RINGREF_NAME_LEN (20)
     20
     21struct backend_info {
     22	struct xenbus_device	*dev;
     23	struct xen_blkif	*blkif;
     24	struct xenbus_watch	backend_watch;
     25	unsigned		major;
     26	unsigned		minor;
     27	char			*mode;
     28};
     29
     30static struct kmem_cache *xen_blkif_cachep;
     31static void connect(struct backend_info *);
     32static int connect_ring(struct backend_info *);
     33static void backend_changed(struct xenbus_watch *, const char *,
     34			    const char *);
     35static void xen_blkif_free(struct xen_blkif *blkif);
     36static void xen_vbd_free(struct xen_vbd *vbd);
     37
     38struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
     39{
     40	return be->dev;
     41}
     42
     43/*
     44 * The last request could free the device from softirq context and
     45 * xen_blkif_free() can sleep.
     46 */
     47static void xen_blkif_deferred_free(struct work_struct *work)
     48{
     49	struct xen_blkif *blkif;
     50
     51	blkif = container_of(work, struct xen_blkif, free_work);
     52	xen_blkif_free(blkif);
     53}
     54
     55static int blkback_name(struct xen_blkif *blkif, char *buf)
     56{
     57	char *devpath, *devname;
     58	struct xenbus_device *dev = blkif->be->dev;
     59
     60	devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
     61	if (IS_ERR(devpath))
     62		return PTR_ERR(devpath);
     63
     64	devname = strstr(devpath, "/dev/");
     65	if (devname != NULL)
     66		devname += strlen("/dev/");
     67	else
     68		devname  = devpath;
     69
     70	snprintf(buf, TASK_COMM_LEN, "%d.%s", blkif->domid, devname);
     71	kfree(devpath);
     72
     73	return 0;
     74}
     75
     76static void xen_update_blkif_status(struct xen_blkif *blkif)
     77{
     78	int err;
     79	char name[TASK_COMM_LEN];
     80	struct xen_blkif_ring *ring;
     81	int i;
     82
     83	/* Not ready to connect? */
     84	if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev)
     85		return;
     86
     87	/* Already connected? */
     88	if (blkif->be->dev->state == XenbusStateConnected)
     89		return;
     90
     91	/* Attempt to connect: exit if we fail to. */
     92	connect(blkif->be);
     93	if (blkif->be->dev->state != XenbusStateConnected)
     94		return;
     95
     96	err = blkback_name(blkif, name);
     97	if (err) {
     98		xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
     99		return;
    100	}
    101
    102	err = sync_blockdev(blkif->vbd.bdev);
    103	if (err) {
    104		xenbus_dev_error(blkif->be->dev, err, "block flush");
    105		return;
    106	}
    107	invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
    108
    109	for (i = 0; i < blkif->nr_rings; i++) {
    110		ring = &blkif->rings[i];
    111		ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d", name, i);
    112		if (IS_ERR(ring->xenblkd)) {
    113			err = PTR_ERR(ring->xenblkd);
    114			ring->xenblkd = NULL;
    115			xenbus_dev_fatal(blkif->be->dev, err,
    116					"start %s-%d xenblkd", name, i);
    117			goto out;
    118		}
    119	}
    120	return;
    121
    122out:
    123	while (--i >= 0) {
    124		ring = &blkif->rings[i];
    125		kthread_stop(ring->xenblkd);
    126	}
    127	return;
    128}
    129
    130static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
    131{
    132	unsigned int r;
    133
    134	blkif->rings = kcalloc(blkif->nr_rings, sizeof(struct xen_blkif_ring),
    135			       GFP_KERNEL);
    136	if (!blkif->rings)
    137		return -ENOMEM;
    138
    139	for (r = 0; r < blkif->nr_rings; r++) {
    140		struct xen_blkif_ring *ring = &blkif->rings[r];
    141
    142		spin_lock_init(&ring->blk_ring_lock);
    143		init_waitqueue_head(&ring->wq);
    144		INIT_LIST_HEAD(&ring->pending_free);
    145		INIT_LIST_HEAD(&ring->persistent_purge_list);
    146		INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants);
    147		gnttab_page_cache_init(&ring->free_pages);
    148
    149		spin_lock_init(&ring->pending_free_lock);
    150		init_waitqueue_head(&ring->pending_free_wq);
    151		init_waitqueue_head(&ring->shutdown_wq);
    152		ring->blkif = blkif;
    153		ring->st_print = jiffies;
    154		ring->active = true;
    155	}
    156
    157	return 0;
    158}
    159
    160static struct xen_blkif *xen_blkif_alloc(domid_t domid)
    161{
    162	struct xen_blkif *blkif;
    163
    164	BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
    165
    166	blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
    167	if (!blkif)
    168		return ERR_PTR(-ENOMEM);
    169
    170	blkif->domid = domid;
    171	atomic_set(&blkif->refcnt, 1);
    172	init_completion(&blkif->drain_complete);
    173
    174	/*
    175	 * Because freeing back to the cache may be deferred, it is not
    176	 * safe to unload the module (and hence destroy the cache) until
    177	 * this has completed. To prevent premature unloading, take an
    178	 * extra module reference here and release only when the object
    179	 * has been freed back to the cache.
    180	 */
    181	__module_get(THIS_MODULE);
    182	INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
    183
    184	return blkif;
    185}
    186
    187static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
    188			 unsigned int nr_grefs, unsigned int evtchn)
    189{
    190	int err;
    191	struct xen_blkif *blkif = ring->blkif;
    192	const struct blkif_common_sring *sring_common;
    193	RING_IDX rsp_prod, req_prod;
    194	unsigned int size;
    195
    196	/* Already connected through? */
    197	if (ring->irq)
    198		return 0;
    199
    200	err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
    201				     &ring->blk_ring);
    202	if (err < 0)
    203		return err;
    204
    205	sring_common = (struct blkif_common_sring *)ring->blk_ring;
    206	rsp_prod = READ_ONCE(sring_common->rsp_prod);
    207	req_prod = READ_ONCE(sring_common->req_prod);
    208
    209	switch (blkif->blk_protocol) {
    210	case BLKIF_PROTOCOL_NATIVE:
    211	{
    212		struct blkif_sring *sring_native =
    213			(struct blkif_sring *)ring->blk_ring;
    214
    215		BACK_RING_ATTACH(&ring->blk_rings.native, sring_native,
    216				 rsp_prod, XEN_PAGE_SIZE * nr_grefs);
    217		size = __RING_SIZE(sring_native, XEN_PAGE_SIZE * nr_grefs);
    218		break;
    219	}
    220	case BLKIF_PROTOCOL_X86_32:
    221	{
    222		struct blkif_x86_32_sring *sring_x86_32 =
    223			(struct blkif_x86_32_sring *)ring->blk_ring;
    224
    225		BACK_RING_ATTACH(&ring->blk_rings.x86_32, sring_x86_32,
    226				 rsp_prod, XEN_PAGE_SIZE * nr_grefs);
    227		size = __RING_SIZE(sring_x86_32, XEN_PAGE_SIZE * nr_grefs);
    228		break;
    229	}
    230	case BLKIF_PROTOCOL_X86_64:
    231	{
    232		struct blkif_x86_64_sring *sring_x86_64 =
    233			(struct blkif_x86_64_sring *)ring->blk_ring;
    234
    235		BACK_RING_ATTACH(&ring->blk_rings.x86_64, sring_x86_64,
    236				 rsp_prod, XEN_PAGE_SIZE * nr_grefs);
    237		size = __RING_SIZE(sring_x86_64, XEN_PAGE_SIZE * nr_grefs);
    238		break;
    239	}
    240	default:
    241		BUG();
    242	}
    243
    244	err = -EIO;
    245	if (req_prod - rsp_prod > size)
    246		goto fail;
    247
    248	err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->be->dev,
    249			evtchn, xen_blkif_be_int, 0, "blkif-backend", ring);
    250	if (err < 0)
    251		goto fail;
    252	ring->irq = err;
    253
    254	return 0;
    255
    256fail:
    257	xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
    258	ring->blk_rings.common.sring = NULL;
    259	return err;
    260}
    261
    262static int xen_blkif_disconnect(struct xen_blkif *blkif)
    263{
    264	struct pending_req *req, *n;
    265	unsigned int j, r;
    266	bool busy = false;
    267
    268	for (r = 0; r < blkif->nr_rings; r++) {
    269		struct xen_blkif_ring *ring = &blkif->rings[r];
    270		unsigned int i = 0;
    271
    272		if (!ring->active)
    273			continue;
    274
    275		if (ring->xenblkd) {
    276			kthread_stop(ring->xenblkd);
    277			ring->xenblkd = NULL;
    278			wake_up(&ring->shutdown_wq);
    279		}
    280
    281		/* The above kthread_stop() guarantees that at this point we
    282		 * don't have any discard_io or other_io requests. So, checking
    283		 * for inflight IO is enough.
    284		 */
    285		if (atomic_read(&ring->inflight) > 0) {
    286			busy = true;
    287			continue;
    288		}
    289
    290		if (ring->irq) {
    291			unbind_from_irqhandler(ring->irq, ring);
    292			ring->irq = 0;
    293		}
    294
    295		if (ring->blk_rings.common.sring) {
    296			xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
    297			ring->blk_rings.common.sring = NULL;
    298		}
    299
    300		/* Remove all persistent grants and the cache of ballooned pages. */
    301		xen_blkbk_free_caches(ring);
    302
    303		/* Check that there is no request in use */
    304		list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
    305			list_del(&req->free_list);
    306
    307			for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
    308				kfree(req->segments[j]);
    309
    310			for (j = 0; j < MAX_INDIRECT_PAGES; j++)
    311				kfree(req->indirect_pages[j]);
    312
    313			kfree(req);
    314			i++;
    315		}
    316
    317		BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0);
    318		BUG_ON(!list_empty(&ring->persistent_purge_list));
    319		BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
    320		BUG_ON(ring->free_pages.num_pages != 0);
    321		BUG_ON(ring->persistent_gnt_c != 0);
    322		WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
    323		ring->active = false;
    324	}
    325	if (busy)
    326		return -EBUSY;
    327
    328	blkif->nr_ring_pages = 0;
    329	/*
    330	 * blkif->rings was allocated in connect_ring, so we should free it in
    331	 * here.
    332	 */
    333	kfree(blkif->rings);
    334	blkif->rings = NULL;
    335	blkif->nr_rings = 0;
    336
    337	return 0;
    338}
    339
    340static void xen_blkif_free(struct xen_blkif *blkif)
    341{
    342	WARN_ON(xen_blkif_disconnect(blkif));
    343	xen_vbd_free(&blkif->vbd);
    344	kfree(blkif->be->mode);
    345	kfree(blkif->be);
    346
    347	/* Make sure everything is drained before shutting down */
    348	kmem_cache_free(xen_blkif_cachep, blkif);
    349	module_put(THIS_MODULE);
    350}
    351
    352int __init xen_blkif_interface_init(void)
    353{
    354	xen_blkif_cachep = kmem_cache_create("blkif_cache",
    355					     sizeof(struct xen_blkif),
    356					     0, 0, NULL);
    357	if (!xen_blkif_cachep)
    358		return -ENOMEM;
    359
    360	return 0;
    361}
    362
    363void xen_blkif_interface_fini(void)
    364{
    365	kmem_cache_destroy(xen_blkif_cachep);
    366	xen_blkif_cachep = NULL;
    367}
    368
    369/*
    370 *  sysfs interface for VBD I/O requests
    371 */
    372
    373#define VBD_SHOW_ALLRING(name, format)					\
    374	static ssize_t show_##name(struct device *_dev,			\
    375				   struct device_attribute *attr,	\
    376				   char *buf)				\
    377	{								\
    378		struct xenbus_device *dev = to_xenbus_device(_dev);	\
    379		struct backend_info *be = dev_get_drvdata(&dev->dev);	\
    380		struct xen_blkif *blkif = be->blkif;			\
    381		unsigned int i;						\
    382		unsigned long long result = 0;				\
    383									\
    384		if (!blkif->rings)				\
    385			goto out;					\
    386									\
    387		for (i = 0; i < blkif->nr_rings; i++) {		\
    388			struct xen_blkif_ring *ring = &blkif->rings[i];	\
    389									\
    390			result += ring->st_##name;			\
    391		}							\
    392									\
    393out:									\
    394		return sprintf(buf, format, result);			\
    395	}								\
    396	static DEVICE_ATTR(name, 0444, show_##name, NULL)
    397
    398VBD_SHOW_ALLRING(oo_req,  "%llu\n");
    399VBD_SHOW_ALLRING(rd_req,  "%llu\n");
    400VBD_SHOW_ALLRING(wr_req,  "%llu\n");
    401VBD_SHOW_ALLRING(f_req,  "%llu\n");
    402VBD_SHOW_ALLRING(ds_req,  "%llu\n");
    403VBD_SHOW_ALLRING(rd_sect, "%llu\n");
    404VBD_SHOW_ALLRING(wr_sect, "%llu\n");
    405
    406static struct attribute *xen_vbdstat_attrs[] = {
    407	&dev_attr_oo_req.attr,
    408	&dev_attr_rd_req.attr,
    409	&dev_attr_wr_req.attr,
    410	&dev_attr_f_req.attr,
    411	&dev_attr_ds_req.attr,
    412	&dev_attr_rd_sect.attr,
    413	&dev_attr_wr_sect.attr,
    414	NULL
    415};
    416
    417static const struct attribute_group xen_vbdstat_group = {
    418	.name = "statistics",
    419	.attrs = xen_vbdstat_attrs,
    420};
    421
    422#define VBD_SHOW(name, format, args...)					\
    423	static ssize_t show_##name(struct device *_dev,			\
    424				   struct device_attribute *attr,	\
    425				   char *buf)				\
    426	{								\
    427		struct xenbus_device *dev = to_xenbus_device(_dev);	\
    428		struct backend_info *be = dev_get_drvdata(&dev->dev);	\
    429									\
    430		return sprintf(buf, format, ##args);			\
    431	}								\
    432	static DEVICE_ATTR(name, 0444, show_##name, NULL)
    433
    434VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
    435VBD_SHOW(mode, "%s\n", be->mode);
    436
    437static int xenvbd_sysfs_addif(struct xenbus_device *dev)
    438{
    439	int error;
    440
    441	error = device_create_file(&dev->dev, &dev_attr_physical_device);
    442	if (error)
    443		goto fail1;
    444
    445	error = device_create_file(&dev->dev, &dev_attr_mode);
    446	if (error)
    447		goto fail2;
    448
    449	error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group);
    450	if (error)
    451		goto fail3;
    452
    453	return 0;
    454
    455fail3:	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
    456fail2:	device_remove_file(&dev->dev, &dev_attr_mode);
    457fail1:	device_remove_file(&dev->dev, &dev_attr_physical_device);
    458	return error;
    459}
    460
    461static void xenvbd_sysfs_delif(struct xenbus_device *dev)
    462{
    463	sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
    464	device_remove_file(&dev->dev, &dev_attr_mode);
    465	device_remove_file(&dev->dev, &dev_attr_physical_device);
    466}
    467
    468static void xen_vbd_free(struct xen_vbd *vbd)
    469{
    470	if (vbd->bdev)
    471		blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
    472	vbd->bdev = NULL;
    473}
    474
    475/* Enable the persistent grants feature. */
    476static bool feature_persistent = true;
    477module_param(feature_persistent, bool, 0644);
    478MODULE_PARM_DESC(feature_persistent,
    479		"Enables the persistent grants feature");
    480
    481static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
    482			  unsigned major, unsigned minor, int readonly,
    483			  int cdrom)
    484{
    485	struct xen_vbd *vbd;
    486	struct block_device *bdev;
    487
    488	vbd = &blkif->vbd;
    489	vbd->handle   = handle;
    490	vbd->readonly = readonly;
    491	vbd->type     = 0;
    492
    493	vbd->pdevice  = MKDEV(major, minor);
    494
    495	bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
    496				 FMODE_READ : FMODE_WRITE, NULL);
    497
    498	if (IS_ERR(bdev)) {
    499		pr_warn("xen_vbd_create: device %08x could not be opened\n",
    500			vbd->pdevice);
    501		return -ENOENT;
    502	}
    503
    504	vbd->bdev = bdev;
    505	if (vbd->bdev->bd_disk == NULL) {
    506		pr_warn("xen_vbd_create: device %08x doesn't exist\n",
    507			vbd->pdevice);
    508		xen_vbd_free(vbd);
    509		return -ENOENT;
    510	}
    511	vbd->size = vbd_sz(vbd);
    512
    513	if (cdrom || disk_to_cdi(vbd->bdev->bd_disk))
    514		vbd->type |= VDISK_CDROM;
    515	if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
    516		vbd->type |= VDISK_REMOVABLE;
    517
    518	if (bdev_write_cache(bdev))
    519		vbd->flush_support = true;
    520	if (bdev_max_secure_erase_sectors(bdev))
    521		vbd->discard_secure = true;
    522
    523	vbd->feature_gnt_persistent = feature_persistent;
    524
    525	pr_debug("Successful creation of handle=%04x (dom=%u)\n",
    526		handle, blkif->domid);
    527	return 0;
    528}
    529
    530static int xen_blkbk_remove(struct xenbus_device *dev)
    531{
    532	struct backend_info *be = dev_get_drvdata(&dev->dev);
    533
    534	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
    535
    536	if (be->major || be->minor)
    537		xenvbd_sysfs_delif(dev);
    538
    539	if (be->backend_watch.node) {
    540		unregister_xenbus_watch(&be->backend_watch);
    541		kfree(be->backend_watch.node);
    542		be->backend_watch.node = NULL;
    543	}
    544
    545	dev_set_drvdata(&dev->dev, NULL);
    546
    547	if (be->blkif) {
    548		xen_blkif_disconnect(be->blkif);
    549
    550		/* Put the reference we set in xen_blkif_alloc(). */
    551		xen_blkif_put(be->blkif);
    552	}
    553
    554	return 0;
    555}
    556
    557int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
    558			      struct backend_info *be, int state)
    559{
    560	struct xenbus_device *dev = be->dev;
    561	int err;
    562
    563	err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
    564			    "%d", state);
    565	if (err)
    566		dev_warn(&dev->dev, "writing feature-flush-cache (%d)", err);
    567
    568	return err;
    569}
    570
    571static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
    572{
    573	struct xenbus_device *dev = be->dev;
    574	struct xen_blkif *blkif = be->blkif;
    575	int err;
    576	int state = 0;
    577	struct block_device *bdev = be->blkif->vbd.bdev;
    578
    579	if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1))
    580		return;
    581
    582	if (bdev_max_discard_sectors(bdev)) {
    583		err = xenbus_printf(xbt, dev->nodename,
    584			"discard-granularity", "%u",
    585			bdev_discard_granularity(bdev));
    586		if (err) {
    587			dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
    588			return;
    589		}
    590		err = xenbus_printf(xbt, dev->nodename,
    591			"discard-alignment", "%u",
    592			bdev_discard_alignment(bdev));
    593		if (err) {
    594			dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
    595			return;
    596		}
    597		state = 1;
    598		/* Optional. */
    599		err = xenbus_printf(xbt, dev->nodename,
    600				    "discard-secure", "%d",
    601				    blkif->vbd.discard_secure);
    602		if (err) {
    603			dev_warn(&dev->dev, "writing discard-secure (%d)", err);
    604			return;
    605		}
    606	}
    607	err = xenbus_printf(xbt, dev->nodename, "feature-discard",
    608			    "%d", state);
    609	if (err)
    610		dev_warn(&dev->dev, "writing feature-discard (%d)", err);
    611}
    612
    613int xen_blkbk_barrier(struct xenbus_transaction xbt,
    614		      struct backend_info *be, int state)
    615{
    616	struct xenbus_device *dev = be->dev;
    617	int err;
    618
    619	err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
    620			    "%d", state);
    621	if (err)
    622		dev_warn(&dev->dev, "writing feature-barrier (%d)", err);
    623
    624	return err;
    625}
    626
    627/*
    628 * Entry point to this code when a new device is created.  Allocate the basic
    629 * structures, and watch the store waiting for the hotplug scripts to tell us
    630 * the device's physical major and minor numbers.  Switch to InitWait.
    631 */
    632static int xen_blkbk_probe(struct xenbus_device *dev,
    633			   const struct xenbus_device_id *id)
    634{
    635	int err;
    636	struct backend_info *be = kzalloc(sizeof(struct backend_info),
    637					  GFP_KERNEL);
    638
    639	/* match the pr_debug in xen_blkbk_remove */
    640	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
    641
    642	if (!be) {
    643		xenbus_dev_fatal(dev, -ENOMEM,
    644				 "allocating backend structure");
    645		return -ENOMEM;
    646	}
    647	be->dev = dev;
    648	dev_set_drvdata(&dev->dev, be);
    649
    650	be->blkif = xen_blkif_alloc(dev->otherend_id);
    651	if (IS_ERR(be->blkif)) {
    652		err = PTR_ERR(be->blkif);
    653		be->blkif = NULL;
    654		xenbus_dev_fatal(dev, err, "creating block interface");
    655		goto fail;
    656	}
    657
    658	err = xenbus_printf(XBT_NIL, dev->nodename,
    659			    "feature-max-indirect-segments", "%u",
    660			    MAX_INDIRECT_SEGMENTS);
    661	if (err)
    662		dev_warn(&dev->dev,
    663			 "writing %s/feature-max-indirect-segments (%d)",
    664			 dev->nodename, err);
    665
    666	/* Multi-queue: advertise how many queues are supported by us.*/
    667	err = xenbus_printf(XBT_NIL, dev->nodename,
    668			    "multi-queue-max-queues", "%u", xenblk_max_queues);
    669	if (err)
    670		pr_warn("Error writing multi-queue-max-queues\n");
    671
    672	/* setup back pointer */
    673	be->blkif->be = be;
    674
    675	err = xenbus_watch_pathfmt(dev, &be->backend_watch, NULL,
    676				   backend_changed,
    677				   "%s/%s", dev->nodename, "physical-device");
    678	if (err)
    679		goto fail;
    680
    681	err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
    682			    xen_blkif_max_ring_order);
    683	if (err)
    684		pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);
    685
    686	err = xenbus_switch_state(dev, XenbusStateInitWait);
    687	if (err)
    688		goto fail;
    689
    690	return 0;
    691
    692fail:
    693	pr_warn("%s failed\n", __func__);
    694	xen_blkbk_remove(dev);
    695	return err;
    696}
    697
    698/*
    699 * Callback received when the hotplug scripts have placed the physical-device
    700 * node.  Read it and the mode node, and create a vbd.  If the frontend is
    701 * ready, connect.
    702 */
    703static void backend_changed(struct xenbus_watch *watch,
    704			    const char *path, const char *token)
    705{
    706	int err;
    707	unsigned major;
    708	unsigned minor;
    709	struct backend_info *be
    710		= container_of(watch, struct backend_info, backend_watch);
    711	struct xenbus_device *dev = be->dev;
    712	int cdrom = 0;
    713	unsigned long handle;
    714	char *device_type;
    715
    716	pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
    717
    718	err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
    719			   &major, &minor);
    720	if (XENBUS_EXIST_ERR(err)) {
    721		/*
    722		 * Since this watch will fire once immediately after it is
    723		 * registered, we expect this.  Ignore it, and wait for the
    724		 * hotplug scripts.
    725		 */
    726		return;
    727	}
    728	if (err != 2) {
    729		xenbus_dev_fatal(dev, err, "reading physical-device");
    730		return;
    731	}
    732
    733	if (be->major | be->minor) {
    734		if (be->major != major || be->minor != minor)
    735			pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
    736				be->major, be->minor, major, minor);
    737		return;
    738	}
    739
    740	be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
    741	if (IS_ERR(be->mode)) {
    742		err = PTR_ERR(be->mode);
    743		be->mode = NULL;
    744		xenbus_dev_fatal(dev, err, "reading mode");
    745		return;
    746	}
    747
    748	device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
    749	if (!IS_ERR(device_type)) {
    750		cdrom = strcmp(device_type, "cdrom") == 0;
    751		kfree(device_type);
    752	}
    753
    754	/* Front end dir is a number, which is used as the handle. */
    755	err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
    756	if (err) {
    757		kfree(be->mode);
    758		be->mode = NULL;
    759		return;
    760	}
    761
    762	be->major = major;
    763	be->minor = minor;
    764
    765	err = xen_vbd_create(be->blkif, handle, major, minor,
    766			     !strchr(be->mode, 'w'), cdrom);
    767
    768	if (err)
    769		xenbus_dev_fatal(dev, err, "creating vbd structure");
    770	else {
    771		err = xenvbd_sysfs_addif(dev);
    772		if (err) {
    773			xen_vbd_free(&be->blkif->vbd);
    774			xenbus_dev_fatal(dev, err, "creating sysfs entries");
    775		}
    776	}
    777
    778	if (err) {
    779		kfree(be->mode);
    780		be->mode = NULL;
    781		be->major = 0;
    782		be->minor = 0;
    783	} else {
    784		/* We're potentially connected now */
    785		xen_update_blkif_status(be->blkif);
    786	}
    787}
    788
    789/*
    790 * Callback received when the frontend's state changes.
    791 */
    792static void frontend_changed(struct xenbus_device *dev,
    793			     enum xenbus_state frontend_state)
    794{
    795	struct backend_info *be = dev_get_drvdata(&dev->dev);
    796	int err;
    797
    798	pr_debug("%s %p %s\n", __func__, dev, xenbus_strstate(frontend_state));
    799
    800	switch (frontend_state) {
    801	case XenbusStateInitialising:
    802		if (dev->state == XenbusStateClosed) {
    803			pr_info("%s: prepare for reconnect\n", dev->nodename);
    804			xenbus_switch_state(dev, XenbusStateInitWait);
    805		}
    806		break;
    807
    808	case XenbusStateInitialised:
    809	case XenbusStateConnected:
    810		/*
    811		 * Ensure we connect even when two watches fire in
    812		 * close succession and we miss the intermediate value
    813		 * of frontend_state.
    814		 */
    815		if (dev->state == XenbusStateConnected)
    816			break;
    817
    818		/*
    819		 * Enforce precondition before potential leak point.
    820		 * xen_blkif_disconnect() is idempotent.
    821		 */
    822		err = xen_blkif_disconnect(be->blkif);
    823		if (err) {
    824			xenbus_dev_fatal(dev, err, "pending I/O");
    825			break;
    826		}
    827
    828		err = connect_ring(be);
    829		if (err) {
    830			/*
    831			 * Clean up so that memory resources can be used by
    832			 * other devices. connect_ring reported already error.
    833			 */
    834			xen_blkif_disconnect(be->blkif);
    835			break;
    836		}
    837		xen_update_blkif_status(be->blkif);
    838		break;
    839
    840	case XenbusStateClosing:
    841		xenbus_switch_state(dev, XenbusStateClosing);
    842		break;
    843
    844	case XenbusStateClosed:
    845		xen_blkif_disconnect(be->blkif);
    846		xenbus_switch_state(dev, XenbusStateClosed);
    847		if (xenbus_dev_is_online(dev))
    848			break;
    849		fallthrough;
    850		/* if not online */
    851	case XenbusStateUnknown:
    852		/* implies xen_blkif_disconnect() via xen_blkbk_remove() */
    853		device_unregister(&dev->dev);
    854		break;
    855
    856	default:
    857		xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
    858				 frontend_state);
    859		break;
    860	}
    861}
    862
    863/* Once a memory pressure is detected, squeeze free page pools for a while. */
    864static unsigned int buffer_squeeze_duration_ms = 10;
    865module_param_named(buffer_squeeze_duration_ms,
    866		buffer_squeeze_duration_ms, int, 0644);
    867MODULE_PARM_DESC(buffer_squeeze_duration_ms,
    868"Duration in ms to squeeze pages buffer when a memory pressure is detected");
    869
    870/*
    871 * Callback received when the memory pressure is detected.
    872 */
    873static void reclaim_memory(struct xenbus_device *dev)
    874{
    875	struct backend_info *be = dev_get_drvdata(&dev->dev);
    876
    877	if (!be)
    878		return;
    879	be->blkif->buffer_squeeze_end = jiffies +
    880		msecs_to_jiffies(buffer_squeeze_duration_ms);
    881}
    882
    883/* ** Connection ** */
    884
    885/*
    886 * Write the physical details regarding the block device to the store, and
    887 * switch to Connected state.
    888 */
    889static void connect(struct backend_info *be)
    890{
    891	struct xenbus_transaction xbt;
    892	int err;
    893	struct xenbus_device *dev = be->dev;
    894
    895	pr_debug("%s %s\n", __func__, dev->otherend);
    896
    897	/* Supply the information about the device the frontend needs */
    898again:
    899	err = xenbus_transaction_start(&xbt);
    900	if (err) {
    901		xenbus_dev_fatal(dev, err, "starting transaction");
    902		return;
    903	}
    904
    905	/* If we can't advertise it is OK. */
    906	xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
    907
    908	xen_blkbk_discard(xbt, be);
    909
    910	xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
    911
    912	err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u",
    913			be->blkif->vbd.feature_gnt_persistent);
    914	if (err) {
    915		xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
    916				 dev->nodename);
    917		goto abort;
    918	}
    919
    920	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
    921			    (unsigned long long)vbd_sz(&be->blkif->vbd));
    922	if (err) {
    923		xenbus_dev_fatal(dev, err, "writing %s/sectors",
    924				 dev->nodename);
    925		goto abort;
    926	}
    927
    928	/* FIXME: use a typename instead */
    929	err = xenbus_printf(xbt, dev->nodename, "info", "%u",
    930			    be->blkif->vbd.type |
    931			    (be->blkif->vbd.readonly ? VDISK_READONLY : 0));
    932	if (err) {
    933		xenbus_dev_fatal(dev, err, "writing %s/info",
    934				 dev->nodename);
    935		goto abort;
    936	}
    937	err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
    938			    (unsigned long)
    939			    bdev_logical_block_size(be->blkif->vbd.bdev));
    940	if (err) {
    941		xenbus_dev_fatal(dev, err, "writing %s/sector-size",
    942				 dev->nodename);
    943		goto abort;
    944	}
    945	err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
    946			    bdev_physical_block_size(be->blkif->vbd.bdev));
    947	if (err)
    948		xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
    949				 dev->nodename);
    950
    951	err = xenbus_transaction_end(xbt, 0);
    952	if (err == -EAGAIN)
    953		goto again;
    954	if (err)
    955		xenbus_dev_fatal(dev, err, "ending transaction");
    956
    957	err = xenbus_switch_state(dev, XenbusStateConnected);
    958	if (err)
    959		xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
    960				 dev->nodename);
    961
    962	return;
    963 abort:
    964	xenbus_transaction_end(xbt, 1);
    965}
    966
    967/*
    968 * Each ring may have multi pages, depends on "ring-page-order".
    969 */
    970static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
    971{
    972	unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
    973	struct pending_req *req, *n;
    974	int err, i, j;
    975	struct xen_blkif *blkif = ring->blkif;
    976	struct xenbus_device *dev = blkif->be->dev;
    977	unsigned int nr_grefs, evtchn;
    978
    979	err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u",
    980			  &evtchn);
    981	if (err != 1) {
    982		err = -EINVAL;
    983		xenbus_dev_fatal(dev, err, "reading %s/event-channel", dir);
    984		return err;
    985	}
    986
    987	nr_grefs = blkif->nr_ring_pages;
    988
    989	if (unlikely(!nr_grefs)) {
    990		WARN_ON(true);
    991		return -EINVAL;
    992	}
    993
    994	for (i = 0; i < nr_grefs; i++) {
    995		char ring_ref_name[RINGREF_NAME_LEN];
    996
    997		if (blkif->multi_ref)
    998			snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
    999		else {
   1000			WARN_ON(i != 0);
   1001			snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref");
   1002		}
   1003
   1004		err = xenbus_scanf(XBT_NIL, dir, ring_ref_name,
   1005				   "%u", &ring_ref[i]);
   1006
   1007		if (err != 1) {
   1008			err = -EINVAL;
   1009			xenbus_dev_fatal(dev, err, "reading %s/%s",
   1010					 dir, ring_ref_name);
   1011			return err;
   1012		}
   1013	}
   1014
   1015	err = -ENOMEM;
   1016	for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
   1017		req = kzalloc(sizeof(*req), GFP_KERNEL);
   1018		if (!req)
   1019			goto fail;
   1020		list_add_tail(&req->free_list, &ring->pending_free);
   1021		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
   1022			req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
   1023			if (!req->segments[j])
   1024				goto fail;
   1025		}
   1026		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
   1027			req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
   1028							 GFP_KERNEL);
   1029			if (!req->indirect_pages[j])
   1030				goto fail;
   1031		}
   1032	}
   1033
   1034	/* Map the shared frame, irq etc. */
   1035	err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn);
   1036	if (err) {
   1037		xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
   1038		goto fail;
   1039	}
   1040
   1041	return 0;
   1042
   1043fail:
   1044	list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
   1045		list_del(&req->free_list);
   1046		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
   1047			if (!req->segments[j])
   1048				break;
   1049			kfree(req->segments[j]);
   1050		}
   1051		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
   1052			if (!req->indirect_pages[j])
   1053				break;
   1054			kfree(req->indirect_pages[j]);
   1055		}
   1056		kfree(req);
   1057	}
   1058	return err;
   1059}
   1060
   1061static int connect_ring(struct backend_info *be)
   1062{
   1063	struct xenbus_device *dev = be->dev;
   1064	struct xen_blkif *blkif = be->blkif;
   1065	char protocol[64] = "";
   1066	int err, i;
   1067	char *xspath;
   1068	size_t xspathsize;
   1069	const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
   1070	unsigned int requested_num_queues = 0;
   1071	unsigned int ring_page_order;
   1072
   1073	pr_debug("%s %s\n", __func__, dev->otherend);
   1074
   1075	blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
   1076	err = xenbus_scanf(XBT_NIL, dev->otherend, "protocol",
   1077			   "%63s", protocol);
   1078	if (err <= 0)
   1079		strcpy(protocol, "unspecified, assuming default");
   1080	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
   1081		blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
   1082	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
   1083		blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
   1084	else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
   1085		blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
   1086	else {
   1087		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
   1088		return -ENOSYS;
   1089	}
   1090	if (blkif->vbd.feature_gnt_persistent)
   1091		blkif->vbd.feature_gnt_persistent =
   1092			xenbus_read_unsigned(dev->otherend,
   1093					"feature-persistent", 0);
   1094
   1095	blkif->vbd.overflow_max_grants = 0;
   1096
   1097	/*
   1098	 * Read the number of hardware queues from frontend.
   1099	 */
   1100	requested_num_queues = xenbus_read_unsigned(dev->otherend,
   1101						    "multi-queue-num-queues",
   1102						    1);
   1103	if (requested_num_queues > xenblk_max_queues
   1104	    || requested_num_queues == 0) {
   1105		/* Buggy or malicious guest. */
   1106		xenbus_dev_fatal(dev, err,
   1107				"guest requested %u queues, exceeding the maximum of %u.",
   1108				requested_num_queues, xenblk_max_queues);
   1109		return -ENOSYS;
   1110	}
   1111	blkif->nr_rings = requested_num_queues;
   1112	if (xen_blkif_alloc_rings(blkif))
   1113		return -ENOMEM;
   1114
   1115	pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename,
   1116		 blkif->nr_rings, blkif->blk_protocol, protocol,
   1117		 blkif->vbd.feature_gnt_persistent ? "persistent grants" : "");
   1118
   1119	err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
   1120			   &ring_page_order);
   1121	if (err != 1) {
   1122		blkif->nr_ring_pages = 1;
   1123		blkif->multi_ref = false;
   1124	} else if (ring_page_order <= xen_blkif_max_ring_order) {
   1125		blkif->nr_ring_pages = 1 << ring_page_order;
   1126		blkif->multi_ref = true;
   1127	} else {
   1128		err = -EINVAL;
   1129		xenbus_dev_fatal(dev, err,
   1130				 "requested ring page order %d exceed max:%d",
   1131				 ring_page_order,
   1132				 xen_blkif_max_ring_order);
   1133		return err;
   1134	}
   1135
   1136	if (blkif->nr_rings == 1)
   1137		return read_per_ring_refs(&blkif->rings[0], dev->otherend);
   1138	else {
   1139		xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
   1140		xspath = kmalloc(xspathsize, GFP_KERNEL);
   1141		if (!xspath) {
   1142			xenbus_dev_fatal(dev, -ENOMEM, "reading ring references");
   1143			return -ENOMEM;
   1144		}
   1145
   1146		for (i = 0; i < blkif->nr_rings; i++) {
   1147			memset(xspath, 0, xspathsize);
   1148			snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i);
   1149			err = read_per_ring_refs(&blkif->rings[i], xspath);
   1150			if (err) {
   1151				kfree(xspath);
   1152				return err;
   1153			}
   1154		}
   1155		kfree(xspath);
   1156	}
   1157	return 0;
   1158}
   1159
   1160static const struct xenbus_device_id xen_blkbk_ids[] = {
   1161	{ "vbd" },
   1162	{ "" }
   1163};
   1164
   1165static struct xenbus_driver xen_blkbk_driver = {
   1166	.ids  = xen_blkbk_ids,
   1167	.probe = xen_blkbk_probe,
   1168	.remove = xen_blkbk_remove,
   1169	.otherend_changed = frontend_changed,
   1170	.allow_rebind = true,
   1171	.reclaim_memory = reclaim_memory,
   1172};
   1173
   1174int xen_blkif_xenbus_init(void)
   1175{
   1176	return xenbus_register_backend(&xen_blkbk_driver);
   1177}
   1178
   1179void xen_blkif_xenbus_fini(void)
   1180{
   1181	xenbus_unregister_driver(&xen_blkbk_driver);
   1182}