cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

target_core_user.c (87933B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2013 Shaohua Li <shli@kernel.org>
      4 * Copyright (C) 2014 Red Hat, Inc.
      5 * Copyright (C) 2015 Arrikto, Inc.
      6 * Copyright (C) 2017 Chinamobile, Inc.
      7 */
      8
      9#include <linux/spinlock.h>
     10#include <linux/module.h>
     11#include <linux/kernel.h>
     12#include <linux/timer.h>
     13#include <linux/parser.h>
     14#include <linux/vmalloc.h>
     15#include <linux/uio_driver.h>
     16#include <linux/xarray.h>
     17#include <linux/stringify.h>
     18#include <linux/bitops.h>
     19#include <linux/highmem.h>
     20#include <linux/configfs.h>
     21#include <linux/mutex.h>
     22#include <linux/workqueue.h>
     23#include <linux/pagemap.h>
     24#include <net/genetlink.h>
     25#include <scsi/scsi_common.h>
     26#include <scsi/scsi_proto.h>
     27#include <target/target_core_base.h>
     28#include <target/target_core_fabric.h>
     29#include <target/target_core_backend.h>
     30
     31#include <linux/target_core_user.h>
     32
     33/**
     34 * DOC: Userspace I/O
     35 * Userspace I/O
     36 * -------------
     37 *
     38 * Define a shared-memory interface for LIO to pass SCSI commands and
     39 * data to userspace for processing. This is to allow backends that
     40 * are too complex for in-kernel support to be possible.
     41 *
     42 * It uses the UIO framework to do a lot of the device-creation and
     43 * introspection work for us.
     44 *
     45 * See the .h file for how the ring is laid out. Note that while the
     46 * command ring is defined, the particulars of the data area are
     47 * not. Offset values in the command entry point to other locations
     48 * internal to the mmap-ed area. There is separate space outside the
     49 * command ring for data buffers. This leaves maximum flexibility for
     50 * moving buffer allocations, or even page flipping or other
     51 * allocation techniques, without altering the command ring layout.
     52 *
     53 * SECURITY:
     54 * The user process must be assumed to be malicious. There's no way to
     55 * prevent it breaking the command ring protocol if it wants, but in
     56 * order to prevent other issues we must only ever read *data* from
     57 * the shared memory area, not offsets or sizes. This applies to
     58 * command ring entries as well as the mailbox. Extra code needed for
     59 * this may have a 'UAM' comment.
     60 */
     61
     62#define TCMU_TIME_OUT (30 * MSEC_PER_SEC)
     63
     64/* For mailbox plus cmd ring, the size is fixed 8MB */
     65#define MB_CMDR_SIZE_DEF (8 * 1024 * 1024)
     66/* Offset of cmd ring is size of mailbox */
     67#define CMDR_OFF ((__u32)sizeof(struct tcmu_mailbox))
     68#define CMDR_SIZE_DEF (MB_CMDR_SIZE_DEF - CMDR_OFF)
     69
     70/*
     71 * For data area, the default block size is PAGE_SIZE and
     72 * the default total size is 256K * PAGE_SIZE.
     73 */
     74#define DATA_PAGES_PER_BLK_DEF 1
     75#define DATA_AREA_PAGES_DEF (256 * 1024)
     76
     77#define TCMU_MBS_TO_PAGES(_mbs) ((size_t)_mbs << (20 - PAGE_SHIFT))
     78#define TCMU_PAGES_TO_MBS(_pages) (_pages >> (20 - PAGE_SHIFT))
     79
     80/*
     81 * Default number of global data blocks(512K * PAGE_SIZE)
     82 * when the unmap thread will be started.
     83 */
     84#define TCMU_GLOBAL_MAX_PAGES_DEF (512 * 1024)
     85
     86static u8 tcmu_kern_cmd_reply_supported;
     87static u8 tcmu_netlink_blocked;
     88
     89static struct device *tcmu_root_device;
     90
     91struct tcmu_hba {
     92	u32 host_id;
     93};
     94
     95#define TCMU_CONFIG_LEN 256
     96
     97static DEFINE_MUTEX(tcmu_nl_cmd_mutex);
     98static LIST_HEAD(tcmu_nl_cmd_list);
     99
    100struct tcmu_dev;
    101
    102struct tcmu_nl_cmd {
    103	/* wake up thread waiting for reply */
    104	struct completion complete;
    105	struct list_head nl_list;
    106	struct tcmu_dev *udev;
    107	int cmd;
    108	int status;
    109};
    110
    111struct tcmu_dev {
    112	struct list_head node;
    113	struct kref kref;
    114
    115	struct se_device se_dev;
    116	struct se_dev_plug se_plug;
    117
    118	char *name;
    119	struct se_hba *hba;
    120
    121#define TCMU_DEV_BIT_OPEN 0
    122#define TCMU_DEV_BIT_BROKEN 1
    123#define TCMU_DEV_BIT_BLOCKED 2
    124#define TCMU_DEV_BIT_TMR_NOTIFY 3
    125#define TCMU_DEV_BIT_PLUGGED 4
    126	unsigned long flags;
    127
    128	struct uio_info uio_info;
    129
    130	struct inode *inode;
    131
    132	uint64_t dev_size;
    133
    134	struct tcmu_mailbox *mb_addr;
    135	void *cmdr;
    136	u32 cmdr_size;
    137	u32 cmdr_last_cleaned;
    138	/* Offset of data area from start of mb */
    139	/* Must add data_off and mb_addr to get the address */
    140	size_t data_off;
    141	int data_area_mb;
    142	uint32_t max_blocks;
    143	size_t mmap_pages;
    144
    145	struct mutex cmdr_lock;
    146	struct list_head qfull_queue;
    147	struct list_head tmr_queue;
    148
    149	uint32_t dbi_max;
    150	uint32_t dbi_thresh;
    151	unsigned long *data_bitmap;
    152	struct xarray data_pages;
    153	uint32_t data_pages_per_blk;
    154	uint32_t data_blk_size;
    155
    156	struct xarray commands;
    157
    158	struct timer_list cmd_timer;
    159	unsigned int cmd_time_out;
    160	struct list_head inflight_queue;
    161
    162	struct timer_list qfull_timer;
    163	int qfull_time_out;
    164
    165	struct list_head timedout_entry;
    166
    167	struct tcmu_nl_cmd curr_nl_cmd;
    168
    169	char dev_config[TCMU_CONFIG_LEN];
    170
    171	int nl_reply_supported;
    172};
    173
    174#define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, se_dev)
    175
    176struct tcmu_cmd {
    177	struct se_cmd *se_cmd;
    178	struct tcmu_dev *tcmu_dev;
    179	struct list_head queue_entry;
    180
    181	uint16_t cmd_id;
    182
    183	/* Can't use se_cmd when cleaning up expired cmds, because if
    184	   cmd has been completed then accessing se_cmd is off limits */
    185	uint32_t dbi_cnt;
    186	uint32_t dbi_bidi_cnt;
    187	uint32_t dbi_cur;
    188	uint32_t *dbi;
    189
    190	uint32_t data_len_bidi;
    191
    192	unsigned long deadline;
    193
    194#define TCMU_CMD_BIT_EXPIRED 0
    195#define TCMU_CMD_BIT_KEEP_BUF 1
    196	unsigned long flags;
    197};
    198
    199struct tcmu_tmr {
    200	struct list_head queue_entry;
    201
    202	uint8_t tmr_type;
    203	uint32_t tmr_cmd_cnt;
    204	int16_t tmr_cmd_ids[];
    205};
    206
    207/*
    208 * To avoid dead lock the mutex lock order should always be:
    209 *
    210 * mutex_lock(&root_udev_mutex);
    211 * ...
    212 * mutex_lock(&tcmu_dev->cmdr_lock);
    213 * mutex_unlock(&tcmu_dev->cmdr_lock);
    214 * ...
    215 * mutex_unlock(&root_udev_mutex);
    216 */
    217static DEFINE_MUTEX(root_udev_mutex);
    218static LIST_HEAD(root_udev);
    219
    220static DEFINE_SPINLOCK(timed_out_udevs_lock);
    221static LIST_HEAD(timed_out_udevs);
    222
    223static struct kmem_cache *tcmu_cmd_cache;
    224
    225static atomic_t global_page_count = ATOMIC_INIT(0);
    226static struct delayed_work tcmu_unmap_work;
    227static int tcmu_global_max_pages = TCMU_GLOBAL_MAX_PAGES_DEF;
    228
    229static int tcmu_set_global_max_data_area(const char *str,
    230					 const struct kernel_param *kp)
    231{
    232	int ret, max_area_mb;
    233
    234	ret = kstrtoint(str, 10, &max_area_mb);
    235	if (ret)
    236		return -EINVAL;
    237
    238	if (max_area_mb <= 0) {
    239		pr_err("global_max_data_area must be larger than 0.\n");
    240		return -EINVAL;
    241	}
    242
    243	tcmu_global_max_pages = TCMU_MBS_TO_PAGES(max_area_mb);
    244	if (atomic_read(&global_page_count) > tcmu_global_max_pages)
    245		schedule_delayed_work(&tcmu_unmap_work, 0);
    246	else
    247		cancel_delayed_work_sync(&tcmu_unmap_work);
    248
    249	return 0;
    250}
    251
    252static int tcmu_get_global_max_data_area(char *buffer,
    253					 const struct kernel_param *kp)
    254{
    255	return sprintf(buffer, "%d\n", TCMU_PAGES_TO_MBS(tcmu_global_max_pages));
    256}
    257
    258static const struct kernel_param_ops tcmu_global_max_data_area_op = {
    259	.set = tcmu_set_global_max_data_area,
    260	.get = tcmu_get_global_max_data_area,
    261};
    262
    263module_param_cb(global_max_data_area_mb, &tcmu_global_max_data_area_op, NULL,
    264		S_IWUSR | S_IRUGO);
    265MODULE_PARM_DESC(global_max_data_area_mb,
    266		 "Max MBs allowed to be allocated to all the tcmu device's "
    267		 "data areas.");
    268
    269static int tcmu_get_block_netlink(char *buffer,
    270				  const struct kernel_param *kp)
    271{
    272	return sprintf(buffer, "%s\n", tcmu_netlink_blocked ?
    273		       "blocked" : "unblocked");
    274}
    275
    276static int tcmu_set_block_netlink(const char *str,
    277				  const struct kernel_param *kp)
    278{
    279	int ret;
    280	u8 val;
    281
    282	ret = kstrtou8(str, 0, &val);
    283	if (ret < 0)
    284		return ret;
    285
    286	if (val > 1) {
    287		pr_err("Invalid block netlink value %u\n", val);
    288		return -EINVAL;
    289	}
    290
    291	tcmu_netlink_blocked = val;
    292	return 0;
    293}
    294
    295static const struct kernel_param_ops tcmu_block_netlink_op = {
    296	.set = tcmu_set_block_netlink,
    297	.get = tcmu_get_block_netlink,
    298};
    299
    300module_param_cb(block_netlink, &tcmu_block_netlink_op, NULL, S_IWUSR | S_IRUGO);
    301MODULE_PARM_DESC(block_netlink, "Block new netlink commands.");
    302
    303static int tcmu_fail_netlink_cmd(struct tcmu_nl_cmd *nl_cmd)
    304{
    305	struct tcmu_dev *udev = nl_cmd->udev;
    306
    307	if (!tcmu_netlink_blocked) {
    308		pr_err("Could not reset device's netlink interface. Netlink is not blocked.\n");
    309		return -EBUSY;
    310	}
    311
    312	if (nl_cmd->cmd != TCMU_CMD_UNSPEC) {
    313		pr_debug("Aborting nl cmd %d on %s\n", nl_cmd->cmd, udev->name);
    314		nl_cmd->status = -EINTR;
    315		list_del(&nl_cmd->nl_list);
    316		complete(&nl_cmd->complete);
    317	}
    318	return 0;
    319}
    320
    321static int tcmu_set_reset_netlink(const char *str,
    322				  const struct kernel_param *kp)
    323{
    324	struct tcmu_nl_cmd *nl_cmd, *tmp_cmd;
    325	int ret;
    326	u8 val;
    327
    328	ret = kstrtou8(str, 0, &val);
    329	if (ret < 0)
    330		return ret;
    331
    332	if (val != 1) {
    333		pr_err("Invalid reset netlink value %u\n", val);
    334		return -EINVAL;
    335	}
    336
    337	mutex_lock(&tcmu_nl_cmd_mutex);
    338	list_for_each_entry_safe(nl_cmd, tmp_cmd, &tcmu_nl_cmd_list, nl_list) {
    339		ret = tcmu_fail_netlink_cmd(nl_cmd);
    340		if (ret)
    341			break;
    342	}
    343	mutex_unlock(&tcmu_nl_cmd_mutex);
    344
    345	return ret;
    346}
    347
    348static const struct kernel_param_ops tcmu_reset_netlink_op = {
    349	.set = tcmu_set_reset_netlink,
    350};
    351
    352module_param_cb(reset_netlink, &tcmu_reset_netlink_op, NULL, S_IWUSR);
    353MODULE_PARM_DESC(reset_netlink, "Reset netlink commands.");
    354
    355/* multicast group */
    356enum tcmu_multicast_groups {
    357	TCMU_MCGRP_CONFIG,
    358};
    359
    360static const struct genl_multicast_group tcmu_mcgrps[] = {
    361	[TCMU_MCGRP_CONFIG] = { .name = "config", },
    362};
    363
    364static struct nla_policy tcmu_attr_policy[TCMU_ATTR_MAX+1] = {
    365	[TCMU_ATTR_DEVICE]	= { .type = NLA_STRING },
    366	[TCMU_ATTR_MINOR]	= { .type = NLA_U32 },
    367	[TCMU_ATTR_CMD_STATUS]	= { .type = NLA_S32 },
    368	[TCMU_ATTR_DEVICE_ID]	= { .type = NLA_U32 },
    369	[TCMU_ATTR_SUPP_KERN_CMD_REPLY] = { .type = NLA_U8 },
    370};
    371
    372static int tcmu_genl_cmd_done(struct genl_info *info, int completed_cmd)
    373{
    374	struct tcmu_dev *udev = NULL;
    375	struct tcmu_nl_cmd *nl_cmd;
    376	int dev_id, rc, ret = 0;
    377
    378	if (!info->attrs[TCMU_ATTR_CMD_STATUS] ||
    379	    !info->attrs[TCMU_ATTR_DEVICE_ID]) {
    380		printk(KERN_ERR "TCMU_ATTR_CMD_STATUS or TCMU_ATTR_DEVICE_ID not set, doing nothing\n");
    381		return -EINVAL;
    382        }
    383
    384	dev_id = nla_get_u32(info->attrs[TCMU_ATTR_DEVICE_ID]);
    385	rc = nla_get_s32(info->attrs[TCMU_ATTR_CMD_STATUS]);
    386
    387	mutex_lock(&tcmu_nl_cmd_mutex);
    388	list_for_each_entry(nl_cmd, &tcmu_nl_cmd_list, nl_list) {
    389		if (nl_cmd->udev->se_dev.dev_index == dev_id) {
    390			udev = nl_cmd->udev;
    391			break;
    392		}
    393	}
    394
    395	if (!udev) {
    396		pr_err("tcmu nl cmd %u/%d completion could not find device with dev id %u.\n",
    397		       completed_cmd, rc, dev_id);
    398		ret = -ENODEV;
    399		goto unlock;
    400	}
    401	list_del(&nl_cmd->nl_list);
    402
    403	pr_debug("%s genl cmd done got id %d curr %d done %d rc %d stat %d\n",
    404		 udev->name, dev_id, nl_cmd->cmd, completed_cmd, rc,
    405		 nl_cmd->status);
    406
    407	if (nl_cmd->cmd != completed_cmd) {
    408		pr_err("Mismatched commands on %s (Expecting reply for %d. Current %d).\n",
    409		       udev->name, completed_cmd, nl_cmd->cmd);
    410		ret = -EINVAL;
    411		goto unlock;
    412	}
    413
    414	nl_cmd->status = rc;
    415	complete(&nl_cmd->complete);
    416unlock:
    417	mutex_unlock(&tcmu_nl_cmd_mutex);
    418	return ret;
    419}
    420
    421static int tcmu_genl_rm_dev_done(struct sk_buff *skb, struct genl_info *info)
    422{
    423	return tcmu_genl_cmd_done(info, TCMU_CMD_REMOVED_DEVICE);
    424}
    425
    426static int tcmu_genl_add_dev_done(struct sk_buff *skb, struct genl_info *info)
    427{
    428	return tcmu_genl_cmd_done(info, TCMU_CMD_ADDED_DEVICE);
    429}
    430
    431static int tcmu_genl_reconfig_dev_done(struct sk_buff *skb,
    432				       struct genl_info *info)
    433{
    434	return tcmu_genl_cmd_done(info, TCMU_CMD_RECONFIG_DEVICE);
    435}
    436
    437static int tcmu_genl_set_features(struct sk_buff *skb, struct genl_info *info)
    438{
    439	if (info->attrs[TCMU_ATTR_SUPP_KERN_CMD_REPLY]) {
    440		tcmu_kern_cmd_reply_supported  =
    441			nla_get_u8(info->attrs[TCMU_ATTR_SUPP_KERN_CMD_REPLY]);
    442		printk(KERN_INFO "tcmu daemon: command reply support %u.\n",
    443		       tcmu_kern_cmd_reply_supported);
    444	}
    445
    446	return 0;
    447}
    448
    449static const struct genl_small_ops tcmu_genl_ops[] = {
    450	{
    451		.cmd	= TCMU_CMD_SET_FEATURES,
    452		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
    453		.flags	= GENL_ADMIN_PERM,
    454		.doit	= tcmu_genl_set_features,
    455	},
    456	{
    457		.cmd	= TCMU_CMD_ADDED_DEVICE_DONE,
    458		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
    459		.flags	= GENL_ADMIN_PERM,
    460		.doit	= tcmu_genl_add_dev_done,
    461	},
    462	{
    463		.cmd	= TCMU_CMD_REMOVED_DEVICE_DONE,
    464		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
    465		.flags	= GENL_ADMIN_PERM,
    466		.doit	= tcmu_genl_rm_dev_done,
    467	},
    468	{
    469		.cmd	= TCMU_CMD_RECONFIG_DEVICE_DONE,
    470		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
    471		.flags	= GENL_ADMIN_PERM,
    472		.doit	= tcmu_genl_reconfig_dev_done,
    473	},
    474};
    475
    476/* Our generic netlink family */
    477static struct genl_family tcmu_genl_family __ro_after_init = {
    478	.module = THIS_MODULE,
    479	.hdrsize = 0,
    480	.name = "TCM-USER",
    481	.version = 2,
    482	.maxattr = TCMU_ATTR_MAX,
    483	.policy = tcmu_attr_policy,
    484	.mcgrps = tcmu_mcgrps,
    485	.n_mcgrps = ARRAY_SIZE(tcmu_mcgrps),
    486	.netnsok = true,
    487	.small_ops = tcmu_genl_ops,
    488	.n_small_ops = ARRAY_SIZE(tcmu_genl_ops),
    489};
    490
    491#define tcmu_cmd_set_dbi_cur(cmd, index) ((cmd)->dbi_cur = (index))
    492#define tcmu_cmd_reset_dbi_cur(cmd) tcmu_cmd_set_dbi_cur(cmd, 0)
    493#define tcmu_cmd_set_dbi(cmd, index) ((cmd)->dbi[(cmd)->dbi_cur++] = (index))
    494#define tcmu_cmd_get_dbi(cmd) ((cmd)->dbi[(cmd)->dbi_cur++])
    495
    496static void tcmu_cmd_free_data(struct tcmu_cmd *tcmu_cmd, uint32_t len)
    497{
    498	struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
    499	uint32_t i;
    500
    501	for (i = 0; i < len; i++)
    502		clear_bit(tcmu_cmd->dbi[i], udev->data_bitmap);
    503}
    504
    505static inline int tcmu_get_empty_block(struct tcmu_dev *udev,
    506				       struct tcmu_cmd *tcmu_cmd,
    507				       int prev_dbi, int length, int *iov_cnt)
    508{
    509	XA_STATE(xas, &udev->data_pages, 0);
    510	struct page *page;
    511	int i, cnt, dbi, dpi;
    512	int page_cnt = DIV_ROUND_UP(length, PAGE_SIZE);
    513
    514	dbi = find_first_zero_bit(udev->data_bitmap, udev->dbi_thresh);
    515	if (dbi == udev->dbi_thresh)
    516		return -1;
    517
    518	dpi = dbi * udev->data_pages_per_blk;
    519	/* Count the number of already allocated pages */
    520	xas_set(&xas, dpi);
    521	rcu_read_lock();
    522	for (cnt = 0; xas_next(&xas) && cnt < page_cnt;)
    523		cnt++;
    524	rcu_read_unlock();
    525
    526	for (i = cnt; i < page_cnt; i++) {
    527		/* try to get new zeroed page from the mm */
    528		page = alloc_page(GFP_NOIO | __GFP_ZERO);
    529		if (!page)
    530			break;
    531
    532		if (xa_store(&udev->data_pages, dpi + i, page, GFP_NOIO)) {
    533			__free_page(page);
    534			break;
    535		}
    536	}
    537	if (atomic_add_return(i - cnt, &global_page_count) >
    538			      tcmu_global_max_pages)
    539		schedule_delayed_work(&tcmu_unmap_work, 0);
    540
    541	if (i && dbi > udev->dbi_max)
    542		udev->dbi_max = dbi;
    543
    544	set_bit(dbi, udev->data_bitmap);
    545	tcmu_cmd_set_dbi(tcmu_cmd, dbi);
    546
    547	if (dbi != prev_dbi + 1)
    548		*iov_cnt += 1;
    549
    550	return i == page_cnt ? dbi : -1;
    551}
    552
    553static int tcmu_get_empty_blocks(struct tcmu_dev *udev,
    554				 struct tcmu_cmd *tcmu_cmd, int length)
    555{
    556	/* start value of dbi + 1 must not be a valid dbi */
    557	int dbi = -2;
    558	int blk_data_len, iov_cnt = 0;
    559	uint32_t blk_size = udev->data_blk_size;
    560
    561	for (; length > 0; length -= blk_size) {
    562		blk_data_len = min_t(uint32_t, length, blk_size);
    563		dbi = tcmu_get_empty_block(udev, tcmu_cmd, dbi, blk_data_len,
    564					   &iov_cnt);
    565		if (dbi < 0)
    566			return -1;
    567	}
    568	return iov_cnt;
    569}
    570
    571static inline void tcmu_free_cmd(struct tcmu_cmd *tcmu_cmd)
    572{
    573	kfree(tcmu_cmd->dbi);
    574	kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
    575}
    576
    577static inline void tcmu_cmd_set_block_cnts(struct tcmu_cmd *cmd)
    578{
    579	int i, len;
    580	struct se_cmd *se_cmd = cmd->se_cmd;
    581	uint32_t blk_size = cmd->tcmu_dev->data_blk_size;
    582
    583	cmd->dbi_cnt = DIV_ROUND_UP(se_cmd->data_length, blk_size);
    584
    585	if (se_cmd->se_cmd_flags & SCF_BIDI) {
    586		BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents));
    587		for (i = 0, len = 0; i < se_cmd->t_bidi_data_nents; i++)
    588			len += se_cmd->t_bidi_data_sg[i].length;
    589		cmd->dbi_bidi_cnt = DIV_ROUND_UP(len, blk_size);
    590		cmd->dbi_cnt += cmd->dbi_bidi_cnt;
    591		cmd->data_len_bidi = len;
    592	}
    593}
    594
    595static int new_block_to_iov(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
    596			    struct iovec **iov, int prev_dbi, int len)
    597{
    598	/* Get the next dbi */
    599	int dbi = tcmu_cmd_get_dbi(cmd);
    600
    601	/* Do not add more than udev->data_blk_size to iov */
    602	len = min_t(int,  len, udev->data_blk_size);
    603
    604	/*
    605	 * The following code will gather and map the blocks to the same iovec
    606	 * when the blocks are all next to each other.
    607	 */
    608	if (dbi != prev_dbi + 1) {
    609		/* dbi is not next to previous dbi, so start new iov */
    610		if (prev_dbi >= 0)
    611			(*iov)++;
    612		/* write offset relative to mb_addr */
    613		(*iov)->iov_base = (void __user *)
    614				   (udev->data_off + dbi * udev->data_blk_size);
    615	}
    616	(*iov)->iov_len += len;
    617
    618	return dbi;
    619}
    620
    621static void tcmu_setup_iovs(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
    622			    struct iovec **iov, int data_length)
    623{
    624	/* start value of dbi + 1 must not be a valid dbi */
    625	int dbi = -2;
    626
    627	/* We prepare the IOVs for DMA_FROM_DEVICE transfer direction */
    628	for (; data_length > 0; data_length -= udev->data_blk_size)
    629		dbi = new_block_to_iov(udev, cmd, iov, dbi, data_length);
    630}
    631
    632static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
    633{
    634	struct se_device *se_dev = se_cmd->se_dev;
    635	struct tcmu_dev *udev = TCMU_DEV(se_dev);
    636	struct tcmu_cmd *tcmu_cmd;
    637
    638	tcmu_cmd = kmem_cache_zalloc(tcmu_cmd_cache, GFP_NOIO);
    639	if (!tcmu_cmd)
    640		return NULL;
    641
    642	INIT_LIST_HEAD(&tcmu_cmd->queue_entry);
    643	tcmu_cmd->se_cmd = se_cmd;
    644	tcmu_cmd->tcmu_dev = udev;
    645
    646	tcmu_cmd_set_block_cnts(tcmu_cmd);
    647	tcmu_cmd->dbi = kcalloc(tcmu_cmd->dbi_cnt, sizeof(uint32_t),
    648				GFP_NOIO);
    649	if (!tcmu_cmd->dbi) {
    650		kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
    651		return NULL;
    652	}
    653
    654	return tcmu_cmd;
    655}
    656
    657static inline void tcmu_flush_dcache_range(void *vaddr, size_t size)
    658{
    659	unsigned long offset = offset_in_page(vaddr);
    660	void *start = vaddr - offset;
    661
    662	size = round_up(size+offset, PAGE_SIZE);
    663
    664	while (size) {
    665		flush_dcache_page(vmalloc_to_page(start));
    666		start += PAGE_SIZE;
    667		size -= PAGE_SIZE;
    668	}
    669}
    670
    671/*
    672 * Some ring helper functions. We don't assume size is a power of 2 so
    673 * we can't use circ_buf.h.
    674 */
    675static inline size_t spc_used(size_t head, size_t tail, size_t size)
    676{
    677	int diff = head - tail;
    678
    679	if (diff >= 0)
    680		return diff;
    681	else
    682		return size + diff;
    683}
    684
    685static inline size_t spc_free(size_t head, size_t tail, size_t size)
    686{
    687	/* Keep 1 byte unused or we can't tell full from empty */
    688	return (size - spc_used(head, tail, size) - 1);
    689}
    690
    691static inline size_t head_to_end(size_t head, size_t size)
    692{
    693	return size - head;
    694}
    695
    696#define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size)
    697
    698#define TCMU_SG_TO_DATA_AREA 1
    699#define TCMU_DATA_AREA_TO_SG 2
    700
    701static inline void tcmu_copy_data(struct tcmu_dev *udev,
    702				  struct tcmu_cmd *tcmu_cmd, uint32_t direction,
    703				  struct scatterlist *sg, unsigned int sg_nents,
    704				  struct iovec **iov, size_t data_len)
    705{
    706	/* start value of dbi + 1 must not be a valid dbi */
    707	int dbi = -2;
    708	size_t page_remaining, cp_len;
    709	int page_cnt, page_inx, dpi;
    710	struct sg_mapping_iter sg_iter;
    711	unsigned int sg_flags;
    712	struct page *page;
    713	void *data_page_start, *data_addr;
    714
    715	if (direction == TCMU_SG_TO_DATA_AREA)
    716		sg_flags = SG_MITER_ATOMIC | SG_MITER_FROM_SG;
    717	else
    718		sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG;
    719	sg_miter_start(&sg_iter, sg, sg_nents, sg_flags);
    720
    721	while (data_len) {
    722		if (direction == TCMU_SG_TO_DATA_AREA)
    723			dbi = new_block_to_iov(udev, tcmu_cmd, iov, dbi,
    724					       data_len);
    725		else
    726			dbi = tcmu_cmd_get_dbi(tcmu_cmd);
    727
    728		page_cnt = DIV_ROUND_UP(data_len, PAGE_SIZE);
    729		if (page_cnt > udev->data_pages_per_blk)
    730			page_cnt = udev->data_pages_per_blk;
    731
    732		dpi = dbi * udev->data_pages_per_blk;
    733		for (page_inx = 0; page_inx < page_cnt && data_len;
    734		     page_inx++, dpi++) {
    735			page = xa_load(&udev->data_pages, dpi);
    736
    737			if (direction == TCMU_DATA_AREA_TO_SG)
    738				flush_dcache_page(page);
    739			data_page_start = kmap_atomic(page);
    740			page_remaining = PAGE_SIZE;
    741
    742			while (page_remaining && data_len) {
    743				if (!sg_miter_next(&sg_iter)) {
    744					/* set length to 0 to abort outer loop */
    745					data_len = 0;
    746					pr_debug("%s: aborting data copy due to exhausted sg_list\n",
    747						 __func__);
    748					break;
    749				}
    750				cp_len = min3(sg_iter.length, page_remaining,
    751					      data_len);
    752
    753				data_addr = data_page_start +
    754					    PAGE_SIZE - page_remaining;
    755				if (direction == TCMU_SG_TO_DATA_AREA)
    756					memcpy(data_addr, sg_iter.addr, cp_len);
    757				else
    758					memcpy(sg_iter.addr, data_addr, cp_len);
    759
    760				data_len -= cp_len;
    761				page_remaining -= cp_len;
    762				sg_iter.consumed = cp_len;
    763			}
    764			sg_miter_stop(&sg_iter);
    765
    766			kunmap_atomic(data_page_start);
    767			if (direction == TCMU_SG_TO_DATA_AREA)
    768				flush_dcache_page(page);
    769		}
    770	}
    771}
    772
    773static void scatter_data_area(struct tcmu_dev *udev, struct tcmu_cmd *tcmu_cmd,
    774			      struct iovec **iov)
    775{
    776	struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
    777
    778	tcmu_copy_data(udev, tcmu_cmd, TCMU_SG_TO_DATA_AREA, se_cmd->t_data_sg,
    779		       se_cmd->t_data_nents, iov, se_cmd->data_length);
    780}
    781
    782static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *tcmu_cmd,
    783			     bool bidi, uint32_t read_len)
    784{
    785	struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
    786	struct scatterlist *data_sg;
    787	unsigned int data_nents;
    788
    789	if (!bidi) {
    790		data_sg = se_cmd->t_data_sg;
    791		data_nents = se_cmd->t_data_nents;
    792	} else {
    793		/*
    794		 * For bidi case, the first count blocks are for Data-Out
    795		 * buffer blocks, and before gathering the Data-In buffer
    796		 * the Data-Out buffer blocks should be skipped.
    797		 */
    798		tcmu_cmd_set_dbi_cur(tcmu_cmd,
    799				     tcmu_cmd->dbi_cnt - tcmu_cmd->dbi_bidi_cnt);
    800
    801		data_sg = se_cmd->t_bidi_data_sg;
    802		data_nents = se_cmd->t_bidi_data_nents;
    803	}
    804
    805	tcmu_copy_data(udev, tcmu_cmd, TCMU_DATA_AREA_TO_SG, data_sg,
    806		       data_nents, NULL, read_len);
    807}
    808
    809static inline size_t spc_bitmap_free(unsigned long *bitmap, uint32_t thresh)
    810{
    811	return thresh - bitmap_weight(bitmap, thresh);
    812}
    813
    814/*
    815 * We can't queue a command until we have space available on the cmd ring.
    816 *
    817 * Called with ring lock held.
    818 */
    819static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size)
    820{
    821	struct tcmu_mailbox *mb = udev->mb_addr;
    822	size_t space, cmd_needed;
    823	u32 cmd_head;
    824
    825	tcmu_flush_dcache_range(mb, sizeof(*mb));
    826
    827	cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
    828
    829	/*
    830	 * If cmd end-of-ring space is too small then we need space for a NOP plus
    831	 * original cmd - cmds are internally contiguous.
    832	 */
    833	if (head_to_end(cmd_head, udev->cmdr_size) >= cmd_size)
    834		cmd_needed = cmd_size;
    835	else
    836		cmd_needed = cmd_size + head_to_end(cmd_head, udev->cmdr_size);
    837
    838	space = spc_free(cmd_head, udev->cmdr_last_cleaned, udev->cmdr_size);
    839	if (space < cmd_needed) {
    840		pr_debug("no cmd space: %u %u %u\n", cmd_head,
    841		       udev->cmdr_last_cleaned, udev->cmdr_size);
    842		return false;
    843	}
    844	return true;
    845}
    846
    847/*
    848 * We have to allocate data buffers before we can queue a command.
    849 * Returns -1 on error (not enough space) or number of needed iovs on success
    850 *
    851 * Called with ring lock held.
    852 */
    853static int tcmu_alloc_data_space(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
    854				  int *iov_bidi_cnt)
    855{
    856	int space, iov_cnt = 0, ret = 0;
    857
    858	if (!cmd->dbi_cnt)
    859		goto wr_iov_cnts;
    860
    861	/* try to check and get the data blocks as needed */
    862	space = spc_bitmap_free(udev->data_bitmap, udev->dbi_thresh);
    863	if (space < cmd->dbi_cnt) {
    864		unsigned long blocks_left =
    865				(udev->max_blocks - udev->dbi_thresh) + space;
    866
    867		if (blocks_left < cmd->dbi_cnt) {
    868			pr_debug("no data space: only %lu available, but ask for %u\n",
    869					blocks_left * udev->data_blk_size,
    870					cmd->dbi_cnt * udev->data_blk_size);
    871			return -1;
    872		}
    873
    874		udev->dbi_thresh += cmd->dbi_cnt;
    875		if (udev->dbi_thresh > udev->max_blocks)
    876			udev->dbi_thresh = udev->max_blocks;
    877	}
    878
    879	iov_cnt = tcmu_get_empty_blocks(udev, cmd, cmd->se_cmd->data_length);
    880	if (iov_cnt < 0)
    881		return -1;
    882
    883	if (cmd->dbi_bidi_cnt) {
    884		ret = tcmu_get_empty_blocks(udev, cmd, cmd->data_len_bidi);
    885		if (ret < 0)
    886			return -1;
    887	}
    888wr_iov_cnts:
    889	*iov_bidi_cnt = ret;
    890	return iov_cnt + ret;
    891}
    892
    893static inline size_t tcmu_cmd_get_base_cmd_size(size_t iov_cnt)
    894{
    895	return max(offsetof(struct tcmu_cmd_entry, req.iov[iov_cnt]),
    896			sizeof(struct tcmu_cmd_entry));
    897}
    898
    899static inline size_t tcmu_cmd_get_cmd_size(struct tcmu_cmd *tcmu_cmd,
    900					   size_t base_command_size)
    901{
    902	struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
    903	size_t command_size;
    904
    905	command_size = base_command_size +
    906		round_up(scsi_command_size(se_cmd->t_task_cdb),
    907				TCMU_OP_ALIGN_SIZE);
    908
    909	WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1));
    910
    911	return command_size;
    912}
    913
    914static void tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd, unsigned int tmo,
    915				 struct timer_list *timer)
    916{
    917	if (!tmo)
    918		return;
    919
    920	tcmu_cmd->deadline = round_jiffies_up(jiffies + msecs_to_jiffies(tmo));
    921	if (!timer_pending(timer))
    922		mod_timer(timer, tcmu_cmd->deadline);
    923
    924	pr_debug("Timeout set up for cmd %p, dev = %s, tmo = %lu\n", tcmu_cmd,
    925		 tcmu_cmd->tcmu_dev->name, tmo / MSEC_PER_SEC);
    926}
    927
    928static int add_to_qfull_queue(struct tcmu_cmd *tcmu_cmd)
    929{
    930	struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
    931	unsigned int tmo;
    932
    933	/*
    934	 * For backwards compat if qfull_time_out is not set use
    935	 * cmd_time_out and if that's not set use the default time out.
    936	 */
    937	if (!udev->qfull_time_out)
    938		return -ETIMEDOUT;
    939	else if (udev->qfull_time_out > 0)
    940		tmo = udev->qfull_time_out;
    941	else if (udev->cmd_time_out)
    942		tmo = udev->cmd_time_out;
    943	else
    944		tmo = TCMU_TIME_OUT;
    945
    946	tcmu_setup_cmd_timer(tcmu_cmd, tmo, &udev->qfull_timer);
    947
    948	list_add_tail(&tcmu_cmd->queue_entry, &udev->qfull_queue);
    949	pr_debug("adding cmd %p on dev %s to ring space wait queue\n",
    950		 tcmu_cmd, udev->name);
    951	return 0;
    952}
    953
    954static uint32_t ring_insert_padding(struct tcmu_dev *udev, size_t cmd_size)
    955{
    956	struct tcmu_cmd_entry_hdr *hdr;
    957	struct tcmu_mailbox *mb = udev->mb_addr;
    958	uint32_t cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
    959
    960	/* Insert a PAD if end-of-ring space is too small */
    961	if (head_to_end(cmd_head, udev->cmdr_size) < cmd_size) {
    962		size_t pad_size = head_to_end(cmd_head, udev->cmdr_size);
    963
    964		hdr = udev->cmdr + cmd_head;
    965		tcmu_hdr_set_op(&hdr->len_op, TCMU_OP_PAD);
    966		tcmu_hdr_set_len(&hdr->len_op, pad_size);
    967		hdr->cmd_id = 0; /* not used for PAD */
    968		hdr->kflags = 0;
    969		hdr->uflags = 0;
    970		tcmu_flush_dcache_range(hdr, sizeof(*hdr));
    971
    972		UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size);
    973		tcmu_flush_dcache_range(mb, sizeof(*mb));
    974
    975		cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
    976		WARN_ON(cmd_head != 0);
    977	}
    978
    979	return cmd_head;
    980}
    981
    982static void tcmu_unplug_device(struct se_dev_plug *se_plug)
    983{
    984	struct se_device *se_dev = se_plug->se_dev;
    985	struct tcmu_dev *udev = TCMU_DEV(se_dev);
    986
    987	clear_bit(TCMU_DEV_BIT_PLUGGED, &udev->flags);
    988	uio_event_notify(&udev->uio_info);
    989}
    990
    991static struct se_dev_plug *tcmu_plug_device(struct se_device *se_dev)
    992{
    993	struct tcmu_dev *udev = TCMU_DEV(se_dev);
    994
    995	if (!test_and_set_bit(TCMU_DEV_BIT_PLUGGED, &udev->flags))
    996		return &udev->se_plug;
    997
    998	return NULL;
    999}
   1000
   1001/**
   1002 * queue_cmd_ring - queue cmd to ring or internally
   1003 * @tcmu_cmd: cmd to queue
   1004 * @scsi_err: TCM error code if failure (-1) returned.
   1005 *
   1006 * Returns:
   1007 * -1 we cannot queue internally or to the ring.
   1008 *  0 success
   1009 *  1 internally queued to wait for ring memory to free.
   1010 */
   1011static int queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, sense_reason_t *scsi_err)
   1012{
   1013	struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
   1014	struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
   1015	size_t base_command_size, command_size;
   1016	struct tcmu_mailbox *mb = udev->mb_addr;
   1017	struct tcmu_cmd_entry *entry;
   1018	struct iovec *iov;
   1019	int iov_cnt, iov_bidi_cnt;
   1020	uint32_t cmd_id, cmd_head;
   1021	uint64_t cdb_off;
   1022	uint32_t blk_size = udev->data_blk_size;
   1023	/* size of data buffer needed */
   1024	size_t data_length = (size_t)tcmu_cmd->dbi_cnt * blk_size;
   1025
   1026	*scsi_err = TCM_NO_SENSE;
   1027
   1028	if (test_bit(TCMU_DEV_BIT_BLOCKED, &udev->flags)) {
   1029		*scsi_err = TCM_LUN_BUSY;
   1030		return -1;
   1031	}
   1032
   1033	if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) {
   1034		*scsi_err = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
   1035		return -1;
   1036	}
   1037
   1038	if (!list_empty(&udev->qfull_queue))
   1039		goto queue;
   1040
   1041	if (data_length > (size_t)udev->max_blocks * blk_size) {
   1042		pr_warn("TCMU: Request of size %zu is too big for %zu data area\n",
   1043			data_length, (size_t)udev->max_blocks * blk_size);
   1044		*scsi_err = TCM_INVALID_CDB_FIELD;
   1045		return -1;
   1046	}
   1047
   1048	iov_cnt = tcmu_alloc_data_space(udev, tcmu_cmd, &iov_bidi_cnt);
   1049	if (iov_cnt < 0)
   1050		goto free_and_queue;
   1051
   1052	/*
   1053	 * Must be a certain minimum size for response sense info, but
   1054	 * also may be larger if the iov array is large.
   1055	 */
   1056	base_command_size = tcmu_cmd_get_base_cmd_size(iov_cnt);
   1057	command_size = tcmu_cmd_get_cmd_size(tcmu_cmd, base_command_size);
   1058
   1059	if (command_size > (udev->cmdr_size / 2)) {
   1060		pr_warn("TCMU: Request of size %zu is too big for %u cmd ring\n",
   1061			command_size, udev->cmdr_size);
   1062		tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cur);
   1063		*scsi_err = TCM_INVALID_CDB_FIELD;
   1064		return -1;
   1065	}
   1066
   1067	if (!is_ring_space_avail(udev, command_size))
   1068		/*
   1069		 * Don't leave commands partially setup because the unmap
   1070		 * thread might need the blocks to make forward progress.
   1071		 */
   1072		goto free_and_queue;
   1073
   1074	if (xa_alloc(&udev->commands, &cmd_id, tcmu_cmd, XA_LIMIT(1, 0xffff),
   1075		     GFP_NOWAIT) < 0) {
   1076		pr_err("tcmu: Could not allocate cmd id.\n");
   1077
   1078		tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt);
   1079		*scsi_err = TCM_OUT_OF_RESOURCES;
   1080		return -1;
   1081	}
   1082	tcmu_cmd->cmd_id = cmd_id;
   1083
   1084	pr_debug("allocated cmd id %u for cmd %p dev %s\n", tcmu_cmd->cmd_id,
   1085		 tcmu_cmd, udev->name);
   1086
   1087	cmd_head = ring_insert_padding(udev, command_size);
   1088
   1089	entry = udev->cmdr + cmd_head;
   1090	memset(entry, 0, command_size);
   1091	tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_CMD);
   1092
   1093	/* prepare iov list and copy data to data area if necessary */
   1094	tcmu_cmd_reset_dbi_cur(tcmu_cmd);
   1095	iov = &entry->req.iov[0];
   1096
   1097	if (se_cmd->data_direction == DMA_TO_DEVICE ||
   1098	    se_cmd->se_cmd_flags & SCF_BIDI)
   1099		scatter_data_area(udev, tcmu_cmd, &iov);
   1100	else
   1101		tcmu_setup_iovs(udev, tcmu_cmd, &iov, se_cmd->data_length);
   1102
   1103	entry->req.iov_cnt = iov_cnt - iov_bidi_cnt;
   1104
   1105	/* Handle BIDI commands */
   1106	if (se_cmd->se_cmd_flags & SCF_BIDI) {
   1107		iov++;
   1108		tcmu_setup_iovs(udev, tcmu_cmd, &iov, tcmu_cmd->data_len_bidi);
   1109		entry->req.iov_bidi_cnt = iov_bidi_cnt;
   1110	}
   1111
   1112	tcmu_setup_cmd_timer(tcmu_cmd, udev->cmd_time_out, &udev->cmd_timer);
   1113
   1114	entry->hdr.cmd_id = tcmu_cmd->cmd_id;
   1115
   1116	tcmu_hdr_set_len(&entry->hdr.len_op, command_size);
   1117
   1118	/* All offsets relative to mb_addr, not start of entry! */
   1119	cdb_off = CMDR_OFF + cmd_head + base_command_size;
   1120	memcpy((void *) mb + cdb_off, se_cmd->t_task_cdb, scsi_command_size(se_cmd->t_task_cdb));
   1121	entry->req.cdb_off = cdb_off;
   1122	tcmu_flush_dcache_range(entry, command_size);
   1123
   1124	UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size);
   1125	tcmu_flush_dcache_range(mb, sizeof(*mb));
   1126
   1127	list_add_tail(&tcmu_cmd->queue_entry, &udev->inflight_queue);
   1128
   1129	if (!test_bit(TCMU_DEV_BIT_PLUGGED, &udev->flags))
   1130		uio_event_notify(&udev->uio_info);
   1131
   1132	return 0;
   1133
   1134free_and_queue:
   1135	tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cur);
   1136	tcmu_cmd_reset_dbi_cur(tcmu_cmd);
   1137
   1138queue:
   1139	if (add_to_qfull_queue(tcmu_cmd)) {
   1140		*scsi_err = TCM_OUT_OF_RESOURCES;
   1141		return -1;
   1142	}
   1143
   1144	return 1;
   1145}
   1146
   1147/**
   1148 * queue_tmr_ring - queue tmr info to ring or internally
   1149 * @udev: related tcmu_dev
   1150 * @tmr: tcmu_tmr containing tmr info to queue
   1151 *
   1152 * Returns:
   1153 *  0 success
   1154 *  1 internally queued to wait for ring memory to free.
   1155 */
   1156static int
   1157queue_tmr_ring(struct tcmu_dev *udev, struct tcmu_tmr *tmr)
   1158{
   1159	struct tcmu_tmr_entry *entry;
   1160	int cmd_size;
   1161	int id_list_sz;
   1162	struct tcmu_mailbox *mb = udev->mb_addr;
   1163	uint32_t cmd_head;
   1164
   1165	if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags))
   1166		goto out_free;
   1167
   1168	id_list_sz = sizeof(tmr->tmr_cmd_ids[0]) * tmr->tmr_cmd_cnt;
   1169	cmd_size = round_up(sizeof(*entry) + id_list_sz, TCMU_OP_ALIGN_SIZE);
   1170
   1171	if (!list_empty(&udev->tmr_queue) ||
   1172	    !is_ring_space_avail(udev, cmd_size)) {
   1173		list_add_tail(&tmr->queue_entry, &udev->tmr_queue);
   1174		pr_debug("adding tmr %p on dev %s to TMR ring space wait queue\n",
   1175			 tmr, udev->name);
   1176		return 1;
   1177	}
   1178
   1179	cmd_head = ring_insert_padding(udev, cmd_size);
   1180
   1181	entry = udev->cmdr + cmd_head;
   1182	memset(entry, 0, cmd_size);
   1183	tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_TMR);
   1184	tcmu_hdr_set_len(&entry->hdr.len_op, cmd_size);
   1185	entry->tmr_type = tmr->tmr_type;
   1186	entry->cmd_cnt = tmr->tmr_cmd_cnt;
   1187	memcpy(&entry->cmd_ids[0], &tmr->tmr_cmd_ids[0], id_list_sz);
   1188	tcmu_flush_dcache_range(entry, cmd_size);
   1189
   1190	UPDATE_HEAD(mb->cmd_head, cmd_size, udev->cmdr_size);
   1191	tcmu_flush_dcache_range(mb, sizeof(*mb));
   1192
   1193	uio_event_notify(&udev->uio_info);
   1194
   1195out_free:
   1196	kfree(tmr);
   1197
   1198	return 0;
   1199}
   1200
   1201static sense_reason_t
   1202tcmu_queue_cmd(struct se_cmd *se_cmd)
   1203{
   1204	struct se_device *se_dev = se_cmd->se_dev;
   1205	struct tcmu_dev *udev = TCMU_DEV(se_dev);
   1206	struct tcmu_cmd *tcmu_cmd;
   1207	sense_reason_t scsi_ret = TCM_CHECK_CONDITION_ABORT_CMD;
   1208	int ret = -1;
   1209
   1210	tcmu_cmd = tcmu_alloc_cmd(se_cmd);
   1211	if (!tcmu_cmd)
   1212		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
   1213
   1214	mutex_lock(&udev->cmdr_lock);
   1215	if (!(se_cmd->transport_state & CMD_T_ABORTED))
   1216		ret = queue_cmd_ring(tcmu_cmd, &scsi_ret);
   1217	if (ret < 0)
   1218		tcmu_free_cmd(tcmu_cmd);
   1219	else
   1220		se_cmd->priv = tcmu_cmd;
   1221	mutex_unlock(&udev->cmdr_lock);
   1222	return scsi_ret;
   1223}
   1224
   1225static void tcmu_set_next_deadline(struct list_head *queue,
   1226				   struct timer_list *timer)
   1227{
   1228	struct tcmu_cmd *cmd;
   1229
   1230	if (!list_empty(queue)) {
   1231		cmd = list_first_entry(queue, struct tcmu_cmd, queue_entry);
   1232		mod_timer(timer, cmd->deadline);
   1233	} else
   1234		del_timer(timer);
   1235}
   1236
   1237static int
   1238tcmu_tmr_type(enum tcm_tmreq_table tmf)
   1239{
   1240	switch (tmf) {
   1241	case TMR_ABORT_TASK:		return TCMU_TMR_ABORT_TASK;
   1242	case TMR_ABORT_TASK_SET:	return TCMU_TMR_ABORT_TASK_SET;
   1243	case TMR_CLEAR_ACA:		return TCMU_TMR_CLEAR_ACA;
   1244	case TMR_CLEAR_TASK_SET:	return TCMU_TMR_CLEAR_TASK_SET;
   1245	case TMR_LUN_RESET:		return TCMU_TMR_LUN_RESET;
   1246	case TMR_TARGET_WARM_RESET:	return TCMU_TMR_TARGET_WARM_RESET;
   1247	case TMR_TARGET_COLD_RESET:	return TCMU_TMR_TARGET_COLD_RESET;
   1248	case TMR_LUN_RESET_PRO:		return TCMU_TMR_LUN_RESET_PRO;
   1249	default:			return TCMU_TMR_UNKNOWN;
   1250	}
   1251}
   1252
   1253static void
   1254tcmu_tmr_notify(struct se_device *se_dev, enum tcm_tmreq_table tmf,
   1255		struct list_head *cmd_list)
   1256{
   1257	int i = 0, cmd_cnt = 0;
   1258	bool unqueued = false;
   1259	struct tcmu_cmd *cmd;
   1260	struct se_cmd *se_cmd;
   1261	struct tcmu_tmr *tmr;
   1262	struct tcmu_dev *udev = TCMU_DEV(se_dev);
   1263
   1264	mutex_lock(&udev->cmdr_lock);
   1265
   1266	/* First we check for aborted commands in qfull_queue */
   1267	list_for_each_entry(se_cmd, cmd_list, state_list) {
   1268		i++;
   1269		if (!se_cmd->priv)
   1270			continue;
   1271		cmd = se_cmd->priv;
   1272		/* Commands on qfull queue have no id yet */
   1273		if (cmd->cmd_id) {
   1274			cmd_cnt++;
   1275			continue;
   1276		}
   1277		pr_debug("Removing aborted command %p from queue on dev %s.\n",
   1278			 cmd, udev->name);
   1279
   1280		list_del_init(&cmd->queue_entry);
   1281		tcmu_free_cmd(cmd);
   1282		se_cmd->priv = NULL;
   1283		target_complete_cmd(se_cmd, SAM_STAT_TASK_ABORTED);
   1284		unqueued = true;
   1285	}
   1286	if (unqueued)
   1287		tcmu_set_next_deadline(&udev->qfull_queue, &udev->qfull_timer);
   1288
   1289	if (!test_bit(TCMU_DEV_BIT_TMR_NOTIFY, &udev->flags))
   1290		goto unlock;
   1291
   1292	pr_debug("TMR event %d on dev %s, aborted cmds %d, afflicted cmd_ids %d\n",
   1293		 tcmu_tmr_type(tmf), udev->name, i, cmd_cnt);
   1294
   1295	tmr = kmalloc(struct_size(tmr, tmr_cmd_ids, cmd_cnt), GFP_NOIO);
   1296	if (!tmr)
   1297		goto unlock;
   1298
   1299	tmr->tmr_type = tcmu_tmr_type(tmf);
   1300	tmr->tmr_cmd_cnt = cmd_cnt;
   1301
   1302	if (cmd_cnt != 0) {
   1303		cmd_cnt = 0;
   1304		list_for_each_entry(se_cmd, cmd_list, state_list) {
   1305			if (!se_cmd->priv)
   1306				continue;
   1307			cmd = se_cmd->priv;
   1308			if (cmd->cmd_id)
   1309				tmr->tmr_cmd_ids[cmd_cnt++] = cmd->cmd_id;
   1310		}
   1311	}
   1312
   1313	queue_tmr_ring(udev, tmr);
   1314
   1315unlock:
   1316	mutex_unlock(&udev->cmdr_lock);
   1317}
   1318
   1319static bool tcmu_handle_completion(struct tcmu_cmd *cmd,
   1320				   struct tcmu_cmd_entry *entry, bool keep_buf)
   1321{
   1322	struct se_cmd *se_cmd = cmd->se_cmd;
   1323	struct tcmu_dev *udev = cmd->tcmu_dev;
   1324	bool read_len_valid = false;
   1325	bool ret = true;
   1326	uint32_t read_len;
   1327
   1328	/*
   1329	 * cmd has been completed already from timeout, just reclaim
   1330	 * data area space and free cmd
   1331	 */
   1332	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
   1333		WARN_ON_ONCE(se_cmd);
   1334		goto out;
   1335	}
   1336	if (test_bit(TCMU_CMD_BIT_KEEP_BUF, &cmd->flags)) {
   1337		pr_err("cmd_id %u already completed with KEEP_BUF, ring is broken\n",
   1338		       entry->hdr.cmd_id);
   1339		set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags);
   1340		ret = false;
   1341		goto out;
   1342	}
   1343
   1344	list_del_init(&cmd->queue_entry);
   1345
   1346	tcmu_cmd_reset_dbi_cur(cmd);
   1347
   1348	if (entry->hdr.uflags & TCMU_UFLAG_UNKNOWN_OP) {
   1349		pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n",
   1350			cmd->se_cmd);
   1351		entry->rsp.scsi_status = SAM_STAT_CHECK_CONDITION;
   1352		goto done;
   1353	}
   1354
   1355	read_len = se_cmd->data_length;
   1356	if (se_cmd->data_direction == DMA_FROM_DEVICE &&
   1357	    (entry->hdr.uflags & TCMU_UFLAG_READ_LEN) && entry->rsp.read_len) {
   1358		read_len_valid = true;
   1359		if (entry->rsp.read_len < read_len)
   1360			read_len = entry->rsp.read_len;
   1361	}
   1362
   1363	if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
   1364		transport_copy_sense_to_cmd(se_cmd, entry->rsp.sense_buffer);
   1365		if (!read_len_valid )
   1366			goto done;
   1367		else
   1368			se_cmd->se_cmd_flags |= SCF_TREAT_READ_AS_NORMAL;
   1369	}
   1370	if (se_cmd->se_cmd_flags & SCF_BIDI) {
   1371		/* Get Data-In buffer before clean up */
   1372		gather_data_area(udev, cmd, true, read_len);
   1373	} else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
   1374		gather_data_area(udev, cmd, false, read_len);
   1375	} else if (se_cmd->data_direction == DMA_TO_DEVICE) {
   1376		/* TODO: */
   1377	} else if (se_cmd->data_direction != DMA_NONE) {
   1378		pr_warn("TCMU: data direction was %d!\n",
   1379			se_cmd->data_direction);
   1380	}
   1381
   1382done:
   1383	se_cmd->priv = NULL;
   1384	if (read_len_valid) {
   1385		pr_debug("read_len = %d\n", read_len);
   1386		target_complete_cmd_with_length(cmd->se_cmd,
   1387					entry->rsp.scsi_status, read_len);
   1388	} else
   1389		target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status);
   1390
   1391out:
   1392	if (!keep_buf) {
   1393		tcmu_cmd_free_data(cmd, cmd->dbi_cnt);
   1394		tcmu_free_cmd(cmd);
   1395	} else {
   1396		/*
   1397		 * Keep this command after completion, since userspace still
   1398		 * needs the data buffer. Mark it with TCMU_CMD_BIT_KEEP_BUF
   1399		 * and reset potential TCMU_CMD_BIT_EXPIRED, so we don't accept
   1400		 * a second completion later.
   1401		 * Userspace can free the buffer later by writing the cmd_id
   1402		 * to new action attribute free_kept_buf.
   1403		 */
   1404		clear_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags);
   1405		set_bit(TCMU_CMD_BIT_KEEP_BUF, &cmd->flags);
   1406	}
   1407	return ret;
   1408}
   1409
   1410static int tcmu_run_tmr_queue(struct tcmu_dev *udev)
   1411{
   1412	struct tcmu_tmr *tmr, *tmp;
   1413	LIST_HEAD(tmrs);
   1414
   1415	if (list_empty(&udev->tmr_queue))
   1416		return 1;
   1417
   1418	pr_debug("running %s's tmr queue\n", udev->name);
   1419
   1420	list_splice_init(&udev->tmr_queue, &tmrs);
   1421
   1422	list_for_each_entry_safe(tmr, tmp, &tmrs, queue_entry) {
   1423		list_del_init(&tmr->queue_entry);
   1424
   1425		pr_debug("removing tmr %p on dev %s from queue\n",
   1426			 tmr, udev->name);
   1427
   1428		if (queue_tmr_ring(udev, tmr)) {
   1429			pr_debug("ran out of space during tmr queue run\n");
   1430			/*
   1431			 * tmr was requeued, so just put all tmrs back in
   1432			 * the queue
   1433			 */
   1434			list_splice_tail(&tmrs, &udev->tmr_queue);
   1435			return 0;
   1436		}
   1437	}
   1438
   1439	return 1;
   1440}
   1441
   1442static bool tcmu_handle_completions(struct tcmu_dev *udev)
   1443{
   1444	struct tcmu_mailbox *mb;
   1445	struct tcmu_cmd *cmd;
   1446	bool free_space = false;
   1447
   1448	if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) {
   1449		pr_err("ring broken, not handling completions\n");
   1450		return false;
   1451	}
   1452
   1453	mb = udev->mb_addr;
   1454	tcmu_flush_dcache_range(mb, sizeof(*mb));
   1455
   1456	while (udev->cmdr_last_cleaned != READ_ONCE(mb->cmd_tail)) {
   1457
   1458		struct tcmu_cmd_entry *entry = udev->cmdr + udev->cmdr_last_cleaned;
   1459		bool keep_buf;
   1460
   1461		/*
   1462		 * Flush max. up to end of cmd ring since current entry might
   1463		 * be a padding that is shorter than sizeof(*entry)
   1464		 */
   1465		size_t ring_left = head_to_end(udev->cmdr_last_cleaned,
   1466					       udev->cmdr_size);
   1467		tcmu_flush_dcache_range(entry, ring_left < sizeof(*entry) ?
   1468					ring_left : sizeof(*entry));
   1469
   1470		free_space = true;
   1471
   1472		if (tcmu_hdr_get_op(entry->hdr.len_op) == TCMU_OP_PAD ||
   1473		    tcmu_hdr_get_op(entry->hdr.len_op) == TCMU_OP_TMR) {
   1474			UPDATE_HEAD(udev->cmdr_last_cleaned,
   1475				    tcmu_hdr_get_len(entry->hdr.len_op),
   1476				    udev->cmdr_size);
   1477			continue;
   1478		}
   1479		WARN_ON(tcmu_hdr_get_op(entry->hdr.len_op) != TCMU_OP_CMD);
   1480
   1481		keep_buf = !!(entry->hdr.uflags & TCMU_UFLAG_KEEP_BUF);
   1482		if (keep_buf)
   1483			cmd = xa_load(&udev->commands, entry->hdr.cmd_id);
   1484		else
   1485			cmd = xa_erase(&udev->commands, entry->hdr.cmd_id);
   1486		if (!cmd) {
   1487			pr_err("cmd_id %u not found, ring is broken\n",
   1488			       entry->hdr.cmd_id);
   1489			set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags);
   1490			return false;
   1491		}
   1492
   1493		if (!tcmu_handle_completion(cmd, entry, keep_buf))
   1494			break;
   1495
   1496		UPDATE_HEAD(udev->cmdr_last_cleaned,
   1497			    tcmu_hdr_get_len(entry->hdr.len_op),
   1498			    udev->cmdr_size);
   1499	}
   1500	if (free_space)
   1501		free_space = tcmu_run_tmr_queue(udev);
   1502
   1503	if (atomic_read(&global_page_count) > tcmu_global_max_pages &&
   1504	    xa_empty(&udev->commands) && list_empty(&udev->qfull_queue)) {
   1505		/*
   1506		 * Allocated blocks exceeded global block limit, currently no
   1507		 * more pending or waiting commands so try to reclaim blocks.
   1508		 */
   1509		schedule_delayed_work(&tcmu_unmap_work, 0);
   1510	}
   1511	if (udev->cmd_time_out)
   1512		tcmu_set_next_deadline(&udev->inflight_queue, &udev->cmd_timer);
   1513
   1514	return free_space;
   1515}
   1516
   1517static void tcmu_check_expired_ring_cmd(struct tcmu_cmd *cmd)
   1518{
   1519	struct se_cmd *se_cmd;
   1520
   1521	if (!time_after_eq(jiffies, cmd->deadline))
   1522		return;
   1523
   1524	set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags);
   1525	list_del_init(&cmd->queue_entry);
   1526	se_cmd = cmd->se_cmd;
   1527	se_cmd->priv = NULL;
   1528	cmd->se_cmd = NULL;
   1529
   1530	pr_debug("Timing out inflight cmd %u on dev %s.\n",
   1531		 cmd->cmd_id, cmd->tcmu_dev->name);
   1532
   1533	target_complete_cmd(se_cmd, SAM_STAT_CHECK_CONDITION);
   1534}
   1535
   1536static void tcmu_check_expired_queue_cmd(struct tcmu_cmd *cmd)
   1537{
   1538	struct se_cmd *se_cmd;
   1539
   1540	if (!time_after_eq(jiffies, cmd->deadline))
   1541		return;
   1542
   1543	pr_debug("Timing out queued cmd %p on dev %s.\n",
   1544		  cmd, cmd->tcmu_dev->name);
   1545
   1546	list_del_init(&cmd->queue_entry);
   1547	se_cmd = cmd->se_cmd;
   1548	tcmu_free_cmd(cmd);
   1549
   1550	se_cmd->priv = NULL;
   1551	target_complete_cmd(se_cmd, SAM_STAT_TASK_SET_FULL);
   1552}
   1553
   1554static void tcmu_device_timedout(struct tcmu_dev *udev)
   1555{
   1556	spin_lock(&timed_out_udevs_lock);
   1557	if (list_empty(&udev->timedout_entry))
   1558		list_add_tail(&udev->timedout_entry, &timed_out_udevs);
   1559	spin_unlock(&timed_out_udevs_lock);
   1560
   1561	schedule_delayed_work(&tcmu_unmap_work, 0);
   1562}
   1563
   1564static void tcmu_cmd_timedout(struct timer_list *t)
   1565{
   1566	struct tcmu_dev *udev = from_timer(udev, t, cmd_timer);
   1567
   1568	pr_debug("%s cmd timeout has expired\n", udev->name);
   1569	tcmu_device_timedout(udev);
   1570}
   1571
   1572static void tcmu_qfull_timedout(struct timer_list *t)
   1573{
   1574	struct tcmu_dev *udev = from_timer(udev, t, qfull_timer);
   1575
   1576	pr_debug("%s qfull timeout has expired\n", udev->name);
   1577	tcmu_device_timedout(udev);
   1578}
   1579
   1580static int tcmu_attach_hba(struct se_hba *hba, u32 host_id)
   1581{
   1582	struct tcmu_hba *tcmu_hba;
   1583
   1584	tcmu_hba = kzalloc(sizeof(struct tcmu_hba), GFP_KERNEL);
   1585	if (!tcmu_hba)
   1586		return -ENOMEM;
   1587
   1588	tcmu_hba->host_id = host_id;
   1589	hba->hba_ptr = tcmu_hba;
   1590
   1591	return 0;
   1592}
   1593
   1594static void tcmu_detach_hba(struct se_hba *hba)
   1595{
   1596	kfree(hba->hba_ptr);
   1597	hba->hba_ptr = NULL;
   1598}
   1599
   1600static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
   1601{
   1602	struct tcmu_dev *udev;
   1603
   1604	udev = kzalloc(sizeof(struct tcmu_dev), GFP_KERNEL);
   1605	if (!udev)
   1606		return NULL;
   1607	kref_init(&udev->kref);
   1608
   1609	udev->name = kstrdup(name, GFP_KERNEL);
   1610	if (!udev->name) {
   1611		kfree(udev);
   1612		return NULL;
   1613	}
   1614
   1615	udev->hba = hba;
   1616	udev->cmd_time_out = TCMU_TIME_OUT;
   1617	udev->qfull_time_out = -1;
   1618
   1619	udev->data_pages_per_blk = DATA_PAGES_PER_BLK_DEF;
   1620	udev->max_blocks = DATA_AREA_PAGES_DEF / udev->data_pages_per_blk;
   1621	udev->cmdr_size = CMDR_SIZE_DEF;
   1622	udev->data_area_mb = TCMU_PAGES_TO_MBS(DATA_AREA_PAGES_DEF);
   1623
   1624	mutex_init(&udev->cmdr_lock);
   1625
   1626	INIT_LIST_HEAD(&udev->node);
   1627	INIT_LIST_HEAD(&udev->timedout_entry);
   1628	INIT_LIST_HEAD(&udev->qfull_queue);
   1629	INIT_LIST_HEAD(&udev->tmr_queue);
   1630	INIT_LIST_HEAD(&udev->inflight_queue);
   1631	xa_init_flags(&udev->commands, XA_FLAGS_ALLOC1);
   1632
   1633	timer_setup(&udev->qfull_timer, tcmu_qfull_timedout, 0);
   1634	timer_setup(&udev->cmd_timer, tcmu_cmd_timedout, 0);
   1635
   1636	xa_init(&udev->data_pages);
   1637
   1638	return &udev->se_dev;
   1639}
   1640
   1641static void tcmu_dev_call_rcu(struct rcu_head *p)
   1642{
   1643	struct se_device *dev = container_of(p, struct se_device, rcu_head);
   1644	struct tcmu_dev *udev = TCMU_DEV(dev);
   1645
   1646	kfree(udev->uio_info.name);
   1647	kfree(udev->name);
   1648	kfree(udev);
   1649}
   1650
   1651static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
   1652{
   1653	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags) ||
   1654	    test_bit(TCMU_CMD_BIT_KEEP_BUF, &cmd->flags)) {
   1655		kmem_cache_free(tcmu_cmd_cache, cmd);
   1656		return 0;
   1657	}
   1658	return -EINVAL;
   1659}
   1660
   1661static u32 tcmu_blocks_release(struct tcmu_dev *udev, unsigned long first,
   1662				unsigned long last)
   1663{
   1664	struct page *page;
   1665	unsigned long dpi;
   1666	u32 pages_freed = 0;
   1667
   1668	first = first * udev->data_pages_per_blk;
   1669	last = (last + 1) * udev->data_pages_per_blk - 1;
   1670	xa_for_each_range(&udev->data_pages, dpi, page, first, last) {
   1671		xa_erase(&udev->data_pages, dpi);
   1672		/*
   1673		 * While reaching here there may be page faults occurring on
   1674		 * the to-be-released pages. A race condition may occur if
   1675		 * unmap_mapping_range() is called before page faults on these
   1676		 * pages have completed; a valid but stale map is created.
   1677		 *
   1678		 * If another command subsequently runs and needs to extend
   1679		 * dbi_thresh, it may reuse the slot corresponding to the
   1680		 * previous page in data_bitmap. Though we will allocate a new
   1681		 * page for the slot in data_area, no page fault will happen
   1682		 * because we have a valid map. Therefore the command's data
   1683		 * will be lost.
   1684		 *
   1685		 * We lock and unlock pages that are to be released to ensure
   1686		 * all page faults have completed. This way
   1687		 * unmap_mapping_range() can ensure stale maps are cleanly
   1688		 * removed.
   1689		 */
   1690		lock_page(page);
   1691		unlock_page(page);
   1692		__free_page(page);
   1693		pages_freed++;
   1694	}
   1695
   1696	atomic_sub(pages_freed, &global_page_count);
   1697
   1698	return pages_freed;
   1699}
   1700
   1701static void tcmu_remove_all_queued_tmr(struct tcmu_dev *udev)
   1702{
   1703	struct tcmu_tmr *tmr, *tmp;
   1704
   1705	list_for_each_entry_safe(tmr, tmp, &udev->tmr_queue, queue_entry) {
   1706		list_del_init(&tmr->queue_entry);
   1707		kfree(tmr);
   1708	}
   1709}
   1710
   1711static void tcmu_dev_kref_release(struct kref *kref)
   1712{
   1713	struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref);
   1714	struct se_device *dev = &udev->se_dev;
   1715	struct tcmu_cmd *cmd;
   1716	bool all_expired = true;
   1717	unsigned long i;
   1718
   1719	vfree(udev->mb_addr);
   1720	udev->mb_addr = NULL;
   1721
   1722	spin_lock_bh(&timed_out_udevs_lock);
   1723	if (!list_empty(&udev->timedout_entry))
   1724		list_del(&udev->timedout_entry);
   1725	spin_unlock_bh(&timed_out_udevs_lock);
   1726
   1727	/* Upper layer should drain all requests before calling this */
   1728	mutex_lock(&udev->cmdr_lock);
   1729	xa_for_each(&udev->commands, i, cmd) {
   1730		if (tcmu_check_and_free_pending_cmd(cmd) != 0)
   1731			all_expired = false;
   1732	}
   1733	/* There can be left over TMR cmds. Remove them. */
   1734	tcmu_remove_all_queued_tmr(udev);
   1735	if (!list_empty(&udev->qfull_queue))
   1736		all_expired = false;
   1737	xa_destroy(&udev->commands);
   1738	WARN_ON(!all_expired);
   1739
   1740	tcmu_blocks_release(udev, 0, udev->dbi_max);
   1741	bitmap_free(udev->data_bitmap);
   1742	mutex_unlock(&udev->cmdr_lock);
   1743
   1744	pr_debug("dev_kref_release\n");
   1745
   1746	call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
   1747}
   1748
   1749static void run_qfull_queue(struct tcmu_dev *udev, bool fail)
   1750{
   1751	struct tcmu_cmd *tcmu_cmd, *tmp_cmd;
   1752	LIST_HEAD(cmds);
   1753	sense_reason_t scsi_ret;
   1754	int ret;
   1755
   1756	if (list_empty(&udev->qfull_queue))
   1757		return;
   1758
   1759	pr_debug("running %s's cmdr queue forcefail %d\n", udev->name, fail);
   1760
   1761	list_splice_init(&udev->qfull_queue, &cmds);
   1762
   1763	list_for_each_entry_safe(tcmu_cmd, tmp_cmd, &cmds, queue_entry) {
   1764		list_del_init(&tcmu_cmd->queue_entry);
   1765
   1766		pr_debug("removing cmd %p on dev %s from queue\n",
   1767			 tcmu_cmd, udev->name);
   1768
   1769		if (fail) {
   1770			/*
   1771			 * We were not able to even start the command, so
   1772			 * fail with busy to allow a retry in case runner
   1773			 * was only temporarily down. If the device is being
   1774			 * removed then LIO core will do the right thing and
   1775			 * fail the retry.
   1776			 */
   1777			tcmu_cmd->se_cmd->priv = NULL;
   1778			target_complete_cmd(tcmu_cmd->se_cmd, SAM_STAT_BUSY);
   1779			tcmu_free_cmd(tcmu_cmd);
   1780			continue;
   1781		}
   1782
   1783		ret = queue_cmd_ring(tcmu_cmd, &scsi_ret);
   1784		if (ret < 0) {
   1785			pr_debug("cmd %p on dev %s failed with %u\n",
   1786				 tcmu_cmd, udev->name, scsi_ret);
   1787			/*
   1788			 * Ignore scsi_ret for now. target_complete_cmd
   1789			 * drops it.
   1790			 */
   1791			tcmu_cmd->se_cmd->priv = NULL;
   1792			target_complete_cmd(tcmu_cmd->se_cmd,
   1793					    SAM_STAT_CHECK_CONDITION);
   1794			tcmu_free_cmd(tcmu_cmd);
   1795		} else if (ret > 0) {
   1796			pr_debug("ran out of space during cmdr queue run\n");
   1797			/*
   1798			 * cmd was requeued, so just put all cmds back in
   1799			 * the queue
   1800			 */
   1801			list_splice_tail(&cmds, &udev->qfull_queue);
   1802			break;
   1803		}
   1804	}
   1805
   1806	tcmu_set_next_deadline(&udev->qfull_queue, &udev->qfull_timer);
   1807}
   1808
   1809static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on)
   1810{
   1811	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
   1812
   1813	mutex_lock(&udev->cmdr_lock);
   1814	if (tcmu_handle_completions(udev))
   1815		run_qfull_queue(udev, false);
   1816	mutex_unlock(&udev->cmdr_lock);
   1817
   1818	return 0;
   1819}
   1820
   1821/*
   1822 * mmap code from uio.c. Copied here because we want to hook mmap()
   1823 * and this stuff must come along.
   1824 */
   1825static int tcmu_find_mem_index(struct vm_area_struct *vma)
   1826{
   1827	struct tcmu_dev *udev = vma->vm_private_data;
   1828	struct uio_info *info = &udev->uio_info;
   1829
   1830	if (vma->vm_pgoff < MAX_UIO_MAPS) {
   1831		if (info->mem[vma->vm_pgoff].size == 0)
   1832			return -1;
   1833		return (int)vma->vm_pgoff;
   1834	}
   1835	return -1;
   1836}
   1837
   1838static struct page *tcmu_try_get_data_page(struct tcmu_dev *udev, uint32_t dpi)
   1839{
   1840	struct page *page;
   1841
   1842	mutex_lock(&udev->cmdr_lock);
   1843	page = xa_load(&udev->data_pages, dpi);
   1844	if (likely(page)) {
   1845		get_page(page);
   1846		lock_page(page);
   1847		mutex_unlock(&udev->cmdr_lock);
   1848		return page;
   1849	}
   1850
   1851	/*
   1852	 * Userspace messed up and passed in a address not in the
   1853	 * data iov passed to it.
   1854	 */
   1855	pr_err("Invalid addr to data page mapping (dpi %u) on device %s\n",
   1856	       dpi, udev->name);
   1857	mutex_unlock(&udev->cmdr_lock);
   1858
   1859	return NULL;
   1860}
   1861
   1862static void tcmu_vma_open(struct vm_area_struct *vma)
   1863{
   1864	struct tcmu_dev *udev = vma->vm_private_data;
   1865
   1866	pr_debug("vma_open\n");
   1867
   1868	kref_get(&udev->kref);
   1869}
   1870
   1871static void tcmu_vma_close(struct vm_area_struct *vma)
   1872{
   1873	struct tcmu_dev *udev = vma->vm_private_data;
   1874
   1875	pr_debug("vma_close\n");
   1876
   1877	/* release ref from tcmu_vma_open */
   1878	kref_put(&udev->kref, tcmu_dev_kref_release);
   1879}
   1880
   1881static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
   1882{
   1883	struct tcmu_dev *udev = vmf->vma->vm_private_data;
   1884	struct uio_info *info = &udev->uio_info;
   1885	struct page *page;
   1886	unsigned long offset;
   1887	void *addr;
   1888	vm_fault_t ret = 0;
   1889
   1890	int mi = tcmu_find_mem_index(vmf->vma);
   1891	if (mi < 0)
   1892		return VM_FAULT_SIGBUS;
   1893
   1894	/*
   1895	 * We need to subtract mi because userspace uses offset = N*PAGE_SIZE
   1896	 * to use mem[N].
   1897	 */
   1898	offset = (vmf->pgoff - mi) << PAGE_SHIFT;
   1899
   1900	if (offset < udev->data_off) {
   1901		/* For the vmalloc()ed cmd area pages */
   1902		addr = (void *)(unsigned long)info->mem[mi].addr + offset;
   1903		page = vmalloc_to_page(addr);
   1904		get_page(page);
   1905	} else {
   1906		uint32_t dpi;
   1907
   1908		/* For the dynamically growing data area pages */
   1909		dpi = (offset - udev->data_off) / PAGE_SIZE;
   1910		page = tcmu_try_get_data_page(udev, dpi);
   1911		if (!page)
   1912			return VM_FAULT_SIGBUS;
   1913		ret = VM_FAULT_LOCKED;
   1914	}
   1915
   1916	vmf->page = page;
   1917	return ret;
   1918}
   1919
   1920static const struct vm_operations_struct tcmu_vm_ops = {
   1921	.open = tcmu_vma_open,
   1922	.close = tcmu_vma_close,
   1923	.fault = tcmu_vma_fault,
   1924};
   1925
   1926static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma)
   1927{
   1928	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
   1929
   1930	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
   1931	vma->vm_ops = &tcmu_vm_ops;
   1932
   1933	vma->vm_private_data = udev;
   1934
   1935	/* Ensure the mmap is exactly the right size */
   1936	if (vma_pages(vma) != udev->mmap_pages)
   1937		return -EINVAL;
   1938
   1939	tcmu_vma_open(vma);
   1940
   1941	return 0;
   1942}
   1943
   1944static int tcmu_open(struct uio_info *info, struct inode *inode)
   1945{
   1946	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
   1947
   1948	/* O_EXCL not supported for char devs, so fake it? */
   1949	if (test_and_set_bit(TCMU_DEV_BIT_OPEN, &udev->flags))
   1950		return -EBUSY;
   1951
   1952	udev->inode = inode;
   1953
   1954	pr_debug("open\n");
   1955
   1956	return 0;
   1957}
   1958
   1959static int tcmu_release(struct uio_info *info, struct inode *inode)
   1960{
   1961	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
   1962	struct tcmu_cmd *cmd;
   1963	unsigned long i;
   1964	bool freed = false;
   1965
   1966	mutex_lock(&udev->cmdr_lock);
   1967
   1968	xa_for_each(&udev->commands, i, cmd) {
   1969		/* Cmds with KEEP_BUF set are no longer on the ring, but
   1970		 * userspace still holds the data buffer. If userspace closes
   1971		 * we implicitly free these cmds and buffers, since after new
   1972		 * open the (new ?) userspace cannot find the cmd in the ring
   1973		 * and thus never will release the buffer by writing cmd_id to
   1974		 * free_kept_buf action attribute.
   1975		 */
   1976		if (!test_bit(TCMU_CMD_BIT_KEEP_BUF, &cmd->flags))
   1977			continue;
   1978		pr_debug("removing KEEP_BUF cmd %u on dev %s from ring\n",
   1979			 cmd->cmd_id, udev->name);
   1980		freed = true;
   1981
   1982		xa_erase(&udev->commands, i);
   1983		tcmu_cmd_free_data(cmd, cmd->dbi_cnt);
   1984		tcmu_free_cmd(cmd);
   1985	}
   1986	/*
   1987	 * We only freed data space, not ring space. Therefore we dont call
   1988	 * run_tmr_queue, but call run_qfull_queue if tmr_list is empty.
   1989	 */
   1990	if (freed && list_empty(&udev->tmr_queue))
   1991		run_qfull_queue(udev, false);
   1992
   1993	mutex_unlock(&udev->cmdr_lock);
   1994
   1995	clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags);
   1996
   1997	pr_debug("close\n");
   1998
   1999	return 0;
   2000}
   2001
   2002static int tcmu_init_genl_cmd_reply(struct tcmu_dev *udev, int cmd)
   2003{
   2004	struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd;
   2005
   2006	if (!tcmu_kern_cmd_reply_supported)
   2007		return 0;
   2008
   2009	if (udev->nl_reply_supported <= 0)
   2010		return 0;
   2011
   2012	mutex_lock(&tcmu_nl_cmd_mutex);
   2013
   2014	if (tcmu_netlink_blocked) {
   2015		mutex_unlock(&tcmu_nl_cmd_mutex);
   2016		pr_warn("Failing nl cmd %d on %s. Interface is blocked.\n", cmd,
   2017			udev->name);
   2018		return -EAGAIN;
   2019	}
   2020
   2021	if (nl_cmd->cmd != TCMU_CMD_UNSPEC) {
   2022		mutex_unlock(&tcmu_nl_cmd_mutex);
   2023		pr_warn("netlink cmd %d already executing on %s\n",
   2024			 nl_cmd->cmd, udev->name);
   2025		return -EBUSY;
   2026	}
   2027
   2028	memset(nl_cmd, 0, sizeof(*nl_cmd));
   2029	nl_cmd->cmd = cmd;
   2030	nl_cmd->udev = udev;
   2031	init_completion(&nl_cmd->complete);
   2032	INIT_LIST_HEAD(&nl_cmd->nl_list);
   2033
   2034	list_add_tail(&nl_cmd->nl_list, &tcmu_nl_cmd_list);
   2035
   2036	mutex_unlock(&tcmu_nl_cmd_mutex);
   2037	return 0;
   2038}
   2039
   2040static void tcmu_destroy_genl_cmd_reply(struct tcmu_dev *udev)
   2041{
   2042	struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd;
   2043
   2044	if (!tcmu_kern_cmd_reply_supported)
   2045		return;
   2046
   2047	if (udev->nl_reply_supported <= 0)
   2048		return;
   2049
   2050	mutex_lock(&tcmu_nl_cmd_mutex);
   2051
   2052	list_del(&nl_cmd->nl_list);
   2053	memset(nl_cmd, 0, sizeof(*nl_cmd));
   2054
   2055	mutex_unlock(&tcmu_nl_cmd_mutex);
   2056}
   2057
   2058static int tcmu_wait_genl_cmd_reply(struct tcmu_dev *udev)
   2059{
   2060	struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd;
   2061	int ret;
   2062
   2063	if (!tcmu_kern_cmd_reply_supported)
   2064		return 0;
   2065
   2066	if (udev->nl_reply_supported <= 0)
   2067		return 0;
   2068
   2069	pr_debug("sleeping for nl reply\n");
   2070	wait_for_completion(&nl_cmd->complete);
   2071
   2072	mutex_lock(&tcmu_nl_cmd_mutex);
   2073	nl_cmd->cmd = TCMU_CMD_UNSPEC;
   2074	ret = nl_cmd->status;
   2075	mutex_unlock(&tcmu_nl_cmd_mutex);
   2076
   2077	return ret;
   2078}
   2079
   2080static int tcmu_netlink_event_init(struct tcmu_dev *udev,
   2081				   enum tcmu_genl_cmd cmd,
   2082				   struct sk_buff **buf, void **hdr)
   2083{
   2084	struct sk_buff *skb;
   2085	void *msg_header;
   2086	int ret = -ENOMEM;
   2087
   2088	skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
   2089	if (!skb)
   2090		return ret;
   2091
   2092	msg_header = genlmsg_put(skb, 0, 0, &tcmu_genl_family, 0, cmd);
   2093	if (!msg_header)
   2094		goto free_skb;
   2095
   2096	ret = nla_put_string(skb, TCMU_ATTR_DEVICE, udev->uio_info.name);
   2097	if (ret < 0)
   2098		goto free_skb;
   2099
   2100	ret = nla_put_u32(skb, TCMU_ATTR_MINOR, udev->uio_info.uio_dev->minor);
   2101	if (ret < 0)
   2102		goto free_skb;
   2103
   2104	ret = nla_put_u32(skb, TCMU_ATTR_DEVICE_ID, udev->se_dev.dev_index);
   2105	if (ret < 0)
   2106		goto free_skb;
   2107
   2108	*buf = skb;
   2109	*hdr = msg_header;
   2110	return ret;
   2111
   2112free_skb:
   2113	nlmsg_free(skb);
   2114	return ret;
   2115}
   2116
   2117static int tcmu_netlink_event_send(struct tcmu_dev *udev,
   2118				   enum tcmu_genl_cmd cmd,
   2119				   struct sk_buff *skb, void *msg_header)
   2120{
   2121	int ret;
   2122
   2123	genlmsg_end(skb, msg_header);
   2124
   2125	ret = tcmu_init_genl_cmd_reply(udev, cmd);
   2126	if (ret) {
   2127		nlmsg_free(skb);
   2128		return ret;
   2129	}
   2130
   2131	ret = genlmsg_multicast_allns(&tcmu_genl_family, skb, 0,
   2132				      TCMU_MCGRP_CONFIG, GFP_KERNEL);
   2133
   2134	/* Wait during an add as the listener may not be up yet */
   2135	if (ret == 0 ||
   2136	   (ret == -ESRCH && cmd == TCMU_CMD_ADDED_DEVICE))
   2137		return tcmu_wait_genl_cmd_reply(udev);
   2138	else
   2139		tcmu_destroy_genl_cmd_reply(udev);
   2140
   2141	return ret;
   2142}
   2143
   2144static int tcmu_send_dev_add_event(struct tcmu_dev *udev)
   2145{
   2146	struct sk_buff *skb = NULL;
   2147	void *msg_header = NULL;
   2148	int ret = 0;
   2149
   2150	ret = tcmu_netlink_event_init(udev, TCMU_CMD_ADDED_DEVICE, &skb,
   2151				      &msg_header);
   2152	if (ret < 0)
   2153		return ret;
   2154	return tcmu_netlink_event_send(udev, TCMU_CMD_ADDED_DEVICE, skb,
   2155				       msg_header);
   2156}
   2157
   2158static int tcmu_send_dev_remove_event(struct tcmu_dev *udev)
   2159{
   2160	struct sk_buff *skb = NULL;
   2161	void *msg_header = NULL;
   2162	int ret = 0;
   2163
   2164	ret = tcmu_netlink_event_init(udev, TCMU_CMD_REMOVED_DEVICE,
   2165				      &skb, &msg_header);
   2166	if (ret < 0)
   2167		return ret;
   2168	return tcmu_netlink_event_send(udev, TCMU_CMD_REMOVED_DEVICE,
   2169				       skb, msg_header);
   2170}
   2171
   2172static int tcmu_update_uio_info(struct tcmu_dev *udev)
   2173{
   2174	struct tcmu_hba *hba = udev->hba->hba_ptr;
   2175	struct uio_info *info;
   2176	char *str;
   2177
   2178	info = &udev->uio_info;
   2179
   2180	if (udev->dev_config[0])
   2181		str = kasprintf(GFP_KERNEL, "tcm-user/%u/%s/%s", hba->host_id,
   2182				udev->name, udev->dev_config);
   2183	else
   2184		str = kasprintf(GFP_KERNEL, "tcm-user/%u/%s", hba->host_id,
   2185				udev->name);
   2186	if (!str)
   2187		return -ENOMEM;
   2188
   2189	/* If the old string exists, free it */
   2190	kfree(info->name);
   2191	info->name = str;
   2192
   2193	return 0;
   2194}
   2195
   2196static int tcmu_configure_device(struct se_device *dev)
   2197{
   2198	struct tcmu_dev *udev = TCMU_DEV(dev);
   2199	struct uio_info *info;
   2200	struct tcmu_mailbox *mb;
   2201	size_t data_size;
   2202	int ret = 0;
   2203
   2204	ret = tcmu_update_uio_info(udev);
   2205	if (ret)
   2206		return ret;
   2207
   2208	info = &udev->uio_info;
   2209
   2210	mutex_lock(&udev->cmdr_lock);
   2211	udev->data_bitmap = bitmap_zalloc(udev->max_blocks, GFP_KERNEL);
   2212	mutex_unlock(&udev->cmdr_lock);
   2213	if (!udev->data_bitmap) {
   2214		ret = -ENOMEM;
   2215		goto err_bitmap_alloc;
   2216	}
   2217
   2218	mb = vzalloc(udev->cmdr_size + CMDR_OFF);
   2219	if (!mb) {
   2220		ret = -ENOMEM;
   2221		goto err_vzalloc;
   2222	}
   2223
   2224	/* mailbox fits in first part of CMDR space */
   2225	udev->mb_addr = mb;
   2226	udev->cmdr = (void *)mb + CMDR_OFF;
   2227	udev->data_off = udev->cmdr_size + CMDR_OFF;
   2228	data_size = TCMU_MBS_TO_PAGES(udev->data_area_mb) << PAGE_SHIFT;
   2229	udev->mmap_pages = (data_size + udev->cmdr_size + CMDR_OFF) >> PAGE_SHIFT;
   2230	udev->data_blk_size = udev->data_pages_per_blk * PAGE_SIZE;
   2231	udev->dbi_thresh = 0; /* Default in Idle state */
   2232
   2233	/* Initialise the mailbox of the ring buffer */
   2234	mb->version = TCMU_MAILBOX_VERSION;
   2235	mb->flags = TCMU_MAILBOX_FLAG_CAP_OOOC |
   2236		    TCMU_MAILBOX_FLAG_CAP_READ_LEN |
   2237		    TCMU_MAILBOX_FLAG_CAP_TMR |
   2238		    TCMU_MAILBOX_FLAG_CAP_KEEP_BUF;
   2239	mb->cmdr_off = CMDR_OFF;
   2240	mb->cmdr_size = udev->cmdr_size;
   2241
   2242	WARN_ON(!PAGE_ALIGNED(udev->data_off));
   2243	WARN_ON(data_size % PAGE_SIZE);
   2244
   2245	info->version = __stringify(TCMU_MAILBOX_VERSION);
   2246
   2247	info->mem[0].name = "tcm-user command & data buffer";
   2248	info->mem[0].addr = (phys_addr_t)(uintptr_t)udev->mb_addr;
   2249	info->mem[0].size = data_size + udev->cmdr_size + CMDR_OFF;
   2250	info->mem[0].memtype = UIO_MEM_NONE;
   2251
   2252	info->irqcontrol = tcmu_irqcontrol;
   2253	info->irq = UIO_IRQ_CUSTOM;
   2254
   2255	info->mmap = tcmu_mmap;
   2256	info->open = tcmu_open;
   2257	info->release = tcmu_release;
   2258
   2259	ret = uio_register_device(tcmu_root_device, info);
   2260	if (ret)
   2261		goto err_register;
   2262
   2263	/* User can set hw_block_size before enable the device */
   2264	if (dev->dev_attrib.hw_block_size == 0)
   2265		dev->dev_attrib.hw_block_size = 512;
   2266	/* Other attributes can be configured in userspace */
   2267	if (!dev->dev_attrib.hw_max_sectors)
   2268		dev->dev_attrib.hw_max_sectors = 128;
   2269	if (!dev->dev_attrib.emulate_write_cache)
   2270		dev->dev_attrib.emulate_write_cache = 0;
   2271	dev->dev_attrib.hw_queue_depth = 128;
   2272
   2273	/* If user didn't explicitly disable netlink reply support, use
   2274	 * module scope setting.
   2275	 */
   2276	if (udev->nl_reply_supported >= 0)
   2277		udev->nl_reply_supported = tcmu_kern_cmd_reply_supported;
   2278
   2279	/*
   2280	 * Get a ref incase userspace does a close on the uio device before
   2281	 * LIO has initiated tcmu_free_device.
   2282	 */
   2283	kref_get(&udev->kref);
   2284
   2285	ret = tcmu_send_dev_add_event(udev);
   2286	if (ret)
   2287		goto err_netlink;
   2288
   2289	mutex_lock(&root_udev_mutex);
   2290	list_add(&udev->node, &root_udev);
   2291	mutex_unlock(&root_udev_mutex);
   2292
   2293	return 0;
   2294
   2295err_netlink:
   2296	kref_put(&udev->kref, tcmu_dev_kref_release);
   2297	uio_unregister_device(&udev->uio_info);
   2298err_register:
   2299	vfree(udev->mb_addr);
   2300	udev->mb_addr = NULL;
   2301err_vzalloc:
   2302	bitmap_free(udev->data_bitmap);
   2303	udev->data_bitmap = NULL;
   2304err_bitmap_alloc:
   2305	kfree(info->name);
   2306	info->name = NULL;
   2307
   2308	return ret;
   2309}
   2310
   2311static void tcmu_free_device(struct se_device *dev)
   2312{
   2313	struct tcmu_dev *udev = TCMU_DEV(dev);
   2314
   2315	/* release ref from init */
   2316	kref_put(&udev->kref, tcmu_dev_kref_release);
   2317}
   2318
   2319static void tcmu_destroy_device(struct se_device *dev)
   2320{
   2321	struct tcmu_dev *udev = TCMU_DEV(dev);
   2322
   2323	del_timer_sync(&udev->cmd_timer);
   2324	del_timer_sync(&udev->qfull_timer);
   2325
   2326	mutex_lock(&root_udev_mutex);
   2327	list_del(&udev->node);
   2328	mutex_unlock(&root_udev_mutex);
   2329
   2330	tcmu_send_dev_remove_event(udev);
   2331
   2332	uio_unregister_device(&udev->uio_info);
   2333
   2334	/* release ref from configure */
   2335	kref_put(&udev->kref, tcmu_dev_kref_release);
   2336}
   2337
   2338static void tcmu_unblock_dev(struct tcmu_dev *udev)
   2339{
   2340	mutex_lock(&udev->cmdr_lock);
   2341	clear_bit(TCMU_DEV_BIT_BLOCKED, &udev->flags);
   2342	mutex_unlock(&udev->cmdr_lock);
   2343}
   2344
   2345static void tcmu_block_dev(struct tcmu_dev *udev)
   2346{
   2347	mutex_lock(&udev->cmdr_lock);
   2348
   2349	if (test_and_set_bit(TCMU_DEV_BIT_BLOCKED, &udev->flags))
   2350		goto unlock;
   2351
   2352	/* complete IO that has executed successfully */
   2353	tcmu_handle_completions(udev);
   2354	/* fail IO waiting to be queued */
   2355	run_qfull_queue(udev, true);
   2356
   2357unlock:
   2358	mutex_unlock(&udev->cmdr_lock);
   2359}
   2360
   2361static void tcmu_reset_ring(struct tcmu_dev *udev, u8 err_level)
   2362{
   2363	struct tcmu_mailbox *mb;
   2364	struct tcmu_cmd *cmd;
   2365	unsigned long i;
   2366
   2367	mutex_lock(&udev->cmdr_lock);
   2368
   2369	xa_for_each(&udev->commands, i, cmd) {
   2370		pr_debug("removing cmd %u on dev %s from ring %s\n",
   2371			 cmd->cmd_id, udev->name,
   2372			 test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags) ?
   2373			 "(is expired)" :
   2374			 (test_bit(TCMU_CMD_BIT_KEEP_BUF, &cmd->flags) ?
   2375			 "(is keep buffer)" : ""));
   2376
   2377		xa_erase(&udev->commands, i);
   2378		if (!test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags) &&
   2379		    !test_bit(TCMU_CMD_BIT_KEEP_BUF, &cmd->flags)) {
   2380			WARN_ON(!cmd->se_cmd);
   2381			list_del_init(&cmd->queue_entry);
   2382			cmd->se_cmd->priv = NULL;
   2383			if (err_level == 1) {
   2384				/*
   2385				 * Userspace was not able to start the
   2386				 * command or it is retryable.
   2387				 */
   2388				target_complete_cmd(cmd->se_cmd, SAM_STAT_BUSY);
   2389			} else {
   2390				/* hard failure */
   2391				target_complete_cmd(cmd->se_cmd,
   2392						    SAM_STAT_CHECK_CONDITION);
   2393			}
   2394		}
   2395		tcmu_cmd_free_data(cmd, cmd->dbi_cnt);
   2396		tcmu_free_cmd(cmd);
   2397	}
   2398
   2399	mb = udev->mb_addr;
   2400	tcmu_flush_dcache_range(mb, sizeof(*mb));
   2401	pr_debug("mb last %u head %u tail %u\n", udev->cmdr_last_cleaned,
   2402		 mb->cmd_tail, mb->cmd_head);
   2403
   2404	udev->cmdr_last_cleaned = 0;
   2405	mb->cmd_tail = 0;
   2406	mb->cmd_head = 0;
   2407	tcmu_flush_dcache_range(mb, sizeof(*mb));
   2408	clear_bit(TCMU_DEV_BIT_BROKEN, &udev->flags);
   2409
   2410	del_timer(&udev->cmd_timer);
   2411
   2412	/*
   2413	 * ring is empty and qfull queue never contains aborted commands.
   2414	 * So TMRs in tmr queue do not contain relevant cmd_ids.
   2415	 * After a ring reset userspace should do a fresh start, so
   2416	 * even LUN RESET message is no longer relevant.
   2417	 * Therefore remove all TMRs from qfull queue
   2418	 */
   2419	tcmu_remove_all_queued_tmr(udev);
   2420
   2421	run_qfull_queue(udev, false);
   2422
   2423	mutex_unlock(&udev->cmdr_lock);
   2424}
   2425
   2426enum {
   2427	Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors,
   2428	Opt_nl_reply_supported, Opt_max_data_area_mb, Opt_data_pages_per_blk,
   2429	Opt_cmd_ring_size_mb, Opt_err,
   2430};
   2431
   2432static match_table_t tokens = {
   2433	{Opt_dev_config, "dev_config=%s"},
   2434	{Opt_dev_size, "dev_size=%s"},
   2435	{Opt_hw_block_size, "hw_block_size=%d"},
   2436	{Opt_hw_max_sectors, "hw_max_sectors=%d"},
   2437	{Opt_nl_reply_supported, "nl_reply_supported=%d"},
   2438	{Opt_max_data_area_mb, "max_data_area_mb=%d"},
   2439	{Opt_data_pages_per_blk, "data_pages_per_blk=%d"},
   2440	{Opt_cmd_ring_size_mb, "cmd_ring_size_mb=%d"},
   2441	{Opt_err, NULL}
   2442};
   2443
   2444static int tcmu_set_dev_attrib(substring_t *arg, u32 *dev_attrib)
   2445{
   2446	int val, ret;
   2447
   2448	ret = match_int(arg, &val);
   2449	if (ret < 0) {
   2450		pr_err("match_int() failed for dev attrib. Error %d.\n",
   2451		       ret);
   2452		return ret;
   2453	}
   2454
   2455	if (val <= 0) {
   2456		pr_err("Invalid dev attrib value %d. Must be greater than zero.\n",
   2457		       val);
   2458		return -EINVAL;
   2459	}
   2460	*dev_attrib = val;
   2461	return 0;
   2462}
   2463
   2464static int tcmu_set_max_blocks_param(struct tcmu_dev *udev, substring_t *arg)
   2465{
   2466	int val, ret;
   2467	uint32_t pages_per_blk = udev->data_pages_per_blk;
   2468
   2469	ret = match_int(arg, &val);
   2470	if (ret < 0) {
   2471		pr_err("match_int() failed for max_data_area_mb=. Error %d.\n",
   2472		       ret);
   2473		return ret;
   2474	}
   2475	if (val <= 0) {
   2476		pr_err("Invalid max_data_area %d.\n", val);
   2477		return -EINVAL;
   2478	}
   2479	if (val > TCMU_PAGES_TO_MBS(tcmu_global_max_pages)) {
   2480		pr_err("%d is too large. Adjusting max_data_area_mb to global limit of %u\n",
   2481		       val, TCMU_PAGES_TO_MBS(tcmu_global_max_pages));
   2482		val = TCMU_PAGES_TO_MBS(tcmu_global_max_pages);
   2483	}
   2484	if (TCMU_MBS_TO_PAGES(val) < pages_per_blk) {
   2485		pr_err("Invalid max_data_area %d (%zu pages): smaller than data_pages_per_blk (%u pages).\n",
   2486		       val, TCMU_MBS_TO_PAGES(val), pages_per_blk);
   2487		return -EINVAL;
   2488	}
   2489
   2490	mutex_lock(&udev->cmdr_lock);
   2491	if (udev->data_bitmap) {
   2492		pr_err("Cannot set max_data_area_mb after it has been enabled.\n");
   2493		ret = -EINVAL;
   2494		goto unlock;
   2495	}
   2496
   2497	udev->data_area_mb = val;
   2498	udev->max_blocks = TCMU_MBS_TO_PAGES(val) / pages_per_blk;
   2499
   2500unlock:
   2501	mutex_unlock(&udev->cmdr_lock);
   2502	return ret;
   2503}
   2504
   2505static int tcmu_set_data_pages_per_blk(struct tcmu_dev *udev, substring_t *arg)
   2506{
   2507	int val, ret;
   2508
   2509	ret = match_int(arg, &val);
   2510	if (ret < 0) {
   2511		pr_err("match_int() failed for data_pages_per_blk=. Error %d.\n",
   2512		       ret);
   2513		return ret;
   2514	}
   2515
   2516	if (val > TCMU_MBS_TO_PAGES(udev->data_area_mb)) {
   2517		pr_err("Invalid data_pages_per_blk %d: greater than max_data_area_mb %d -> %zd pages).\n",
   2518		       val, udev->data_area_mb,
   2519		       TCMU_MBS_TO_PAGES(udev->data_area_mb));
   2520		return -EINVAL;
   2521	}
   2522
   2523	mutex_lock(&udev->cmdr_lock);
   2524	if (udev->data_bitmap) {
   2525		pr_err("Cannot set data_pages_per_blk after it has been enabled.\n");
   2526		ret = -EINVAL;
   2527		goto unlock;
   2528	}
   2529
   2530	udev->data_pages_per_blk = val;
   2531	udev->max_blocks = TCMU_MBS_TO_PAGES(udev->data_area_mb) / val;
   2532
   2533unlock:
   2534	mutex_unlock(&udev->cmdr_lock);
   2535	return ret;
   2536}
   2537
   2538static int tcmu_set_cmd_ring_size(struct tcmu_dev *udev, substring_t *arg)
   2539{
   2540	int val, ret;
   2541
   2542	ret = match_int(arg, &val);
   2543	if (ret < 0) {
   2544		pr_err("match_int() failed for cmd_ring_size_mb=. Error %d.\n",
   2545		       ret);
   2546		return ret;
   2547	}
   2548
   2549	if (val <= 0) {
   2550		pr_err("Invalid cmd_ring_size_mb %d.\n", val);
   2551		return -EINVAL;
   2552	}
   2553
   2554	mutex_lock(&udev->cmdr_lock);
   2555	if (udev->data_bitmap) {
   2556		pr_err("Cannot set cmd_ring_size_mb after it has been enabled.\n");
   2557		ret = -EINVAL;
   2558		goto unlock;
   2559	}
   2560
   2561	udev->cmdr_size = (val << 20) - CMDR_OFF;
   2562	if (val > (MB_CMDR_SIZE_DEF >> 20)) {
   2563		pr_err("%d is too large. Adjusting cmd_ring_size_mb to global limit of %u\n",
   2564		       val, (MB_CMDR_SIZE_DEF >> 20));
   2565		udev->cmdr_size = CMDR_SIZE_DEF;
   2566	}
   2567
   2568unlock:
   2569	mutex_unlock(&udev->cmdr_lock);
   2570	return ret;
   2571}
   2572
   2573static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
   2574		const char *page, ssize_t count)
   2575{
   2576	struct tcmu_dev *udev = TCMU_DEV(dev);
   2577	char *orig, *ptr, *opts;
   2578	substring_t args[MAX_OPT_ARGS];
   2579	int ret = 0, token;
   2580
   2581	opts = kstrdup(page, GFP_KERNEL);
   2582	if (!opts)
   2583		return -ENOMEM;
   2584
   2585	orig = opts;
   2586
   2587	while ((ptr = strsep(&opts, ",\n")) != NULL) {
   2588		if (!*ptr)
   2589			continue;
   2590
   2591		token = match_token(ptr, tokens, args);
   2592		switch (token) {
   2593		case Opt_dev_config:
   2594			if (match_strlcpy(udev->dev_config, &args[0],
   2595					  TCMU_CONFIG_LEN) == 0) {
   2596				ret = -EINVAL;
   2597				break;
   2598			}
   2599			pr_debug("TCMU: Referencing Path: %s\n", udev->dev_config);
   2600			break;
   2601		case Opt_dev_size:
   2602			ret = match_u64(&args[0], &udev->dev_size);
   2603			if (ret < 0)
   2604				pr_err("match_u64() failed for dev_size=. Error %d.\n",
   2605				       ret);
   2606			break;
   2607		case Opt_hw_block_size:
   2608			ret = tcmu_set_dev_attrib(&args[0],
   2609					&(dev->dev_attrib.hw_block_size));
   2610			break;
   2611		case Opt_hw_max_sectors:
   2612			ret = tcmu_set_dev_attrib(&args[0],
   2613					&(dev->dev_attrib.hw_max_sectors));
   2614			break;
   2615		case Opt_nl_reply_supported:
   2616			ret = match_int(&args[0], &udev->nl_reply_supported);
   2617			if (ret < 0)
   2618				pr_err("match_int() failed for nl_reply_supported=. Error %d.\n",
   2619				       ret);
   2620			break;
   2621		case Opt_max_data_area_mb:
   2622			ret = tcmu_set_max_blocks_param(udev, &args[0]);
   2623			break;
   2624		case Opt_data_pages_per_blk:
   2625			ret = tcmu_set_data_pages_per_blk(udev, &args[0]);
   2626			break;
   2627		case Opt_cmd_ring_size_mb:
   2628			ret = tcmu_set_cmd_ring_size(udev, &args[0]);
   2629			break;
   2630		default:
   2631			break;
   2632		}
   2633
   2634		if (ret)
   2635			break;
   2636	}
   2637
   2638	kfree(orig);
   2639	return (!ret) ? count : ret;
   2640}
   2641
   2642static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b)
   2643{
   2644	struct tcmu_dev *udev = TCMU_DEV(dev);
   2645	ssize_t bl = 0;
   2646
   2647	bl = sprintf(b + bl, "Config: %s ",
   2648		     udev->dev_config[0] ? udev->dev_config : "NULL");
   2649	bl += sprintf(b + bl, "Size: %llu ", udev->dev_size);
   2650	bl += sprintf(b + bl, "MaxDataAreaMB: %u ", udev->data_area_mb);
   2651	bl += sprintf(b + bl, "DataPagesPerBlk: %u ", udev->data_pages_per_blk);
   2652	bl += sprintf(b + bl, "CmdRingSizeMB: %u\n",
   2653		      (udev->cmdr_size + CMDR_OFF) >> 20);
   2654
   2655	return bl;
   2656}
   2657
   2658static sector_t tcmu_get_blocks(struct se_device *dev)
   2659{
   2660	struct tcmu_dev *udev = TCMU_DEV(dev);
   2661
   2662	return div_u64(udev->dev_size - dev->dev_attrib.block_size,
   2663		       dev->dev_attrib.block_size);
   2664}
   2665
   2666static sense_reason_t
   2667tcmu_parse_cdb(struct se_cmd *cmd)
   2668{
   2669	return passthrough_parse_cdb(cmd, tcmu_queue_cmd);
   2670}
   2671
   2672static ssize_t tcmu_cmd_time_out_show(struct config_item *item, char *page)
   2673{
   2674	struct se_dev_attrib *da = container_of(to_config_group(item),
   2675					struct se_dev_attrib, da_group);
   2676	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
   2677
   2678	return snprintf(page, PAGE_SIZE, "%lu\n", udev->cmd_time_out / MSEC_PER_SEC);
   2679}
   2680
   2681static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *page,
   2682				       size_t count)
   2683{
   2684	struct se_dev_attrib *da = container_of(to_config_group(item),
   2685					struct se_dev_attrib, da_group);
   2686	struct tcmu_dev *udev = container_of(da->da_dev,
   2687					struct tcmu_dev, se_dev);
   2688	u32 val;
   2689	int ret;
   2690
   2691	if (da->da_dev->export_count) {
   2692		pr_err("Unable to set tcmu cmd_time_out while exports exist\n");
   2693		return -EINVAL;
   2694	}
   2695
   2696	ret = kstrtou32(page, 0, &val);
   2697	if (ret < 0)
   2698		return ret;
   2699
   2700	udev->cmd_time_out = val * MSEC_PER_SEC;
   2701	return count;
   2702}
   2703CONFIGFS_ATTR(tcmu_, cmd_time_out);
   2704
   2705static ssize_t tcmu_qfull_time_out_show(struct config_item *item, char *page)
   2706{
   2707	struct se_dev_attrib *da = container_of(to_config_group(item),
   2708						struct se_dev_attrib, da_group);
   2709	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
   2710
   2711	return snprintf(page, PAGE_SIZE, "%ld\n", udev->qfull_time_out <= 0 ?
   2712			udev->qfull_time_out :
   2713			udev->qfull_time_out / MSEC_PER_SEC);
   2714}
   2715
   2716static ssize_t tcmu_qfull_time_out_store(struct config_item *item,
   2717					 const char *page, size_t count)
   2718{
   2719	struct se_dev_attrib *da = container_of(to_config_group(item),
   2720					struct se_dev_attrib, da_group);
   2721	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
   2722	s32 val;
   2723	int ret;
   2724
   2725	ret = kstrtos32(page, 0, &val);
   2726	if (ret < 0)
   2727		return ret;
   2728
   2729	if (val >= 0) {
   2730		udev->qfull_time_out = val * MSEC_PER_SEC;
   2731	} else if (val == -1) {
   2732		udev->qfull_time_out = val;
   2733	} else {
   2734		printk(KERN_ERR "Invalid qfull timeout value %d\n", val);
   2735		return -EINVAL;
   2736	}
   2737	return count;
   2738}
   2739CONFIGFS_ATTR(tcmu_, qfull_time_out);
   2740
   2741static ssize_t tcmu_max_data_area_mb_show(struct config_item *item, char *page)
   2742{
   2743	struct se_dev_attrib *da = container_of(to_config_group(item),
   2744						struct se_dev_attrib, da_group);
   2745	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
   2746
   2747	return snprintf(page, PAGE_SIZE, "%u\n", udev->data_area_mb);
   2748}
   2749CONFIGFS_ATTR_RO(tcmu_, max_data_area_mb);
   2750
   2751static ssize_t tcmu_data_pages_per_blk_show(struct config_item *item,
   2752					    char *page)
   2753{
   2754	struct se_dev_attrib *da = container_of(to_config_group(item),
   2755						struct se_dev_attrib, da_group);
   2756	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
   2757
   2758	return snprintf(page, PAGE_SIZE, "%u\n", udev->data_pages_per_blk);
   2759}
   2760CONFIGFS_ATTR_RO(tcmu_, data_pages_per_blk);
   2761
   2762static ssize_t tcmu_cmd_ring_size_mb_show(struct config_item *item, char *page)
   2763{
   2764	struct se_dev_attrib *da = container_of(to_config_group(item),
   2765						struct se_dev_attrib, da_group);
   2766	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
   2767
   2768	return snprintf(page, PAGE_SIZE, "%u\n",
   2769			(udev->cmdr_size + CMDR_OFF) >> 20);
   2770}
   2771CONFIGFS_ATTR_RO(tcmu_, cmd_ring_size_mb);
   2772
   2773static ssize_t tcmu_dev_config_show(struct config_item *item, char *page)
   2774{
   2775	struct se_dev_attrib *da = container_of(to_config_group(item),
   2776						struct se_dev_attrib, da_group);
   2777	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
   2778
   2779	return snprintf(page, PAGE_SIZE, "%s\n", udev->dev_config);
   2780}
   2781
   2782static int tcmu_send_dev_config_event(struct tcmu_dev *udev,
   2783				      const char *reconfig_data)
   2784{
   2785	struct sk_buff *skb = NULL;
   2786	void *msg_header = NULL;
   2787	int ret = 0;
   2788
   2789	ret = tcmu_netlink_event_init(udev, TCMU_CMD_RECONFIG_DEVICE,
   2790				      &skb, &msg_header);
   2791	if (ret < 0)
   2792		return ret;
   2793	ret = nla_put_string(skb, TCMU_ATTR_DEV_CFG, reconfig_data);
   2794	if (ret < 0) {
   2795		nlmsg_free(skb);
   2796		return ret;
   2797	}
   2798	return tcmu_netlink_event_send(udev, TCMU_CMD_RECONFIG_DEVICE,
   2799				       skb, msg_header);
   2800}
   2801
   2802
   2803static ssize_t tcmu_dev_config_store(struct config_item *item, const char *page,
   2804				     size_t count)
   2805{
   2806	struct se_dev_attrib *da = container_of(to_config_group(item),
   2807						struct se_dev_attrib, da_group);
   2808	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
   2809	int ret, len;
   2810
   2811	len = strlen(page);
   2812	if (!len || len > TCMU_CONFIG_LEN - 1)
   2813		return -EINVAL;
   2814
   2815	/* Check if device has been configured before */
   2816	if (target_dev_configured(&udev->se_dev)) {
   2817		ret = tcmu_send_dev_config_event(udev, page);
   2818		if (ret) {
   2819			pr_err("Unable to reconfigure device\n");
   2820			return ret;
   2821		}
   2822		strlcpy(udev->dev_config, page, TCMU_CONFIG_LEN);
   2823
   2824		ret = tcmu_update_uio_info(udev);
   2825		if (ret)
   2826			return ret;
   2827		return count;
   2828	}
   2829	strlcpy(udev->dev_config, page, TCMU_CONFIG_LEN);
   2830
   2831	return count;
   2832}
   2833CONFIGFS_ATTR(tcmu_, dev_config);
   2834
   2835static ssize_t tcmu_dev_size_show(struct config_item *item, char *page)
   2836{
   2837	struct se_dev_attrib *da = container_of(to_config_group(item),
   2838						struct se_dev_attrib, da_group);
   2839	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
   2840
   2841	return snprintf(page, PAGE_SIZE, "%llu\n", udev->dev_size);
   2842}
   2843
   2844static int tcmu_send_dev_size_event(struct tcmu_dev *udev, u64 size)
   2845{
   2846	struct sk_buff *skb = NULL;
   2847	void *msg_header = NULL;
   2848	int ret = 0;
   2849
   2850	ret = tcmu_netlink_event_init(udev, TCMU_CMD_RECONFIG_DEVICE,
   2851				      &skb, &msg_header);
   2852	if (ret < 0)
   2853		return ret;
   2854	ret = nla_put_u64_64bit(skb, TCMU_ATTR_DEV_SIZE,
   2855				size, TCMU_ATTR_PAD);
   2856	if (ret < 0) {
   2857		nlmsg_free(skb);
   2858		return ret;
   2859	}
   2860	return tcmu_netlink_event_send(udev, TCMU_CMD_RECONFIG_DEVICE,
   2861				       skb, msg_header);
   2862}
   2863
   2864static ssize_t tcmu_dev_size_store(struct config_item *item, const char *page,
   2865				   size_t count)
   2866{
   2867	struct se_dev_attrib *da = container_of(to_config_group(item),
   2868						struct se_dev_attrib, da_group);
   2869	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
   2870	u64 val;
   2871	int ret;
   2872
   2873	ret = kstrtou64(page, 0, &val);
   2874	if (ret < 0)
   2875		return ret;
   2876
   2877	/* Check if device has been configured before */
   2878	if (target_dev_configured(&udev->se_dev)) {
   2879		ret = tcmu_send_dev_size_event(udev, val);
   2880		if (ret) {
   2881			pr_err("Unable to reconfigure device\n");
   2882			return ret;
   2883		}
   2884	}
   2885	udev->dev_size = val;
   2886	return count;
   2887}
   2888CONFIGFS_ATTR(tcmu_, dev_size);
   2889
   2890static ssize_t tcmu_nl_reply_supported_show(struct config_item *item,
   2891		char *page)
   2892{
   2893	struct se_dev_attrib *da = container_of(to_config_group(item),
   2894						struct se_dev_attrib, da_group);
   2895	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
   2896
   2897	return snprintf(page, PAGE_SIZE, "%d\n", udev->nl_reply_supported);
   2898}
   2899
   2900static ssize_t tcmu_nl_reply_supported_store(struct config_item *item,
   2901		const char *page, size_t count)
   2902{
   2903	struct se_dev_attrib *da = container_of(to_config_group(item),
   2904						struct se_dev_attrib, da_group);
   2905	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
   2906	s8 val;
   2907	int ret;
   2908
   2909	ret = kstrtos8(page, 0, &val);
   2910	if (ret < 0)
   2911		return ret;
   2912
   2913	udev->nl_reply_supported = val;
   2914	return count;
   2915}
   2916CONFIGFS_ATTR(tcmu_, nl_reply_supported);
   2917
   2918static ssize_t tcmu_emulate_write_cache_show(struct config_item *item,
   2919					     char *page)
   2920{
   2921	struct se_dev_attrib *da = container_of(to_config_group(item),
   2922					struct se_dev_attrib, da_group);
   2923
   2924	return snprintf(page, PAGE_SIZE, "%i\n", da->emulate_write_cache);
   2925}
   2926
   2927static int tcmu_send_emulate_write_cache(struct tcmu_dev *udev, u8 val)
   2928{
   2929	struct sk_buff *skb = NULL;
   2930	void *msg_header = NULL;
   2931	int ret = 0;
   2932
   2933	ret = tcmu_netlink_event_init(udev, TCMU_CMD_RECONFIG_DEVICE,
   2934				      &skb, &msg_header);
   2935	if (ret < 0)
   2936		return ret;
   2937	ret = nla_put_u8(skb, TCMU_ATTR_WRITECACHE, val);
   2938	if (ret < 0) {
   2939		nlmsg_free(skb);
   2940		return ret;
   2941	}
   2942	return tcmu_netlink_event_send(udev, TCMU_CMD_RECONFIG_DEVICE,
   2943				       skb, msg_header);
   2944}
   2945
   2946static ssize_t tcmu_emulate_write_cache_store(struct config_item *item,
   2947					      const char *page, size_t count)
   2948{
   2949	struct se_dev_attrib *da = container_of(to_config_group(item),
   2950					struct se_dev_attrib, da_group);
   2951	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
   2952	u8 val;
   2953	int ret;
   2954
   2955	ret = kstrtou8(page, 0, &val);
   2956	if (ret < 0)
   2957		return ret;
   2958
   2959	/* Check if device has been configured before */
   2960	if (target_dev_configured(&udev->se_dev)) {
   2961		ret = tcmu_send_emulate_write_cache(udev, val);
   2962		if (ret) {
   2963			pr_err("Unable to reconfigure device\n");
   2964			return ret;
   2965		}
   2966	}
   2967
   2968	da->emulate_write_cache = val;
   2969	return count;
   2970}
   2971CONFIGFS_ATTR(tcmu_, emulate_write_cache);
   2972
   2973static ssize_t tcmu_tmr_notification_show(struct config_item *item, char *page)
   2974{
   2975	struct se_dev_attrib *da = container_of(to_config_group(item),
   2976					struct se_dev_attrib, da_group);
   2977	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
   2978
   2979	return snprintf(page, PAGE_SIZE, "%i\n",
   2980			test_bit(TCMU_DEV_BIT_TMR_NOTIFY, &udev->flags));
   2981}
   2982
   2983static ssize_t tcmu_tmr_notification_store(struct config_item *item,
   2984					   const char *page, size_t count)
   2985{
   2986	struct se_dev_attrib *da = container_of(to_config_group(item),
   2987					struct se_dev_attrib, da_group);
   2988	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
   2989	u8 val;
   2990	int ret;
   2991
   2992	ret = kstrtou8(page, 0, &val);
   2993	if (ret < 0)
   2994		return ret;
   2995	if (val > 1)
   2996		return -EINVAL;
   2997
   2998	if (val)
   2999		set_bit(TCMU_DEV_BIT_TMR_NOTIFY, &udev->flags);
   3000	else
   3001		clear_bit(TCMU_DEV_BIT_TMR_NOTIFY, &udev->flags);
   3002	return count;
   3003}
   3004CONFIGFS_ATTR(tcmu_, tmr_notification);
   3005
   3006static ssize_t tcmu_block_dev_show(struct config_item *item, char *page)
   3007{
   3008	struct se_device *se_dev = container_of(to_config_group(item),
   3009						struct se_device,
   3010						dev_action_group);
   3011	struct tcmu_dev *udev = TCMU_DEV(se_dev);
   3012
   3013	if (test_bit(TCMU_DEV_BIT_BLOCKED, &udev->flags))
   3014		return snprintf(page, PAGE_SIZE, "%s\n", "blocked");
   3015	else
   3016		return snprintf(page, PAGE_SIZE, "%s\n", "unblocked");
   3017}
   3018
   3019static ssize_t tcmu_block_dev_store(struct config_item *item, const char *page,
   3020				    size_t count)
   3021{
   3022	struct se_device *se_dev = container_of(to_config_group(item),
   3023						struct se_device,
   3024						dev_action_group);
   3025	struct tcmu_dev *udev = TCMU_DEV(se_dev);
   3026	u8 val;
   3027	int ret;
   3028
   3029	if (!target_dev_configured(&udev->se_dev)) {
   3030		pr_err("Device is not configured.\n");
   3031		return -EINVAL;
   3032	}
   3033
   3034	ret = kstrtou8(page, 0, &val);
   3035	if (ret < 0)
   3036		return ret;
   3037
   3038	if (val > 1) {
   3039		pr_err("Invalid block value %d\n", val);
   3040		return -EINVAL;
   3041	}
   3042
   3043	if (!val)
   3044		tcmu_unblock_dev(udev);
   3045	else
   3046		tcmu_block_dev(udev);
   3047	return count;
   3048}
   3049CONFIGFS_ATTR(tcmu_, block_dev);
   3050
   3051static ssize_t tcmu_reset_ring_store(struct config_item *item, const char *page,
   3052				     size_t count)
   3053{
   3054	struct se_device *se_dev = container_of(to_config_group(item),
   3055						struct se_device,
   3056						dev_action_group);
   3057	struct tcmu_dev *udev = TCMU_DEV(se_dev);
   3058	u8 val;
   3059	int ret;
   3060
   3061	if (!target_dev_configured(&udev->se_dev)) {
   3062		pr_err("Device is not configured.\n");
   3063		return -EINVAL;
   3064	}
   3065
   3066	ret = kstrtou8(page, 0, &val);
   3067	if (ret < 0)
   3068		return ret;
   3069
   3070	if (val != 1 && val != 2) {
   3071		pr_err("Invalid reset ring value %d\n", val);
   3072		return -EINVAL;
   3073	}
   3074
   3075	tcmu_reset_ring(udev, val);
   3076	return count;
   3077}
   3078CONFIGFS_ATTR_WO(tcmu_, reset_ring);
   3079
   3080static ssize_t tcmu_free_kept_buf_store(struct config_item *item, const char *page,
   3081					size_t count)
   3082{
   3083	struct se_device *se_dev = container_of(to_config_group(item),
   3084						struct se_device,
   3085						dev_action_group);
   3086	struct tcmu_dev *udev = TCMU_DEV(se_dev);
   3087	struct tcmu_cmd *cmd;
   3088	u16 cmd_id;
   3089	int ret;
   3090
   3091	if (!target_dev_configured(&udev->se_dev)) {
   3092		pr_err("Device is not configured.\n");
   3093		return -EINVAL;
   3094	}
   3095
   3096	ret = kstrtou16(page, 0, &cmd_id);
   3097	if (ret < 0)
   3098		return ret;
   3099
   3100	mutex_lock(&udev->cmdr_lock);
   3101
   3102	{
   3103		XA_STATE(xas, &udev->commands, cmd_id);
   3104
   3105		xas_lock(&xas);
   3106		cmd = xas_load(&xas);
   3107		if (!cmd) {
   3108			pr_err("free_kept_buf: cmd_id %d not found\n", cmd_id);
   3109			count = -EINVAL;
   3110			xas_unlock(&xas);
   3111			goto out_unlock;
   3112		}
   3113		if (!test_bit(TCMU_CMD_BIT_KEEP_BUF, &cmd->flags)) {
   3114			pr_err("free_kept_buf: cmd_id %d was not completed with KEEP_BUF\n",
   3115			       cmd_id);
   3116			count = -EINVAL;
   3117			xas_unlock(&xas);
   3118			goto out_unlock;
   3119		}
   3120		xas_store(&xas, NULL);
   3121		xas_unlock(&xas);
   3122	}
   3123
   3124	tcmu_cmd_free_data(cmd, cmd->dbi_cnt);
   3125	tcmu_free_cmd(cmd);
   3126	/*
   3127	 * We only freed data space, not ring space. Therefore we dont call
   3128	 * run_tmr_queue, but call run_qfull_queue if tmr_list is empty.
   3129	 */
   3130	if (list_empty(&udev->tmr_queue))
   3131		run_qfull_queue(udev, false);
   3132
   3133out_unlock:
   3134	mutex_unlock(&udev->cmdr_lock);
   3135	return count;
   3136}
   3137CONFIGFS_ATTR_WO(tcmu_, free_kept_buf);
   3138
   3139static struct configfs_attribute *tcmu_attrib_attrs[] = {
   3140	&tcmu_attr_cmd_time_out,
   3141	&tcmu_attr_qfull_time_out,
   3142	&tcmu_attr_max_data_area_mb,
   3143	&tcmu_attr_data_pages_per_blk,
   3144	&tcmu_attr_cmd_ring_size_mb,
   3145	&tcmu_attr_dev_config,
   3146	&tcmu_attr_dev_size,
   3147	&tcmu_attr_emulate_write_cache,
   3148	&tcmu_attr_tmr_notification,
   3149	&tcmu_attr_nl_reply_supported,
   3150	NULL,
   3151};
   3152
   3153static struct configfs_attribute **tcmu_attrs;
   3154
   3155static struct configfs_attribute *tcmu_action_attrs[] = {
   3156	&tcmu_attr_block_dev,
   3157	&tcmu_attr_reset_ring,
   3158	&tcmu_attr_free_kept_buf,
   3159	NULL,
   3160};
   3161
   3162static struct target_backend_ops tcmu_ops = {
   3163	.name			= "user",
   3164	.owner			= THIS_MODULE,
   3165	.transport_flags_default = TRANSPORT_FLAG_PASSTHROUGH,
   3166	.transport_flags_changeable = TRANSPORT_FLAG_PASSTHROUGH_PGR |
   3167				      TRANSPORT_FLAG_PASSTHROUGH_ALUA,
   3168	.attach_hba		= tcmu_attach_hba,
   3169	.detach_hba		= tcmu_detach_hba,
   3170	.alloc_device		= tcmu_alloc_device,
   3171	.configure_device	= tcmu_configure_device,
   3172	.destroy_device		= tcmu_destroy_device,
   3173	.free_device		= tcmu_free_device,
   3174	.unplug_device		= tcmu_unplug_device,
   3175	.plug_device		= tcmu_plug_device,
   3176	.parse_cdb		= tcmu_parse_cdb,
   3177	.tmr_notify		= tcmu_tmr_notify,
   3178	.set_configfs_dev_params = tcmu_set_configfs_dev_params,
   3179	.show_configfs_dev_params = tcmu_show_configfs_dev_params,
   3180	.get_device_type	= sbc_get_device_type,
   3181	.get_blocks		= tcmu_get_blocks,
   3182	.tb_dev_action_attrs	= tcmu_action_attrs,
   3183};
   3184
   3185static void find_free_blocks(void)
   3186{
   3187	struct tcmu_dev *udev;
   3188	loff_t off;
   3189	u32 pages_freed, total_pages_freed = 0;
   3190	u32 start, end, block, total_blocks_freed = 0;
   3191
   3192	if (atomic_read(&global_page_count) <= tcmu_global_max_pages)
   3193		return;
   3194
   3195	mutex_lock(&root_udev_mutex);
   3196	list_for_each_entry(udev, &root_udev, node) {
   3197		mutex_lock(&udev->cmdr_lock);
   3198
   3199		if (!target_dev_configured(&udev->se_dev)) {
   3200			mutex_unlock(&udev->cmdr_lock);
   3201			continue;
   3202		}
   3203
   3204		/* Try to complete the finished commands first */
   3205		if (tcmu_handle_completions(udev))
   3206			run_qfull_queue(udev, false);
   3207
   3208		/* Skip the udevs in idle */
   3209		if (!udev->dbi_thresh) {
   3210			mutex_unlock(&udev->cmdr_lock);
   3211			continue;
   3212		}
   3213
   3214		end = udev->dbi_max + 1;
   3215		block = find_last_bit(udev->data_bitmap, end);
   3216		if (block == udev->dbi_max) {
   3217			/*
   3218			 * The last bit is dbi_max, so it is not possible
   3219			 * reclaim any blocks.
   3220			 */
   3221			mutex_unlock(&udev->cmdr_lock);
   3222			continue;
   3223		} else if (block == end) {
   3224			/* The current udev will goto idle state */
   3225			udev->dbi_thresh = start = 0;
   3226			udev->dbi_max = 0;
   3227		} else {
   3228			udev->dbi_thresh = start = block + 1;
   3229			udev->dbi_max = block;
   3230		}
   3231
   3232		/*
   3233		 * Release the block pages.
   3234		 *
   3235		 * Also note that since tcmu_vma_fault() gets an extra page
   3236		 * refcount, tcmu_blocks_release() won't free pages if pages
   3237		 * are mapped. This means it is safe to call
   3238		 * tcmu_blocks_release() before unmap_mapping_range() which
   3239		 * drops the refcount of any pages it unmaps and thus releases
   3240		 * them.
   3241		 */
   3242		pages_freed = tcmu_blocks_release(udev, start, end - 1);
   3243
   3244		/* Here will truncate the data area from off */
   3245		off = udev->data_off + (loff_t)start * udev->data_blk_size;
   3246		unmap_mapping_range(udev->inode->i_mapping, off, 0, 1);
   3247
   3248		mutex_unlock(&udev->cmdr_lock);
   3249
   3250		total_pages_freed += pages_freed;
   3251		total_blocks_freed += end - start;
   3252		pr_debug("Freed %u pages (total %u) from %u blocks (total %u) from %s.\n",
   3253			 pages_freed, total_pages_freed, end - start,
   3254			 total_blocks_freed, udev->name);
   3255	}
   3256	mutex_unlock(&root_udev_mutex);
   3257
   3258	if (atomic_read(&global_page_count) > tcmu_global_max_pages)
   3259		schedule_delayed_work(&tcmu_unmap_work, msecs_to_jiffies(5000));
   3260}
   3261
   3262static void check_timedout_devices(void)
   3263{
   3264	struct tcmu_dev *udev, *tmp_dev;
   3265	struct tcmu_cmd *cmd, *tmp_cmd;
   3266	LIST_HEAD(devs);
   3267
   3268	spin_lock_bh(&timed_out_udevs_lock);
   3269	list_splice_init(&timed_out_udevs, &devs);
   3270
   3271	list_for_each_entry_safe(udev, tmp_dev, &devs, timedout_entry) {
   3272		list_del_init(&udev->timedout_entry);
   3273		spin_unlock_bh(&timed_out_udevs_lock);
   3274
   3275		mutex_lock(&udev->cmdr_lock);
   3276
   3277		/*
   3278		 * If cmd_time_out is disabled but qfull is set deadline
   3279		 * will only reflect the qfull timeout. Ignore it.
   3280		 */
   3281		if (udev->cmd_time_out) {
   3282			list_for_each_entry_safe(cmd, tmp_cmd,
   3283						 &udev->inflight_queue,
   3284						 queue_entry) {
   3285				tcmu_check_expired_ring_cmd(cmd);
   3286			}
   3287			tcmu_set_next_deadline(&udev->inflight_queue,
   3288					       &udev->cmd_timer);
   3289		}
   3290		list_for_each_entry_safe(cmd, tmp_cmd, &udev->qfull_queue,
   3291					 queue_entry) {
   3292			tcmu_check_expired_queue_cmd(cmd);
   3293		}
   3294		tcmu_set_next_deadline(&udev->qfull_queue, &udev->qfull_timer);
   3295
   3296		mutex_unlock(&udev->cmdr_lock);
   3297
   3298		spin_lock_bh(&timed_out_udevs_lock);
   3299	}
   3300
   3301	spin_unlock_bh(&timed_out_udevs_lock);
   3302}
   3303
   3304static void tcmu_unmap_work_fn(struct work_struct *work)
   3305{
   3306	check_timedout_devices();
   3307	find_free_blocks();
   3308}
   3309
   3310static int __init tcmu_module_init(void)
   3311{
   3312	int ret, i, k, len = 0;
   3313
   3314	BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
   3315
   3316	INIT_DELAYED_WORK(&tcmu_unmap_work, tcmu_unmap_work_fn);
   3317
   3318	tcmu_cmd_cache = kmem_cache_create("tcmu_cmd_cache",
   3319				sizeof(struct tcmu_cmd),
   3320				__alignof__(struct tcmu_cmd),
   3321				0, NULL);
   3322	if (!tcmu_cmd_cache)
   3323		return -ENOMEM;
   3324
   3325	tcmu_root_device = root_device_register("tcm_user");
   3326	if (IS_ERR(tcmu_root_device)) {
   3327		ret = PTR_ERR(tcmu_root_device);
   3328		goto out_free_cache;
   3329	}
   3330
   3331	ret = genl_register_family(&tcmu_genl_family);
   3332	if (ret < 0) {
   3333		goto out_unreg_device;
   3334	}
   3335
   3336	for (i = 0; passthrough_attrib_attrs[i] != NULL; i++)
   3337		len += sizeof(struct configfs_attribute *);
   3338	for (i = 0; passthrough_pr_attrib_attrs[i] != NULL; i++)
   3339		len += sizeof(struct configfs_attribute *);
   3340	for (i = 0; tcmu_attrib_attrs[i] != NULL; i++)
   3341		len += sizeof(struct configfs_attribute *);
   3342	len += sizeof(struct configfs_attribute *);
   3343
   3344	tcmu_attrs = kzalloc(len, GFP_KERNEL);
   3345	if (!tcmu_attrs) {
   3346		ret = -ENOMEM;
   3347		goto out_unreg_genl;
   3348	}
   3349
   3350	for (i = 0; passthrough_attrib_attrs[i] != NULL; i++)
   3351		tcmu_attrs[i] = passthrough_attrib_attrs[i];
   3352	for (k = 0; passthrough_pr_attrib_attrs[k] != NULL; k++)
   3353		tcmu_attrs[i++] = passthrough_pr_attrib_attrs[k];
   3354	for (k = 0; tcmu_attrib_attrs[k] != NULL; k++)
   3355		tcmu_attrs[i++] = tcmu_attrib_attrs[k];
   3356	tcmu_ops.tb_dev_attrib_attrs = tcmu_attrs;
   3357
   3358	ret = transport_backend_register(&tcmu_ops);
   3359	if (ret)
   3360		goto out_attrs;
   3361
   3362	return 0;
   3363
   3364out_attrs:
   3365	kfree(tcmu_attrs);
   3366out_unreg_genl:
   3367	genl_unregister_family(&tcmu_genl_family);
   3368out_unreg_device:
   3369	root_device_unregister(tcmu_root_device);
   3370out_free_cache:
   3371	kmem_cache_destroy(tcmu_cmd_cache);
   3372
   3373	return ret;
   3374}
   3375
   3376static void __exit tcmu_module_exit(void)
   3377{
   3378	cancel_delayed_work_sync(&tcmu_unmap_work);
   3379	target_backend_unregister(&tcmu_ops);
   3380	kfree(tcmu_attrs);
   3381	genl_unregister_family(&tcmu_genl_family);
   3382	root_device_unregister(tcmu_root_device);
   3383	kmem_cache_destroy(tcmu_cmd_cache);
   3384}
   3385
   3386MODULE_DESCRIPTION("TCM USER subsystem plugin");
   3387MODULE_AUTHOR("Shaohua Li <shli@kernel.org>");
   3388MODULE_AUTHOR("Andy Grover <agrover@redhat.com>");
   3389MODULE_LICENSE("GPL");
   3390
   3391module_init(tcmu_module_init);
   3392module_exit(tcmu_module_exit);