cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vmw_pvscsi.c (44256B)


      1/*
      2 * Linux driver for VMware's para-virtualized SCSI HBA.
      3 *
      4 * Copyright (C) 2008-2014, VMware, Inc. All Rights Reserved.
      5 *
      6 * This program is free software; you can redistribute it and/or modify it
      7 * under the terms of the GNU General Public License as published by the
      8 * Free Software Foundation; version 2 of the License and no later version.
      9 *
     10 * This program is distributed in the hope that it will be useful, but
     11 * WITHOUT ANY WARRANTY; without even the implied warranty of
     12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
     13 * NON INFRINGEMENT.  See the GNU General Public License for more
     14 * details.
     15 *
     16 * You should have received a copy of the GNU General Public License
     17 * along with this program; if not, write to the Free Software
     18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
     19 *
     20 */
     21
     22#include <linux/kernel.h>
     23#include <linux/module.h>
     24#include <linux/interrupt.h>
     25#include <linux/slab.h>
     26#include <linux/workqueue.h>
     27#include <linux/pci.h>
     28
     29#include <scsi/scsi.h>
     30#include <scsi/scsi_host.h>
     31#include <scsi/scsi_cmnd.h>
     32#include <scsi/scsi_device.h>
     33#include <scsi/scsi_tcq.h>
     34
     35#include "vmw_pvscsi.h"
     36
     37#define PVSCSI_LINUX_DRIVER_DESC "VMware PVSCSI driver"
     38
     39MODULE_DESCRIPTION(PVSCSI_LINUX_DRIVER_DESC);
     40MODULE_AUTHOR("VMware, Inc.");
     41MODULE_LICENSE("GPL");
     42MODULE_VERSION(PVSCSI_DRIVER_VERSION_STRING);
     43
     44#define PVSCSI_DEFAULT_NUM_PAGES_PER_RING	8
     45#define PVSCSI_DEFAULT_NUM_PAGES_MSG_RING	1
     46#define PVSCSI_DEFAULT_QUEUE_DEPTH		254
     47#define SGL_SIZE				PAGE_SIZE
     48
     49struct pvscsi_sg_list {
     50	struct PVSCSISGElement sge[PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT];
     51};
     52
     53struct pvscsi_ctx {
     54	/*
     55	 * The index of the context in cmd_map serves as the context ID for a
     56	 * 1-to-1 mapping completions back to requests.
     57	 */
     58	struct scsi_cmnd	*cmd;
     59	struct pvscsi_sg_list	*sgl;
     60	struct list_head	list;
     61	dma_addr_t		dataPA;
     62	dma_addr_t		sensePA;
     63	dma_addr_t		sglPA;
     64	struct completion	*abort_cmp;
     65};
     66
     67struct pvscsi_adapter {
     68	char				*mmioBase;
     69	u8				rev;
     70	bool				use_msg;
     71	bool				use_req_threshold;
     72
     73	spinlock_t			hw_lock;
     74
     75	struct workqueue_struct		*workqueue;
     76	struct work_struct		work;
     77
     78	struct PVSCSIRingReqDesc	*req_ring;
     79	unsigned			req_pages;
     80	unsigned			req_depth;
     81	dma_addr_t			reqRingPA;
     82
     83	struct PVSCSIRingCmpDesc	*cmp_ring;
     84	unsigned			cmp_pages;
     85	dma_addr_t			cmpRingPA;
     86
     87	struct PVSCSIRingMsgDesc	*msg_ring;
     88	unsigned			msg_pages;
     89	dma_addr_t			msgRingPA;
     90
     91	struct PVSCSIRingsState		*rings_state;
     92	dma_addr_t			ringStatePA;
     93
     94	struct pci_dev			*dev;
     95	struct Scsi_Host		*host;
     96
     97	struct list_head		cmd_pool;
     98	struct pvscsi_ctx		*cmd_map;
     99};
    100
    101
    102/* Command line parameters */
    103static int pvscsi_ring_pages;
    104static int pvscsi_msg_ring_pages = PVSCSI_DEFAULT_NUM_PAGES_MSG_RING;
    105static int pvscsi_cmd_per_lun    = PVSCSI_DEFAULT_QUEUE_DEPTH;
    106static bool pvscsi_disable_msi;
    107static bool pvscsi_disable_msix;
    108static bool pvscsi_use_msg       = true;
    109static bool pvscsi_use_req_threshold = true;
    110
    111#define PVSCSI_RW (S_IRUSR | S_IWUSR)
    112
    113module_param_named(ring_pages, pvscsi_ring_pages, int, PVSCSI_RW);
    114MODULE_PARM_DESC(ring_pages, "Number of pages per req/cmp ring - (default="
    115		 __stringify(PVSCSI_DEFAULT_NUM_PAGES_PER_RING)
    116		 "[up to 16 targets],"
    117		 __stringify(PVSCSI_SETUP_RINGS_MAX_NUM_PAGES)
    118		 "[for 16+ targets])");
    119
    120module_param_named(msg_ring_pages, pvscsi_msg_ring_pages, int, PVSCSI_RW);
    121MODULE_PARM_DESC(msg_ring_pages, "Number of pages for the msg ring - (default="
    122		 __stringify(PVSCSI_DEFAULT_NUM_PAGES_MSG_RING) ")");
    123
    124module_param_named(cmd_per_lun, pvscsi_cmd_per_lun, int, PVSCSI_RW);
    125MODULE_PARM_DESC(cmd_per_lun, "Maximum commands per lun - (default="
    126		 __stringify(PVSCSI_DEFAULT_QUEUE_DEPTH) ")");
    127
    128module_param_named(disable_msi, pvscsi_disable_msi, bool, PVSCSI_RW);
    129MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
    130
    131module_param_named(disable_msix, pvscsi_disable_msix, bool, PVSCSI_RW);
    132MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
    133
    134module_param_named(use_msg, pvscsi_use_msg, bool, PVSCSI_RW);
    135MODULE_PARM_DESC(use_msg, "Use msg ring when available - (default=1)");
    136
    137module_param_named(use_req_threshold, pvscsi_use_req_threshold,
    138		   bool, PVSCSI_RW);
    139MODULE_PARM_DESC(use_req_threshold, "Use driver-based request coalescing if configured - (default=1)");
    140
    141static const struct pci_device_id pvscsi_pci_tbl[] = {
    142	{ PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_PVSCSI) },
    143	{ 0 }
    144};
    145
    146MODULE_DEVICE_TABLE(pci, pvscsi_pci_tbl);
    147
    148static struct device *
    149pvscsi_dev(const struct pvscsi_adapter *adapter)
    150{
    151	return &(adapter->dev->dev);
    152}
    153
    154static struct pvscsi_ctx *
    155pvscsi_find_context(const struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd)
    156{
    157	struct pvscsi_ctx *ctx, *end;
    158
    159	end = &adapter->cmd_map[adapter->req_depth];
    160	for (ctx = adapter->cmd_map; ctx < end; ctx++)
    161		if (ctx->cmd == cmd)
    162			return ctx;
    163
    164	return NULL;
    165}
    166
    167static struct pvscsi_ctx *
    168pvscsi_acquire_context(struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd)
    169{
    170	struct pvscsi_ctx *ctx;
    171
    172	if (list_empty(&adapter->cmd_pool))
    173		return NULL;
    174
    175	ctx = list_first_entry(&adapter->cmd_pool, struct pvscsi_ctx, list);
    176	ctx->cmd = cmd;
    177	list_del(&ctx->list);
    178
    179	return ctx;
    180}
    181
    182static void pvscsi_release_context(struct pvscsi_adapter *adapter,
    183				   struct pvscsi_ctx *ctx)
    184{
    185	ctx->cmd = NULL;
    186	ctx->abort_cmp = NULL;
    187	list_add(&ctx->list, &adapter->cmd_pool);
    188}
    189
    190/*
    191 * Map a pvscsi_ctx struct to a context ID field value; we map to a simple
    192 * non-zero integer. ctx always points to an entry in cmd_map array, hence
    193 * the return value is always >=1.
    194 */
    195static u64 pvscsi_map_context(const struct pvscsi_adapter *adapter,
    196			      const struct pvscsi_ctx *ctx)
    197{
    198	return ctx - adapter->cmd_map + 1;
    199}
    200
    201static struct pvscsi_ctx *
    202pvscsi_get_context(const struct pvscsi_adapter *adapter, u64 context)
    203{
    204	return &adapter->cmd_map[context - 1];
    205}
    206
    207static void pvscsi_reg_write(const struct pvscsi_adapter *adapter,
    208			     u32 offset, u32 val)
    209{
    210	writel(val, adapter->mmioBase + offset);
    211}
    212
    213static u32 pvscsi_reg_read(const struct pvscsi_adapter *adapter, u32 offset)
    214{
    215	return readl(adapter->mmioBase + offset);
    216}
    217
    218static u32 pvscsi_read_intr_status(const struct pvscsi_adapter *adapter)
    219{
    220	return pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_INTR_STATUS);
    221}
    222
    223static void pvscsi_write_intr_status(const struct pvscsi_adapter *adapter,
    224				     u32 val)
    225{
    226	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_STATUS, val);
    227}
    228
    229static void pvscsi_unmask_intr(const struct pvscsi_adapter *adapter)
    230{
    231	u32 intr_bits;
    232
    233	intr_bits = PVSCSI_INTR_CMPL_MASK;
    234	if (adapter->use_msg)
    235		intr_bits |= PVSCSI_INTR_MSG_MASK;
    236
    237	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, intr_bits);
    238}
    239
    240static void pvscsi_mask_intr(const struct pvscsi_adapter *adapter)
    241{
    242	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, 0);
    243}
    244
    245static void pvscsi_write_cmd_desc(const struct pvscsi_adapter *adapter,
    246				  u32 cmd, const void *desc, size_t len)
    247{
    248	const u32 *ptr = desc;
    249	size_t i;
    250
    251	len /= sizeof(*ptr);
    252	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND, cmd);
    253	for (i = 0; i < len; i++)
    254		pvscsi_reg_write(adapter,
    255				 PVSCSI_REG_OFFSET_COMMAND_DATA, ptr[i]);
    256}
    257
    258static void pvscsi_abort_cmd(const struct pvscsi_adapter *adapter,
    259			     const struct pvscsi_ctx *ctx)
    260{
    261	struct PVSCSICmdDescAbortCmd cmd = { 0 };
    262
    263	cmd.target = ctx->cmd->device->id;
    264	cmd.context = pvscsi_map_context(adapter, ctx);
    265
    266	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd));
    267}
    268
    269static void pvscsi_kick_rw_io(const struct pvscsi_adapter *adapter)
    270{
    271	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
    272}
    273
    274static void pvscsi_process_request_ring(const struct pvscsi_adapter *adapter)
    275{
    276	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
    277}
    278
    279static int scsi_is_rw(unsigned char op)
    280{
    281	return op == READ_6  || op == WRITE_6 ||
    282	       op == READ_10 || op == WRITE_10 ||
    283	       op == READ_12 || op == WRITE_12 ||
    284	       op == READ_16 || op == WRITE_16;
    285}
    286
    287static void pvscsi_kick_io(const struct pvscsi_adapter *adapter,
    288			   unsigned char op)
    289{
    290	if (scsi_is_rw(op)) {
    291		struct PVSCSIRingsState *s = adapter->rings_state;
    292
    293		if (!adapter->use_req_threshold ||
    294		    s->reqProdIdx - s->reqConsIdx >= s->reqCallThreshold)
    295			pvscsi_kick_rw_io(adapter);
    296	} else {
    297		pvscsi_process_request_ring(adapter);
    298	}
    299}
    300
    301static void ll_adapter_reset(const struct pvscsi_adapter *adapter)
    302{
    303	dev_dbg(pvscsi_dev(adapter), "Adapter Reset on %p\n", adapter);
    304
    305	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
    306}
    307
    308static void ll_bus_reset(const struct pvscsi_adapter *adapter)
    309{
    310	dev_dbg(pvscsi_dev(adapter), "Resetting bus on %p\n", adapter);
    311
    312	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_BUS, NULL, 0);
    313}
    314
    315static void ll_device_reset(const struct pvscsi_adapter *adapter, u32 target)
    316{
    317	struct PVSCSICmdDescResetDevice cmd = { 0 };
    318
    319	dev_dbg(pvscsi_dev(adapter), "Resetting device: target=%u\n", target);
    320
    321	cmd.target = target;
    322
    323	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_DEVICE,
    324			      &cmd, sizeof(cmd));
    325}
    326
    327static void pvscsi_create_sg(struct pvscsi_ctx *ctx,
    328			     struct scatterlist *sg, unsigned count)
    329{
    330	unsigned i;
    331	struct PVSCSISGElement *sge;
    332
    333	BUG_ON(count > PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT);
    334
    335	sge = &ctx->sgl->sge[0];
    336	for (i = 0; i < count; i++, sg = sg_next(sg)) {
    337		sge[i].addr   = sg_dma_address(sg);
    338		sge[i].length = sg_dma_len(sg);
    339		sge[i].flags  = 0;
    340	}
    341}
    342
    343/*
    344 * Map all data buffers for a command into PCI space and
    345 * setup the scatter/gather list if needed.
    346 */
    347static int pvscsi_map_buffers(struct pvscsi_adapter *adapter,
    348			      struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd,
    349			      struct PVSCSIRingReqDesc *e)
    350{
    351	unsigned count;
    352	unsigned bufflen = scsi_bufflen(cmd);
    353	struct scatterlist *sg;
    354
    355	e->dataLen = bufflen;
    356	e->dataAddr = 0;
    357	if (bufflen == 0)
    358		return 0;
    359
    360	sg = scsi_sglist(cmd);
    361	count = scsi_sg_count(cmd);
    362	if (count != 0) {
    363		int segs = scsi_dma_map(cmd);
    364
    365		if (segs == -ENOMEM) {
    366			scmd_printk(KERN_DEBUG, cmd,
    367				    "vmw_pvscsi: Failed to map cmd sglist for DMA.\n");
    368			return -ENOMEM;
    369		} else if (segs > 1) {
    370			pvscsi_create_sg(ctx, sg, segs);
    371
    372			e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
    373			ctx->sglPA = dma_map_single(&adapter->dev->dev,
    374					ctx->sgl, SGL_SIZE, DMA_TO_DEVICE);
    375			if (dma_mapping_error(&adapter->dev->dev, ctx->sglPA)) {
    376				scmd_printk(KERN_ERR, cmd,
    377					    "vmw_pvscsi: Failed to map ctx sglist for DMA.\n");
    378				scsi_dma_unmap(cmd);
    379				ctx->sglPA = 0;
    380				return -ENOMEM;
    381			}
    382			e->dataAddr = ctx->sglPA;
    383		} else
    384			e->dataAddr = sg_dma_address(sg);
    385	} else {
    386		/*
    387		 * In case there is no S/G list, scsi_sglist points
    388		 * directly to the buffer.
    389		 */
    390		ctx->dataPA = dma_map_single(&adapter->dev->dev, sg, bufflen,
    391					     cmd->sc_data_direction);
    392		if (dma_mapping_error(&adapter->dev->dev, ctx->dataPA)) {
    393			scmd_printk(KERN_DEBUG, cmd,
    394				    "vmw_pvscsi: Failed to map direct data buffer for DMA.\n");
    395			return -ENOMEM;
    396		}
    397		e->dataAddr = ctx->dataPA;
    398	}
    399
    400	return 0;
    401}
    402
    403/*
    404 * The device incorrectly doesn't clear the first byte of the sense
    405 * buffer in some cases. We have to do it ourselves.
    406 * Otherwise we run into trouble when SWIOTLB is forced.
    407 */
    408static void pvscsi_patch_sense(struct scsi_cmnd *cmd)
    409{
    410	if (cmd->sense_buffer)
    411		cmd->sense_buffer[0] = 0;
    412}
    413
    414static void pvscsi_unmap_buffers(const struct pvscsi_adapter *adapter,
    415				 struct pvscsi_ctx *ctx)
    416{
    417	struct scsi_cmnd *cmd;
    418	unsigned bufflen;
    419
    420	cmd = ctx->cmd;
    421	bufflen = scsi_bufflen(cmd);
    422
    423	if (bufflen != 0) {
    424		unsigned count = scsi_sg_count(cmd);
    425
    426		if (count != 0) {
    427			scsi_dma_unmap(cmd);
    428			if (ctx->sglPA) {
    429				dma_unmap_single(&adapter->dev->dev, ctx->sglPA,
    430						 SGL_SIZE, DMA_TO_DEVICE);
    431				ctx->sglPA = 0;
    432			}
    433		} else
    434			dma_unmap_single(&adapter->dev->dev, ctx->dataPA,
    435					 bufflen, cmd->sc_data_direction);
    436	}
    437	if (cmd->sense_buffer)
    438		dma_unmap_single(&adapter->dev->dev, ctx->sensePA,
    439				 SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
    440}
    441
    442static int pvscsi_allocate_rings(struct pvscsi_adapter *adapter)
    443{
    444	adapter->rings_state = dma_alloc_coherent(&adapter->dev->dev, PAGE_SIZE,
    445			&adapter->ringStatePA, GFP_KERNEL);
    446	if (!adapter->rings_state)
    447		return -ENOMEM;
    448
    449	adapter->req_pages = min(PVSCSI_MAX_NUM_PAGES_REQ_RING,
    450				 pvscsi_ring_pages);
    451	adapter->req_depth = adapter->req_pages
    452					* PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
    453	adapter->req_ring = dma_alloc_coherent(&adapter->dev->dev,
    454			adapter->req_pages * PAGE_SIZE, &adapter->reqRingPA,
    455			GFP_KERNEL);
    456	if (!adapter->req_ring)
    457		return -ENOMEM;
    458
    459	adapter->cmp_pages = min(PVSCSI_MAX_NUM_PAGES_CMP_RING,
    460				 pvscsi_ring_pages);
    461	adapter->cmp_ring = dma_alloc_coherent(&adapter->dev->dev,
    462			adapter->cmp_pages * PAGE_SIZE, &adapter->cmpRingPA,
    463			GFP_KERNEL);
    464	if (!adapter->cmp_ring)
    465		return -ENOMEM;
    466
    467	BUG_ON(!IS_ALIGNED(adapter->ringStatePA, PAGE_SIZE));
    468	BUG_ON(!IS_ALIGNED(adapter->reqRingPA, PAGE_SIZE));
    469	BUG_ON(!IS_ALIGNED(adapter->cmpRingPA, PAGE_SIZE));
    470
    471	if (!adapter->use_msg)
    472		return 0;
    473
    474	adapter->msg_pages = min(PVSCSI_MAX_NUM_PAGES_MSG_RING,
    475				 pvscsi_msg_ring_pages);
    476	adapter->msg_ring = dma_alloc_coherent(&adapter->dev->dev,
    477			adapter->msg_pages * PAGE_SIZE, &adapter->msgRingPA,
    478			GFP_KERNEL);
    479	if (!adapter->msg_ring)
    480		return -ENOMEM;
    481	BUG_ON(!IS_ALIGNED(adapter->msgRingPA, PAGE_SIZE));
    482
    483	return 0;
    484}
    485
    486static void pvscsi_setup_all_rings(const struct pvscsi_adapter *adapter)
    487{
    488	struct PVSCSICmdDescSetupRings cmd = { 0 };
    489	dma_addr_t base;
    490	unsigned i;
    491
    492	cmd.ringsStatePPN   = adapter->ringStatePA >> PAGE_SHIFT;
    493	cmd.reqRingNumPages = adapter->req_pages;
    494	cmd.cmpRingNumPages = adapter->cmp_pages;
    495
    496	base = adapter->reqRingPA;
    497	for (i = 0; i < adapter->req_pages; i++) {
    498		cmd.reqRingPPNs[i] = base >> PAGE_SHIFT;
    499		base += PAGE_SIZE;
    500	}
    501
    502	base = adapter->cmpRingPA;
    503	for (i = 0; i < adapter->cmp_pages; i++) {
    504		cmd.cmpRingPPNs[i] = base >> PAGE_SHIFT;
    505		base += PAGE_SIZE;
    506	}
    507
    508	memset(adapter->rings_state, 0, PAGE_SIZE);
    509	memset(adapter->req_ring, 0, adapter->req_pages * PAGE_SIZE);
    510	memset(adapter->cmp_ring, 0, adapter->cmp_pages * PAGE_SIZE);
    511
    512	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_RINGS,
    513			      &cmd, sizeof(cmd));
    514
    515	if (adapter->use_msg) {
    516		struct PVSCSICmdDescSetupMsgRing cmd_msg = { 0 };
    517
    518		cmd_msg.numPages = adapter->msg_pages;
    519
    520		base = adapter->msgRingPA;
    521		for (i = 0; i < adapter->msg_pages; i++) {
    522			cmd_msg.ringPPNs[i] = base >> PAGE_SHIFT;
    523			base += PAGE_SIZE;
    524		}
    525		memset(adapter->msg_ring, 0, adapter->msg_pages * PAGE_SIZE);
    526
    527		pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_MSG_RING,
    528				      &cmd_msg, sizeof(cmd_msg));
    529	}
    530}
    531
    532static int pvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth)
    533{
    534	if (!sdev->tagged_supported)
    535		qdepth = 1;
    536	return scsi_change_queue_depth(sdev, qdepth);
    537}
    538
    539/*
    540 * Pull a completion descriptor off and pass the completion back
    541 * to the SCSI mid layer.
    542 */
    543static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
    544				    const struct PVSCSIRingCmpDesc *e)
    545{
    546	struct pvscsi_ctx *ctx;
    547	struct scsi_cmnd *cmd;
    548	struct completion *abort_cmp;
    549	u32 btstat = e->hostStatus;
    550	u32 sdstat = e->scsiStatus;
    551
    552	ctx = pvscsi_get_context(adapter, e->context);
    553	cmd = ctx->cmd;
    554	abort_cmp = ctx->abort_cmp;
    555	pvscsi_unmap_buffers(adapter, ctx);
    556	if (sdstat != SAM_STAT_CHECK_CONDITION)
    557		pvscsi_patch_sense(cmd);
    558	pvscsi_release_context(adapter, ctx);
    559	if (abort_cmp) {
    560		/*
    561		 * The command was requested to be aborted. Just signal that
    562		 * the request completed and swallow the actual cmd completion
    563		 * here. The abort handler will post a completion for this
    564		 * command indicating that it got successfully aborted.
    565		 */
    566		complete(abort_cmp);
    567		return;
    568	}
    569
    570	cmd->result = 0;
    571	if (sdstat != SAM_STAT_GOOD &&
    572	    (btstat == BTSTAT_SUCCESS ||
    573	     btstat == BTSTAT_LINKED_COMMAND_COMPLETED ||
    574	     btstat == BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG)) {
    575		if (sdstat == SAM_STAT_COMMAND_TERMINATED) {
    576			cmd->result = (DID_RESET << 16);
    577		} else {
    578			cmd->result = (DID_OK << 16) | sdstat;
    579		}
    580	} else
    581		switch (btstat) {
    582		case BTSTAT_SUCCESS:
    583		case BTSTAT_LINKED_COMMAND_COMPLETED:
    584		case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
    585			/*
    586			 * Commands like INQUIRY may transfer less data than
    587			 * requested by the initiator via bufflen. Set residual
    588			 * count to make upper layer aware of the actual amount
    589			 * of data returned. There are cases when controller
    590			 * returns zero dataLen with non zero data - do not set
    591			 * residual count in that case.
    592			 */
    593			if (e->dataLen && (e->dataLen < scsi_bufflen(cmd)))
    594				scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
    595			cmd->result = (DID_OK << 16);
    596			break;
    597
    598		case BTSTAT_DATARUN:
    599		case BTSTAT_DATA_UNDERRUN:
    600			/* Report residual data in underruns */
    601			scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
    602			cmd->result = (DID_ERROR << 16);
    603			break;
    604
    605		case BTSTAT_SELTIMEO:
    606			/* Our emulation returns this for non-connected devs */
    607			cmd->result = (DID_BAD_TARGET << 16);
    608			break;
    609
    610		case BTSTAT_LUNMISMATCH:
    611		case BTSTAT_TAGREJECT:
    612		case BTSTAT_BADMSG:
    613		case BTSTAT_HAHARDWARE:
    614		case BTSTAT_INVPHASE:
    615		case BTSTAT_HATIMEOUT:
    616		case BTSTAT_NORESPONSE:
    617		case BTSTAT_DISCONNECT:
    618		case BTSTAT_HASOFTWARE:
    619		case BTSTAT_BUSFREE:
    620		case BTSTAT_SENSFAILED:
    621			cmd->result |= (DID_ERROR << 16);
    622			break;
    623
    624		case BTSTAT_SENTRST:
    625		case BTSTAT_RECVRST:
    626		case BTSTAT_BUSRESET:
    627			cmd->result = (DID_RESET << 16);
    628			break;
    629
    630		case BTSTAT_ABORTQUEUE:
    631			cmd->result = (DID_BUS_BUSY << 16);
    632			break;
    633
    634		case BTSTAT_SCSIPARITY:
    635			cmd->result = (DID_PARITY << 16);
    636			break;
    637
    638		default:
    639			cmd->result = (DID_ERROR << 16);
    640			scmd_printk(KERN_DEBUG, cmd,
    641				    "Unknown completion status: 0x%x\n",
    642				    btstat);
    643	}
    644
    645	dev_dbg(&cmd->device->sdev_gendev,
    646		"cmd=%p %x ctx=%p result=0x%x status=0x%x,%x\n",
    647		cmd, cmd->cmnd[0], ctx, cmd->result, btstat, sdstat);
    648
    649	scsi_done(cmd);
    650}
    651
    652/*
    653 * barrier usage : Since the PVSCSI device is emulated, there could be cases
    654 * where we may want to serialize some accesses between the driver and the
    655 * emulation layer. We use compiler barriers instead of the more expensive
    656 * memory barriers because PVSCSI is only supported on X86 which has strong
    657 * memory access ordering.
    658 */
    659static void pvscsi_process_completion_ring(struct pvscsi_adapter *adapter)
    660{
    661	struct PVSCSIRingsState *s = adapter->rings_state;
    662	struct PVSCSIRingCmpDesc *ring = adapter->cmp_ring;
    663	u32 cmp_entries = s->cmpNumEntriesLog2;
    664
    665	while (s->cmpConsIdx != s->cmpProdIdx) {
    666		struct PVSCSIRingCmpDesc *e = ring + (s->cmpConsIdx &
    667						      MASK(cmp_entries));
    668		/*
    669		 * This barrier() ensures that *e is not dereferenced while
    670		 * the device emulation still writes data into the slot.
    671		 * Since the device emulation advances s->cmpProdIdx only after
    672		 * updating the slot we want to check it first.
    673		 */
    674		barrier();
    675		pvscsi_complete_request(adapter, e);
    676		/*
    677		 * This barrier() ensures that compiler doesn't reorder write
    678		 * to s->cmpConsIdx before the read of (*e) inside
    679		 * pvscsi_complete_request. Otherwise, device emulation may
    680		 * overwrite *e before we had a chance to read it.
    681		 */
    682		barrier();
    683		s->cmpConsIdx++;
    684	}
    685}
    686
    687/*
    688 * Translate a Linux SCSI request into a request ring entry.
    689 */
    690static int pvscsi_queue_ring(struct pvscsi_adapter *adapter,
    691			     struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd)
    692{
    693	struct PVSCSIRingsState *s;
    694	struct PVSCSIRingReqDesc *e;
    695	struct scsi_device *sdev;
    696	u32 req_entries;
    697
    698	s = adapter->rings_state;
    699	sdev = cmd->device;
    700	req_entries = s->reqNumEntriesLog2;
    701
    702	/*
    703	 * If this condition holds, we might have room on the request ring, but
    704	 * we might not have room on the completion ring for the response.
    705	 * However, we have already ruled out this possibility - we would not
    706	 * have successfully allocated a context if it were true, since we only
    707	 * have one context per request entry.  Check for it anyway, since it
    708	 * would be a serious bug.
    709	 */
    710	if (s->reqProdIdx - s->cmpConsIdx >= 1 << req_entries) {
    711		scmd_printk(KERN_ERR, cmd, "vmw_pvscsi: "
    712			    "ring full: reqProdIdx=%d cmpConsIdx=%d\n",
    713			    s->reqProdIdx, s->cmpConsIdx);
    714		return -1;
    715	}
    716
    717	e = adapter->req_ring + (s->reqProdIdx & MASK(req_entries));
    718
    719	e->bus    = sdev->channel;
    720	e->target = sdev->id;
    721	memset(e->lun, 0, sizeof(e->lun));
    722	e->lun[1] = sdev->lun;
    723
    724	if (cmd->sense_buffer) {
    725		ctx->sensePA = dma_map_single(&adapter->dev->dev,
    726				cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE,
    727				DMA_FROM_DEVICE);
    728		if (dma_mapping_error(&adapter->dev->dev, ctx->sensePA)) {
    729			scmd_printk(KERN_DEBUG, cmd,
    730				    "vmw_pvscsi: Failed to map sense buffer for DMA.\n");
    731			ctx->sensePA = 0;
    732			return -ENOMEM;
    733		}
    734		e->senseAddr = ctx->sensePA;
    735		e->senseLen = SCSI_SENSE_BUFFERSIZE;
    736	} else {
    737		e->senseLen  = 0;
    738		e->senseAddr = 0;
    739	}
    740	e->cdbLen   = cmd->cmd_len;
    741	e->vcpuHint = smp_processor_id();
    742	memcpy(e->cdb, cmd->cmnd, e->cdbLen);
    743
    744	e->tag = SIMPLE_QUEUE_TAG;
    745
    746	if (cmd->sc_data_direction == DMA_FROM_DEVICE)
    747		e->flags = PVSCSI_FLAG_CMD_DIR_TOHOST;
    748	else if (cmd->sc_data_direction == DMA_TO_DEVICE)
    749		e->flags = PVSCSI_FLAG_CMD_DIR_TODEVICE;
    750	else if (cmd->sc_data_direction == DMA_NONE)
    751		e->flags = PVSCSI_FLAG_CMD_DIR_NONE;
    752	else
    753		e->flags = 0;
    754
    755	if (pvscsi_map_buffers(adapter, ctx, cmd, e) != 0) {
    756		if (cmd->sense_buffer) {
    757			dma_unmap_single(&adapter->dev->dev, ctx->sensePA,
    758					 SCSI_SENSE_BUFFERSIZE,
    759					 DMA_FROM_DEVICE);
    760			ctx->sensePA = 0;
    761		}
    762		return -ENOMEM;
    763	}
    764
    765	e->context = pvscsi_map_context(adapter, ctx);
    766
    767	barrier();
    768
    769	s->reqProdIdx++;
    770
    771	return 0;
    772}
    773
    774static int pvscsi_queue_lck(struct scsi_cmnd *cmd)
    775{
    776	struct Scsi_Host *host = cmd->device->host;
    777	struct pvscsi_adapter *adapter = shost_priv(host);
    778	struct pvscsi_ctx *ctx;
    779	unsigned long flags;
    780	unsigned char op;
    781
    782	spin_lock_irqsave(&adapter->hw_lock, flags);
    783
    784	ctx = pvscsi_acquire_context(adapter, cmd);
    785	if (!ctx || pvscsi_queue_ring(adapter, ctx, cmd) != 0) {
    786		if (ctx)
    787			pvscsi_release_context(adapter, ctx);
    788		spin_unlock_irqrestore(&adapter->hw_lock, flags);
    789		return SCSI_MLQUEUE_HOST_BUSY;
    790	}
    791
    792	op = cmd->cmnd[0];
    793
    794	dev_dbg(&cmd->device->sdev_gendev,
    795		"queued cmd %p, ctx %p, op=%x\n", cmd, ctx, op);
    796
    797	spin_unlock_irqrestore(&adapter->hw_lock, flags);
    798
    799	pvscsi_kick_io(adapter, op);
    800
    801	return 0;
    802}
    803
    804static DEF_SCSI_QCMD(pvscsi_queue)
    805
    806static int pvscsi_abort(struct scsi_cmnd *cmd)
    807{
    808	struct pvscsi_adapter *adapter = shost_priv(cmd->device->host);
    809	struct pvscsi_ctx *ctx;
    810	unsigned long flags;
    811	int result = SUCCESS;
    812	DECLARE_COMPLETION_ONSTACK(abort_cmp);
    813	int done;
    814
    815	scmd_printk(KERN_DEBUG, cmd, "task abort on host %u, %p\n",
    816		    adapter->host->host_no, cmd);
    817
    818	spin_lock_irqsave(&adapter->hw_lock, flags);
    819
    820	/*
    821	 * Poll the completion ring first - we might be trying to abort
    822	 * a command that is waiting to be dispatched in the completion ring.
    823	 */
    824	pvscsi_process_completion_ring(adapter);
    825
    826	/*
    827	 * If there is no context for the command, it either already succeeded
    828	 * or else was never properly issued.  Not our problem.
    829	 */
    830	ctx = pvscsi_find_context(adapter, cmd);
    831	if (!ctx) {
    832		scmd_printk(KERN_DEBUG, cmd, "Failed to abort cmd %p\n", cmd);
    833		goto out;
    834	}
    835
    836	/*
    837	 * Mark that the command has been requested to be aborted and issue
    838	 * the abort.
    839	 */
    840	ctx->abort_cmp = &abort_cmp;
    841
    842	pvscsi_abort_cmd(adapter, ctx);
    843	spin_unlock_irqrestore(&adapter->hw_lock, flags);
    844	/* Wait for 2 secs for the completion. */
    845	done = wait_for_completion_timeout(&abort_cmp, msecs_to_jiffies(2000));
    846	spin_lock_irqsave(&adapter->hw_lock, flags);
    847
    848	if (!done) {
    849		/*
    850		 * Failed to abort the command, unmark the fact that it
    851		 * was requested to be aborted.
    852		 */
    853		ctx->abort_cmp = NULL;
    854		result = FAILED;
    855		scmd_printk(KERN_DEBUG, cmd,
    856			    "Failed to get completion for aborted cmd %p\n",
    857			    cmd);
    858		goto out;
    859	}
    860
    861	/*
    862	 * Successfully aborted the command.
    863	 */
    864	cmd->result = (DID_ABORT << 16);
    865	scsi_done(cmd);
    866
    867out:
    868	spin_unlock_irqrestore(&adapter->hw_lock, flags);
    869	return result;
    870}
    871
    872/*
    873 * Abort all outstanding requests.  This is only safe to use if the completion
    874 * ring will never be walked again or the device has been reset, because it
    875 * destroys the 1-1 mapping between context field passed to emulation and our
    876 * request structure.
    877 */
    878static void pvscsi_reset_all(struct pvscsi_adapter *adapter)
    879{
    880	unsigned i;
    881
    882	for (i = 0; i < adapter->req_depth; i++) {
    883		struct pvscsi_ctx *ctx = &adapter->cmd_map[i];
    884		struct scsi_cmnd *cmd = ctx->cmd;
    885		if (cmd) {
    886			scmd_printk(KERN_ERR, cmd,
    887				    "Forced reset on cmd %p\n", cmd);
    888			pvscsi_unmap_buffers(adapter, ctx);
    889			pvscsi_patch_sense(cmd);
    890			pvscsi_release_context(adapter, ctx);
    891			cmd->result = (DID_RESET << 16);
    892			scsi_done(cmd);
    893		}
    894	}
    895}
    896
    897static int pvscsi_host_reset(struct scsi_cmnd *cmd)
    898{
    899	struct Scsi_Host *host = cmd->device->host;
    900	struct pvscsi_adapter *adapter = shost_priv(host);
    901	unsigned long flags;
    902	bool use_msg;
    903
    904	scmd_printk(KERN_INFO, cmd, "SCSI Host reset\n");
    905
    906	spin_lock_irqsave(&adapter->hw_lock, flags);
    907
    908	use_msg = adapter->use_msg;
    909
    910	if (use_msg) {
    911		adapter->use_msg = false;
    912		spin_unlock_irqrestore(&adapter->hw_lock, flags);
    913
    914		/*
    915		 * Now that we know that the ISR won't add more work on the
    916		 * workqueue we can safely flush any outstanding work.
    917		 */
    918		flush_workqueue(adapter->workqueue);
    919		spin_lock_irqsave(&adapter->hw_lock, flags);
    920	}
    921
    922	/*
    923	 * We're going to tear down the entire ring structure and set it back
    924	 * up, so stalling new requests until all completions are flushed and
    925	 * the rings are back in place.
    926	 */
    927
    928	pvscsi_process_request_ring(adapter);
    929
    930	ll_adapter_reset(adapter);
    931
    932	/*
    933	 * Now process any completions.  Note we do this AFTER adapter reset,
    934	 * which is strange, but stops races where completions get posted
    935	 * between processing the ring and issuing the reset.  The backend will
    936	 * not touch the ring memory after reset, so the immediately pre-reset
    937	 * completion ring state is still valid.
    938	 */
    939	pvscsi_process_completion_ring(adapter);
    940
    941	pvscsi_reset_all(adapter);
    942	adapter->use_msg = use_msg;
    943	pvscsi_setup_all_rings(adapter);
    944	pvscsi_unmask_intr(adapter);
    945
    946	spin_unlock_irqrestore(&adapter->hw_lock, flags);
    947
    948	return SUCCESS;
    949}
    950
    951static int pvscsi_bus_reset(struct scsi_cmnd *cmd)
    952{
    953	struct Scsi_Host *host = cmd->device->host;
    954	struct pvscsi_adapter *adapter = shost_priv(host);
    955	unsigned long flags;
    956
    957	scmd_printk(KERN_INFO, cmd, "SCSI Bus reset\n");
    958
    959	/*
    960	 * We don't want to queue new requests for this bus after
    961	 * flushing all pending requests to emulation, since new
    962	 * requests could then sneak in during this bus reset phase,
    963	 * so take the lock now.
    964	 */
    965	spin_lock_irqsave(&adapter->hw_lock, flags);
    966
    967	pvscsi_process_request_ring(adapter);
    968	ll_bus_reset(adapter);
    969	pvscsi_process_completion_ring(adapter);
    970
    971	spin_unlock_irqrestore(&adapter->hw_lock, flags);
    972
    973	return SUCCESS;
    974}
    975
    976static int pvscsi_device_reset(struct scsi_cmnd *cmd)
    977{
    978	struct Scsi_Host *host = cmd->device->host;
    979	struct pvscsi_adapter *adapter = shost_priv(host);
    980	unsigned long flags;
    981
    982	scmd_printk(KERN_INFO, cmd, "SCSI device reset on scsi%u:%u\n",
    983		    host->host_no, cmd->device->id);
    984
    985	/*
    986	 * We don't want to queue new requests for this device after flushing
    987	 * all pending requests to emulation, since new requests could then
    988	 * sneak in during this device reset phase, so take the lock now.
    989	 */
    990	spin_lock_irqsave(&adapter->hw_lock, flags);
    991
    992	pvscsi_process_request_ring(adapter);
    993	ll_device_reset(adapter, cmd->device->id);
    994	pvscsi_process_completion_ring(adapter);
    995
    996	spin_unlock_irqrestore(&adapter->hw_lock, flags);
    997
    998	return SUCCESS;
    999}
   1000
   1001static struct scsi_host_template pvscsi_template;
   1002
   1003static const char *pvscsi_info(struct Scsi_Host *host)
   1004{
   1005	struct pvscsi_adapter *adapter = shost_priv(host);
   1006	static char buf[256];
   1007
   1008	sprintf(buf, "VMware PVSCSI storage adapter rev %d, req/cmp/msg rings: "
   1009		"%u/%u/%u pages, cmd_per_lun=%u", adapter->rev,
   1010		adapter->req_pages, adapter->cmp_pages, adapter->msg_pages,
   1011		pvscsi_template.cmd_per_lun);
   1012
   1013	return buf;
   1014}
   1015
   1016static struct scsi_host_template pvscsi_template = {
   1017	.module				= THIS_MODULE,
   1018	.name				= "VMware PVSCSI Host Adapter",
   1019	.proc_name			= "vmw_pvscsi",
   1020	.info				= pvscsi_info,
   1021	.queuecommand			= pvscsi_queue,
   1022	.this_id			= -1,
   1023	.sg_tablesize			= PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT,
   1024	.dma_boundary			= UINT_MAX,
   1025	.max_sectors			= 0xffff,
   1026	.change_queue_depth		= pvscsi_change_queue_depth,
   1027	.eh_abort_handler		= pvscsi_abort,
   1028	.eh_device_reset_handler	= pvscsi_device_reset,
   1029	.eh_bus_reset_handler		= pvscsi_bus_reset,
   1030	.eh_host_reset_handler		= pvscsi_host_reset,
   1031};
   1032
   1033static void pvscsi_process_msg(const struct pvscsi_adapter *adapter,
   1034			       const struct PVSCSIRingMsgDesc *e)
   1035{
   1036	struct PVSCSIRingsState *s = adapter->rings_state;
   1037	struct Scsi_Host *host = adapter->host;
   1038	struct scsi_device *sdev;
   1039
   1040	printk(KERN_INFO "vmw_pvscsi: msg type: 0x%x - MSG RING: %u/%u (%u) \n",
   1041	       e->type, s->msgProdIdx, s->msgConsIdx, s->msgNumEntriesLog2);
   1042
   1043	BUILD_BUG_ON(PVSCSI_MSG_LAST != 2);
   1044
   1045	if (e->type == PVSCSI_MSG_DEV_ADDED) {
   1046		struct PVSCSIMsgDescDevStatusChanged *desc;
   1047		desc = (struct PVSCSIMsgDescDevStatusChanged *)e;
   1048
   1049		printk(KERN_INFO
   1050		       "vmw_pvscsi: msg: device added at scsi%u:%u:%u\n",
   1051		       desc->bus, desc->target, desc->lun[1]);
   1052
   1053		if (!scsi_host_get(host))
   1054			return;
   1055
   1056		sdev = scsi_device_lookup(host, desc->bus, desc->target,
   1057					  desc->lun[1]);
   1058		if (sdev) {
   1059			printk(KERN_INFO "vmw_pvscsi: device already exists\n");
   1060			scsi_device_put(sdev);
   1061		} else
   1062			scsi_add_device(adapter->host, desc->bus,
   1063					desc->target, desc->lun[1]);
   1064
   1065		scsi_host_put(host);
   1066	} else if (e->type == PVSCSI_MSG_DEV_REMOVED) {
   1067		struct PVSCSIMsgDescDevStatusChanged *desc;
   1068		desc = (struct PVSCSIMsgDescDevStatusChanged *)e;
   1069
   1070		printk(KERN_INFO
   1071		       "vmw_pvscsi: msg: device removed at scsi%u:%u:%u\n",
   1072		       desc->bus, desc->target, desc->lun[1]);
   1073
   1074		if (!scsi_host_get(host))
   1075			return;
   1076
   1077		sdev = scsi_device_lookup(host, desc->bus, desc->target,
   1078					  desc->lun[1]);
   1079		if (sdev) {
   1080			scsi_remove_device(sdev);
   1081			scsi_device_put(sdev);
   1082		} else
   1083			printk(KERN_INFO
   1084			       "vmw_pvscsi: failed to lookup scsi%u:%u:%u\n",
   1085			       desc->bus, desc->target, desc->lun[1]);
   1086
   1087		scsi_host_put(host);
   1088	}
   1089}
   1090
   1091static int pvscsi_msg_pending(const struct pvscsi_adapter *adapter)
   1092{
   1093	struct PVSCSIRingsState *s = adapter->rings_state;
   1094
   1095	return s->msgProdIdx != s->msgConsIdx;
   1096}
   1097
   1098static void pvscsi_process_msg_ring(const struct pvscsi_adapter *adapter)
   1099{
   1100	struct PVSCSIRingsState *s = adapter->rings_state;
   1101	struct PVSCSIRingMsgDesc *ring = adapter->msg_ring;
   1102	u32 msg_entries = s->msgNumEntriesLog2;
   1103
   1104	while (pvscsi_msg_pending(adapter)) {
   1105		struct PVSCSIRingMsgDesc *e = ring + (s->msgConsIdx &
   1106						      MASK(msg_entries));
   1107
   1108		barrier();
   1109		pvscsi_process_msg(adapter, e);
   1110		barrier();
   1111		s->msgConsIdx++;
   1112	}
   1113}
   1114
   1115static void pvscsi_msg_workqueue_handler(struct work_struct *data)
   1116{
   1117	struct pvscsi_adapter *adapter;
   1118
   1119	adapter = container_of(data, struct pvscsi_adapter, work);
   1120
   1121	pvscsi_process_msg_ring(adapter);
   1122}
   1123
   1124static int pvscsi_setup_msg_workqueue(struct pvscsi_adapter *adapter)
   1125{
   1126	char name[32];
   1127
   1128	if (!pvscsi_use_msg)
   1129		return 0;
   1130
   1131	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND,
   1132			 PVSCSI_CMD_SETUP_MSG_RING);
   1133
   1134	if (pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS) == -1)
   1135		return 0;
   1136
   1137	snprintf(name, sizeof(name),
   1138		 "vmw_pvscsi_wq_%u", adapter->host->host_no);
   1139
   1140	adapter->workqueue = create_singlethread_workqueue(name);
   1141	if (!adapter->workqueue) {
   1142		printk(KERN_ERR "vmw_pvscsi: failed to create work queue\n");
   1143		return 0;
   1144	}
   1145	INIT_WORK(&adapter->work, pvscsi_msg_workqueue_handler);
   1146
   1147	return 1;
   1148}
   1149
   1150static bool pvscsi_setup_req_threshold(struct pvscsi_adapter *adapter,
   1151				      bool enable)
   1152{
   1153	u32 val;
   1154
   1155	if (!pvscsi_use_req_threshold)
   1156		return false;
   1157
   1158	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND,
   1159			 PVSCSI_CMD_SETUP_REQCALLTHRESHOLD);
   1160	val = pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS);
   1161	if (val == -1) {
   1162		printk(KERN_INFO "vmw_pvscsi: device does not support req_threshold\n");
   1163		return false;
   1164	} else {
   1165		struct PVSCSICmdDescSetupReqCall cmd_msg = { 0 };
   1166		cmd_msg.enable = enable;
   1167		printk(KERN_INFO
   1168		       "vmw_pvscsi: %sabling reqCallThreshold\n",
   1169			enable ? "en" : "dis");
   1170		pvscsi_write_cmd_desc(adapter,
   1171				      PVSCSI_CMD_SETUP_REQCALLTHRESHOLD,
   1172				      &cmd_msg, sizeof(cmd_msg));
   1173		return pvscsi_reg_read(adapter,
   1174				       PVSCSI_REG_OFFSET_COMMAND_STATUS) != 0;
   1175	}
   1176}
   1177
   1178static irqreturn_t pvscsi_isr(int irq, void *devp)
   1179{
   1180	struct pvscsi_adapter *adapter = devp;
   1181	unsigned long flags;
   1182
   1183	spin_lock_irqsave(&adapter->hw_lock, flags);
   1184	pvscsi_process_completion_ring(adapter);
   1185	if (adapter->use_msg && pvscsi_msg_pending(adapter))
   1186		queue_work(adapter->workqueue, &adapter->work);
   1187	spin_unlock_irqrestore(&adapter->hw_lock, flags);
   1188
   1189	return IRQ_HANDLED;
   1190}
   1191
   1192static irqreturn_t pvscsi_shared_isr(int irq, void *devp)
   1193{
   1194	struct pvscsi_adapter *adapter = devp;
   1195	u32 val = pvscsi_read_intr_status(adapter);
   1196
   1197	if (!(val & PVSCSI_INTR_ALL_SUPPORTED))
   1198		return IRQ_NONE;
   1199	pvscsi_write_intr_status(devp, val);
   1200	return pvscsi_isr(irq, devp);
   1201}
   1202
   1203static void pvscsi_free_sgls(const struct pvscsi_adapter *adapter)
   1204{
   1205	struct pvscsi_ctx *ctx = adapter->cmd_map;
   1206	unsigned i;
   1207
   1208	for (i = 0; i < adapter->req_depth; ++i, ++ctx)
   1209		free_pages((unsigned long)ctx->sgl, get_order(SGL_SIZE));
   1210}
   1211
   1212static void pvscsi_shutdown_intr(struct pvscsi_adapter *adapter)
   1213{
   1214	free_irq(pci_irq_vector(adapter->dev, 0), adapter);
   1215	pci_free_irq_vectors(adapter->dev);
   1216}
   1217
   1218static void pvscsi_release_resources(struct pvscsi_adapter *adapter)
   1219{
   1220	if (adapter->workqueue)
   1221		destroy_workqueue(adapter->workqueue);
   1222
   1223	if (adapter->mmioBase)
   1224		pci_iounmap(adapter->dev, adapter->mmioBase);
   1225
   1226	pci_release_regions(adapter->dev);
   1227
   1228	if (adapter->cmd_map) {
   1229		pvscsi_free_sgls(adapter);
   1230		kfree(adapter->cmd_map);
   1231	}
   1232
   1233	if (adapter->rings_state)
   1234		dma_free_coherent(&adapter->dev->dev, PAGE_SIZE,
   1235				    adapter->rings_state, adapter->ringStatePA);
   1236
   1237	if (adapter->req_ring)
   1238		dma_free_coherent(&adapter->dev->dev,
   1239				    adapter->req_pages * PAGE_SIZE,
   1240				    adapter->req_ring, adapter->reqRingPA);
   1241
   1242	if (adapter->cmp_ring)
   1243		dma_free_coherent(&adapter->dev->dev,
   1244				    adapter->cmp_pages * PAGE_SIZE,
   1245				    adapter->cmp_ring, adapter->cmpRingPA);
   1246
   1247	if (adapter->msg_ring)
   1248		dma_free_coherent(&adapter->dev->dev,
   1249				    adapter->msg_pages * PAGE_SIZE,
   1250				    adapter->msg_ring, adapter->msgRingPA);
   1251}
   1252
   1253/*
   1254 * Allocate scatter gather lists.
   1255 *
   1256 * These are statically allocated.  Trying to be clever was not worth it.
   1257 *
   1258 * Dynamic allocation can fail, and we can't go deep into the memory
   1259 * allocator, since we're a SCSI driver, and trying too hard to allocate
   1260 * memory might generate disk I/O.  We also don't want to fail disk I/O
   1261 * in that case because we can't get an allocation - the I/O could be
   1262 * trying to swap out data to free memory.  Since that is pathological,
   1263 * just use a statically allocated scatter list.
   1264 *
   1265 */
   1266static int pvscsi_allocate_sg(struct pvscsi_adapter *adapter)
   1267{
   1268	struct pvscsi_ctx *ctx;
   1269	int i;
   1270
   1271	ctx = adapter->cmd_map;
   1272	BUILD_BUG_ON(sizeof(struct pvscsi_sg_list) > SGL_SIZE);
   1273
   1274	for (i = 0; i < adapter->req_depth; ++i, ++ctx) {
   1275		ctx->sgl = (void *)__get_free_pages(GFP_KERNEL,
   1276						    get_order(SGL_SIZE));
   1277		ctx->sglPA = 0;
   1278		BUG_ON(!IS_ALIGNED(((unsigned long)ctx->sgl), PAGE_SIZE));
   1279		if (!ctx->sgl) {
   1280			for (; i >= 0; --i, --ctx) {
   1281				free_pages((unsigned long)ctx->sgl,
   1282					   get_order(SGL_SIZE));
   1283				ctx->sgl = NULL;
   1284			}
   1285			return -ENOMEM;
   1286		}
   1287	}
   1288
   1289	return 0;
   1290}
   1291
   1292/*
   1293 * Query the device, fetch the config info and return the
   1294 * maximum number of targets on the adapter. In case of
   1295 * failure due to any reason return default i.e. 16.
   1296 */
   1297static u32 pvscsi_get_max_targets(struct pvscsi_adapter *adapter)
   1298{
   1299	struct PVSCSICmdDescConfigCmd cmd;
   1300	struct PVSCSIConfigPageHeader *header;
   1301	struct device *dev;
   1302	dma_addr_t configPagePA;
   1303	void *config_page;
   1304	u32 numPhys = 16;
   1305
   1306	dev = pvscsi_dev(adapter);
   1307	config_page = dma_alloc_coherent(&adapter->dev->dev, PAGE_SIZE,
   1308			&configPagePA, GFP_KERNEL);
   1309	if (!config_page) {
   1310		dev_warn(dev, "vmw_pvscsi: failed to allocate memory for config page\n");
   1311		goto exit;
   1312	}
   1313	BUG_ON(configPagePA & ~PAGE_MASK);
   1314
   1315	/* Fetch config info from the device. */
   1316	cmd.configPageAddress = ((u64)PVSCSI_CONFIG_CONTROLLER_ADDRESS) << 32;
   1317	cmd.configPageNum = PVSCSI_CONFIG_PAGE_CONTROLLER;
   1318	cmd.cmpAddr = configPagePA;
   1319	cmd._pad = 0;
   1320
   1321	/*
   1322	 * Mark the completion page header with error values. If the device
   1323	 * completes the command successfully, it sets the status values to
   1324	 * indicate success.
   1325	 */
   1326	header = config_page;
   1327	header->hostStatus = BTSTAT_INVPARAM;
   1328	header->scsiStatus = SDSTAT_CHECK;
   1329
   1330	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_CONFIG, &cmd, sizeof cmd);
   1331
   1332	if (header->hostStatus == BTSTAT_SUCCESS &&
   1333	    header->scsiStatus == SDSTAT_GOOD) {
   1334		struct PVSCSIConfigPageController *config;
   1335
   1336		config = config_page;
   1337		numPhys = config->numPhys;
   1338	} else
   1339		dev_warn(dev, "vmw_pvscsi: PVSCSI_CMD_CONFIG failed. hostStatus = 0x%x, scsiStatus = 0x%x\n",
   1340			 header->hostStatus, header->scsiStatus);
   1341	dma_free_coherent(&adapter->dev->dev, PAGE_SIZE, config_page,
   1342			  configPagePA);
   1343exit:
   1344	return numPhys;
   1345}
   1346
   1347static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
   1348{
   1349	unsigned int irq_flag = PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY;
   1350	struct pvscsi_adapter *adapter;
   1351	struct pvscsi_adapter adapter_temp;
   1352	struct Scsi_Host *host = NULL;
   1353	unsigned int i;
   1354	int error;
   1355	u32 max_id;
   1356
   1357	error = -ENODEV;
   1358
   1359	if (pci_enable_device(pdev))
   1360		return error;
   1361
   1362	if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
   1363		printk(KERN_INFO "vmw_pvscsi: using 64bit dma\n");
   1364	} else if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) {
   1365		printk(KERN_INFO "vmw_pvscsi: using 32bit dma\n");
   1366	} else {
   1367		printk(KERN_ERR "vmw_pvscsi: failed to set DMA mask\n");
   1368		goto out_disable_device;
   1369	}
   1370
   1371	/*
   1372	 * Let's use a temp pvscsi_adapter struct until we find the number of
   1373	 * targets on the adapter, after that we will switch to the real
   1374	 * allocated struct.
   1375	 */
   1376	adapter = &adapter_temp;
   1377	memset(adapter, 0, sizeof(*adapter));
   1378	adapter->dev  = pdev;
   1379	adapter->rev = pdev->revision;
   1380
   1381	if (pci_request_regions(pdev, "vmw_pvscsi")) {
   1382		printk(KERN_ERR "vmw_pvscsi: pci memory selection failed\n");
   1383		goto out_disable_device;
   1384	}
   1385
   1386	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
   1387		if ((pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE_IO))
   1388			continue;
   1389
   1390		if (pci_resource_len(pdev, i) < PVSCSI_MEM_SPACE_SIZE)
   1391			continue;
   1392
   1393		break;
   1394	}
   1395
   1396	if (i == DEVICE_COUNT_RESOURCE) {
   1397		printk(KERN_ERR
   1398		       "vmw_pvscsi: adapter has no suitable MMIO region\n");
   1399		goto out_release_resources_and_disable;
   1400	}
   1401
   1402	adapter->mmioBase = pci_iomap(pdev, i, PVSCSI_MEM_SPACE_SIZE);
   1403
   1404	if (!adapter->mmioBase) {
   1405		printk(KERN_ERR
   1406		       "vmw_pvscsi: can't iomap for BAR %d memsize %lu\n",
   1407		       i, PVSCSI_MEM_SPACE_SIZE);
   1408		goto out_release_resources_and_disable;
   1409	}
   1410
   1411	pci_set_master(pdev);
   1412
   1413	/*
   1414	 * Ask the device for max number of targets before deciding the
   1415	 * default pvscsi_ring_pages value.
   1416	 */
   1417	max_id = pvscsi_get_max_targets(adapter);
   1418	printk(KERN_INFO "vmw_pvscsi: max_id: %u\n", max_id);
   1419
   1420	if (pvscsi_ring_pages == 0)
   1421		/*
   1422		 * Set the right default value. Up to 16 it is 8, above it is
   1423		 * max.
   1424		 */
   1425		pvscsi_ring_pages = (max_id > 16) ?
   1426			PVSCSI_SETUP_RINGS_MAX_NUM_PAGES :
   1427			PVSCSI_DEFAULT_NUM_PAGES_PER_RING;
   1428	printk(KERN_INFO
   1429	       "vmw_pvscsi: setting ring_pages to %d\n",
   1430	       pvscsi_ring_pages);
   1431
   1432	pvscsi_template.can_queue =
   1433		min(PVSCSI_MAX_NUM_PAGES_REQ_RING, pvscsi_ring_pages) *
   1434		PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
   1435	pvscsi_template.cmd_per_lun =
   1436		min(pvscsi_template.can_queue, pvscsi_cmd_per_lun);
   1437	host = scsi_host_alloc(&pvscsi_template, sizeof(struct pvscsi_adapter));
   1438	if (!host) {
   1439		printk(KERN_ERR "vmw_pvscsi: failed to allocate host\n");
   1440		goto out_release_resources_and_disable;
   1441	}
   1442
   1443	/*
   1444	 * Let's use the real pvscsi_adapter struct here onwards.
   1445	 */
   1446	adapter = shost_priv(host);
   1447	memset(adapter, 0, sizeof(*adapter));
   1448	adapter->dev  = pdev;
   1449	adapter->host = host;
   1450	/*
   1451	 * Copy back what we already have to the allocated adapter struct.
   1452	 */
   1453	adapter->rev = adapter_temp.rev;
   1454	adapter->mmioBase = adapter_temp.mmioBase;
   1455
   1456	spin_lock_init(&adapter->hw_lock);
   1457	host->max_channel = 0;
   1458	host->max_lun     = 1;
   1459	host->max_cmd_len = 16;
   1460	host->max_id      = max_id;
   1461
   1462	pci_set_drvdata(pdev, host);
   1463
   1464	ll_adapter_reset(adapter);
   1465
   1466	adapter->use_msg = pvscsi_setup_msg_workqueue(adapter);
   1467
   1468	error = pvscsi_allocate_rings(adapter);
   1469	if (error) {
   1470		printk(KERN_ERR "vmw_pvscsi: unable to allocate ring memory\n");
   1471		goto out_release_resources;
   1472	}
   1473
   1474	/*
   1475	 * From this point on we should reset the adapter if anything goes
   1476	 * wrong.
   1477	 */
   1478	pvscsi_setup_all_rings(adapter);
   1479
   1480	adapter->cmd_map = kcalloc(adapter->req_depth,
   1481				   sizeof(struct pvscsi_ctx), GFP_KERNEL);
   1482	if (!adapter->cmd_map) {
   1483		printk(KERN_ERR "vmw_pvscsi: failed to allocate memory.\n");
   1484		error = -ENOMEM;
   1485		goto out_reset_adapter;
   1486	}
   1487
   1488	INIT_LIST_HEAD(&adapter->cmd_pool);
   1489	for (i = 0; i < adapter->req_depth; i++) {
   1490		struct pvscsi_ctx *ctx = adapter->cmd_map + i;
   1491		list_add(&ctx->list, &adapter->cmd_pool);
   1492	}
   1493
   1494	error = pvscsi_allocate_sg(adapter);
   1495	if (error) {
   1496		printk(KERN_ERR "vmw_pvscsi: unable to allocate s/g table\n");
   1497		goto out_reset_adapter;
   1498	}
   1499
   1500	if (pvscsi_disable_msix)
   1501		irq_flag &= ~PCI_IRQ_MSIX;
   1502	if (pvscsi_disable_msi)
   1503		irq_flag &= ~PCI_IRQ_MSI;
   1504
   1505	error = pci_alloc_irq_vectors(adapter->dev, 1, 1, irq_flag);
   1506	if (error < 0)
   1507		goto out_reset_adapter;
   1508
   1509	adapter->use_req_threshold = pvscsi_setup_req_threshold(adapter, true);
   1510	printk(KERN_DEBUG "vmw_pvscsi: driver-based request coalescing %sabled\n",
   1511	       adapter->use_req_threshold ? "en" : "dis");
   1512
   1513	if (adapter->dev->msix_enabled || adapter->dev->msi_enabled) {
   1514		printk(KERN_INFO "vmw_pvscsi: using MSI%s\n",
   1515			adapter->dev->msix_enabled ? "-X" : "");
   1516		error = request_irq(pci_irq_vector(pdev, 0), pvscsi_isr,
   1517				0, "vmw_pvscsi", adapter);
   1518	} else {
   1519		printk(KERN_INFO "vmw_pvscsi: using INTx\n");
   1520		error = request_irq(pci_irq_vector(pdev, 0), pvscsi_shared_isr,
   1521				IRQF_SHARED, "vmw_pvscsi", adapter);
   1522	}
   1523
   1524	if (error) {
   1525		printk(KERN_ERR
   1526		       "vmw_pvscsi: unable to request IRQ: %d\n", error);
   1527		goto out_reset_adapter;
   1528	}
   1529
   1530	error = scsi_add_host(host, &pdev->dev);
   1531	if (error) {
   1532		printk(KERN_ERR
   1533		       "vmw_pvscsi: scsi_add_host failed: %d\n", error);
   1534		goto out_reset_adapter;
   1535	}
   1536
   1537	dev_info(&pdev->dev, "VMware PVSCSI rev %d host #%u\n",
   1538		 adapter->rev, host->host_no);
   1539
   1540	pvscsi_unmask_intr(adapter);
   1541
   1542	scsi_scan_host(host);
   1543
   1544	return 0;
   1545
   1546out_reset_adapter:
   1547	ll_adapter_reset(adapter);
   1548out_release_resources:
   1549	pvscsi_shutdown_intr(adapter);
   1550	pvscsi_release_resources(adapter);
   1551	scsi_host_put(host);
   1552out_disable_device:
   1553	pci_disable_device(pdev);
   1554
   1555	return error;
   1556
   1557out_release_resources_and_disable:
   1558	pvscsi_shutdown_intr(adapter);
   1559	pvscsi_release_resources(adapter);
   1560	goto out_disable_device;
   1561}
   1562
   1563static void __pvscsi_shutdown(struct pvscsi_adapter *adapter)
   1564{
   1565	pvscsi_mask_intr(adapter);
   1566
   1567	if (adapter->workqueue)
   1568		flush_workqueue(adapter->workqueue);
   1569
   1570	pvscsi_shutdown_intr(adapter);
   1571
   1572	pvscsi_process_request_ring(adapter);
   1573	pvscsi_process_completion_ring(adapter);
   1574	ll_adapter_reset(adapter);
   1575}
   1576
   1577static void pvscsi_shutdown(struct pci_dev *dev)
   1578{
   1579	struct Scsi_Host *host = pci_get_drvdata(dev);
   1580	struct pvscsi_adapter *adapter = shost_priv(host);
   1581
   1582	__pvscsi_shutdown(adapter);
   1583}
   1584
   1585static void pvscsi_remove(struct pci_dev *pdev)
   1586{
   1587	struct Scsi_Host *host = pci_get_drvdata(pdev);
   1588	struct pvscsi_adapter *adapter = shost_priv(host);
   1589
   1590	scsi_remove_host(host);
   1591
   1592	__pvscsi_shutdown(adapter);
   1593	pvscsi_release_resources(adapter);
   1594
   1595	scsi_host_put(host);
   1596
   1597	pci_disable_device(pdev);
   1598}
   1599
   1600static struct pci_driver pvscsi_pci_driver = {
   1601	.name		= "vmw_pvscsi",
   1602	.id_table	= pvscsi_pci_tbl,
   1603	.probe		= pvscsi_probe,
   1604	.remove		= pvscsi_remove,
   1605	.shutdown       = pvscsi_shutdown,
   1606};
   1607
   1608static int __init pvscsi_init(void)
   1609{
   1610	pr_info("%s - version %s\n",
   1611		PVSCSI_LINUX_DRIVER_DESC, PVSCSI_DRIVER_VERSION_STRING);
   1612	return pci_register_driver(&pvscsi_pci_driver);
   1613}
   1614
   1615static void __exit pvscsi_exit(void)
   1616{
   1617	pci_unregister_driver(&pvscsi_pci_driver);
   1618}
   1619
   1620module_init(pvscsi_init);
   1621module_exit(pvscsi_exit);