Merge tag 'for-4.11/linus-merge-signed' of git://git.kernel.dk/linux-block

Pull block layer updates from Jens Axboe: - blk-mq scheduling framework from me and Omar, with a port of the deadline scheduler for this framework. A port of BFQ from Paolo is in the works, and should be ready for 4.12. - Various fixups and improvements to the above scheduling framework from Omar, Paolo, Bart, me, others. - Cleanup of the exported sysfs blk-mq data into debugfs, from Omar. This allows us to export more information that helps debug hangs or performance issues, without cluttering or abusing the sysfs API. - Fixes for the sbitmap code, the scalable bitmap code that was migrated from blk-mq, from Omar. - Removal of the BLOCK_PC support in struct request, and refactoring of carrying SCSI payloads in the block layer. This cleans up the code nicely, and enables us to kill the SCSI specific parts of struct request, shrinking it down nicely. From Christoph mainly, with help from Hannes. - Support for ranged discard requests and discard merging, also from Christoph. - Support for OPAL in the block layer, and for NVMe as well. Mainly from Scott Bauer, with fixes/updates from various others folks. - Error code fixup for gdrom from Christophe. - cciss pci irq allocation cleanup from Christoph. - Making the cdrom device operations read only, from Kees Cook. - Fixes for duplicate bdi registrations and bdi/queue life time problems from Jan and Dan. - Set of fixes and updates for lightnvm, from Matias and Javier. - A few fixes for nbd from Josef, using idr to name devices and a workqueue deadlock fix on receive. Also marks Josef as the current maintainer of nbd. - Fix from Josef, overwriting queue settings when the number of hardware queues is updated for a blk-mq device. - NVMe fix from Keith, ensuring that we don't repeatedly mark and IO aborted, if we didn't end up aborting it. - SG gap merging fix from Ming Lei for block. - Loop fix also from Ming, fixing a race and crash between setting loop status and IO. - Two block race fixes from Tahsin, fixing request list iteration and fixing a race between device registration and udev device add notifiations. - Double free fix from cgroup writeback, from Tejun. - Another double free fix in blkcg, from Hou Tao. - Partition overflow fix for EFI from Alden Tondettar. * tag 'for-4.11/linus-merge-signed' of git://git.kernel.dk/linux-block: (156 commits) nvme: Check for Security send/recv support before issuing commands. block/sed-opal: allocate struct opal_dev dynamically block/sed-opal: tone down not supported warnings block: don't defer flushes on blk-mq + scheduling blk-mq-sched: ask scheduler for work, if we failed dispatching leftovers blk-mq: don't special case flush inserts for blk-mq-sched blk-mq-sched: don't add flushes to the head of requeue queue blk-mq: have blk_mq_dispatch_rq_list() return if we queued IO or not block: do not allow updates through sysfs until registration completes lightnvm: set default lun range when no luns are specified lightnvm: fix off-by-one error on target initialization Maintainers: Modify SED list from nvme to block Move stack parameters for sed_ioctl to prevent oversized stack with CONFIG_KASAN uapi: sed-opal fix IOW for activate lsp to use correct struct cdrom: Make device operations read-only elevator: fix loading wrong elevator type for blk-mq devices cciss: switch to pci_irq_alloc_vectors block/loop: fix race between I/O and set_status blk-mq-sched: don't hold queue_lock when calling exit_icq block: set make_request_fn manually in blk_mq_update_nr_hw_queues ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2017-02-21 10:57:33 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2017-02-21 10:57:33 -0800
commit: 772c8f6f3bbd3ceb94a89373473083e3e1113554 (patch)
tree: d2b34e8f1841a169d59adf53074de217a9e0f977 /include/linux/blkdev.h
parent: fd4a61e08aa79f2b7835b25c6f94f27bd2d65990 (diff)
parent: 818551e2b2c662a1b26de6b4f7d6b8411a838d18 (diff)
download: cachepc-linux-772c8f6f3bbd3ceb94a89373473083e3e1113554.tar.gz
cachepc-linux-772c8f6f3bbd3ceb94a89373473083e3e1113554.zip
1 files changed, 89 insertions, 35 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1ca8e8fd1078..aecca0e7d9ca 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -71,15 +71,6 @@ struct request_list {
 };
 
 /*
- * request command types
- */
-enum rq_cmd_type_bits {
-	REQ_TYPE_FS		= 1,	/* fs request */
-	REQ_TYPE_BLOCK_PC,		/* scsi command */
-	REQ_TYPE_DRV_PRIV,		/* driver defined types from here */
-};
-
-/*
  * request flags */
 typedef __u32 __bitwise req_flags_t;
 
@@ -128,8 +119,6 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_NOMERGE_FLAGS \
 	(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
 
-#define BLK_MAX_CDB	16
-
 /*
  * Try to put the fields that are referenced together in the same cacheline.
  *
@@ -147,13 +136,16 @@ struct request {
 	struct blk_mq_ctx *mq_ctx;
 
 	int cpu;
-	unsigned cmd_type;
 	unsigned int cmd_flags;		/* op and common flags */
 	req_flags_t rq_flags;
+
+	int internal_tag;
+
 	unsigned long atomic_flags;
 
 	/* the following two fields are internal, NEVER access directly */
 	unsigned int __data_len;	/* total data len */
+	int tag;
 	sector_t __sector;		/* sector cursor */
 
 	struct bio *bio;
@@ -222,20 +214,9 @@ struct request {
 
 	void *special;		/* opaque pointer available for LLD use */
 
-	int tag;
 	int errors;
 
-	/*
-	 * when request is used as a packet command carrier
-	 */
-	unsigned char __cmd[BLK_MAX_CDB];
-	unsigned char *cmd;
-	unsigned short cmd_len;
-
 	unsigned int extra_len;	/* length of alignment and padding */
-	unsigned int sense_len;
-	unsigned int resid_len;	/* residual count */
-	void *sense;
 
 	unsigned long deadline;
 	struct list_head timeout_list;
@@ -252,6 +233,21 @@ struct request {
 	struct request *next_rq;
 };
 
+static inline bool blk_rq_is_scsi(struct request *rq)
+{
+	return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT;
+}
+
+static inline bool blk_rq_is_private(struct request *rq)
+{
+	return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT;
+}
+
+static inline bool blk_rq_is_passthrough(struct request *rq)
+{
+	return blk_rq_is_scsi(rq) || blk_rq_is_private(rq);
+}
+
 static inline unsigned short req_get_ioprio(struct request *req)
 {
 	return req->ioprio;
@@ -271,6 +267,8 @@ typedef void (softirq_done_fn)(struct request *);
 typedef int (dma_drain_needed_fn)(struct request *);
 typedef int (lld_busy_fn) (struct request_queue *q);
 typedef int (bsg_job_fn) (struct bsg_job *);
+typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t);
+typedef void (exit_rq_fn)(struct request_queue *, struct request *);
 
 enum blk_eh_timer_return {
 	BLK_EH_NOT_HANDLED,
@@ -333,6 +331,7 @@ struct queue_limits {
 	unsigned short		logical_block_size;
 	unsigned short		max_segments;
 	unsigned short		max_integrity_segments;
+	unsigned short		max_discard_segments;
 
 	unsigned char		misaligned;
 	unsigned char		discard_misaligned;
@@ -406,8 +405,10 @@ struct request_queue {
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
 	lld_busy_fn		*lld_busy_fn;
+	init_rq_fn		*init_rq_fn;
+	exit_rq_fn		*exit_rq_fn;
 
-	struct blk_mq_ops	*mq_ops;
+	const struct blk_mq_ops	*mq_ops;
 
 	unsigned int		*mq_map;
 
@@ -432,7 +433,8 @@ struct request_queue {
 	 */
 	struct delayed_work	delay_work;
 
-	struct backing_dev_info	backing_dev_info;
+	struct backing_dev_info	*backing_dev_info;
+	struct disk_devt	*disk_devt;
 
 	/*
 	 * The queue owner gets to use this for whatever they like.
@@ -569,7 +571,15 @@ struct request_queue {
 	struct list_head	tag_set_list;
 	struct bio_set		*bio_split;
 
+#ifdef CONFIG_BLK_DEBUG_FS
+	struct dentry		*debugfs_dir;
+	struct dentry		*mq_debugfs_dir;
+#endif
+
 	bool			mq_sysfs_init_done;
+
+	size_t			cmd_size;
+	void			*rq_alloc_data;
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
@@ -600,6 +610,7 @@ struct request_queue {
 #define QUEUE_FLAG_FLUSH_NQ    25	/* flush not queueuable */
 #define QUEUE_FLAG_DAX         26	/* device supports DAX */
 #define QUEUE_FLAG_STATS       27	/* track rq completion times */
+#define QUEUE_FLAG_RESTART     28	/* queue needs restart at completion */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -695,9 +706,10 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
 			     REQ_FAILFAST_DRIVER))
 
-#define blk_account_rq(rq) \
-	(((rq)->rq_flags & RQF_STARTED) && \
-	 ((rq)->cmd_type == REQ_TYPE_FS))
+static inline bool blk_account_rq(struct request *rq)
+{
+	return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq);
+}
 
 #define blk_rq_cpu_valid(rq)	((rq)->cpu != -1)
 #define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
@@ -772,7 +784,7 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync)
 
 static inline bool rq_mergeable(struct request *rq)
 {
-	if (rq->cmd_type != REQ_TYPE_FS)
+	if (blk_rq_is_passthrough(rq))
 		return false;
 
 	if (req_op(rq) == REQ_OP_FLUSH)
@@ -910,7 +922,6 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
-extern void blk_rq_set_block_pc(struct request *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
@@ -1047,7 +1058,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
 {
 	struct request_queue *q = rq->q;
 
-	if (unlikely(rq->cmd_type != REQ_TYPE_FS))
+	if (blk_rq_is_passthrough(rq))
 		return q->limits.max_hw_sectors;
 
 	if (!q->limits.chunk_sectors ||
@@ -1129,14 +1140,15 @@ extern void blk_unprep_request(struct request *);
 extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn,
 					spinlock_t *lock, int node_id);
 extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *);
-extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
-						      request_fn_proc *, spinlock_t *);
+extern int blk_init_allocated_queue(struct request_queue *);
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
+extern void blk_queue_max_discard_segments(struct request_queue *,
+		unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
 		unsigned int max_discard_sectors);
@@ -1179,8 +1191,16 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
-extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 
+/*
+ * Number of physical segments as sent to the device.
+ *
+ * Normally this is the number of discontiguous data segments sent by the
+ * submitter.  But for data-less command like discard we might have no
+ * actual data segments submitted, but the driver might have to add it's
+ * own special payload.  In that case we still return 1 here so that this
+ * special payload will be mapped.
+ */
 static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
 {
 	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
@@ -1188,6 +1208,15 @@ static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
 	return rq->nr_phys_segments;
 }
 
+/*
+ * Number of discard segments (or ranges) the driver needs to fill in.
+ * Each discard bio merged into a request is counted as one segment.
+ */
+static inline unsigned short blk_rq_nr_discard_segments(struct request *rq)
+{
+	return max_t(unsigned short, rq->nr_phys_segments, 1);
+}
+
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
 extern void blk_dump_rq_flags(struct request *, char *);
 extern long nr_blockdev_pages(void);
@@ -1376,6 +1405,11 @@ static inline unsigned short queue_max_segments(struct request_queue *q)
 	return q->limits.max_segments;
 }
 
+static inline unsigned short queue_max_discard_segments(struct request_queue *q)
+{
+	return q->limits.max_discard_segments;
+}
+
 static inline unsigned int queue_max_segment_size(struct request_queue *q)
 {
 	return q->limits.max_segment_size;
@@ -1620,6 +1654,25 @@ static inline bool bvec_gap_to_prev(struct request_queue *q,
 	return __bvec_gap_to_prev(q, bprv, offset);
 }
 
+/*
+ * Check if the two bvecs from two bios can be merged to one segment.
+ * If yes, no need to check gap between the two bios since the 1st bio
+ * and the 1st bvec in the 2nd bio can be handled in one segment.
+ */
+static inline bool bios_segs_mergeable(struct request_queue *q,
+		struct bio *prev, struct bio_vec *prev_last_bv,
+		struct bio_vec *next_first_bv)
+{
+	if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv))
+		return false;
+	if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv))
+		return false;
+	if (prev->bi_seg_back_size + next_first_bv->bv_len >
+			queue_max_segment_size(q))
+		return false;
+	return true;
+}
+
 static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
 			 struct bio *next)
 {
@@ -1629,7 +1682,8 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
 		bio_get_last_bvec(prev, &pb);
 		bio_get_first_bvec(next, &nb);
 
-		return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
+		if (!bios_segs_mergeable(q, prev, &pb, &nb))
+			return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
 	}
 
 	return false;
author	Linus Torvalds <torvalds@linux-foundation.org>	2017-02-21 10:57:33 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2017-02-21 10:57:33 -0800
commit	772c8f6f3bbd3ceb94a89373473083e3e1113554 (patch)
tree	d2b34e8f1841a169d59adf53074de217a9e0f977 /include/linux/blkdev.h
parent	fd4a61e08aa79f2b7835b25c6f94f27bd2d65990 (diff)
parent	818551e2b2c662a1b26de6b4f7d6b8411a838d18 (diff)
download	cachepc-linux-772c8f6f3bbd3ceb94a89373473083e3e1113554.tar.gz cachepc-linux-772c8f6f3bbd3ceb94a89373473083e3e1113554.zip