diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-21 10:57:33 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-21 10:57:33 -0800 |
| commit | 772c8f6f3bbd3ceb94a89373473083e3e1113554 (patch) | |
| tree | d2b34e8f1841a169d59adf53074de217a9e0f977 /include/linux/blkdev.h | |
| parent | fd4a61e08aa79f2b7835b25c6f94f27bd2d65990 (diff) | |
| parent | 818551e2b2c662a1b26de6b4f7d6b8411a838d18 (diff) | |
| download | cachepc-linux-772c8f6f3bbd3ceb94a89373473083e3e1113554.tar.gz cachepc-linux-772c8f6f3bbd3ceb94a89373473083e3e1113554.zip | |
Merge tag 'for-4.11/linus-merge-signed' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe:
- blk-mq scheduling framework from me and Omar, with a port of the
deadline scheduler for this framework. A port of BFQ from Paolo is in
the works, and should be ready for 4.12.
- Various fixups and improvements to the above scheduling framework
from Omar, Paolo, Bart, me, others.
- Cleanup of the exported sysfs blk-mq data into debugfs, from Omar.
This allows us to export more information that helps debug hangs or
performance issues, without cluttering or abusing the sysfs API.
- Fixes for the sbitmap code, the scalable bitmap code that was
migrated from blk-mq, from Omar.
- Removal of the BLOCK_PC support in struct request, and refactoring of
carrying SCSI payloads in the block layer. This cleans up the code
nicely, and enables us to kill the SCSI specific parts of struct
request, shrinking it down nicely. From Christoph mainly, with help
from Hannes.
- Support for ranged discard requests and discard merging, also from
Christoph.
- Support for OPAL in the block layer, and for NVMe as well. Mainly
from Scott Bauer, with fixes/updates from various others folks.
- Error code fixup for gdrom from Christophe.
- cciss pci irq allocation cleanup from Christoph.
- Making the cdrom device operations read only, from Kees Cook.
- Fixes for duplicate bdi registrations and bdi/queue life time
problems from Jan and Dan.
- Set of fixes and updates for lightnvm, from Matias and Javier.
- A few fixes for nbd from Josef, using idr to name devices and a
workqueue deadlock fix on receive. Also marks Josef as the current
maintainer of nbd.
- Fix from Josef, overwriting queue settings when the number of
hardware queues is updated for a blk-mq device.
- NVMe fix from Keith, ensuring that we don't repeatedly mark and IO
aborted, if we didn't end up aborting it.
- SG gap merging fix from Ming Lei for block.
- Loop fix also from Ming, fixing a race and crash between setting loop
status and IO.
- Two block race fixes from Tahsin, fixing request list iteration and
fixing a race between device registration and udev device add
notifiations.
- Double free fix from cgroup writeback, from Tejun.
- Another double free fix in blkcg, from Hou Tao.
- Partition overflow fix for EFI from Alden Tondettar.
* tag 'for-4.11/linus-merge-signed' of git://git.kernel.dk/linux-block: (156 commits)
nvme: Check for Security send/recv support before issuing commands.
block/sed-opal: allocate struct opal_dev dynamically
block/sed-opal: tone down not supported warnings
block: don't defer flushes on blk-mq + scheduling
blk-mq-sched: ask scheduler for work, if we failed dispatching leftovers
blk-mq: don't special case flush inserts for blk-mq-sched
blk-mq-sched: don't add flushes to the head of requeue queue
blk-mq: have blk_mq_dispatch_rq_list() return if we queued IO or not
block: do not allow updates through sysfs until registration completes
lightnvm: set default lun range when no luns are specified
lightnvm: fix off-by-one error on target initialization
Maintainers: Modify SED list from nvme to block
Move stack parameters for sed_ioctl to prevent oversized stack with CONFIG_KASAN
uapi: sed-opal fix IOW for activate lsp to use correct struct
cdrom: Make device operations read-only
elevator: fix loading wrong elevator type for blk-mq devices
cciss: switch to pci_irq_alloc_vectors
block/loop: fix race between I/O and set_status
blk-mq-sched: don't hold queue_lock when calling exit_icq
block: set make_request_fn manually in blk_mq_update_nr_hw_queues
...
Diffstat (limited to 'include/linux/blkdev.h')
| -rw-r--r-- | include/linux/blkdev.h | 124 |
1 files changed, 89 insertions, 35 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1ca8e8fd1078..aecca0e7d9ca 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -71,15 +71,6 @@ struct request_list { }; /* - * request command types - */ -enum rq_cmd_type_bits { - REQ_TYPE_FS = 1, /* fs request */ - REQ_TYPE_BLOCK_PC, /* scsi command */ - REQ_TYPE_DRV_PRIV, /* driver defined types from here */ -}; - -/* * request flags */ typedef __u32 __bitwise req_flags_t; @@ -128,8 +119,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_NOMERGE_FLAGS \ (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD) -#define BLK_MAX_CDB 16 - /* * Try to put the fields that are referenced together in the same cacheline. * @@ -147,13 +136,16 @@ struct request { struct blk_mq_ctx *mq_ctx; int cpu; - unsigned cmd_type; unsigned int cmd_flags; /* op and common flags */ req_flags_t rq_flags; + + int internal_tag; + unsigned long atomic_flags; /* the following two fields are internal, NEVER access directly */ unsigned int __data_len; /* total data len */ + int tag; sector_t __sector; /* sector cursor */ struct bio *bio; @@ -222,20 +214,9 @@ struct request { void *special; /* opaque pointer available for LLD use */ - int tag; int errors; - /* - * when request is used as a packet command carrier - */ - unsigned char __cmd[BLK_MAX_CDB]; - unsigned char *cmd; - unsigned short cmd_len; - unsigned int extra_len; /* length of alignment and padding */ - unsigned int sense_len; - unsigned int resid_len; /* residual count */ - void *sense; unsigned long deadline; struct list_head timeout_list; @@ -252,6 +233,21 @@ struct request { struct request *next_rq; }; +static inline bool blk_rq_is_scsi(struct request *rq) +{ + return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT; +} + +static inline bool blk_rq_is_private(struct request *rq) +{ + return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT; +} + +static inline bool blk_rq_is_passthrough(struct request *rq) +{ + return blk_rq_is_scsi(rq) || blk_rq_is_private(rq); +} + static inline unsigned short req_get_ioprio(struct request *req) { return req->ioprio; @@ -271,6 +267,8 @@ typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); typedef int (lld_busy_fn) (struct request_queue *q); typedef int (bsg_job_fn) (struct bsg_job *); +typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t); +typedef void (exit_rq_fn)(struct request_queue *, struct request *); enum blk_eh_timer_return { BLK_EH_NOT_HANDLED, @@ -333,6 +331,7 @@ struct queue_limits { unsigned short logical_block_size; unsigned short max_segments; unsigned short max_integrity_segments; + unsigned short max_discard_segments; unsigned char misaligned; unsigned char discard_misaligned; @@ -406,8 +405,10 @@ struct request_queue { rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; lld_busy_fn *lld_busy_fn; + init_rq_fn *init_rq_fn; + exit_rq_fn *exit_rq_fn; - struct blk_mq_ops *mq_ops; + const struct blk_mq_ops *mq_ops; unsigned int *mq_map; @@ -432,7 +433,8 @@ struct request_queue { */ struct delayed_work delay_work; - struct backing_dev_info backing_dev_info; + struct backing_dev_info *backing_dev_info; + struct disk_devt *disk_devt; /* * The queue owner gets to use this for whatever they like. @@ -569,7 +571,15 @@ struct request_queue { struct list_head tag_set_list; struct bio_set *bio_split; +#ifdef CONFIG_BLK_DEBUG_FS + struct dentry *debugfs_dir; + struct dentry *mq_debugfs_dir; +#endif + bool mq_sysfs_init_done; + + size_t cmd_size; + void *rq_alloc_data; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ @@ -600,6 +610,7 @@ struct request_queue { #define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ #define QUEUE_FLAG_DAX 26 /* device supports DAX */ #define QUEUE_FLAG_STATS 27 /* track rq completion times */ +#define QUEUE_FLAG_RESTART 28 /* queue needs restart at completion */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -695,9 +706,10 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ REQ_FAILFAST_DRIVER)) -#define blk_account_rq(rq) \ - (((rq)->rq_flags & RQF_STARTED) && \ - ((rq)->cmd_type == REQ_TYPE_FS)) +static inline bool blk_account_rq(struct request *rq) +{ + return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); +} #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) @@ -772,7 +784,7 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync) static inline bool rq_mergeable(struct request *rq) { - if (rq->cmd_type != REQ_TYPE_FS) + if (blk_rq_is_passthrough(rq)) return false; if (req_op(rq) == REQ_OP_FLUSH) @@ -910,7 +922,6 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, int, gfp_t); -extern void blk_rq_set_block_pc(struct request *); extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, @@ -1047,7 +1058,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq, { struct request_queue *q = rq->q; - if (unlikely(rq->cmd_type != REQ_TYPE_FS)) + if (blk_rq_is_passthrough(rq)) return q->limits.max_hw_sectors; if (!q->limits.chunk_sectors || @@ -1129,14 +1140,15 @@ extern void blk_unprep_request(struct request *); extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id); extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); -extern struct request_queue *blk_init_allocated_queue(struct request_queue *, - request_fn_proc *, spinlock_t *); +extern int blk_init_allocated_queue(struct request_queue *); extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_segments(struct request_queue *, unsigned short); +extern void blk_queue_max_discard_segments(struct request_queue *, + unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); @@ -1179,8 +1191,16 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); -extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); +/* + * Number of physical segments as sent to the device. + * + * Normally this is the number of discontiguous data segments sent by the + * submitter. But for data-less command like discard we might have no + * actual data segments submitted, but the driver might have to add it's + * own special payload. In that case we still return 1 here so that this + * special payload will be mapped. + */ static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) { if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) @@ -1188,6 +1208,15 @@ static inline unsigned short blk_rq_nr_phys_segments(struct request *rq) return rq->nr_phys_segments; } +/* + * Number of discard segments (or ranges) the driver needs to fill in. + * Each discard bio merged into a request is counted as one segment. + */ +static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) +{ + return max_t(unsigned short, rq->nr_phys_segments, 1); +} + extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); extern long nr_blockdev_pages(void); @@ -1376,6 +1405,11 @@ static inline unsigned short queue_max_segments(struct request_queue *q) return q->limits.max_segments; } +static inline unsigned short queue_max_discard_segments(struct request_queue *q) +{ + return q->limits.max_discard_segments; +} + static inline unsigned int queue_max_segment_size(struct request_queue *q) { return q->limits.max_segment_size; @@ -1620,6 +1654,25 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, return __bvec_gap_to_prev(q, bprv, offset); } +/* + * Check if the two bvecs from two bios can be merged to one segment. + * If yes, no need to check gap between the two bios since the 1st bio + * and the 1st bvec in the 2nd bio can be handled in one segment. + */ +static inline bool bios_segs_mergeable(struct request_queue *q, + struct bio *prev, struct bio_vec *prev_last_bv, + struct bio_vec *next_first_bv) +{ + if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv)) + return false; + if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv)) + return false; + if (prev->bi_seg_back_size + next_first_bv->bv_len > + queue_max_segment_size(q)) + return false; + return true; +} + static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, struct bio *next) { @@ -1629,7 +1682,8 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, bio_get_last_bvec(prev, &pb); bio_get_first_bvec(next, &nb); - return __bvec_gap_to_prev(q, &pb, nb.bv_offset); + if (!bios_segs_mergeable(q, prev, &pb, &nb)) + return __bvec_gap_to_prev(q, &pb, nb.bv_offset); } return false; |
