From c4cf5261f8bffd9de132b50660a69148e7575bd6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 17 Apr 2015 16:15:18 -0600 Subject: bio: skip atomic inc/dec of ->bi_remaining for non-chains Struct bio has an atomic ref count for chained bio's, and we use this to know when to end IO on the bio. However, most bio's are not chained, so we don't need to always introduce this atomic operation as part of ending IO. Add a helper to elevate the bi_remaining count, and flag the bio as now actually needing the decrement at end_io time. Rename the field to __bi_remaining to catch any current users of this doing the incrementing manually. For high IOPS workloads, this reduces the overhead of bio_endio() substantially. Tested-by: Robert Elliott Acked-by: Kent Overstreet Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/bio.h | 11 +++++++++++ include/linux/blk_types.h | 3 ++- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index da3a127c9958..8bfe9eee6d1a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -644,6 +644,17 @@ static inline struct bio *bio_list_get(struct bio_list *bl) return bio; } +/* + * Increment chain count for the bio. Make sure the CHAIN flag update + * is visible before the raised count. + */ +static inline void bio_inc_remaining(struct bio *bio) +{ + bio->bi_flags |= (1 << BIO_CHAIN); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_remaining); +} + /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a1b25e35ea5f..8b07e0603887 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -65,7 +65,7 @@ struct bio { unsigned int bi_seg_front_size; unsigned int bi_seg_back_size; - atomic_t bi_remaining; + atomic_t __bi_remaining; bio_end_io_t *bi_end_io; @@ -122,6 +122,7 @@ struct bio { #define BIO_NULL_MAPPED 8 /* contains invalid user pages */ #define BIO_QUIET 9 /* Make BIO Quiet */ #define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ +#define BIO_CHAIN 11 /* chained bio, ->bi_remaining in effect */ /* * Flags starting here get preserved by bio_reset() - this includes -- cgit v1.2.3-71-gd317 From dac56212e8127dbc0bff7be35c508bc280213309 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 17 Apr 2015 16:23:59 -0600 Subject: bio: skip atomic inc/dec of ->bi_cnt for most use cases Struct bio has a reference count that controls when it can be freed. Most uses cases is allocating the bio, which then returns with a single reference to it, doing IO, and then dropping that single reference. We can remove this atomic_dec_and_test() in the completion path, if nobody else is holding a reference to the bio. If someone does call bio_get() on the bio, then we flag the bio as now having valid count and that we must properly honor the reference count when it's being put. Tested-by: Robert Elliott Signed-off-by: Jens Axboe --- block/bio.c | 18 +++++++++++------- drivers/md/bcache/request.c | 2 +- fs/btrfs/volumes.c | 2 +- fs/xfs/xfs_aops.c | 1 - include/linux/bio.h | 16 +++++++++++++++- include/linux/blk_types.h | 3 ++- 6 files changed, 30 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index 117da319afb6..c2ff8a88aef1 100644 --- a/block/bio.c +++ b/block/bio.c @@ -271,7 +271,7 @@ void bio_init(struct bio *bio) memset(bio, 0, sizeof(*bio)); bio->bi_flags = 1 << BIO_UPTODATE; atomic_set(&bio->__bi_remaining, 1); - atomic_set(&bio->bi_cnt, 1); + atomic_set(&bio->__bi_cnt, 1); } EXPORT_SYMBOL(bio_init); @@ -524,13 +524,17 @@ EXPORT_SYMBOL(zero_fill_bio); **/ void bio_put(struct bio *bio) { - BIO_BUG_ON(!atomic_read(&bio->bi_cnt)); - - /* - * last put frees it - */ - if (atomic_dec_and_test(&bio->bi_cnt)) + if (!bio_flagged(bio, BIO_REFFED)) bio_free(bio); + else { + BIO_BUG_ON(!atomic_read(&bio->__bi_cnt)); + + /* + * last put frees it + */ + if (atomic_dec_and_test(&bio->__bi_cnt)) + bio_free(bio); + } } EXPORT_SYMBOL(bio_put); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index ab43faddb447..1616f668a4cb 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -619,7 +619,7 @@ static void do_bio_hook(struct search *s, struct bio *orig_bio) bio->bi_end_io = request_endio; bio->bi_private = &s->cl; - atomic_set(&bio->bi_cnt, 3); + bio_cnt_set(bio, 3); } static void search_free(struct closure *cl) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 96aebf3bcd5b..8e8d1d1e28a5 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -345,7 +345,7 @@ loop_lock: waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); - BUG_ON(atomic_read(&cur->bi_cnt) == 0); + BUG_ON(atomic_read(&cur->__bi_cnt) == 0); /* * if we're doing the sync list, record that our diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a56960dd1684..095f94c2d8b5 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -356,7 +356,6 @@ xfs_end_bio( { xfs_ioend_t *ioend = bio->bi_private; - ASSERT(atomic_read(&bio->bi_cnt) >= 1); ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; /* Toss bio and pass work off to an xfsdatad thread */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 8bfe9eee6d1a..7486ea103f6e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -290,7 +290,21 @@ static inline unsigned bio_segments(struct bio *bio) * returns. and then bio would be freed memory when if (bio->bi_flags ...) * runs */ -#define bio_get(bio) atomic_inc(&(bio)->bi_cnt) +static inline void bio_get(struct bio *bio) +{ + bio->bi_flags |= (1 << BIO_REFFED); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_cnt); +} + +static inline void bio_cnt_set(struct bio *bio, unsigned int count) +{ + if (count != 1) { + bio->bi_flags |= (1 << BIO_REFFED); + smp_mb__before_atomic(); + } + atomic_set(&bio->__bi_cnt, count); +} enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 8b07e0603887..93d2e7153816 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -92,7 +92,7 @@ struct bio { unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ - atomic_t bi_cnt; /* pin count */ + atomic_t __bi_cnt; /* pin count */ struct bio_vec *bi_io_vec; /* the actual vec list */ @@ -123,6 +123,7 @@ struct bio { #define BIO_QUIET 9 /* Make BIO Quiet */ #define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ #define BIO_CHAIN 11 /* chained bio, ->bi_remaining in effect */ +#define BIO_REFFED 12 /* bio has elevated ->bi_cnt */ /* * Flags starting here get preserved by bio_reset() - this includes -- cgit v1.2.3-71-gd317 From 4f8c9510ba71bb54477841bebb90154ef140860f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2015 22:37:16 +0200 Subject: block: rename REQ_TYPE_SPECIAL to REQ_TYPE_DRV_PRIV Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 2 +- drivers/block/paride/pd.c | 4 ++-- drivers/block/sx8.c | 4 ++-- drivers/block/virtio_blk.c | 6 +++--- drivers/ide/ide-atapi.c | 4 ++-- drivers/ide/ide-cd.c | 2 +- drivers/ide/ide-cd_ioctl.c | 2 +- drivers/ide/ide-devsets.c | 2 +- drivers/ide/ide-eh.c | 2 +- drivers/ide/ide-floppy.c | 6 +++--- drivers/ide/ide-io.c | 4 ++-- drivers/ide/ide-ioctls.c | 2 +- drivers/ide/ide-park.c | 4 ++-- drivers/ide/ide-tape.c | 4 ++-- include/linux/blkdev.h | 4 ++-- 15 files changed, 26 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 39e5f7fae3ef..9cf52ac328fe 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -592,7 +592,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, fsync_bdev(bdev); mutex_lock(&nbd->tx_lock); blk_rq_init(NULL, &sreq); - sreq.cmd_type = REQ_TYPE_SPECIAL; + sreq.cmd_type = REQ_TYPE_DRV_PRIV; nbd_cmd(&sreq) = NBD_CMD_DISC; /* Check again after getting mutex back. */ diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index d48715b287e6..dbb4da1cdca8 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -442,7 +442,7 @@ static char *pd_buf; /* buffer for request in progress */ static enum action do_pd_io_start(void) { - if (pd_req->cmd_type == REQ_TYPE_SPECIAL) { + if (pd_req->cmd_type == REQ_TYPE_DRV_PRIV) { phase = pd_special; return pd_special(); } @@ -725,7 +725,7 @@ static int pd_special_command(struct pd_unit *disk, if (IS_ERR(rq)) return PTR_ERR(rq); - rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->special = func; err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0); diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 5d552857de41..59c91d49b14b 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -620,7 +620,7 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx) spin_unlock_irq(&host->lock); DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); - crq->rq->cmd_type = REQ_TYPE_SPECIAL; + crq->rq->cmd_type = REQ_TYPE_DRV_PRIV; crq->rq->special = crq; blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); @@ -661,7 +661,7 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func) crq->msg_bucket = (u32) rc; DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx); - crq->rq->cmd_type = REQ_TYPE_SPECIAL; + crq->rq->cmd_type = REQ_TYPE_DRV_PRIV; crq->rq->special = crq; blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 5ea2f0bbbc7c..d4d05f064d39 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -124,7 +124,7 @@ static inline void virtblk_request_done(struct request *req) req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual); req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len); req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors); - } else if (req->cmd_type == REQ_TYPE_SPECIAL) { + } else if (req->cmd_type == REQ_TYPE_DRV_PRIV) { req->errors = (error != 0); } @@ -188,7 +188,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, vbr->out_hdr.sector = 0; vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); break; - case REQ_TYPE_SPECIAL: + case REQ_TYPE_DRV_PRIV: vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID); vbr->out_hdr.sector = 0; vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req)); @@ -251,7 +251,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str) return PTR_ERR(req); } - req->cmd_type = REQ_TYPE_SPECIAL; + req->cmd_type = REQ_TYPE_DRV_PRIV; err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); blk_put_request(req); diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index fac3d9da2e07..b367300ce479 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -93,7 +93,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, int error; rq = blk_get_request(drive->queue, READ, __GFP_WAIT); - rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->special = (char *)pc; if (buf && bufflen) { @@ -477,7 +477,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if (uptodate == 0) drive->failed_pc = NULL; - if (rq->cmd_type == REQ_TYPE_SPECIAL) { + if (rq->cmd_type == REQ_TYPE_DRV_PRIV) { rq->errors = 0; error = 0; } else { diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 0b510bafd90e..9a32603c6c74 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -799,7 +799,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, cdrom_do_block_pc(drive, rq); break; - case REQ_TYPE_SPECIAL: + case REQ_TYPE_DRV_PRIV: /* right now this can only be a reset... */ uptodate = 1; goto out_end; diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c index 02caa7dd51c8..066e39036518 100644 --- a/drivers/ide/ide-cd_ioctl.c +++ b/drivers/ide/ide-cd_ioctl.c @@ -304,7 +304,7 @@ int ide_cdrom_reset(struct cdrom_device_info *cdi) int ret; rq = blk_get_request(drive->queue, READ, __GFP_WAIT); - rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->cmd_flags = REQ_QUIET; ret = blk_execute_rq(drive->queue, cd->disk, rq, 0); blk_put_request(rq); diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c index 9e98122f646e..b05a74d78ef5 100644 --- a/drivers/ide/ide-devsets.c +++ b/drivers/ide/ide-devsets.c @@ -166,7 +166,7 @@ int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting, return setting->set(drive, arg); rq = blk_get_request(q, READ, __GFP_WAIT); - rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->cmd_len = 5; rq->cmd[0] = REQ_DEVSET_EXEC; *(int *)&rq->cmd[1] = arg; diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c index 32970664c275..19d809c48a8d 100644 --- a/drivers/ide/ide-eh.c +++ b/drivers/ide/ide-eh.c @@ -147,7 +147,7 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err) { struct request *rq = drive->hwif->rq; - if (rq && rq->cmd_type == REQ_TYPE_SPECIAL && + if (rq && rq->cmd_type == REQ_TYPE_DRV_PRIV && rq->cmd[0] == REQ_DRIVE_RESET) { if (err <= 0 && rq->errors == 0) rq->errors = -EIO; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 8c6363cdd208..3dbfa5b6db6a 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -97,7 +97,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc) "Aborting request!\n"); } - if (rq->cmd_type == REQ_TYPE_SPECIAL) + if (rq->cmd_type == REQ_TYPE_DRV_PRIV) rq->errors = uptodate ? 0 : IDE_DRV_ERROR_GENERAL; return uptodate; @@ -246,7 +246,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, } else printk(KERN_ERR PFX "%s: I/O error\n", drive->name); - if (rq->cmd_type == REQ_TYPE_SPECIAL) { + if (rq->cmd_type == REQ_TYPE_DRV_PRIV) { rq->errors = 0; ide_complete_rq(drive, 0, blk_rq_bytes(rq)); return ide_stopped; @@ -265,7 +265,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, pc = &floppy->queued_pc; idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block); break; - case REQ_TYPE_SPECIAL: + case REQ_TYPE_DRV_PRIV: case REQ_TYPE_SENSE: pc = (struct ide_atapi_pc *)rq->special; break; diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 177db6d5b2f5..8e55abd03a24 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -135,7 +135,7 @@ EXPORT_SYMBOL(ide_complete_rq); void ide_kill_rq(ide_drive_t *drive, struct request *rq) { - u8 drv_req = (rq->cmd_type == REQ_TYPE_SPECIAL) && rq->rq_disk; + u8 drv_req = (rq->cmd_type == REQ_TYPE_DRV_PRIV) && rq->rq_disk; u8 media = drive->media; drive->failed_pc = NULL; @@ -353,7 +353,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) pm->pm_step == IDE_PM_COMPLETED) ide_complete_pm_rq(drive, rq); return startstop; - } else if (!rq->rq_disk && rq->cmd_type == REQ_TYPE_SPECIAL) + } else if (!rq->rq_disk && rq->cmd_type == REQ_TYPE_DRV_PRIV) /* * TODO: Once all ULDs have been modified to * check for specific op codes rather than diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index 6233fa2cb8a9..aa2e9b77b20d 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -222,7 +222,7 @@ static int generic_drive_reset(ide_drive_t *drive) int ret = 0; rq = blk_get_request(drive->queue, READ, __GFP_WAIT); - rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->cmd_len = 1; rq->cmd[0] = REQ_DRIVE_RESET; if (blk_execute_rq(drive->queue, NULL, rq, 1)) diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index ca958604cda2..c80868520488 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -34,7 +34,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) rq = blk_get_request(q, READ, __GFP_WAIT); rq->cmd[0] = REQ_PARK_HEADS; rq->cmd_len = 1; - rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->special = &timeout; rc = blk_execute_rq(q, NULL, rq, 1); blk_put_request(rq); @@ -51,7 +51,7 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) rq->cmd[0] = REQ_UNPARK_HEADS; rq->cmd_len = 1; - rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_type = REQ_TYPE_DRV_PRIV; elv_add_request(q, rq, ELEVATOR_INSERT_FRONT); out: diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 6eb738ca6d2f..2a5d543db9f5 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -576,7 +576,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, rq->cmd[0], (unsigned long long)blk_rq_pos(rq), blk_rq_sectors(rq)); - BUG_ON(!(rq->cmd_type == REQ_TYPE_SPECIAL || + BUG_ON(!(rq->cmd_type == REQ_TYPE_DRV_PRIV || rq->cmd_type == REQ_TYPE_SENSE)); /* Retry a failed packet command */ @@ -853,7 +853,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size) BUG_ON(size < 0 || size % tape->blk_size); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); - rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_type = REQ_TYPE_DRV_PRIV; rq->cmd[13] = cmd; rq->rq_disk = tape->disk; rq->__sector = tape->first_frame; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7f9a516f24de..98c90272443b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -79,10 +79,10 @@ enum rq_cmd_type_bits { REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ - REQ_TYPE_SPECIAL, /* driver defined type */ + REQ_TYPE_DRV_PRIV, /* driver defined type */ /* * for ATA/ATAPI devices. this really doesn't belong here, ide should - * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver + * use REQ_TYPE_DRV_PRIV and use rq->cmd[0] with the range of driver * private REQ_LB opcodes to differentiate what type of request this is */ REQ_TYPE_ATA_TASKFILE, -- cgit v1.2.3-71-gd317 From b42171ef7d938a66fa52e66a3d911ed63770b5ca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2015 22:37:17 +0200 Subject: block: move REQ_TYPE_ATA_TASKFILE and REQ_TYPE_ATA_PC to ide.h These values are only used by the IDE driver, so move them into it by allowing drivers to take cmd_type values after the first private one. Note that we have to turn cmd_type into a plain unsigned integer so that gcc doesn't complain about mismatching enum types. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 11 ++--------- include/linux/ide.h | 7 +++++++ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 98c90272443b..9cb4d80a4987 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -79,14 +79,7 @@ enum rq_cmd_type_bits { REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ - REQ_TYPE_DRV_PRIV, /* driver defined type */ - /* - * for ATA/ATAPI devices. this really doesn't belong here, ide should - * use REQ_TYPE_DRV_PRIV and use rq->cmd[0] with the range of driver - * private REQ_LB opcodes to differentiate what type of request this is - */ - REQ_TYPE_ATA_TASKFILE, - REQ_TYPE_ATA_PC, + REQ_TYPE_DRV_PRIV, /* driver defined types from here */ }; #define BLK_MAX_CDB 16 @@ -108,7 +101,7 @@ struct request { struct blk_mq_ctx *mq_ctx; u64 cmd_flags; - enum rq_cmd_type_bits cmd_type; + unsigned cmd_type; unsigned long atomic_flags; int cpu; diff --git a/include/linux/ide.h b/include/linux/ide.h index 93b5ca754b5b..62ac399144a6 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -39,6 +39,12 @@ struct device; +/* IDE-specific values for req->cmd_type */ +enum ata_cmd_type_bits { + REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1, + REQ_TYPE_ATA_PC, +}; + /* Error codes returned in rq->errors to the higher part of the driver. */ enum { IDE_DRV_ERROR_GENERAL = 101, @@ -1551,4 +1557,5 @@ static inline void ide_set_drivedata(ide_drive_t *drive, void *data) #define ide_host_for_each_port(i, port, host) \ for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++) + #endif /* _IDE_H */ -- cgit v1.2.3-71-gd317 From b0b93b48a30e809240ddd7449a6ad60a5ddf7b4d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2015 22:37:18 +0200 Subject: block: move REQ_TYPE_SENSE to the ide driver Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/ide/ide-atapi.c | 6 +++--- drivers/ide/ide-cd.c | 8 ++++---- drivers/ide/ide-floppy.c | 2 +- drivers/ide/ide-tape.c | 2 +- include/linux/blkdev.h | 1 - include/linux/ide.h | 1 + 6 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index b367300ce479..1362ad80a76c 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -191,7 +191,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) BUG_ON(sense_len > sizeof(*sense)); - if (rq->cmd_type == REQ_TYPE_SENSE || drive->sense_rq_armed) + if (rq->cmd_type == REQ_TYPE_ATA_SENSE || drive->sense_rq_armed) return; memset(sense, 0, sizeof(*sense)); @@ -210,7 +210,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) sense_rq->rq_disk = rq->rq_disk; sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; sense_rq->cmd[4] = cmd_len; - sense_rq->cmd_type = REQ_TYPE_SENSE; + sense_rq->cmd_type = REQ_TYPE_ATA_SENSE; sense_rq->cmd_flags |= REQ_PREEMPT; if (drive->media == ide_tape) @@ -310,7 +310,7 @@ int ide_cd_get_xferlen(struct request *rq) switch (rq->cmd_type) { case REQ_TYPE_FS: return 32768; - case REQ_TYPE_SENSE: + case REQ_TYPE_ATA_SENSE: case REQ_TYPE_BLOCK_PC: case REQ_TYPE_ATA_PC: return blk_rq_bytes(rq); diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 9a32603c6c74..64a6b827b3dd 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -210,7 +210,7 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive, static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { /* - * For REQ_TYPE_SENSE, "rq->special" points to the original + * For REQ_TYPE_ATA_SENSE, "rq->special" points to the original * failed request. Also, the sense data should be read * directly from rq which might be different from the original * sense buffer if it got copied during mapping. @@ -285,7 +285,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) "stat 0x%x", rq->cmd[0], rq->cmd_type, err, stat); - if (rq->cmd_type == REQ_TYPE_SENSE) { + if (rq->cmd_type == REQ_TYPE_ATA_SENSE) { /* * We got an error trying to get sense info from the drive * (probably while trying to recover from a former error). @@ -526,7 +526,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) ide_expiry_t *expiry = NULL; int dma_error = 0, dma, thislen, uptodate = 0; int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc = 0; - int sense = (rq->cmd_type == REQ_TYPE_SENSE); + int sense = (rq->cmd_type == REQ_TYPE_ATA_SENSE); unsigned int timeout; u16 len; u8 ireason, stat; @@ -791,7 +791,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, if (cdrom_start_rw(drive, rq) == ide_stopped) goto out_end; break; - case REQ_TYPE_SENSE: + case REQ_TYPE_ATA_SENSE: case REQ_TYPE_BLOCK_PC: case REQ_TYPE_ATA_PC: if (!rq->timeout) diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 3dbfa5b6db6a..2fb5350c5410 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -266,7 +266,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block); break; case REQ_TYPE_DRV_PRIV: - case REQ_TYPE_SENSE: + case REQ_TYPE_ATA_SENSE: pc = (struct ide_atapi_pc *)rq->special; break; case REQ_TYPE_BLOCK_PC: diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 2a5d543db9f5..f5d51d1d09ee 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -577,7 +577,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, blk_rq_sectors(rq)); BUG_ON(!(rq->cmd_type == REQ_TYPE_DRV_PRIV || - rq->cmd_type == REQ_TYPE_SENSE)); + rq->cmd_type == REQ_TYPE_ATA_SENSE)); /* Retry a failed packet command */ if (drive->failed_pc && drive->pc->c[0] == REQUEST_SENSE) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9cb4d80a4987..6076b9e18dcb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -75,7 +75,6 @@ struct request_list { enum rq_cmd_type_bits { REQ_TYPE_FS = 1, /* fs request */ REQ_TYPE_BLOCK_PC, /* scsi command */ - REQ_TYPE_SENSE, /* sense request */ REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ diff --git a/include/linux/ide.h b/include/linux/ide.h index 62ac399144a6..9856b7d455d9 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -43,6 +43,7 @@ struct device; enum ata_cmd_type_bits { REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1, REQ_TYPE_ATA_PC, + REQ_TYPE_ATA_SENSE, /* sense request */ }; /* Error codes returned in rq->errors to the higher part of the driver. */ -- cgit v1.2.3-71-gd317 From ac7cdff00a33d48d27217560fa3b16d802e5f535 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2015 22:37:19 +0200 Subject: block: remove REQ_TYPE_PM_SHUTDOWN Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6076b9e18dcb..c2829ba5e738 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -77,7 +77,6 @@ enum rq_cmd_type_bits { REQ_TYPE_BLOCK_PC, /* scsi command */ REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ - REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ REQ_TYPE_DRV_PRIV, /* driver defined types from here */ }; -- cgit v1.2.3-71-gd317 From a7928c1578c550bd6f4dec62d65132e6db226c57 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Apr 2015 22:37:20 +0200 Subject: block: move PM request support to IDE This removes the request types and hacks from the block code and into the old IDE driver. There is a small amunt of code duplication due to this, but it's not too bad. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 1 + block/blk-exec.c | 10 --------- block/blk.h | 2 -- drivers/ide/ide-eh.c | 2 +- drivers/ide/ide-io.c | 8 +++---- drivers/ide/ide-pm.c | 56 +++++++++++++++++++++++++++++++++++----------- drivers/ide/ide-taskfile.c | 2 +- include/linux/blkdev.h | 21 +---------------- include/linux/ide.h | 19 ++++++++++++++++ 9 files changed, 70 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index fd154b94447a..2e5020f37d55 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -285,6 +285,7 @@ inline void __blk_run_queue_uncond(struct request_queue *q) q->request_fn(q); q->request_fn_active--; } +EXPORT_SYMBOL_GPL(__blk_run_queue_uncond); /** * __blk_run_queue - run a single device queue diff --git a/block/blk-exec.c b/block/blk-exec.c index 9924725fa50d..3fec8a29d0fa 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -53,7 +53,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, rq_end_io_fn *done) { int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; - bool is_pm_resume; WARN_ON(irqs_disabled()); WARN_ON(rq->cmd_type == REQ_TYPE_FS); @@ -70,12 +69,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, return; } - /* - * need to check this before __blk_run_queue(), because rq can - * be freed before that returns. - */ - is_pm_resume = rq->cmd_type == REQ_TYPE_PM_RESUME; - spin_lock_irq(q->queue_lock); if (unlikely(blk_queue_dying(q))) { @@ -88,9 +81,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, __elv_add_request(q, rq, where); __blk_run_queue(q); - /* the queue is stopped so it won't be run */ - if (is_pm_resume) - __blk_run_queue_uncond(q); spin_unlock_irq(q->queue_lock); } EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); diff --git a/block/blk.h b/block/blk.h index 43b036185712..4b48d55e588e 100644 --- a/block/blk.h +++ b/block/blk.h @@ -193,8 +193,6 @@ int blk_try_merge(struct request *rq, struct bio *bio); void blk_queue_congestion_threshold(struct request_queue *q); -void __blk_run_queue_uncond(struct request_queue *q); - int blk_dev_init(void); diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c index 19d809c48a8d..d6da011299f5 100644 --- a/drivers/ide/ide-eh.c +++ b/drivers/ide/ide-eh.c @@ -129,7 +129,7 @@ ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat) if (cmd) ide_complete_cmd(drive, cmd, stat, err); - } else if (blk_pm_request(rq)) { + } else if (ata_pm_request(rq)) { rq->errors = 1; ide_complete_pm_rq(drive, rq); return ide_stopped; diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 8e55abd03a24..669ea1e45795 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -320,7 +320,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) goto kill_rq; } - if (blk_pm_request(rq)) + if (ata_pm_request(rq)) ide_check_pm_state(drive, rq); drive->hwif->tp_ops->dev_select(drive); @@ -342,8 +342,8 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) return execute_drive_cmd(drive, rq); - else if (blk_pm_request(rq)) { - struct request_pm_state *pm = rq->special; + else if (ata_pm_request(rq)) { + struct ide_pm_state *pm = rq->special; #ifdef DEBUG_PM printk("%s: start_power_step(step: %d)\n", drive->name, pm->pm_step); @@ -538,7 +538,7 @@ repeat: * state machine. */ if ((drive->dev_flags & IDE_DFLAG_BLOCKED) && - blk_pm_request(rq) == 0 && + ata_pm_request(rq) == 0 && (rq->cmd_flags & REQ_PREEMPT) == 0) { /* there should be no pending command at this point */ ide_unlock_port(hwif); diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 8d1e32d7cd97..081e43458d50 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -8,7 +8,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) ide_drive_t *pair = ide_get_pair_dev(drive); ide_hwif_t *hwif = drive->hwif; struct request *rq; - struct request_pm_state rqpm; + struct ide_pm_state rqpm; int ret; if (ide_port_acpi(hwif)) { @@ -19,7 +19,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) memset(&rqpm, 0, sizeof(rqpm)); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); - rq->cmd_type = REQ_TYPE_PM_SUSPEND; + rq->cmd_type = REQ_TYPE_ATA_PM_SUSPEND; rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_SUSPEND; if (mesg.event == PM_EVENT_PRETHAW) @@ -38,13 +38,43 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) return ret; } +static void ide_end_sync_rq(struct request *rq, int error) +{ + complete(rq->end_io_data); +} + +static int ide_pm_execute_rq(struct request *rq) +{ + struct request_queue *q = rq->q; + DECLARE_COMPLETION_ONSTACK(wait); + + rq->end_io_data = &wait; + rq->end_io = ide_end_sync_rq; + + spin_lock_irq(q->queue_lock); + if (unlikely(blk_queue_dying(q))) { + rq->cmd_flags |= REQ_QUIET; + rq->errors = -ENXIO; + __blk_end_request_all(rq, rq->errors); + spin_unlock_irq(q->queue_lock); + return -ENXIO; + } + __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT); + __blk_run_queue_uncond(q); + spin_unlock_irq(q->queue_lock); + + wait_for_completion_io(&wait); + + return rq->errors ? -EIO : 0; +} + int generic_ide_resume(struct device *dev) { ide_drive_t *drive = to_ide_device(dev); ide_drive_t *pair = ide_get_pair_dev(drive); ide_hwif_t *hwif = drive->hwif; struct request *rq; - struct request_pm_state rqpm; + struct ide_pm_state rqpm; int err; if (ide_port_acpi(hwif)) { @@ -59,13 +89,13 @@ int generic_ide_resume(struct device *dev) memset(&rqpm, 0, sizeof(rqpm)); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); - rq->cmd_type = REQ_TYPE_PM_RESUME; + rq->cmd_type = REQ_TYPE_ATA_PM_RESUME; rq->cmd_flags |= REQ_PREEMPT; rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_RESUME; rqpm.pm_state = PM_EVENT_ON; - err = blk_execute_rq(drive->queue, NULL, rq, 1); + err = ide_pm_execute_rq(rq); blk_put_request(rq); if (err == 0 && dev->driver) { @@ -80,7 +110,7 @@ int generic_ide_resume(struct device *dev) void ide_complete_power_step(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->special; + struct ide_pm_state *pm = rq->special; #ifdef DEBUG_PM printk(KERN_INFO "%s: complete_power_step(step: %d)\n", @@ -110,7 +140,7 @@ void ide_complete_power_step(ide_drive_t *drive, struct request *rq) ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->special; + struct ide_pm_state *pm = rq->special; struct ide_cmd cmd = { }; switch (pm->pm_step) { @@ -182,7 +212,7 @@ out_do_tf: void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) { struct request_queue *q = drive->queue; - struct request_pm_state *pm = rq->special; + struct ide_pm_state *pm = rq->special; unsigned long flags; ide_complete_power_step(drive, rq); @@ -191,10 +221,10 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) #ifdef DEBUG_PM printk("%s: completing PM request, %s\n", drive->name, - (rq->cmd_type == REQ_TYPE_PM_SUSPEND) ? "suspend" : "resume"); + (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND) ? "suspend" : "resume"); #endif spin_lock_irqsave(q->queue_lock, flags); - if (rq->cmd_type == REQ_TYPE_PM_SUSPEND) + if (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND) blk_stop_queue(q); else drive->dev_flags &= ~IDE_DFLAG_BLOCKED; @@ -208,13 +238,13 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) void ide_check_pm_state(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->special; + struct ide_pm_state *pm = rq->special; - if (rq->cmd_type == REQ_TYPE_PM_SUSPEND && + if (rq->cmd_type == REQ_TYPE_ATA_PM_SUSPEND && pm->pm_step == IDE_PM_START_SUSPEND) /* Mark drive blocked when starting the suspend sequence. */ drive->dev_flags |= IDE_DFLAG_BLOCKED; - else if (rq->cmd_type == REQ_TYPE_PM_RESUME && + else if (rq->cmd_type == REQ_TYPE_ATA_PM_RESUME && pm->pm_step == IDE_PM_START_RESUME) { /* * The first thing we do on wakeup is to wait for BSY bit to diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index dabb88b1cbec..0979e126fff1 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -186,7 +186,7 @@ static ide_startstop_t task_no_data_intr(ide_drive_t *drive) tf->command == ATA_CMD_CHK_POWER) { struct request *rq = hwif->rq; - if (blk_pm_request(rq)) + if (ata_pm_request(rq)) ide_complete_pm_rq(drive, rq); else ide_finish_cmd(drive, cmd, stat); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c2829ba5e738..2da818a48097 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -30,7 +30,6 @@ struct scsi_ioctl_command; struct request_queue; struct elevator_queue; -struct request_pm_state; struct blk_trace; struct request; struct sg_io_hdr; @@ -75,8 +74,6 @@ struct request_list { enum rq_cmd_type_bits { REQ_TYPE_FS = 1, /* fs request */ REQ_TYPE_BLOCK_PC, /* scsi command */ - REQ_TYPE_PM_SUSPEND, /* suspend request */ - REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_DRV_PRIV, /* driver defined types from here */ }; @@ -207,19 +204,6 @@ static inline unsigned short req_get_ioprio(struct request *req) return req->ioprio; } -/* - * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME - * requests. Some step values could eventually be made generic. - */ -struct request_pm_state -{ - /* PM state machine step value, currently driver specific */ - int pm_step; - /* requested PM state value (S1, S2, S3, S4, ...) */ - u32 pm_state; - void* data; /* for driver use */ -}; - #include struct blk_queue_ctx; @@ -601,10 +585,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) (((rq)->cmd_flags & REQ_STARTED) && \ ((rq)->cmd_type == REQ_TYPE_FS)) -#define blk_pm_request(rq) \ - ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ - (rq)->cmd_type == REQ_TYPE_PM_RESUME) - #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) /* rq->queuelist of dequeued request must be list_empty() */ @@ -838,6 +818,7 @@ extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *q); +extern void __blk_run_queue_uncond(struct request_queue *q); extern void blk_run_queue(struct request_queue *); extern void blk_run_queue_async(struct request_queue *q); extern int blk_rq_map_user(struct request_queue *, struct request *, diff --git a/include/linux/ide.h b/include/linux/ide.h index 9856b7d455d9..a633898f36ac 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -44,8 +44,14 @@ enum ata_cmd_type_bits { REQ_TYPE_ATA_TASKFILE = REQ_TYPE_DRV_PRIV + 1, REQ_TYPE_ATA_PC, REQ_TYPE_ATA_SENSE, /* sense request */ + REQ_TYPE_ATA_PM_SUSPEND,/* suspend request */ + REQ_TYPE_ATA_PM_RESUME, /* resume request */ }; +#define ata_pm_request(rq) \ + ((rq)->cmd_type == REQ_TYPE_ATA_PM_SUSPEND || \ + (rq)->cmd_type == REQ_TYPE_ATA_PM_RESUME) + /* Error codes returned in rq->errors to the higher part of the driver. */ enum { IDE_DRV_ERROR_GENERAL = 101, @@ -1321,6 +1327,19 @@ struct ide_port_info { u8 udma_mask; }; +/* + * State information carried for REQ_TYPE_ATA_PM_SUSPEND and REQ_TYPE_ATA_PM_RESUME + * requests. + */ +struct ide_pm_state { + /* PM state machine step value, currently driver specific */ + int pm_step; + /* requested PM state value (S1, S2, S3, S4, ...) */ + u32 pm_state; + void* data; /* for driver use */ +}; + + int ide_pci_init_one(struct pci_dev *, const struct ide_port_info *, void *); int ide_pci_init_two(struct pci_dev *, struct pci_dev *, const struct ide_port_info *, void *); -- cgit v1.2.3-71-gd317 From 4ecd4fef3a074c8bb43c391a57742c422469ebbd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 May 2015 09:38:13 +0200 Subject: block: use an atomic_t for mq_freeze_depth lockdep gets unhappy about the not disabling irqs when using the queue_lock around it. Instead of trying to fix that up just switch to an atomic_t and get rid of the lock. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 24 ++++++++++-------------- include/linux/blkdev.h | 2 +- 2 files changed, 11 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq.c b/block/blk-mq.c index 31df47443699..c382a34fe5ac 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -89,7 +89,8 @@ static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp) return -EBUSY; ret = wait_event_interruptible(q->mq_freeze_wq, - !q->mq_freeze_depth || blk_queue_dying(q)); + !atomic_read(&q->mq_freeze_depth) || + blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; if (ret) @@ -112,13 +113,10 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref) void blk_mq_freeze_queue_start(struct request_queue *q) { - bool freeze; + int freeze_depth; - spin_lock_irq(q->queue_lock); - freeze = !q->mq_freeze_depth++; - spin_unlock_irq(q->queue_lock); - - if (freeze) { + freeze_depth = atomic_inc_return(&q->mq_freeze_depth); + if (freeze_depth == 1) { percpu_ref_kill(&q->mq_usage_counter); blk_mq_run_hw_queues(q, false); } @@ -143,13 +141,11 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue); void blk_mq_unfreeze_queue(struct request_queue *q) { - bool wake; + int freeze_depth; - spin_lock_irq(q->queue_lock); - wake = !--q->mq_freeze_depth; - WARN_ON_ONCE(q->mq_freeze_depth < 0); - spin_unlock_irq(q->queue_lock); - if (wake) { + freeze_depth = atomic_dec_return(&q->mq_freeze_depth); + WARN_ON_ONCE(freeze_depth < 0); + if (!freeze_depth) { percpu_ref_reinit(&q->mq_usage_counter); wake_up_all(&q->mq_freeze_wq); } @@ -2081,7 +2077,7 @@ void blk_mq_free_queue(struct request_queue *q) /* Basically redo blk_mq_init_queue with queue frozen */ static void blk_mq_queue_reinit(struct request_queue *q) { - WARN_ON_ONCE(!q->mq_freeze_depth); + WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); blk_mq_sysfs_unregister(q); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2da818a48097..bc917956a6d0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -444,7 +444,7 @@ struct request_queue { struct mutex sysfs_lock; int bypass_depth; - int mq_freeze_depth; + atomic_t mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; -- cgit v1.2.3-71-gd317 From b25de9d6da49b1a8760a89672283128aa8c78345 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Apr 2015 21:41:01 +0200 Subject: block: remove BIO_EOPNOTSUPP Since the big barrier rewrite/removal in 2007 we never fail FLUSH or FUA requests, which means we can remove the magic BIO_EOPNOTSUPP flag to help propagating those to the buffer_head layer. Signed-off-by: Christoph Hellwig Reviewed-by: Jeff Moyer Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/bounce.c | 3 --- fs/btrfs/disk-io.c | 11 ++--------- fs/btrfs/extent_io.c | 2 -- fs/buffer.c | 10 ---------- fs/ext4/page-io.c | 1 - fs/nilfs2/segbuf.c | 12 ------------ include/linux/blk_types.h | 1 - 7 files changed, 2 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/block/bounce.c b/block/bounce.c index ab21ba203d5c..4bac72579c1f 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -128,9 +128,6 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) struct bio_vec *bvec, *org_vec; int i; - if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) - set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags); - /* * free up bounce indirect pages used */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2ef9a4b72d06..e08a926fe12c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3269,11 +3269,8 @@ static int write_dev_supers(struct btrfs_device *device, */ static void btrfs_end_empty_barrier(struct bio *bio, int err) { - if (err) { - if (err == -EOPNOTSUPP) - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); + if (err) clear_bit(BIO_UPTODATE, &bio->bi_flags); - } if (bio->bi_private) complete(bio->bi_private); bio_put(bio); @@ -3301,11 +3298,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait) wait_for_completion(&device->flush_wait); - if (bio_flagged(bio, BIO_EOPNOTSUPP)) { - printk_in_rcu("BTRFS: disabling barriers on dev %s\n", - rcu_str_deref(device->name)); - device->nobarriers = 1; - } else if (!bio_flagged(bio, BIO_UPTODATE)) { + if (!bio_flagged(bio, BIO_UPTODATE)) { ret = -EIO; btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_FLUSH_ERRS); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 43af5a61ad25..1e155299abc0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2767,8 +2767,6 @@ static int __must_check submit_one_bio(int rw, struct bio *bio, else btrfsic_submit_bio(rw, bio); - if (bio_flagged(bio, BIO_EOPNOTSUPP)) - ret = -EOPNOTSUPP; bio_put(bio); return ret; } diff --git a/fs/buffer.c b/fs/buffer.c index c7a5602d01ee..efd85e0e8660 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2938,10 +2938,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) { struct buffer_head *bh = bio->bi_private; - if (err == -EOPNOTSUPP) { - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - } - if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) set_bit(BH_Quiet, &bh->b_state); @@ -3041,13 +3037,7 @@ int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags) if (buffer_prio(bh)) rw |= REQ_PRIO; - bio_get(bio); submit_bio(rw, bio); - - if (bio_flagged(bio, BIO_EOPNOTSUPP)) - ret = -EOPNOTSUPP; - - bio_put(bio); return ret; } EXPORT_SYMBOL_GPL(_submit_bh); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 5765f88b3904..c5d81e8d84c3 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -359,7 +359,6 @@ void ext4_io_submit(struct ext4_io_submit *io) if (bio) { bio_get(io->io_bio); submit_bio(io->io_op, io->io_bio); - BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); bio_put(io->io_bio); } io->io_bio = NULL; diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index dc3a9efdaab8..42468e5ab3e7 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -343,11 +343,6 @@ static void nilfs_end_bio_write(struct bio *bio, int err) const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct nilfs_segment_buffer *segbuf = bio->bi_private; - if (err == -EOPNOTSUPP) { - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - /* to be detected by nilfs_segbuf_submit_bio() */ - } - if (!uptodate) atomic_inc(&segbuf->sb_err); @@ -374,15 +369,8 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, bio->bi_end_io = nilfs_end_bio_write; bio->bi_private = segbuf; - bio_get(bio); submit_bio(mode, bio); segbuf->sb_nbio++; - if (bio_flagged(bio, BIO_EOPNOTSUPP)) { - bio_put(bio); - err = -EOPNOTSUPP; - goto failed; - } - bio_put(bio); wi->bio = NULL; wi->rest_blocks -= wi->end - wi->start; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 93d2e7153816..daf95915d104 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -118,7 +118,6 @@ struct bio { #define BIO_CLONED 4 /* doesn't own data */ #define BIO_BOUNCED 5 /* bio is a bounce bio */ #define BIO_USER_MAPPED 6 /* contains user pages */ -#define BIO_EOPNOTSUPP 7 /* not supported */ #define BIO_NULL_MAPPED 8 /* contains invalid user pages */ #define BIO_QUIET 9 /* Make BIO Quiet */ #define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ -- cgit v1.2.3-71-gd317 From 97ca223c3b37ed12a5b67a5dc6247e5a4799d337 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Apr 2015 21:41:02 +0200 Subject: block: remove unused BIO_RW_BLOCK and BIO_EOF flags Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 2 -- include/linux/blk_types.h | 2 -- 2 files changed, 4 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index f0be754c7781..de474b5dee2b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1725,8 +1725,6 @@ static void handle_bad_sector(struct bio *bio) bio->bi_rw, (unsigned long long)bio_end_sector(bio), (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); - - set_bit(BIO_EOF, &bio->bi_flags); } #ifdef CONFIG_FAIL_MAKE_REQUEST diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index daf95915d104..09c7a2cd48ef 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -112,8 +112,6 @@ struct bio { * bio flags */ #define BIO_UPTODATE 0 /* ok after I/O completion */ -#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ -#define BIO_EOF 2 /* out-out-bounds error */ #define BIO_SEG_VALID 3 /* bi_phys_segments valid */ #define BIO_CLONED 4 /* doesn't own data */ #define BIO_BOUNCED 5 /* bio is a bounce bio */ -- cgit v1.2.3-71-gd317 From b2dbe0a60f1bcf4db5c701f1577b3135c3159eb5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 19 May 2015 09:18:28 -0600 Subject: block: collapse bio bit space Various previous patches removed bits and left holes, collapse them all. Leave the reset start bit where it is, we don't need to change that. Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 09c7a2cd48ef..3f4ded0b1a34 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -112,15 +112,15 @@ struct bio { * bio flags */ #define BIO_UPTODATE 0 /* ok after I/O completion */ -#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ -#define BIO_CLONED 4 /* doesn't own data */ -#define BIO_BOUNCED 5 /* bio is a bounce bio */ -#define BIO_USER_MAPPED 6 /* contains user pages */ -#define BIO_NULL_MAPPED 8 /* contains invalid user pages */ -#define BIO_QUIET 9 /* Make BIO Quiet */ -#define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ -#define BIO_CHAIN 11 /* chained bio, ->bi_remaining in effect */ -#define BIO_REFFED 12 /* bio has elevated ->bi_cnt */ +#define BIO_SEG_VALID 1 /* bi_phys_segments valid */ +#define BIO_CLONED 2 /* doesn't own data */ +#define BIO_BOUNCED 3 /* bio is a bounce bio */ +#define BIO_USER_MAPPED 4 /* contains user pages */ +#define BIO_NULL_MAPPED 5 /* contains invalid user pages */ +#define BIO_QUIET 6 /* Make BIO Quiet */ +#define BIO_SNAP_STABLE 7 /* bio data must be snapshotted during write */ +#define BIO_CHAIN 8 /* chained bio, ->bi_remaining in effect */ +#define BIO_REFFED 9 /* bio has elevated ->bi_cnt */ /* * Flags starting here get preserved by bio_reset() - this includes -- cgit v1.2.3-71-gd317 From 343df3c79c62b644ce6ff5dff96c9e0be1ecb242 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 May 2015 09:23:23 +0200 Subject: suspend: simplify block I/O handling Stop abusing struct page functionality and the swap end_io handler, and instead add a modified version of the blk-lib.c bio_batch helpers. Also move the block I/O code into swap.c as they are directly tied into each other. Signed-off-by: Christoph Hellwig Tested-by: Pavel Machek Tested-by: Ming Lin Acked-by: Pavel Machek Acked-by: Rafael J. Wysocki Signed-off-by: Jens Axboe --- include/linux/swap.h | 1 - kernel/power/Makefile | 3 +- kernel/power/block_io.c | 103 ------------------------------- kernel/power/power.h | 9 --- kernel/power/swap.c | 159 ++++++++++++++++++++++++++++++++++++------------ mm/page_io.c | 2 +- 6 files changed, 122 insertions(+), 155 deletions(-) delete mode 100644 kernel/power/block_io.c (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index cee108cbe2d5..38874729dc5f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -377,7 +377,6 @@ extern void end_swap_bio_write(struct bio *bio, int err); extern int __swap_writepage(struct page *page, struct writeback_control *wbc, void (*end_write_func)(struct bio *, int)); extern int swap_set_page_dirty(struct page *page); -extern void end_swap_bio_read(struct bio *bio, int err); int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, unsigned long nr_pages, sector_t start_block); diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 29472bff11ef..cb880a14cc39 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -7,8 +7,7 @@ obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o -obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ - block_io.o +obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c deleted file mode 100644 index 9a58bc258810..000000000000 --- a/kernel/power/block_io.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * This file provides functions for block I/O operations on swap/file. - * - * Copyright (C) 1998,2001-2005 Pavel Machek - * Copyright (C) 2006 Rafael J. Wysocki - * - * This file is released under the GPLv2. - */ - -#include -#include -#include -#include - -#include "power.h" - -/** - * submit - submit BIO request. - * @rw: READ or WRITE. - * @off physical offset of page. - * @page: page we're reading or writing. - * @bio_chain: list of pending biod (for async reading) - * - * Straight from the textbook - allocate and initialize the bio. - * If we're reading, make sure the page is marked as dirty. - * Then submit it and, if @bio_chain == NULL, wait. - */ -static int submit(int rw, struct block_device *bdev, sector_t sector, - struct page *page, struct bio **bio_chain) -{ - const int bio_rw = rw | REQ_SYNC; - struct bio *bio; - - bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); - bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; - bio->bi_end_io = end_swap_bio_read; - - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - printk(KERN_ERR "PM: Adding page to bio failed at %llu\n", - (unsigned long long)sector); - bio_put(bio); - return -EFAULT; - } - - lock_page(page); - bio_get(bio); - - if (bio_chain == NULL) { - submit_bio(bio_rw, bio); - wait_on_page_locked(page); - if (rw == READ) - bio_set_pages_dirty(bio); - bio_put(bio); - } else { - if (rw == READ) - get_page(page); /* These pages are freed later */ - bio->bi_private = *bio_chain; - *bio_chain = bio; - submit_bio(bio_rw, bio); - } - return 0; -} - -int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) -{ - return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9), - virt_to_page(addr), bio_chain); -} - -int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain) -{ - return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9), - virt_to_page(addr), bio_chain); -} - -int hib_wait_on_bio_chain(struct bio **bio_chain) -{ - struct bio *bio; - struct bio *next_bio; - int ret = 0; - - if (bio_chain == NULL) - return 0; - - bio = *bio_chain; - if (bio == NULL) - return 0; - while (bio) { - struct page *page; - - next_bio = bio->bi_private; - page = bio->bi_io_vec[0].bv_page; - wait_on_page_locked(page); - if (!PageUptodate(page) || PageError(page)) - ret = -EIO; - put_page(page); - bio_put(bio); - bio = next_bio; - } - *bio_chain = NULL; - return ret; -} diff --git a/kernel/power/power.h b/kernel/power/power.h index ce9b8328a689..caadb566e82b 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -163,15 +163,6 @@ extern void swsusp_close(fmode_t); extern int swsusp_unmark(void); #endif -/* kernel/power/block_io.c */ -extern struct block_device *hib_resume_bdev; - -extern int hib_bio_read_page(pgoff_t page_off, void *addr, - struct bio **bio_chain); -extern int hib_bio_write_page(pgoff_t page_off, void *addr, - struct bio **bio_chain); -extern int hib_wait_on_bio_chain(struct bio **bio_chain); - struct timeval; /* kernel/power/swsusp.c */ extern void swsusp_show_speed(ktime_t, ktime_t, unsigned int, char *); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 570aff817543..2f30ca91e4fa 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -212,7 +212,84 @@ int swsusp_swap_in_use(void) */ static unsigned short root_swap = 0xffff; -struct block_device *hib_resume_bdev; +static struct block_device *hib_resume_bdev; + +struct hib_bio_batch { + atomic_t count; + wait_queue_head_t wait; + int error; +}; + +static void hib_init_batch(struct hib_bio_batch *hb) +{ + atomic_set(&hb->count, 0); + init_waitqueue_head(&hb->wait); + hb->error = 0; +} + +static void hib_end_io(struct bio *bio, int error) +{ + struct hib_bio_batch *hb = bio->bi_private; + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct page *page = bio->bi_io_vec[0].bv_page; + + if (!uptodate || error) { + printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n", + imajor(bio->bi_bdev->bd_inode), + iminor(bio->bi_bdev->bd_inode), + (unsigned long long)bio->bi_iter.bi_sector); + + if (!error) + error = -EIO; + } + + if (bio_data_dir(bio) == WRITE) + put_page(page); + + if (error && !hb->error) + hb->error = error; + if (atomic_dec_and_test(&hb->count)) + wake_up(&hb->wait); + + bio_put(bio); +} + +static int hib_submit_io(int rw, pgoff_t page_off, void *addr, + struct hib_bio_batch *hb) +{ + struct page *page = virt_to_page(addr); + struct bio *bio; + int error = 0; + + bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); + bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9); + bio->bi_bdev = hib_resume_bdev; + + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + printk(KERN_ERR "PM: Adding page to bio failed at %llu\n", + (unsigned long long)bio->bi_iter.bi_sector); + bio_put(bio); + return -EFAULT; + } + + if (hb) { + bio->bi_end_io = hib_end_io; + bio->bi_private = hb; + atomic_inc(&hb->count); + submit_bio(rw, bio); + } else { + error = submit_bio_wait(rw, bio); + bio_put(bio); + } + + return error; +} + +static int hib_wait_io(struct hib_bio_batch *hb) +{ + wait_event(hb->wait, atomic_read(&hb->count) == 0); + return hb->error; +} /* * Saving part @@ -222,7 +299,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) { int error; - hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL); + hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL); if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); @@ -231,7 +308,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) swsusp_header->flags = flags; if (flags & SF_CRC32_MODE) swsusp_header->crc32 = handle->crc32; - error = hib_bio_write_page(swsusp_resume_block, + error = hib_submit_io(WRITE_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { printk(KERN_ERR "PM: Swap header not found!\n"); @@ -271,10 +348,10 @@ static int swsusp_swap_check(void) * write_page - Write one page to given swap location. * @buf: Address we're writing. * @offset: Offset of the swap page we're writing to. - * @bio_chain: Link the next write BIO here + * @hb: bio completion batch */ -static int write_page(void *buf, sector_t offset, struct bio **bio_chain) +static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb) { void *src; int ret; @@ -282,13 +359,13 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) if (!offset) return -ENOSPC; - if (bio_chain) { + if (hb) { src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN | __GFP_NORETRY); if (src) { copy_page(src, buf); } else { - ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */ + ret = hib_wait_io(hb); /* Free pages */ if (ret) return ret; src = (void *)__get_free_page(__GFP_WAIT | @@ -298,14 +375,14 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) copy_page(src, buf); } else { WARN_ON_ONCE(1); - bio_chain = NULL; /* Go synchronous */ + hb = NULL; /* Go synchronous */ src = buf; } } } else { src = buf; } - return hib_bio_write_page(offset, src, bio_chain); + return hib_submit_io(WRITE_SYNC, offset, src, hb); } static void release_swap_writer(struct swap_map_handle *handle) @@ -348,7 +425,7 @@ err_close: } static int swap_write_page(struct swap_map_handle *handle, void *buf, - struct bio **bio_chain) + struct hib_bio_batch *hb) { int error = 0; sector_t offset; @@ -356,7 +433,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, if (!handle->cur) return -EINVAL; offset = alloc_swapdev_block(root_swap); - error = write_page(buf, offset, bio_chain); + error = write_page(buf, offset, hb); if (error) return error; handle->cur->entries[handle->k++] = offset; @@ -365,15 +442,15 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, if (!offset) return -ENOSPC; handle->cur->next_swap = offset; - error = write_page(handle->cur, handle->cur_swap, bio_chain); + error = write_page(handle->cur, handle->cur_swap, hb); if (error) goto out; clear_page(handle->cur); handle->cur_swap = offset; handle->k = 0; - if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { - error = hib_wait_on_bio_chain(bio_chain); + if (hb && low_free_pages() <= handle->reqd_free_pages) { + error = hib_wait_io(hb); if (error) goto out; /* @@ -445,23 +522,24 @@ static int save_image(struct swap_map_handle *handle, int ret; int nr_pages; int err2; - struct bio *bio; + struct hib_bio_batch hb; ktime_t start; ktime_t stop; + hib_init_batch(&hb); + printk(KERN_INFO "PM: Saving image data pages (%u pages)...\n", nr_to_write); m = nr_to_write / 10; if (!m) m = 1; nr_pages = 0; - bio = NULL; start = ktime_get(); while (1) { ret = snapshot_read_next(snapshot); if (ret <= 0) break; - ret = swap_write_page(handle, data_of(*snapshot), &bio); + ret = swap_write_page(handle, data_of(*snapshot), &hb); if (ret) break; if (!(nr_pages % m)) @@ -469,7 +547,7 @@ static int save_image(struct swap_map_handle *handle, nr_pages / m * 10); nr_pages++; } - err2 = hib_wait_on_bio_chain(&bio); + err2 = hib_wait_io(&hb); stop = ktime_get(); if (!ret) ret = err2; @@ -580,7 +658,7 @@ static int save_image_lzo(struct swap_map_handle *handle, int ret = 0; int nr_pages; int err2; - struct bio *bio; + struct hib_bio_batch hb; ktime_t start; ktime_t stop; size_t off; @@ -589,6 +667,8 @@ static int save_image_lzo(struct swap_map_handle *handle, struct cmp_data *data = NULL; struct crc_data *crc = NULL; + hib_init_batch(&hb); + /* * We'll limit the number of threads for compression to limit memory * footprint. @@ -674,7 +754,6 @@ static int save_image_lzo(struct swap_map_handle *handle, if (!m) m = 1; nr_pages = 0; - bio = NULL; start = ktime_get(); for (;;) { for (thr = 0; thr < nr_threads; thr++) { @@ -748,7 +827,7 @@ static int save_image_lzo(struct swap_map_handle *handle, off += PAGE_SIZE) { memcpy(page, data[thr].cmp + off, PAGE_SIZE); - ret = swap_write_page(handle, page, &bio); + ret = swap_write_page(handle, page, &hb); if (ret) goto out_finish; } @@ -759,7 +838,7 @@ static int save_image_lzo(struct swap_map_handle *handle, } out_finish: - err2 = hib_wait_on_bio_chain(&bio); + err2 = hib_wait_io(&hb); stop = ktime_get(); if (!ret) ret = err2; @@ -906,7 +985,7 @@ static int get_swap_reader(struct swap_map_handle *handle, return -ENOMEM; } - error = hib_bio_read_page(offset, tmp->map, NULL); + error = hib_submit_io(READ_SYNC, offset, tmp->map, NULL); if (error) { release_swap_reader(handle); return error; @@ -919,7 +998,7 @@ static int get_swap_reader(struct swap_map_handle *handle, } static int swap_read_page(struct swap_map_handle *handle, void *buf, - struct bio **bio_chain) + struct hib_bio_batch *hb) { sector_t offset; int error; @@ -930,7 +1009,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf, offset = handle->cur->entries[handle->k]; if (!offset) return -EFAULT; - error = hib_bio_read_page(offset, buf, bio_chain); + error = hib_submit_io(READ_SYNC, offset, buf, hb); if (error) return error; if (++handle->k >= MAP_PAGE_ENTRIES) { @@ -968,27 +1047,28 @@ static int load_image(struct swap_map_handle *handle, int ret = 0; ktime_t start; ktime_t stop; - struct bio *bio; + struct hib_bio_batch hb; int err2; unsigned nr_pages; + hib_init_batch(&hb); + printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n", nr_to_read); m = nr_to_read / 10; if (!m) m = 1; nr_pages = 0; - bio = NULL; start = ktime_get(); for ( ; ; ) { ret = snapshot_write_next(snapshot); if (ret <= 0) break; - ret = swap_read_page(handle, data_of(*snapshot), &bio); + ret = swap_read_page(handle, data_of(*snapshot), &hb); if (ret) break; if (snapshot->sync_read) - ret = hib_wait_on_bio_chain(&bio); + ret = hib_wait_io(&hb); if (ret) break; if (!(nr_pages % m)) @@ -996,7 +1076,7 @@ static int load_image(struct swap_map_handle *handle, nr_pages / m * 10); nr_pages++; } - err2 = hib_wait_on_bio_chain(&bio); + err2 = hib_wait_io(&hb); stop = ktime_get(); if (!ret) ret = err2; @@ -1067,7 +1147,7 @@ static int load_image_lzo(struct swap_map_handle *handle, unsigned int m; int ret = 0; int eof = 0; - struct bio *bio; + struct hib_bio_batch hb; ktime_t start; ktime_t stop; unsigned nr_pages; @@ -1080,6 +1160,8 @@ static int load_image_lzo(struct swap_map_handle *handle, struct dec_data *data = NULL; struct crc_data *crc = NULL; + hib_init_batch(&hb); + /* * We'll limit the number of threads for decompression to limit memory * footprint. @@ -1190,7 +1272,6 @@ static int load_image_lzo(struct swap_map_handle *handle, if (!m) m = 1; nr_pages = 0; - bio = NULL; start = ktime_get(); ret = snapshot_write_next(snapshot); @@ -1199,7 +1280,7 @@ static int load_image_lzo(struct swap_map_handle *handle, for(;;) { for (i = 0; !eof && i < want; i++) { - ret = swap_read_page(handle, page[ring], &bio); + ret = swap_read_page(handle, page[ring], &hb); if (ret) { /* * On real read error, finish. On end of data, @@ -1226,7 +1307,7 @@ static int load_image_lzo(struct swap_map_handle *handle, if (!asked) break; - ret = hib_wait_on_bio_chain(&bio); + ret = hib_wait_io(&hb); if (ret) goto out_finish; have += asked; @@ -1281,7 +1362,7 @@ static int load_image_lzo(struct swap_map_handle *handle, * Wait for more data while we are decompressing. */ if (have < LZO_CMP_PAGES && asked) { - ret = hib_wait_on_bio_chain(&bio); + ret = hib_wait_io(&hb); if (ret) goto out_finish; have += asked; @@ -1430,7 +1511,7 @@ int swsusp_check(void) if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); - error = hib_bio_read_page(swsusp_resume_block, + error = hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL); if (error) goto put; @@ -1438,7 +1519,7 @@ int swsusp_check(void) if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) { memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); /* Reset swap signature now */ - error = hib_bio_write_page(swsusp_resume_block, + error = hib_submit_io(WRITE_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { error = -EINVAL; @@ -1482,10 +1563,10 @@ int swsusp_unmark(void) { int error; - hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL); + hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL); if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) { memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10); - error = hib_bio_write_page(swsusp_resume_block, + error = hib_submit_io(WRITE_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { printk(KERN_ERR "PM: Cannot find swsusp signature!\n"); diff --git a/mm/page_io.c b/mm/page_io.c index 6424869e275e..520baa4b04d7 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -69,7 +69,7 @@ void end_swap_bio_write(struct bio *bio, int err) bio_put(bio); } -void end_swap_bio_read(struct bio *bio, int err) +static void end_swap_bio_read(struct bio *bio, int err) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct page *page = bio->bi_io_vec[0].bv_page; -- cgit v1.2.3-71-gd317 From be32417796c2b8a83fe4cbece83bea96ab9e378f Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Wed, 6 May 2015 12:26:22 +0800 Subject: block: export blkdev_reread_part() and __blkdev_reread_part() This patch exports blkdev_reread_part() for block drivers, also introduce __blkdev_reread_part(). For some drivers, such as loop, reread of partitions can be run from the release path, and bd_mutex may already be held prior to calling ioctl_by_bdev(bdev, BLKRRPART, 0), so introduce __blkdev_reread_part for use in such cases. CC: Christoph Hellwig CC: Jens Axboe CC: Tejun Heo CC: Alexander Viro CC: Markus Pargmann CC: Stefan Weinhuber CC: Stefan Haberland CC: Sebastian Ott CC: Fabian Frederick CC: Ming Lei CC: David Herrmann CC: Andrew Morton CC: Peter Zijlstra CC: nbd-general@lists.sourceforge.net CC: linux-s390@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Jarod Wilson Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/ioctl.c | 28 +++++++++++++++++++++++++--- include/linux/fs.h | 3 +++ 2 files changed, 28 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/block/ioctl.c b/block/ioctl.c index 7d8befde2aca..203cb4aeea8b 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -150,21 +150,43 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user } } -static int blkdev_reread_part(struct block_device *bdev) +/* + * This is an exported API for the block driver, and will not + * acquire bd_mutex. This API should be used in case that + * caller has held bd_mutex already. + */ +int __blkdev_reread_part(struct block_device *bdev) { struct gendisk *disk = bdev->bd_disk; - int res; if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; + + lockdep_assert_held(&bdev->bd_mutex); + + return rescan_partitions(disk, bdev); +} +EXPORT_SYMBOL(__blkdev_reread_part); + +/* + * This is an exported API for the block driver, and will + * try to acquire bd_mutex. If bd_mutex has been held already + * in current context, please call __blkdev_reread_part(). + */ +int blkdev_reread_part(struct block_device *bdev) +{ + int res; + if (!mutex_trylock(&bdev->bd_mutex)) return -EBUSY; - res = rescan_partitions(disk, bdev); + res = __blkdev_reread_part(bdev); mutex_unlock(&bdev->bd_mutex); + return res; } +EXPORT_SYMBOL(blkdev_reread_part); static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, uint64_t len, int secure) diff --git a/include/linux/fs.h b/include/linux/fs.h index 35ec87e490b1..1ef63900243c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2279,6 +2279,9 @@ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); extern void blkdev_put(struct block_device *bdev, fmode_t mode); +extern int __blkdev_reread_part(struct block_device *bdev); +extern int blkdev_reread_part(struct block_device *bdev); + #ifdef CONFIG_SYSFS extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); extern void bd_unlink_disk_holder(struct block_device *bdev, -- cgit v1.2.3-71-gd317 From 326e1dbb57368087a36607aaebe9795b8d5453e5 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 22 May 2015 09:14:03 -0400 Subject: block: remove management of bi_remaining when restoring original bi_end_io Commit c4cf5261 ("bio: skip atomic inc/dec of ->bi_remaining for non-chains") regressed all existing callers that followed this pattern: 1) saving a bio's original bi_end_io 2) wiring up an intermediate bi_end_io 3) restoring the original bi_end_io from intermediate bi_end_io 4) calling bio_endio() to execute the restored original bi_end_io The regression was due to BIO_CHAIN only ever getting set if bio_inc_remaining() is called. For the above pattern it isn't set until step 3 above (step 2 would've needed to establish BIO_CHAIN). As such the first bio_endio(), in step 2 above, never decremented __bi_remaining before calling the intermediate bi_end_io -- leaving __bi_remaining with the value 1 instead of 0. When bio_inc_remaining() occurred during step 3 it brought it to a value of 2. When the second bio_endio() was called, in step 4 above, it should've called the original bi_end_io but it didn't because there was an extra reference that wasn't dropped (due to atomic operations being optimized away since BIO_CHAIN wasn't set upfront). Fix this issue by removing the __bi_remaining management complexity for all callers that use the above pattern -- bio_chain() is the only interface that _needs_ to be concerned with __bi_remaining. For the above pattern callers just expect the bi_end_io they set to get called! Remove bio_endio_nodec() and also remove all bio_inc_remaining() calls that aren't associated with the bio_chain() interface. Also, the bio_inc_remaining() interface has been moved local to bio.c. Fixes: c4cf5261 ("bio: skip atomic inc/dec of ->bi_remaining for non-chains") Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/bio-integrity.c | 4 ++-- block/bio.c | 35 ++++++++++++++--------------------- drivers/md/bcache/io.c | 2 +- drivers/md/dm-cache-target.c | 6 ------ drivers/md/dm-raid1.c | 2 -- drivers/md/dm-snap.c | 1 - drivers/md/dm-thin.c | 9 +++------ drivers/md/dm-verity.c | 2 +- fs/btrfs/disk-io.c | 2 +- fs/btrfs/volumes.c | 16 +++++----------- fs/btrfs/volumes.h | 2 -- include/linux/bio.h | 12 ------------ 12 files changed, 27 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 5cbd5d9ea61d..0436c21db7f2 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -361,7 +361,7 @@ static void bio_integrity_verify_fn(struct work_struct *work) /* Restore original bio completion handler */ bio->bi_end_io = bip->bip_end_io; - bio_endio_nodec(bio, error); + bio_endio(bio, error); } /** @@ -388,7 +388,7 @@ void bio_integrity_endio(struct bio *bio, int error) */ if (error) { bio->bi_end_io = bip->bip_end_io; - bio_endio_nodec(bio, error); + bio_endio(bio, error); return; } diff --git a/block/bio.c b/block/bio.c index c2ff8a88aef1..259197d97de1 100644 --- a/block/bio.c +++ b/block/bio.c @@ -303,6 +303,17 @@ static void bio_chain_endio(struct bio *bio, int error) bio_put(bio); } +/* + * Increment chain count for the bio. Make sure the CHAIN flag update + * is visible before the raised count. + */ +static inline void bio_inc_remaining(struct bio *bio) +{ + bio->bi_flags |= (1 << BIO_CHAIN); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_remaining); +} + /** * bio_chain - chain bio completions * @bio: the target bio @@ -1756,8 +1767,10 @@ static inline bool bio_remaining_done(struct bio *bio) BUG_ON(atomic_read(&bio->__bi_remaining) <= 0); - if (atomic_dec_and_test(&bio->__bi_remaining)) + if (atomic_dec_and_test(&bio->__bi_remaining)) { + clear_bit(BIO_CHAIN, &bio->bi_flags); return true; + } return false; } @@ -1808,26 +1821,6 @@ void bio_endio(struct bio *bio, int error) } EXPORT_SYMBOL(bio_endio); -/** - * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining - * @bio: bio - * @error: error, if any - * - * For code that has saved and restored bi_end_io; thing hard before using this - * function, probably you should've cloned the entire bio. - **/ -void bio_endio_nodec(struct bio *bio, int error) -{ - /* - * If it's not flagged as a chain, we are not going to dec the count - */ - if (bio_flagged(bio, BIO_CHAIN)) - bio_inc_remaining(bio); - - bio_endio(bio, error); -} -EXPORT_SYMBOL(bio_endio_nodec); - /** * bio_split - split a bio * @bio: bio to split diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index fa028fa82df4..cb64e64a4789 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -55,7 +55,7 @@ static void bch_bio_submit_split_done(struct closure *cl) s->bio->bi_end_io = s->bi_end_io; s->bio->bi_private = s->bi_private; - bio_endio_nodec(s->bio, 0); + bio_endio(s->bio, 0); closure_debug_destroy(&s->cl); mempool_free(s, s->p->bio_split_hook); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 705eb7b99d69..41b2594a80c6 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -86,12 +86,6 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) { bio->bi_end_io = h->bi_end_io; bio->bi_private = h->bi_private; - - /* - * Must bump bi_remaining to allow bio to complete with - * restored bi_end_io. - */ - bio_inc_remaining(bio); } /*----------------------------------------------------------------*/ diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index d6a1c096b777..743fa9bbae9e 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1254,8 +1254,6 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) dm_bio_restore(bd, bio); bio_record->details.bi_bdev = NULL; - bio_inc_remaining(bio); - queue_bio(ms, bio, rw); return DM_ENDIO_INCOMPLETE; } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 8bfeae218531..7c82d3ccce87 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1478,7 +1478,6 @@ out: if (full_bio) { full_bio->bi_end_io = pe->full_bio_end_io; full_bio->bi_private = pe->full_bio_private; - bio_inc_remaining(full_bio); } increment_pending_exceptions_done_count(); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 342dbdad6131..e852602c0091 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -793,10 +793,9 @@ static void inc_remap_and_issue_cell(struct thin_c *tc, static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) { - if (m->bio) { + if (m->bio) m->bio->bi_end_io = m->saved_bi_end_io; - bio_inc_remaining(m->bio); - } + cell_error(m->tc->pool, m->cell); list_del(&m->list); mempool_free(m, m->tc->pool->mapping_pool); @@ -810,10 +809,8 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) int r; bio = m->bio; - if (bio) { + if (bio) bio->bi_end_io = m->saved_bi_end_io; - bio_inc_remaining(bio); - } if (m->err) { cell_error(pool, m->cell); diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 66616db33e6f..bb9c6a00e4b0 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -459,7 +459,7 @@ static void verity_finish_io(struct dm_verity_io *io, int error) bio->bi_end_io = io->orig_bi_end_io; bio->bi_private = io->orig_bi_private; - bio_endio_nodec(bio, error); + bio_endio(bio, error); } static void verity_work(struct work_struct *w) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e08a926fe12c..0bccf18dc1dc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1745,7 +1745,7 @@ static void end_workqueue_fn(struct btrfs_work *work) bio->bi_private = end_io_wq->private; bio->bi_end_io = end_io_wq->end_io; kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); - bio_endio_nodec(bio, error); + bio_endio(bio, error); } static int cleaner_kthread(void *arg) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8e8d1d1e28a5..dac77d42a9ab 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5585,10 +5585,10 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err) { - if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED)) - bio_endio_nodec(bio, err); - else - bio_endio(bio, err); + bio->bi_private = bbio->private; + bio->bi_end_io = bbio->end_io; + bio_endio(bio, err); + btrfs_put_bbio(bbio); } @@ -5632,8 +5632,6 @@ static void btrfs_end_bio(struct bio *bio, int err) bio = bbio->orig_bio; } - bio->bi_private = bbio->private; - bio->bi_end_io = bbio->end_io; btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; /* only send an error to the higher layers if it is * beyond the tolerance of the btrfs bio @@ -5815,8 +5813,6 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) /* Shoud be the original bio. */ WARN_ON(bio != bbio->orig_bio); - bio->bi_private = bbio->private; - bio->bi_end_io = bbio->end_io; btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; bio->bi_iter.bi_sector = logical >> 9; @@ -5897,10 +5893,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, if (dev_nr < total_devs - 1) { bio = btrfs_bio_clone(first_bio, GFP_NOFS); BUG_ON(!bio); /* -ENOMEM */ - } else { + } else bio = first_bio; - bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED; - } submit_stripe_bio(root, bbio, bio, bbio->stripes[dev_nr].physical, dev_nr, rw, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index ebc31331a837..cedae0356558 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -292,8 +292,6 @@ struct btrfs_bio_stripe { struct btrfs_bio; typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); -#define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0) - struct btrfs_bio { atomic_t refs; atomic_t stripes_pending; diff --git a/include/linux/bio.h b/include/linux/bio.h index 7486ea103f6e..f0291cf64cc5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -427,7 +427,6 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) } extern void bio_endio(struct bio *, int); -extern void bio_endio_nodec(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); @@ -658,17 +657,6 @@ static inline struct bio *bio_list_get(struct bio_list *bl) return bio; } -/* - * Increment chain count for the bio. Make sure the CHAIN flag update - * is visible before the raised count. - */ -static inline void bio_inc_remaining(struct bio *bio) -{ - bio->bi_flags |= (1 << BIO_CHAIN); - smp_mb__before_atomic(); - atomic_inc(&bio->__bi_remaining); -} - /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. -- cgit v1.2.3-71-gd317 From 5f1b670d0bef508a5554d92525f5f6d00d640b38 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 May 2015 09:14:04 -0400 Subject: block, dm: don't copy bios for request clones Currently dm-multipath has to clone the bios for every request sent to the lower devices, which wastes cpu cycles and ties down memory. This patch instead adds a new REQ_CLONE flag that instructs req_bio_endio to not complete bios attached to a request, which we set on clone requests similar to bios in a flush sequence. With this change I/O errors on a path failure only get propagated to dm-multipath, which can then either resubmit the I/O or complete the bios on the original request. I've done some basic testing of this on a Linux target with ALUA support, and it survives path failures during I/O nicely. Signed-off-by: Christoph Hellwig Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 94 +++---------------------- drivers/md/dm-table.c | 25 ++++--- drivers/md/dm.c | 171 +++++++++++----------------------------------- drivers/md/dm.h | 5 +- include/linux/blk_types.h | 2 + include/linux/blkdev.h | 6 +- 6 files changed, 73 insertions(+), 230 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index de474b5dee2b..aa819a58ea24 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -117,7 +117,7 @@ EXPORT_SYMBOL(blk_rq_init); static void req_bio_endio(struct request *rq, struct bio *bio, unsigned int nbytes, int error) { - if (error) + if (error && !(rq->cmd_flags & REQ_CLONE)) clear_bit(BIO_UPTODATE, &bio->bi_flags); else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) error = -EIO; @@ -128,7 +128,8 @@ static void req_bio_endio(struct request *rq, struct bio *bio, bio_advance(bio, nbytes); /* don't actually finish bio if it's part of flush sequence */ - if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) + if (bio->bi_iter.bi_size == 0 && + !(rq->cmd_flags & (REQ_FLUSH_SEQ|REQ_CLONE))) bio_endio(bio, error); } @@ -2909,95 +2910,22 @@ int blk_lld_busy(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_lld_busy); -/** - * blk_rq_unprep_clone - Helper function to free all bios in a cloned request - * @rq: the clone request to be cleaned up - * - * Description: - * Free all bios in @rq for a cloned request. - */ -void blk_rq_unprep_clone(struct request *rq) -{ - struct bio *bio; - - while ((bio = rq->bio) != NULL) { - rq->bio = bio->bi_next; - - bio_put(bio); - } -} -EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); - -/* - * Copy attributes of the original request to the clone request. - * The actual data parts (e.g. ->cmd, ->sense) are not copied. - */ -static void __blk_rq_prep_clone(struct request *dst, struct request *src) +void blk_rq_prep_clone(struct request *dst, struct request *src) { dst->cpu = src->cpu; - dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; + dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK); + dst->cmd_flags |= REQ_NOMERGE | REQ_CLONE; dst->cmd_type = src->cmd_type; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); dst->nr_phys_segments = src->nr_phys_segments; dst->ioprio = src->ioprio; dst->extra_len = src->extra_len; -} - -/** - * blk_rq_prep_clone - Helper function to setup clone request - * @rq: the request to be setup - * @rq_src: original request to be cloned - * @bs: bio_set that bios for clone are allocated from - * @gfp_mask: memory allocation mask for bio - * @bio_ctr: setup function to be called for each clone bio. - * Returns %0 for success, non %0 for failure. - * @data: private data to be passed to @bio_ctr - * - * Description: - * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. - * The actual data parts of @rq_src (e.g. ->cmd, ->sense) - * are not copied, and copying such parts is the caller's responsibility. - * Also, pages which the original bios are pointing to are not copied - * and the cloned bios just point same pages. - * So cloned bios must be completed before original bios, which means - * the caller must complete @rq before @rq_src. - */ -int blk_rq_prep_clone(struct request *rq, struct request *rq_src, - struct bio_set *bs, gfp_t gfp_mask, - int (*bio_ctr)(struct bio *, struct bio *, void *), - void *data) -{ - struct bio *bio, *bio_src; - - if (!bs) - bs = fs_bio_set; - - __rq_for_each_bio(bio_src, rq_src) { - bio = bio_clone_fast(bio_src, gfp_mask, bs); - if (!bio) - goto free_and_out; - - if (bio_ctr && bio_ctr(bio, bio_src, data)) - goto free_and_out; - - if (rq->bio) { - rq->biotail->bi_next = bio; - rq->biotail = bio; - } else - rq->bio = rq->biotail = bio; - } - - __blk_rq_prep_clone(rq, rq_src); - - return 0; - -free_and_out: - if (bio) - bio_put(bio); - blk_rq_unprep_clone(rq); - - return -ENOMEM; + dst->bio = src->bio; + dst->biotail = src->biotail; + dst->cmd = src->cmd; + dst->cmd_len = src->cmd_len; + dst->sense = src->sense; } EXPORT_SYMBOL_GPL(blk_rq_prep_clone); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index d9b00b8565c6..3662b2e49b8d 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -940,21 +940,28 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device * { unsigned type = dm_table_get_type(t); unsigned per_bio_data_size = 0; - struct dm_target *tgt; unsigned i; - if (unlikely(type == DM_TYPE_NONE)) { + switch (type) { + case DM_TYPE_BIO_BASED: + for (i = 0; i < t->num_targets; i++) { + struct dm_target *tgt = t->targets + i; + + per_bio_data_size = max(per_bio_data_size, + tgt->per_bio_data_size); + } + t->mempools = dm_alloc_bio_mempools(t->integrity_supported, + per_bio_data_size); + break; + case DM_TYPE_REQUEST_BASED: + case DM_TYPE_MQ_REQUEST_BASED: + t->mempools = dm_alloc_rq_mempools(md, type); + break; + default: DMWARN("no table type is set, can't allocate mempools"); return -EINVAL; } - if (type == DM_TYPE_BIO_BASED) - for (i = 0; i < t->num_targets; i++) { - tgt = t->targets + i; - per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size); - } - - t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, per_bio_data_size); if (!t->mempools) return -ENOMEM; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a930b72314ac..38837f8ea327 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -990,57 +990,6 @@ static void clone_endio(struct bio *bio, int error) dec_pending(io, error); } -/* - * Partial completion handling for request-based dm - */ -static void end_clone_bio(struct bio *clone, int error) -{ - struct dm_rq_clone_bio_info *info = - container_of(clone, struct dm_rq_clone_bio_info, clone); - struct dm_rq_target_io *tio = info->tio; - struct bio *bio = info->orig; - unsigned int nr_bytes = info->orig->bi_iter.bi_size; - - bio_put(clone); - - if (tio->error) - /* - * An error has already been detected on the request. - * Once error occurred, just let clone->end_io() handle - * the remainder. - */ - return; - else if (error) { - /* - * Don't notice the error to the upper layer yet. - * The error handling decision is made by the target driver, - * when the request is completed. - */ - tio->error = error; - return; - } - - /* - * I/O for the bio successfully completed. - * Notice the data completion to the upper layer. - */ - - /* - * bios are processed from the head of the list. - * So the completing bio should always be rq->bio. - * If it's not, something wrong is happening. - */ - if (tio->orig->bio != bio) - DMERR("bio completion is going in the middle of the request"); - - /* - * Update the original request. - * Do not use blk_end_request() here, because it may complete - * the original request before the clone, and break the ordering. - */ - blk_update_request(tio->orig, 0, nr_bytes); -} - static struct dm_rq_target_io *tio_from_request(struct request *rq) { return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special); @@ -1089,8 +1038,6 @@ static void free_rq_clone(struct request *clone, bool must_be_mapped) WARN_ON_ONCE(must_be_mapped && !clone->q); - blk_rq_unprep_clone(clone); - if (md->type == DM_TYPE_MQ_REQUEST_BASED) /* stacked on blk-mq queue(s) */ tio->ti->type->release_clone_rq(clone); @@ -1821,39 +1768,13 @@ static void dm_dispatch_clone_request(struct request *clone, struct request *rq) dm_complete_request(rq, r); } -static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, - void *data) +static void setup_clone(struct request *clone, struct request *rq, + struct dm_rq_target_io *tio) { - struct dm_rq_target_io *tio = data; - struct dm_rq_clone_bio_info *info = - container_of(bio, struct dm_rq_clone_bio_info, clone); - - info->orig = bio_orig; - info->tio = tio; - bio->bi_end_io = end_clone_bio; - - return 0; -} - -static int setup_clone(struct request *clone, struct request *rq, - struct dm_rq_target_io *tio, gfp_t gfp_mask) -{ - int r; - - r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask, - dm_rq_bio_constructor, tio); - if (r) - return r; - - clone->cmd = rq->cmd; - clone->cmd_len = rq->cmd_len; - clone->sense = rq->sense; + blk_rq_prep_clone(clone, rq); clone->end_io = end_clone_request; clone->end_io_data = tio; - tio->clone = clone; - - return 0; } static struct request *clone_rq(struct request *rq, struct mapped_device *md, @@ -1874,12 +1795,7 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md, clone = tio->clone; blk_rq_init(NULL, clone); - if (setup_clone(clone, rq, tio, gfp_mask)) { - /* -ENOMEM */ - if (alloc_clone) - free_clone_request(md, clone); - return NULL; - } + setup_clone(clone, rq, tio); return clone; } @@ -1973,11 +1889,7 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq, } if (IS_ERR(clone)) return DM_MAPIO_REQUEUE; - if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { - /* -ENOMEM */ - ti->type->release_clone_rq(clone); - return DM_MAPIO_REQUEUE; - } + setup_clone(clone, rq, tio); } switch (r) { @@ -2431,8 +2343,6 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) goto out; } - BUG_ON(!p || md->io_pool || md->rq_pool || md->bs); - md->io_pool = p->io_pool; p->io_pool = NULL; md->rq_pool = p->rq_pool; @@ -3536,48 +3446,23 @@ int dm_noflush_suspending(struct dm_target *ti) } EXPORT_SYMBOL_GPL(dm_noflush_suspending); -struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type, - unsigned integrity, unsigned per_bio_data_size) +struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity, + unsigned per_bio_data_size) { - struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL); - struct kmem_cache *cachep = NULL; - unsigned int pool_size = 0; + struct dm_md_mempools *pools; + unsigned int pool_size = dm_get_reserved_bio_based_ios(); unsigned int front_pad; + pools = kzalloc(sizeof(*pools), GFP_KERNEL); if (!pools) return NULL; - type = filter_md_type(type, md); + front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + + offsetof(struct dm_target_io, clone); - switch (type) { - case DM_TYPE_BIO_BASED: - cachep = _io_cache; - pool_size = dm_get_reserved_bio_based_ios(); - front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); - break; - case DM_TYPE_REQUEST_BASED: - cachep = _rq_tio_cache; - pool_size = dm_get_reserved_rq_based_ios(); - pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache); - if (!pools->rq_pool) - goto out; - /* fall through to setup remaining rq-based pools */ - case DM_TYPE_MQ_REQUEST_BASED: - if (!pool_size) - pool_size = dm_get_reserved_rq_based_ios(); - front_pad = offsetof(struct dm_rq_clone_bio_info, clone); - /* per_bio_data_size is not used. See __bind_mempools(). */ - WARN_ON(per_bio_data_size != 0); - break; - default: - BUG(); - } - - if (cachep) { - pools->io_pool = mempool_create_slab_pool(pool_size, cachep); - if (!pools->io_pool) - goto out; - } + pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache); + if (!pools->io_pool) + goto out; pools->bs = bioset_create_nobvec(pool_size, front_pad); if (!pools->bs) @@ -3587,10 +3472,34 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t goto out; return pools; - out: dm_free_md_mempools(pools); + return NULL; +} + +struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md, + unsigned type) +{ + unsigned int pool_size = dm_get_reserved_rq_based_ios(); + struct dm_md_mempools *pools; + pools = kzalloc(sizeof(*pools), GFP_KERNEL); + if (!pools) + return NULL; + + if (filter_md_type(type, md) == DM_TYPE_REQUEST_BASED) { + pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache); + if (!pools->rq_pool) + goto out; + } + + pools->io_pool = mempool_create_slab_pool(pool_size, _rq_tio_cache); + if (!pools->io_pool) + goto out; + + return pools; +out: + dm_free_md_mempools(pools); return NULL; } diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 6123c2bf9150..e6e66d087b26 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -222,8 +222,9 @@ void dm_kcopyd_exit(void); /* * Mempool operations */ -struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type, - unsigned integrity, unsigned per_bio_data_size); +struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity, + unsigned per_bio_data_size); +struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md, unsigned type); void dm_free_md_mempools(struct dm_md_mempools *pools); /* diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 3f4ded0b1a34..45a6be89957c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -192,6 +192,7 @@ enum rq_flag_bits { __REQ_HASHED, /* on IO scheduler merge hash */ __REQ_MQ_INFLIGHT, /* track inflight for MQ */ __REQ_NO_TIMEOUT, /* requests may never expire */ + __REQ_CLONE, /* cloned bios */ __REQ_NR_BITS, /* stops here */ }; @@ -246,5 +247,6 @@ enum rq_flag_bits { #define REQ_HASHED (1ULL << __REQ_HASHED) #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) #define REQ_NO_TIMEOUT (1ULL << __REQ_NO_TIMEOUT) +#define REQ_CLONE (1ULL << __REQ_CLONE) #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bc917956a6d0..9ded80da2c16 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -775,11 +775,7 @@ extern void blk_add_request_payload(struct request *rq, struct page *page, unsigned int len); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); extern int blk_lld_busy(struct request_queue *q); -extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, - struct bio_set *bs, gfp_t gfp_mask, - int (*bio_ctr)(struct bio *, struct bio *, void *), - void *data); -extern void blk_rq_unprep_clone(struct request *rq); +extern void blk_rq_prep_clone(struct request *rq, struct request *rq_src); extern int blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern void blk_delay_queue(struct request_queue *, unsigned long); -- cgit v1.2.3-71-gd317 From e548ca4ee4595f65b262661d166310ad8a149bec Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 29 May 2015 13:11:32 -0600 Subject: block: don't honor chunk sizes for data-less IO We don't need to honor chunk sizes for IO that doesn't carry any data. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9ded80da2c16..ccaa9aecd593 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -903,7 +903,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq) if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC)) return q->limits.max_hw_sectors; - if (!q->limits.chunk_sectors) + if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD)) return blk_queue_get_max_sectors(q, rq->cmd_flags); return min(blk_max_size_offset(q, blk_rq_pos(rq)), -- cgit v1.2.3-71-gd317 From f26cdc8536ad50fb802a0445f836b4f94ca09ae7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 1 Jun 2015 09:29:53 -0600 Subject: blk-mq: Shared tag enhancements Storage controllers may expose multiple block devices that share hardware resources managed by blk-mq. This patch enhances the shared tags so a low-level driver can access the shared resources not tied to the unshared h/w contexts. This way the LLD can dynamically add and delete disks and request queues without having to track all the request_queue hctx's to iterate outstanding tags. Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 38 ++++++++++++++++++++++++++++++++++++++ block/blk-mq-tag.h | 1 + block/blk-mq.c | 12 ++++++++++-- include/linux/blk-mq.h | 4 ++++ 4 files changed, 53 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index be3290cc0644..9b6e28830b82 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -438,6 +438,39 @@ static void bt_for_each(struct blk_mq_hw_ctx *hctx, } } +static void bt_tags_for_each(struct blk_mq_tags *tags, + struct blk_mq_bitmap_tags *bt, unsigned int off, + busy_tag_iter_fn *fn, void *data, bool reserved) +{ + struct request *rq; + int bit, i; + + if (!tags->rqs) + return; + for (i = 0; i < bt->map_nr; i++) { + struct blk_align_bitmap *bm = &bt->map[i]; + + for (bit = find_first_bit(&bm->word, bm->depth); + bit < bm->depth; + bit = find_next_bit(&bm->word, bm->depth, bit + 1)) { + rq = blk_mq_tag_to_rq(tags, off + bit); + fn(rq, data, reserved); + } + + off += (1 << bt->bits_per_word); + } +} + +void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, + void *priv) +{ + if (tags->nr_reserved_tags) + bt_tags_for_each(tags, &tags->breserved_tags, 0, fn, priv, true); + bt_tags_for_each(tags, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv, + false); +} +EXPORT_SYMBOL(blk_mq_all_tag_busy_iter); + void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, void *priv) { @@ -580,6 +613,11 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, if (!tags) return NULL; + if (!zalloc_cpumask_var(&tags->cpumask, GFP_KERNEL)) { + kfree(tags); + return NULL; + } + tags->nr_tags = total_tags; tags->nr_reserved_tags = reserved_tags; diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 90767b370308..75893a34237d 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -44,6 +44,7 @@ struct blk_mq_tags { struct list_head page_list; int alloc_policy; + cpumask_var_t cpumask; }; diff --git a/block/blk-mq.c b/block/blk-mq.c index c382a34fe5ac..ef100fd2cb86 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1525,7 +1525,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, i++; } } - return tags; fail: @@ -1821,6 +1820,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) hctx = q->mq_ops->map_queue(q, i); cpumask_set_cpu(i, hctx->cpumask); + cpumask_set_cpu(i, hctx->tags->cpumask); ctx->index_hw = hctx->nr_ctx; hctx->ctxs[hctx->nr_ctx++] = ctx; } @@ -2187,6 +2187,12 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) return 0; } +struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags) +{ + return tags->cpumask; +} +EXPORT_SYMBOL_GPL(blk_mq_tags_cpumask); + /* * Alloc a tag set to be associated with one or more request queues. * May fail with EINVAL for various error conditions. May adjust the @@ -2248,8 +2254,10 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) int i; for (i = 0; i < set->nr_hw_queues; i++) { - if (set->tags[i]) + if (set->tags[i]) { blk_mq_free_rq_map(set, set->tags[i], i); + free_cpumask_var(set->tags[i]->cpumask); + } } kfree(set->tags); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2056a99b92f8..37d1602c4f7a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -96,6 +96,7 @@ typedef void (exit_request_fn)(void *, struct request *, unsigned int, typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); +typedef void (busy_tag_iter_fn)(struct request *, void *, bool); struct blk_mq_ops { /* @@ -182,6 +183,7 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *); struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved); struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); +struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags); enum { BLK_MQ_UNIQUE_TAG_BITS = 16, @@ -224,6 +226,8 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, void *priv); +void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, + void *priv); void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q); void blk_mq_freeze_queue_start(struct request_queue *q); -- cgit v1.2.3-71-gd317 From 3f21c265cd5f7ae867cc0e86a1f6d5093f1963cc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 5 Jun 2015 10:57:37 -0600 Subject: block: add blk_set_queue_dying() to blkdev.h We export this function and NVMe wants to use it, but for some reason it was never added to the block header. Do that. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ccaa9aecd593..a31380c35918 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1024,6 +1024,7 @@ bool __must_check blk_get_queue(struct request_queue *); struct request_queue *blk_alloc_queue(gfp_t); struct request_queue *blk_alloc_queue_node(gfp_t, int); extern void blk_put_queue(struct request_queue *); +extern void blk_set_queue_dying(struct request_queue *); /* * block layer runtime pm functions -- cgit v1.2.3-71-gd317 From 0bb979472a7401022109e81dd89d777adea58710 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 10 Jun 2015 08:01:20 -0600 Subject: cfq-iosched: fix the setting of IOPS mode on SSDs A previous commit wanted to make CFQ default to IOPS mode on non-rotational storage, however it did so when the queue was initialized and the non-rotational flag is only set later on in the probe. Add an elevator hook that gets called off the add_disk() path, at that point we know that feature probing has finished, and we can reliably check for the various flags that drivers can set. Fixes: 41c0126b ("block: Make CFQ default to IOPS mode on SSDs") Tested-by: Romain Francoise Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 15 ++++++++++++++- block/elevator.c | 2 ++ include/linux/elevator.h | 2 ++ 3 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index c808ad87652d..d1d0cb235cd2 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -4508,7 +4508,7 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e) cfqd->cfq_slice[1] = cfq_slice_sync; cfqd->cfq_target_latency = cfq_target_latency; cfqd->cfq_slice_async_rq = cfq_slice_async_rq; - cfqd->cfq_slice_idle = blk_queue_nonrot(q) ? 0 : cfq_slice_idle; + cfqd->cfq_slice_idle = cfq_slice_idle; cfqd->cfq_group_idle = cfq_group_idle; cfqd->cfq_latency = 1; cfqd->hw_tag = -1; @@ -4525,6 +4525,18 @@ out_free: return ret; } +static void cfq_registered_queue(struct request_queue *q) +{ + struct elevator_queue *e = q->elevator; + struct cfq_data *cfqd = e->elevator_data; + + /* + * Default to IOPS mode with no idling for SSDs + */ + if (blk_queue_nonrot(q)) + cfqd->cfq_slice_idle = 0; +} + /* * sysfs parts below --> */ @@ -4640,6 +4652,7 @@ static struct elevator_type iosched_cfq = { .elevator_may_queue_fn = cfq_may_queue, .elevator_init_fn = cfq_init_queue, .elevator_exit_fn = cfq_exit_queue, + .elevator_registered_fn = cfq_registered_queue, }, .icq_size = sizeof(struct cfq_io_cq), .icq_align = __alignof__(struct cfq_io_cq), diff --git a/block/elevator.c b/block/elevator.c index 59794d0d38e3..5f0452734a40 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -810,6 +810,8 @@ int elv_register_queue(struct request_queue *q) } kobject_uevent(&e->kobj, KOBJ_ADD); e->registered = 1; + if (e->type->ops.elevator_registered_fn) + e->type->ops.elevator_registered_fn(q); } return error; } diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 45a91474487d..638b324f0291 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -39,6 +39,7 @@ typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct reques typedef int (elevator_init_fn) (struct request_queue *, struct elevator_type *e); typedef void (elevator_exit_fn) (struct elevator_queue *); +typedef void (elevator_registered_fn) (struct request_queue *); struct elevator_ops { @@ -68,6 +69,7 @@ struct elevator_ops elevator_init_fn *elevator_init_fn; elevator_exit_fn *elevator_exit_fn; + elevator_registered_fn *elevator_registered_fn; }; #define ELV_NAME_MAX (16) -- cgit v1.2.3-71-gd317