summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Makefile1
-rw-r--r--block/badblocks.c2
-rw-r--r--block/bfq-iosched.c51
-rw-r--r--block/bfq-iosched.h5
-rw-r--r--block/bio.c11
-rw-r--r--block/blk-cgroup-fc-appid.c57
-rw-r--r--block/blk-cgroup.c168
-rw-r--r--block/blk-cgroup.h140
-rw-r--r--block/blk-core.c81
-rw-r--r--block/blk-crypto-fallback.c1
-rw-r--r--block/blk-iocost.c88
-rw-r--r--block/blk-iolatency.c8
-rw-r--r--block/blk-map.c5
-rw-r--r--block/blk-mq.c120
-rw-r--r--block/blk-throttle.c5
-rw-r--r--block/bounce.c1
-rw-r--r--block/fops.c22
-rw-r--r--block/mq-deadline.c1
-rw-r--r--block/partitions/acorn.c4
-rw-r--r--block/partitions/atari.c1
-rw-r--r--block/partitions/ldm.c15
21 files changed, 471 insertions, 316 deletions
diff --git a/block/Makefile b/block/Makefile
index 3950ecbc5c26..4e01bb71ad6e 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
obj-$(CONFIG_BLK_CGROUP_RWSTAT) += blk-cgroup-rwstat.o
+obj-$(CONFIG_BLK_CGROUP_FC_APPID) += blk-cgroup-fc-appid.o
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
obj-$(CONFIG_BLK_CGROUP_IOPRIO) += blk-ioprio.o
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
diff --git a/block/badblocks.c b/block/badblocks.c
index d39056630d9c..3afb550c0f7b 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -65,7 +65,6 @@ int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
s >>= bb->shift;
target += (1<<bb->shift) - 1;
target >>= bb->shift;
- sectors = target - s;
}
/* 'target' is now the first block after the bad range */
@@ -345,7 +344,6 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
s += (1<<bb->shift) - 1;
s >>= bb->shift;
target >>= bb->shift;
- sectors = target - s;
}
write_seqlock_irq(&bb->lock);
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index e47c75f1fa0f..0d46cb728bbf 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -374,7 +374,7 @@ static const unsigned long bfq_activation_stable_merging = 600;
*/
static const unsigned long bfq_late_stable_merging = 600;
-#define RQ_BIC(rq) icq_to_bic((rq)->elv.priv[0])
+#define RQ_BIC(rq) ((struct bfq_io_cq *)((rq)->elv.priv[0]))
#define RQ_BFQQ(rq) ((rq)->elv.priv[1])
struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
@@ -456,6 +456,8 @@ static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
*/
void bfq_schedule_dispatch(struct bfq_data *bfqd)
{
+ lockdep_assert_held(&bfqd->lock);
+
if (bfqd->queued != 0) {
bfq_log(bfqd, "schedule dispatch");
blk_mq_run_hw_queues(bfqd->queue, true);
@@ -569,7 +571,7 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
struct bfq_entity *entity = &bfqq->entity;
struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH];
struct bfq_entity **entities = inline_entities;
- int depth, level;
+ int depth, level, alloc_depth = BFQ_LIMIT_INLINE_DEPTH;
int class_idx = bfqq->ioprio_class - 1;
struct bfq_sched_data *sched_data;
unsigned long wsum;
@@ -578,15 +580,21 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit)
if (!entity->on_st_or_in_serv)
return false;
+retry:
+ spin_lock_irq(&bfqd->lock);
/* +1 for bfqq entity, root cgroup not included */
depth = bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css.cgroup->level + 1;
- if (depth > BFQ_LIMIT_INLINE_DEPTH) {
+ if (depth > alloc_depth) {
+ spin_unlock_irq(&bfqd->lock);
+ if (entities != inline_entities)
+ kfree(entities);
entities = kmalloc_array(depth, sizeof(*entities), GFP_NOIO);
if (!entities)
return false;
+ alloc_depth = depth;
+ goto retry;
}
- spin_lock_irq(&bfqd->lock);
sched_data = entity->sched_data;
/* Gather our ancestors as we need to traverse them in reverse order */
level = 0;
@@ -2127,9 +2135,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
if (!bfqd->last_completed_rq_bfqq ||
bfqd->last_completed_rq_bfqq == bfqq ||
bfq_bfqq_has_short_ttime(bfqq) ||
- bfqq->dispatched > 0 ||
- now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC ||
- bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq)
+ now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC)
return;
/*
@@ -2202,9 +2208,13 @@ static void bfq_add_request(struct request *rq)
bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq));
bfqq->queued[rq_is_sync(rq)]++;
- bfqd->queued++;
+ /*
+ * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
+ * may be read without holding the lock in bfq_has_work().
+ */
+ WRITE_ONCE(bfqd->queued, bfqd->queued + 1);
- if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) {
+ if (bfq_bfqq_sync(bfqq) && RQ_BIC(rq)->requests <= 1) {
bfq_check_waker(bfqd, bfqq, now_ns);
/*
@@ -2394,7 +2404,11 @@ static void bfq_remove_request(struct request_queue *q,
if (rq->queuelist.prev != &rq->queuelist)
list_del_init(&rq->queuelist);
bfqq->queued[sync]--;
- bfqd->queued--;
+ /*
+ * Updating of 'bfqd->queued' is protected by 'bfqd->lock', however, it
+ * may be read without holding the lock in bfq_has_work().
+ */
+ WRITE_ONCE(bfqd->queued, bfqd->queued - 1);
elv_rb_del(&bfqq->sort_list, rq);
elv_rqhash_del(q, rq);
@@ -5055,11 +5069,11 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
/*
- * Avoiding lock: a race on bfqd->busy_queues should cause at
+ * Avoiding lock: a race on bfqd->queued should cause at
* most a call to dispatch for nothing
*/
return !list_empty_careful(&bfqd->dispatch) ||
- bfq_tot_busy_queues(bfqd) > 0;
+ READ_ONCE(bfqd->queued);
}
static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
@@ -6360,12 +6374,6 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
bfq_schedule_dispatch(bfqd);
}
-static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
-{
- bfqq_request_freed(bfqq);
- bfq_put_queue(bfqq);
-}
-
/*
* The processes associated with bfqq may happen to generate their
* cumulative I/O at a lower rate than the rate at which the device
@@ -6562,7 +6570,9 @@ static void bfq_finish_requeue_request(struct request *rq)
bfq_completed_request(bfqq, bfqd);
}
- bfq_finish_requeue_request_body(bfqq);
+ bfqq_request_freed(bfqq);
+ bfq_put_queue(bfqq);
+ RQ_BIC(rq)->requests--;
spin_unlock_irqrestore(&bfqd->lock, flags);
/*
@@ -6796,6 +6806,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq)
bfqq_request_allocated(bfqq);
bfqq->ref++;
+ bic->requests++;
bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d",
rq, bfqq, bfqq->ref);
@@ -6892,8 +6903,8 @@ bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_bfqq_expire(bfqd, bfqq, true, reason);
schedule_dispatch:
- spin_unlock_irqrestore(&bfqd->lock, flags);
bfq_schedule_dispatch(bfqd);
+ spin_unlock_irqrestore(&bfqd->lock, flags);
}
/*
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 978ef5d6fe6a..ca8177d7bf7c 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -468,6 +468,7 @@ struct bfq_io_cq {
struct bfq_queue *stable_merge_bfqq;
bool stably_merged; /* non splittable if true */
+ unsigned int requests; /* Number of requests this process has in flight */
};
/**
@@ -1102,13 +1103,13 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
break; \
bfq_bfqq_name((bfqq), pid_str, MAX_BFQQ_NAME_LENGTH); \
blk_add_cgroup_trace_msg((bfqd)->queue, \
- bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \
+ &bfqg_to_blkg(bfqq_group(bfqq))->blkcg->css, \
"%s " fmt, pid_str, ##args); \
} while (0)
#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \
blk_add_cgroup_trace_msg((bfqd)->queue, \
- bfqg_to_blkg(bfqg)->blkcg, fmt, ##args); \
+ &bfqg_to_blkg(bfqg)->blkcg->css, fmt, ##args); \
} while (0)
#else /* CONFIG_BFQ_GROUP_IOSCHED */
diff --git a/block/bio.c b/block/bio.c
index 212ccd5b5212..a3893d80dccc 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -761,14 +761,15 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
bio_set_flag(bio, BIO_CLONED);
if (bio_flagged(bio_src, BIO_THROTTLED))
bio_set_flag(bio, BIO_THROTTLED);
- if (bio->bi_bdev == bio_src->bi_bdev &&
- bio_flagged(bio_src, BIO_REMAPPED))
- bio_set_flag(bio, BIO_REMAPPED);
bio->bi_ioprio = bio_src->bi_ioprio;
bio->bi_iter = bio_src->bi_iter;
- bio_clone_blkg_association(bio, bio_src);
- blkcg_bio_issue_init(bio);
+ if (bio->bi_bdev) {
+ if (bio->bi_bdev == bio_src->bi_bdev &&
+ bio_flagged(bio_src, BIO_REMAPPED))
+ bio_set_flag(bio, BIO_REMAPPED);
+ bio_clone_blkg_association(bio, bio_src);
+ }
if (bio_crypt_clone(bio, bio_src, gfp) < 0)
return -ENOMEM;
diff --git a/block/blk-cgroup-fc-appid.c b/block/blk-cgroup-fc-appid.c
new file mode 100644
index 000000000000..760a2e1878dd
--- /dev/null
+++ b/block/blk-cgroup-fc-appid.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "blk-cgroup.h"
+
+/**
+ * blkcg_set_fc_appid - set the fc_app_id field associted to blkcg
+ * @app_id: application identifier
+ * @cgrp_id: cgroup id
+ * @app_id_len: size of application identifier
+ */
+int blkcg_set_fc_appid(char *app_id, u64 cgrp_id, size_t app_id_len)
+{
+ struct cgroup *cgrp;
+ struct cgroup_subsys_state *css;
+ struct blkcg *blkcg;
+ int ret = 0;
+
+ if (app_id_len > FC_APPID_LEN)
+ return -EINVAL;
+
+ cgrp = cgroup_get_from_id(cgrp_id);
+ if (!cgrp)
+ return -ENOENT;
+ css = cgroup_get_e_css(cgrp, &io_cgrp_subsys);
+ if (!css) {
+ ret = -ENOENT;
+ goto out_cgrp_put;
+ }
+ blkcg = css_to_blkcg(css);
+ /*
+ * There is a slight race condition on setting the appid.
+ * Worst case an I/O may not find the right id.
+ * This is no different from the I/O we let pass while obtaining
+ * the vmid from the fabric.
+ * Adding the overhead of a lock is not necessary.
+ */
+ strlcpy(blkcg->fc_app_id, app_id, app_id_len);
+ css_put(css);
+out_cgrp_put:
+ cgroup_put(cgrp);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(blkcg_set_fc_appid);
+
+/**
+ * blkcg_get_fc_appid - get the fc app identifier associated with a bio
+ * @bio: target bio
+ *
+ * On success return the fc_app_id, on failure return NULL
+ */
+char *blkcg_get_fc_appid(struct bio *bio)
+{
+ if (!bio->bi_blkg || bio->bi_blkg->blkcg->fc_app_id[0] == '\0')
+ return NULL;
+ return bio->bi_blkg->blkcg->fc_app_id;
+}
+EXPORT_SYMBOL_GPL(blkcg_get_fc_appid);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 8dfe62786cd5..40161a3f68d0 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -59,6 +59,23 @@ static struct workqueue_struct *blkcg_punt_bio_wq;
#define BLKG_DESTROY_BATCH_SIZE 64
+/**
+ * blkcg_css - find the current css
+ *
+ * Find the css associated with either the kthread or the current task.
+ * This may return a dying css, so it is up to the caller to use tryget logic
+ * to confirm it is alive and well.
+ */
+static struct cgroup_subsys_state *blkcg_css(void)
+{
+ struct cgroup_subsys_state *css;
+
+ css = kthread_blkcg();
+ if (css)
+ return css;
+ return task_css(current, io_cgrp_id);
+}
+
static bool blkcg_policy_enabled(struct request_queue *q,
const struct blkcg_policy *pol)
{
@@ -156,6 +173,33 @@ static void blkg_async_bio_workfn(struct work_struct *work)
}
/**
+ * bio_blkcg_css - return the blkcg CSS associated with a bio
+ * @bio: target bio
+ *
+ * This returns the CSS for the blkcg associated with a bio, or %NULL if not
+ * associated. Callers are expected to either handle %NULL or know association
+ * has been done prior to calling this.
+ */
+struct cgroup_subsys_state *bio_blkcg_css(struct bio *bio)
+{
+ if (!bio || !bio->bi_blkg)
+ return NULL;
+ return &bio->bi_blkg->blkcg->css;
+}
+EXPORT_SYMBOL_GPL(bio_blkcg_css);
+
+/**
+ * blkcg_parent - get the parent of a blkcg
+ * @blkcg: blkcg of interest
+ *
+ * Return the parent blkcg of @blkcg. Can be called anytime.
+ */
+static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
+{
+ return css_to_blkcg(blkcg->css.parent);
+}
+
+/**
* blkg_alloc - allocate a blkg
* @blkcg: block cgroup the new blkg is associated with
* @q: request_queue the new blkg is associated with
@@ -254,7 +298,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
struct blkcg_gq *blkg;
int i, ret;
- WARN_ON_ONCE(!rcu_read_lock_held());
lockdep_assert_held(&q->queue_lock);
/* request_queue is dying, do not create/recreate a blkg */
@@ -905,7 +948,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
{
struct blkg_iostat_set *bis = &blkg->iostat;
u64 rbytes, wbytes, rios, wios, dbytes, dios;
- bool has_stats = false;
const char *dname;
unsigned seq;
int i;
@@ -931,14 +973,12 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
} while (u64_stats_fetch_retry(&bis->sync, seq));
if (rbytes || wbytes || rios || wios) {
- has_stats = true;
seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
rbytes, wbytes, rios, wios,
dbytes, dios);
}
if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
- has_stats = true;
seq_printf(s, " use_delay=%d delay_nsec=%llu",
atomic_read(&blkg->use_delay),
atomic64_read(&blkg->delay_nsec));
@@ -950,12 +990,10 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
if (!blkg->pd[i] || !pol->pd_stat_fn)
continue;
- if (pol->pd_stat_fn(blkg->pd[i], s))
- has_stats = true;
+ pol->pd_stat_fn(blkg->pd[i], s);
}
- if (has_stats)
- seq_printf(s, "\n");
+ seq_puts(s, "\n");
}
static int blkcg_print_stat(struct seq_file *sf, void *v)
@@ -994,6 +1032,13 @@ static struct cftype blkcg_legacy_files[] = {
{ } /* terminate */
};
+#ifdef CONFIG_CGROUP_WRITEBACK
+struct list_head *blkcg_get_cgwb_list(struct cgroup_subsys_state *css)
+{
+ return &css_to_blkcg(css)->cgwb_list;
+}
+#endif
+
/*
* blkcg destruction is a three-stage process.
*
@@ -1016,25 +1061,6 @@ static struct cftype blkcg_legacy_files[] = {
*/
/**
- * blkcg_css_offline - cgroup css_offline callback
- * @css: css of interest
- *
- * This function is called when @css is about to go away. Here the cgwbs are
- * offlined first and only once writeback associated with the blkcg has
- * finished do we start step 2 (see above).
- */
-static void blkcg_css_offline(struct cgroup_subsys_state *css)
-{
- struct blkcg *blkcg = css_to_blkcg(css);
-
- /* this prevents anyone from attaching or migrating to this blkcg */
- wb_blkcg_offline(blkcg);
-
- /* put the base online pin allowing step 2 to be triggered */
- blkcg_unpin_online(blkcg);
-}
-
-/**
* blkcg_destroy_blkgs - responsible for shooting down blkgs
* @blkcg: blkcg of interest
*
@@ -1045,7 +1071,7 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
*
* This is the blkcg counterpart of ioc_release_fn().
*/
-void blkcg_destroy_blkgs(struct blkcg *blkcg)
+static void blkcg_destroy_blkgs(struct blkcg *blkcg)
{
might_sleep();
@@ -1075,6 +1101,57 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg)
spin_unlock_irq(&blkcg->lock);
}
+/**
+ * blkcg_pin_online - pin online state
+ * @blkcg_css: blkcg of interest
+ *
+ * While pinned, a blkcg is kept online. This is primarily used to
+ * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
+ * while an associated cgwb is still active.
+ */
+void blkcg_pin_online(struct cgroup_subsys_state *blkcg_css)
+{
+ refcount_inc(&css_to_blkcg(blkcg_css)->online_pin);
+}
+
+/**
+ * blkcg_unpin_online - unpin online state
+ * @blkcg_css: blkcg of interest
+ *
+ * This is primarily used to impedance-match blkg and cgwb lifetimes so
+ * that blkg doesn't go offline while an associated cgwb is still active.
+ * When this count goes to zero, all active cgwbs have finished so the
+ * blkcg can continue destruction by calling blkcg_destroy_blkgs().
+ */
+void blkcg_unpin_online(struct cgroup_subsys_state *blkcg_css)
+{
+ struct blkcg *blkcg = css_to_blkcg(blkcg_css);
+
+ do {
+ if (!refcount_dec_and_test(&blkcg->online_pin))
+ break;
+ blkcg_destroy_blkgs(blkcg);
+ blkcg = blkcg_parent(blkcg);
+ } while (blkcg);
+}
+
+/**
+ * blkcg_css_offline - cgroup css_offline callback
+ * @css: css of interest
+ *
+ * This function is called when @css is about to go away. Here the cgwbs are
+ * offlined first and only once writeback associated with the blkcg has
+ * finished do we start step 2 (see above).
+ */
+static void blkcg_css_offline(struct cgroup_subsys_state *css)
+{
+ /* this prevents anyone from attaching or migrating to this blkcg */
+ wb_blkcg_offline(css);
+
+ /* put the base online pin allowing step 2 to be triggered */
+ blkcg_unpin_online(css);
+}
+
static void blkcg_css_free(struct cgroup_subsys_state *css)
{
struct blkcg *blkcg = css_to_blkcg(css);
@@ -1163,8 +1240,7 @@ unlock:
static int blkcg_css_online(struct cgroup_subsys_state *css)
{
- struct blkcg *blkcg = css_to_blkcg(css);
- struct blkcg *parent = blkcg_parent(blkcg);
+ struct blkcg *parent = blkcg_parent(css_to_blkcg(css));
/*
* blkcg_pin_online() is used to delay blkcg offline so that blkgs
@@ -1172,7 +1248,7 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
* parent so that offline always happens towards the root.
*/
if (parent)
- blkcg_pin_online(parent);
+ blkcg_pin_online(css);
return 0;
}
@@ -1201,14 +1277,13 @@ int blkcg_init_queue(struct request_queue *q)
preloaded = !radix_tree_preload(GFP_KERNEL);
/* Make sure the root blkg exists. */
- rcu_read_lock();
+ /* spin_lock_irq can serve as RCU read-side critical section. */
spin_lock_irq(&q->queue_lock);
blkg = blkg_create(&blkcg_root, q, new_blkg);
if (IS_ERR(blkg))
goto err_unlock;
q->root_blkg = blkg;
spin_unlock_irq(&q->queue_lock);
- rcu_read_unlock();
if (preloaded)
radix_tree_preload_end();
@@ -1234,7 +1309,6 @@ err_destroy_all:
return ret;
err_unlock:
spin_unlock_irq(&q->queue_lock);
- rcu_read_unlock();
if (preloaded)
radix_tree_preload_end();
return PTR_ERR(blkg);
@@ -1726,7 +1800,6 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
void blkcg_maybe_throttle_current(void)
{
struct request_queue *q = current->throttle_queue;
- struct cgroup_subsys_state *css;
struct blkcg *blkcg;
struct blkcg_gq *blkg;
bool use_memdelay = current->use_memdelay;
@@ -1738,12 +1811,7 @@ void blkcg_maybe_throttle_current(void)
current->use_memdelay = false;
rcu_read_lock();
- css = kthread_blkcg();
- if (css)
- blkcg = css_to_blkcg(css);
- else
- blkcg = css_to_blkcg(task_css(current, io_cgrp_id));
-
+ blkcg = css_to_blkcg(blkcg_css());
if (!blkcg)
goto out;
blkg = blkg_lookup(blkcg, q);
@@ -1889,7 +1957,7 @@ void bio_associate_blkg(struct bio *bio)
rcu_read_lock();
if (bio->bi_blkg)
- css = &bio_blkcg(bio)->css;
+ css = bio_blkcg_css(bio);
else
css = blkcg_css();
@@ -1950,6 +2018,22 @@ void blk_cgroup_bio_start(struct bio *bio)
put_cpu();
}
+bool blk_cgroup_congested(void)
+{
+ struct cgroup_subsys_state *css;
+ bool ret = false;
+
+ rcu_read_lock();
+ for (css = blkcg_css(); css; css = css->parent) {
+ if (atomic_read(&css->cgroup->congestion_count)) {
+ ret = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
static int __init blkcg_init(void)
{
blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 47e1e38390c9..d4de0a35e066 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -15,13 +15,101 @@
*/
#include <linux/blk-cgroup.h>
+#include <linux/cgroup.h>
+#include <linux/kthread.h>
#include <linux/blk-mq.h>
+struct blkcg_gq;
+struct blkg_policy_data;
+
+
/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
#ifdef CONFIG_BLK_CGROUP
+enum blkg_iostat_type {
+ BLKG_IOSTAT_READ,
+ BLKG_IOSTAT_WRITE,
+ BLKG_IOSTAT_DISCARD,
+
+ BLKG_IOSTAT_NR,
+};
+
+struct blkg_iostat {
+ u64 bytes[BLKG_IOSTAT_NR];
+ u64 ios[BLKG_IOSTAT_NR];
+};
+
+struct blkg_iostat_set {
+ struct u64_stats_sync sync;
+ struct blkg_iostat cur;
+ struct blkg_iostat last;
+};
+
+/* association between a blk cgroup and a request queue */
+struct blkcg_gq {
+ /* Pointer to the associated request_queue */
+ struct request_queue *q;
+ struct list_head q_node;
+ struct hlist_node blkcg_node;
+ struct blkcg *blkcg;
+
+ /* all non-root blkcg_gq's are guaranteed to have access to parent */
+ struct blkcg_gq *parent;
+
+ /* reference count */
+ struct percpu_ref refcnt;
+
+ /* is this blkg online? protected by both blkcg and q locks */
+ bool online;
+
+ struct blkg_iostat_set __percpu *iostat_cpu;
+ struct blkg_iostat_set iostat;
+
+ struct blkg_policy_data *pd[BLKCG_MAX_POLS];
+
+ spinlock_t async_bio_lock;
+ struct bio_list async_bios;
+ union {
+ struct work_struct async_bio_work;
+ struct work_struct free_work;
+ };
+
+ atomic_t use_delay;
+ atomic64_t delay_nsec;
+ atomic64_t delay_start;
+ u64 last_delay;
+ int last_use;
+
+ struct rcu_head rcu_head;
+};
+
+struct blkcg {
+ struct cgroup_subsys_state css;
+ spinlock_t lock;
+ refcount_t online_pin;
+
+ struct radix_tree_root blkg_tree;
+ struct blkcg_gq __rcu *blkg_hint;
+ struct hlist_head blkg_list;
+
+ struct blkcg_policy_data *cpd[BLKCG_MAX_POLS];
+
+ struct list_head all_blkcgs_node;
+#ifdef CONFIG_BLK_CGROUP_FC_APPID
+ char fc_app_id[FC_APPID_LEN];
+#endif
+#ifdef CONFIG_CGROUP_WRITEBACK
+ struct list_head cgwb_list;
+#endif
+};
+
+static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
+{
+ return css ? container_of(css, struct blkcg, css) : NULL;
+}
+
/*
* A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
* request_queue (q). This is used by blkcg policies which need to track
@@ -63,7 +151,7 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
-typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
+typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd,
struct seq_file *s);
struct blkcg_policy {
@@ -123,52 +211,14 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
/**
- * blkcg_css - find the current css
- *
- * Find the css associated with either the kthread or the current task.
- * This may return a dying css, so it is up to the caller to use tryget logic
- * to confirm it is alive and well.
- */
-static inline struct cgroup_subsys_state *blkcg_css(void)
-{
- struct cgroup_subsys_state *css;
-
- css = kthread_blkcg();
- if (css)
- return css;
- return task_css(current, io_cgrp_id);
-}
-
-/**
- * __bio_blkcg - internal, inconsistent version to get blkcg
- *
- * DO NOT USE.
- * This function is inconsistent and consequently is dangerous to use. The
- * first part of the function returns a blkcg where a reference is owned by the
- * bio. This means it does not need to be rcu protected as it cannot go away
- * with the bio owning a reference to it. However, the latter potentially gets
- * it from task_css(). This can race against task migration and the cgroup
- * dying. It is also semantically different as it must be called rcu protected
- * and is susceptible to failure when trying to get a reference to it.
- * Therefore, it is not ok to assume that *_get() will always succeed on the
- * blkcg returned here.
- */
-static inline struct blkcg *__bio_blkcg(struct bio *bio)
-{
- if (bio && bio->bi_blkg)
- return bio->bi_blkg->blkcg;
- return css_to_blkcg(blkcg_css());
-}
-
-/**
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
* @return: true if this bio needs to be submitted with the root blkg context.
*
* In order to avoid priority inversions we sometimes need to issue a bio as if
* it were attached to the root blkg, and then backcharge to the actual owning
- * blkg. The idea is we do bio_blkcg() to look up the actual context for the
- * bio and attach the appropriate blkg to the bio. Then we call this helper and
- * if it is true run with the root blkg for that queue and then do any
+ * blkg. The idea is we do bio_blkcg_css() to look up the actual context for
+ * the bio and attach the appropriate blkg to the bio. Then we call this helper
+ * and if it is true run with the root blkg for that queue and then do any
* backcharging to the originating cgroup once the io is complete.
*/
static inline bool bio_issue_as_root_blkg(struct bio *bio)
@@ -457,7 +507,8 @@ struct blkcg_policy_data {
struct blkcg_policy {
};
-#ifdef CONFIG_BLOCK
+struct blkcg {
+};
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
@@ -471,8 +522,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
static inline void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol) { }
-static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
-
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
struct blkcg_policy *pol) { return NULL; }
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
@@ -488,7 +537,6 @@ static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { r
#define blk_queue_for_each_rl(rl, q) \
for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
-#endif /* CONFIG_BLOCK */
#endif /* CONFIG_BLK_CGROUP */
#endif /* _BLK_CGROUP_PRIVATE_H */
diff --git a/block/blk-core.c b/block/blk-core.c
index ee18b6a699bd..80fa73c419a9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -50,7 +50,6 @@
#include "blk-pm.h"
#include "blk-cgroup.h"
#include "blk-throttle.h"
-#include "blk-rq-qos.h"
struct dentry *blk_debugfs_root;
@@ -315,9 +314,6 @@ void blk_cleanup_queue(struct request_queue *q)
*/
blk_freeze_queue(q);
- /* cleanup rq qos structures for queue without disk */
- rq_qos_exit(q);
-
blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
blk_sync_queue(q);
@@ -592,10 +588,9 @@ static inline int bio_check_eod(struct bio *bio)
(nr_sectors > maxsector ||
bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
pr_info_ratelimited("%s: attempt to access beyond end of device\n"
- "%pg: rw=%d, want=%llu, limit=%llu\n",
- current->comm,
- bio->bi_bdev, bio->bi_opf,
- bio_end_sector(bio), maxsector);
+ "%pg: rw=%d, sector=%llu, nr_sectors = %u limit=%llu\n",
+ current->comm, bio->bi_bdev, bio->bi_opf,
+ bio->bi_iter.bi_sector, nr_sectors, maxsector);
return -EIO;
}
return 0;
@@ -893,19 +888,11 @@ void submit_bio(struct bio *bio)
if (blkcg_punt_bio_submit(bio))
return;
- /*
- * If it's a regular read/write or a barrier with data attached,
- * go through the normal accounting stuff before submission.
- */
- if (bio_has_data(bio)) {
- unsigned int count = bio_sectors(bio);
-
- if (op_is_write(bio_op(bio))) {
- count_vm_events(PGPGOUT, count);
- } else {
- task_io_account_read(bio->bi_iter.bi_size);
- count_vm_events(PGPGIN, count);
- }
+ if (bio_op(bio) == REQ_OP_READ) {
+ task_io_account_read(bio->bi_iter.bi_size);
+ count_vm_events(PGPGIN, bio_sectors(bio));
+ } else if (bio_op(bio) == REQ_OP_WRITE) {
+ count_vm_events(PGPGOUT, bio_sectors(bio));
}
/*
@@ -1022,21 +1009,22 @@ again:
}
}
-static unsigned long __part_start_io_acct(struct block_device *part,
- unsigned int sectors, unsigned int op,
- unsigned long start_time)
+unsigned long bdev_start_io_acct(struct block_device *bdev,
+ unsigned int sectors, unsigned int op,
+ unsigned long start_time)
{
const int sgrp = op_stat_group(op);
part_stat_lock();
- update_io_ticks(part, start_time, false);
- part_stat_inc(part, ios[sgrp]);
- part_stat_add(part, sectors[sgrp], sectors);
- part_stat_local_inc(part, in_flight[op_is_write(op)]);
+ update_io_ticks(bdev, start_time, false);
+ part_stat_inc(bdev, ios[sgrp]);
+ part_stat_add(bdev, sectors[sgrp], sectors);
+ part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
return start_time;
}
+EXPORT_SYMBOL(bdev_start_io_acct);
/**
* bio_start_io_acct_time - start I/O accounting for bio based drivers
@@ -1045,8 +1033,8 @@ static unsigned long __part_start_io_acct(struct block_device *part,
*/
void bio_start_io_acct_time(struct bio *bio, unsigned long start_time)
{
- __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
- bio_op(bio), start_time);
+ bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
+ bio_op(bio), start_time);
}
EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
@@ -1058,46 +1046,33 @@ EXPORT_SYMBOL_GPL(bio_start_io_acct_time);
*/
unsigned long bio_start_io_acct(struct bio *bio)
{
- return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio),
- bio_op(bio), jiffies);
+ return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
+ bio_op(bio), jiffies);
}
EXPORT_SYMBOL_GPL(bio_start_io_acct);
-unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
- unsigned int op)
-{
- return __part_start_io_acct(disk->part0, sectors, op, jiffies);
-}
-EXPORT_SYMBOL(disk_start_io_acct);
-
-static void __part_end_io_acct(struct block_device *part, unsigned int op,
- unsigned long start_time)
+void bdev_end_io_acct(struct block_device *bdev, unsigned int op,
+ unsigned long start_time)
{
const int sgrp = op_stat_group(op);
unsigned long now = READ_ONCE(jiffies);
unsigned long duration = now - start_time;
part_stat_lock();
- update_io_ticks(part, now, true);
- part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
- part_stat_local_dec(part, in_flight[op_is_write(op)]);
+ update_io_ticks(bdev, now, true);
+ part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
+ part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
}
+EXPORT_SYMBOL(bdev_end_io_acct);
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
- struct block_device *orig_bdev)
+ struct block_device *orig_bdev)
{
- __part_end_io_acct(orig_bdev, bio_op(bio), start_time);
+ bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
}
EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
-void disk_end_io_acct(struct gendisk *disk, unsigned int op,
- unsigned long start_time)
-{
- __part_end_io_acct(disk->part0, op, start_time);
-}
-EXPORT_SYMBOL(disk_end_io_acct);
-
/**
* blk_lld_busy - Check if underlying low-level drivers of a device are busy
* @q : the queue of the device being checked
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index 5d1aa5b1d30a..621abd1b0e4d 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -179,7 +179,6 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
bio->bi_io_vec[bio->bi_vcnt++] = bv;
bio_clone_blkg_association(bio, bio_src);
- blkcg_bio_issue_init(bio);
return bio;
}
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 70a0a3d680a3..33a11ba971ea 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -533,8 +533,7 @@ struct ioc_gq {
/* statistics */
struct iocg_pcpu_stat __percpu *pcpu_stat;
- struct iocg_stat local_stat;
- struct iocg_stat desc_stat;
+ struct iocg_stat stat;
struct iocg_stat last_stat;
u64 last_stat_abs_vusage;
u64 usage_delta_us;
@@ -1371,7 +1370,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
return true;
} else {
if (iocg->indelay_since) {
- iocg->local_stat.indelay_us += now->now - iocg->indelay_since;
+ iocg->stat.indelay_us += now->now - iocg->indelay_since;
iocg->indelay_since = 0;
}
iocg->delay = 0;
@@ -1419,7 +1418,7 @@ static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay,
/* if debt is paid in full, restore inuse */
if (!iocg->abs_vdebt) {
- iocg->local_stat.indebt_us += now->now - iocg->indebt_since;
+ iocg->stat.indebt_us += now->now - iocg->indebt_since;
iocg->indebt_since = 0;
propagate_weights(iocg, iocg->active, iocg->last_inuse,
@@ -1513,7 +1512,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, bool pay_debt,
if (!waitqueue_active(&iocg->waitq)) {
if (iocg->wait_since) {
- iocg->local_stat.wait_us += now->now - iocg->wait_since;
+ iocg->stat.wait_us += now->now - iocg->wait_since;
iocg->wait_since = 0;
}
return;
@@ -1641,11 +1640,30 @@ static void iocg_build_inner_walk(struct ioc_gq *iocg,
}
}
+/* propagate the deltas to the parent */
+static void iocg_flush_stat_upward(struct ioc_gq *iocg)
+{
+ if (iocg->level > 0) {
+ struct iocg_stat *parent_stat =
+ &iocg->ancestors[iocg->level - 1]->stat;
+
+ parent_stat->usage_us +=
+ iocg->stat.usage_us - iocg->last_stat.usage_us;
+ parent_stat->wait_us +=
+ iocg->stat.wait_us - iocg->last_stat.wait_us;
+ parent_stat->indebt_us +=
+ iocg->stat.indebt_us - iocg->last_stat.indebt_us;
+ parent_stat->indelay_us +=
+ iocg->stat.indelay_us - iocg->last_stat.indelay_us;
+ }
+
+ iocg->last_stat = iocg->stat;
+}
+
/* collect per-cpu counters and propagate the deltas to the parent */
-static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now)
+static void iocg_flush_stat_leaf(struct ioc_gq *iocg, struct ioc_now *now)
{
struct ioc *ioc = iocg->ioc;
- struct iocg_stat new_stat;
u64 abs_vusage = 0;
u64 vusage_delta;
int cpu;
@@ -1661,34 +1679,9 @@ static void iocg_flush_stat_one(struct ioc_gq *iocg, struct ioc_now *now)
iocg->last_stat_abs_vusage = abs_vusage;
iocg->usage_delta_us = div64_u64(vusage_delta, ioc->vtime_base_rate);
- iocg->local_stat.usage_us += iocg->usage_delta_us;
-
- /* propagate upwards */
- new_stat.usage_us =
- iocg->local_stat.usage_us + iocg->desc_stat.usage_us;
- new_stat.wait_us =
- iocg->local_stat.wait_us + iocg->desc_stat.wait_us;
- new_stat.indebt_us =
- iocg->local_stat.indebt_us + iocg->desc_stat.indebt_us;
- new_stat.indelay_us =
- iocg->local_stat.indelay_us + iocg->desc_stat.indelay_us;
-
- /* propagate the deltas to the parent */
- if (iocg->level > 0) {
- struct iocg_stat *parent_stat =
- &iocg->ancestors[iocg->level - 1]->desc_stat;
+ iocg->stat.usage_us += iocg->usage_delta_us;
- parent_stat->usage_us +=
- new_stat.usage_us - iocg->last_stat.usage_us;
- parent_stat->wait_us +=
- new_stat.wait_us - iocg->last_stat.wait_us;
- parent_stat->indebt_us +=
- new_stat.indebt_us - iocg->last_stat.indebt_us;
- parent_stat->indelay_us +=
- new_stat.indelay_us - iocg->last_stat.indelay_us;
- }
-
- iocg->last_stat = new_stat;
+ iocg_flush_stat_upward(iocg);
}
/* get stat counters ready for reading on all active iocgs */
@@ -1699,13 +1692,13 @@ static void iocg_flush_stat(struct list_head *target_iocgs, struct ioc_now *now)
/* flush leaves and build inner node walk list */
list_for_each_entry(iocg, target_iocgs, active_list) {
- iocg_flush_stat_one(iocg, now);
+ iocg_flush_stat_leaf(iocg, now);
iocg_build_inner_walk(iocg, &inner_walk);
}
/* keep flushing upwards by walking the inner list backwards */
list_for_each_entry_safe_reverse(iocg, tiocg, &inner_walk, walk_list) {
- iocg_flush_stat_one(iocg, now);
+ iocg_flush_stat_upward(iocg);
list_del_init(&iocg->walk_list);
}
}
@@ -2152,16 +2145,16 @@ static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now)
/* flush wait and indebt stat deltas */
if (iocg->wait_since) {
- iocg->local_stat.wait_us += now->now - iocg->wait_since;
+ iocg->stat.wait_us += now->now - iocg->wait_since;
iocg->wait_since = now->now;
}
if (iocg->indebt_since) {
- iocg->local_stat.indebt_us +=
+ iocg->stat.indebt_us +=
now->now - iocg->indebt_since;
iocg->indebt_since = now->now;
}
if (iocg->indelay_since) {
- iocg->local_stat.indelay_us +=
+ iocg->stat.indelay_us +=
now->now - iocg->indelay_since;
iocg->indelay_since = now->now;
}
@@ -2322,7 +2315,17 @@ static void ioc_timer_fn(struct timer_list *timer)
iocg->hweight_donating = hwa;
iocg->hweight_after_donation = new_hwi;
list_add(&iocg->surplus_list, &surpluses);
- } else {
+ } else if (!iocg->abs_vdebt) {
+ /*
+ * @iocg doesn't have enough to donate. Reset
+ * its inuse to active.
+ *
+ * Don't reset debtors as their inuse's are
+ * owned by debt handling. This shouldn't affect
+ * donation calculuation in any meaningful way
+ * as @iocg doesn't have a meaningful amount of
+ * share anyway.
+ */
TRACE_IOCG_PATH(inuse_shortage, iocg, &now,
iocg->inuse, iocg->active,
iocg->hweight_inuse, new_hwi);
@@ -2995,13 +2998,13 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
kfree(iocg);
}
-static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
+static void ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
{
struct ioc_gq *iocg = pd_to_iocg(pd);
struct ioc *ioc = iocg->ioc;
if (!ioc->enabled)
- return false;
+ return;
if (iocg->level == 0) {
unsigned vp10k = DIV64_U64_ROUND_CLOSEST(
@@ -3017,7 +3020,6 @@ static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
iocg->last_stat.wait_us,
iocg->last_stat.indebt_us,
iocg->last_stat.indelay_us);
- return true;
}
static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 2f33932e72e3..5b676c7cf2b6 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -891,7 +891,7 @@ static int iolatency_print_limit(struct seq_file *sf, void *v)
return 0;
}
-static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
+static void iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
{
struct latency_stat stat;
int cpu;
@@ -914,17 +914,16 @@ static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s)
(unsigned long long)stat.ps.missed,
(unsigned long long)stat.ps.total,
iolat->rq_depth.max_depth);
- return true;
}
-static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
+static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
{
struct iolatency_grp *iolat = pd_to_lat(pd);
unsigned long long avg_lat;
unsigned long long cur_win;
if (!blkcg_debug_stats)
- return false;
+ return;
if (iolat->ssd)
return iolatency_ssd_stat(iolat, s);
@@ -937,7 +936,6 @@ static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
else
seq_printf(s, " depth=%u avg_lat=%llu win=%llu",
iolat->rq_depth.max_depth, avg_lat, cur_win);
- return true;
}
static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
diff --git a/block/blk-map.c b/block/blk-map.c
index 7ffde64f9019..df8b066cd548 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -262,10 +262,9 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
- if (unlikely(offs & queue_dma_alignment(rq->q))) {
- ret = -EINVAL;
+ if (unlikely(offs & queue_dma_alignment(rq->q)))
j = 0;
- } else {
+ else {
for (j = 0; j < npages; j++) {
struct page *page = pages[j];
unsigned int n = PAGE_SIZE - offs;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c4370d276170..ae116b755648 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1083,7 +1083,7 @@ bool blk_mq_complete_request_remote(struct request *rq)
WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
/*
- * For a polled request, always complete locallly, it's pointless
+ * For a polled request, always complete locally, it's pointless
* to redirect the completion.
*/
if (rq->cmd_flags & REQ_POLLED)
@@ -1131,14 +1131,7 @@ void blk_mq_start_request(struct request *rq)
trace_block_rq_issue(rq);
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
- u64 start_time;
-#ifdef CONFIG_BLK_CGROUP
- if (rq->bio)
- start_time = bio_issue_time(&rq->bio->bi_issue);
- else
-#endif
- start_time = ktime_get_ns();
- rq->io_start_time_ns = start_time;
+ rq->io_start_time_ns = ktime_get_ns();
rq->stats_sectors = blk_rq_sectors(rq);
rq->rq_flags |= RQF_STATS;
rq_qos_issue(q, rq);
@@ -1176,6 +1169,62 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
complete(waiting);
}
+/*
+ * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
+ * queues. This is important for md arrays to benefit from merging
+ * requests.
+ */
+static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
+{
+ if (plug->multiple_queues)
+ return BLK_MAX_REQUEST_COUNT * 2;
+ return BLK_MAX_REQUEST_COUNT;
+}
+
+static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
+{
+ struct request *last = rq_list_peek(&plug->mq_list);
+
+ if (!plug->rq_count) {
+ trace_block_plug(rq->q);
+ } else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
+ (!blk_queue_nomerges(rq->q) &&
+ blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
+ blk_mq_flush_plug_list(plug, false);
+ trace_block_plug(rq->q);
+ }
+
+ if (!plug->multiple_queues && last && last->q != rq->q)
+ plug->multiple_queues = true;
+ if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
+ plug->has_elevator = true;
+ rq->rq_next = NULL;
+ rq_list_add(&plug->mq_list, rq);
+ plug->rq_count++;
+}
+
+static void __blk_execute_rq_nowait(struct request *rq, bool at_head,
+ rq_end_io_fn *done, bool use_plug)
+{
+ WARN_ON(irqs_disabled());
+ WARN_ON(!blk_rq_is_passthrough(rq));
+
+ rq->end_io = done;
+
+ blk_account_io_start(rq);
+
+ if (use_plug && current->plug) {
+ blk_add_rq_to_plug(current->plug, rq);
+ return;
+ }
+ /*
+ * don't check dying flag for MQ because the request won't
+ * be reused after dying flag is set
+ */
+ blk_mq_sched_insert_request(rq, at_head, true, false);
+}
+
+
/**
* blk_execute_rq_nowait - insert a request to I/O scheduler for execution
* @rq: request to insert
@@ -1191,18 +1240,8 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
*/
void blk_execute_rq_nowait(struct request *rq, bool at_head, rq_end_io_fn *done)
{
- WARN_ON(irqs_disabled());
- WARN_ON(!blk_rq_is_passthrough(rq));
-
- rq->end_io = done;
+ __blk_execute_rq_nowait(rq, at_head, done, true);
- blk_account_io_start(rq);
-
- /*
- * don't check dying flag for MQ because the request won't
- * be reused after dying flag is set
- */
- blk_mq_sched_insert_request(rq, at_head, true, false);
}
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
@@ -1240,8 +1279,13 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head)
DECLARE_COMPLETION_ONSTACK(wait);
unsigned long hang_check;
+ /*
+ * iopoll requires request to be submitted to driver, so can't
+ * use plug
+ */
rq->end_io_data = &wait;
- blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq);
+ __blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq,
+ !blk_rq_is_poll(rq));
/* Prevent hang_check timer from firing at us during very long I/O */
hang_check = sysctl_hung_task_timeout_secs;
@@ -2683,40 +2727,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
hctx->queue->mq_ops->commit_rqs(hctx);
}
-/*
- * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
- * queues. This is important for md arrays to benefit from merging
- * requests.
- */
-static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
-{
- if (plug->multiple_queues)
- return BLK_MAX_REQUEST_COUNT * 2;
- return BLK_MAX_REQUEST_COUNT;
-}
-
-static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
-{
- struct request *last = rq_list_peek(&plug->mq_list);
-
- if (!plug->rq_count) {
- trace_block_plug(rq->q);
- } else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
- (!blk_queue_nomerges(rq->q) &&
- blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
- blk_mq_flush_plug_list(plug, false);
- trace_block_plug(rq->q);
- }
-
- if (!plug->multiple_queues && last && last->q != rq->q)
- plug->multiple_queues = true;
- if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
- plug->has_elevator = true;
- rq->rq_next = NULL;
- rq_list_add(&plug->mq_list, rq);
- plug->rq_count++;
-}
-
static bool blk_mq_attempt_bio_merge(struct request_queue *q,
struct bio *bio, unsigned int nr_segs)
{
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 469c483719be..139b2d7a99e2 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -227,7 +227,7 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
break; \
if ((__tg)) { \
blk_add_cgroup_trace_msg(__td->queue, \
- tg_to_blkg(__tg)->blkcg, "throtl " fmt, ##args);\
+ &tg_to_blkg(__tg)->blkcg->css, "throtl " fmt, ##args);\
} else { \
blk_add_trace_msg(__td->queue, "throtl " fmt, ##args); \
} \
@@ -2189,13 +2189,14 @@ again:
}
out_unlock:
- spin_unlock_irq(&q->queue_lock);
bio_set_flag(bio, BIO_THROTTLED);
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
if (throttled || !td->track_bio_latency)
bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
#endif
+ spin_unlock_irq(&q->queue_lock);
+
rcu_read_unlock();
return throttled;
}
diff --git a/block/bounce.c b/block/bounce.c
index 467be46d0e65..8f7b6fe3b4db 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -191,7 +191,6 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
goto err_put;
bio_clone_blkg_association(bio, bio_src);
- blkcg_bio_issue_init(bio);
return bio;
diff --git a/block/fops.c b/block/fops.c
index e3643362c244..b9b83030e0df 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -44,14 +44,6 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb)
#define DIO_INLINE_BIO_VECS 4
-static void blkdev_bio_end_io_simple(struct bio *bio)
-{
- struct task_struct *waiter = bio->bi_private;
-
- WRITE_ONCE(bio->bi_private, NULL);
- blk_wake_io_task(waiter);
-}
-
static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
struct iov_iter *iter, unsigned int nr_pages)
{
@@ -83,8 +75,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
}
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
- bio.bi_private = current;
- bio.bi_end_io = blkdev_bio_end_io_simple;
bio.bi_ioprio = iocb->ki_ioprio;
ret = bio_iov_iter_get_pages(&bio, iter);
@@ -97,18 +87,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
if (iocb->ki_flags & IOCB_NOWAIT)
bio.bi_opf |= REQ_NOWAIT;
- if (iocb->ki_flags & IOCB_HIPRI)
- bio_set_polled(&bio, iocb);
- submit_bio(&bio);
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (!READ_ONCE(bio.bi_private))
- break;
- if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0))
- blk_io_schedule();
- }
- __set_current_state(TASK_RUNNING);
+ submit_bio_wait(&bio);
bio_release_pages(&bio, should_dirty);
if (unlikely(bio.bi_status))
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 3ed5eaf3446a..6ed602b2f80a 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -742,6 +742,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
if (at_head) {
list_add(&rq->queuelist, &per_prio->dispatch);
+ rq->fifo_time = jiffies;
} else {
deadline_add_rq_rb(per_prio, rq);
diff --git a/block/partitions/acorn.c b/block/partitions/acorn.c
index 2c381c694c57..d2fc122d7426 100644
--- a/block/partitions/acorn.c
+++ b/block/partitions/acorn.c
@@ -282,13 +282,13 @@ int adfspart_check_ADFS(struct parsed_partitions *state)
#ifdef CONFIG_ACORN_PARTITION_RISCIX
case PARTITION_RISCIX_SCSI:
case PARTITION_RISCIX_MFM:
- slot = riscix_partition(state, start_sect, slot,
+ riscix_partition(state, start_sect, slot,
nr_sects);
break;
#endif
case PARTITION_LINUX:
- slot = linux_partition(state, start_sect, slot,
+ linux_partition(state, start_sect, slot,
nr_sects);
break;
}
diff --git a/block/partitions/atari.c b/block/partitions/atari.c
index da5994175416..9655c728262a 100644
--- a/block/partitions/atari.c
+++ b/block/partitions/atari.c
@@ -140,7 +140,6 @@ int atari_partition(struct parsed_partitions *state)
/* accept only GEM,BGM,RAW,LNX,SWP partitions */
if (!((pi->flg & 1) && OK_id(pi->id)))
continue;
- part_fmt = 2;
put_partition (state, slot,
be32_to_cpu(pi->st),
be32_to_cpu(pi->siz));
diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c
index 27f6c7d9c776..38e58960ae03 100644
--- a/block/partitions/ldm.c
+++ b/block/partitions/ldm.c
@@ -736,7 +736,6 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
len = r_cols;
} else {
r_stripe = 0;
- r_cols = 0;
len = r_parent;
}
if (len < 0)
@@ -783,11 +782,8 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
r_id1 = ldm_relative (buffer, buflen, 0x24, r_diskid);
r_id2 = ldm_relative (buffer, buflen, 0x24, r_id1);
len = r_id2;
- } else {
- r_id1 = 0;
- r_id2 = 0;
+ } else
len = r_diskid;
- }
if (len < 0)
return false;
@@ -826,11 +822,8 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
r_id1 = ldm_relative (buffer, buflen, 0x44, r_name);
r_id2 = ldm_relative (buffer, buflen, 0x44, r_id1);
len = r_id2;
- } else {
- r_id1 = 0;
- r_id2 = 0;
+ } else
len = r_name;
- }
if (len < 0)
return false;
@@ -963,10 +956,8 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
return false;
}
len = r_index;
- } else {
- r_index = 0;
+ } else
len = r_diskid;
- }
if (len < 0) {
ldm_error("len %d < 0", len);
return false;