diff --git a/block/blk-flush.c b/block/blk-flush.c index dc71da0e6b0e..ebdf158a2603 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -291,6 +291,11 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) spin_unlock_irqrestore(&fq->mq_flush_lock, flags); } +bool is_flush_rq(struct request *rq) +{ + return rq->end_io == flush_end_io; +} + /** * blk_kick_flush - consider issuing flush request * @q: request_queue being kicked diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index fe5ef6df5b80..46a98797d421 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -218,6 +218,20 @@ struct bt_iter_data { bool reserved; }; +static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags, + unsigned int bitnr) +{ + struct request *rq; + unsigned long flags; + + spin_lock_irqsave(&tags->lock, flags); + rq = tags->rqs[bitnr]; + if (!rq || !refcount_inc_not_zero(&rq->ref)) + rq = NULL; + spin_unlock_irqrestore(&tags->lock, flags); + return rq; +} + static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) { struct bt_iter_data *iter_data = data; @@ -225,18 +239,23 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) struct blk_mq_tags *tags = hctx->tags; bool reserved = iter_data->reserved; struct request *rq; + bool ret = true; if (!reserved) bitnr += tags->nr_reserved_tags; - rq = tags->rqs[bitnr]; /* * We can hit rq == NULL here, because the tagging functions * test and set the bit before assining ->rqs[]. */ - if (rq && rq->q == hctx->queue) + rq = blk_mq_find_and_get_req(tags, bitnr); + if (!rq) + return true; + + if (rq->q == hctx->queue) iter_data->fn(hctx, rq, iter_data->data, reserved); - return true; + blk_mq_put_rq_ref(rq); + return ret; } static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt, @@ -265,6 +284,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) struct blk_mq_tags *tags = iter_data->tags; bool reserved = iter_data->reserved; struct request *rq; + bool ret = true; if (!reserved) bitnr += tags->nr_reserved_tags; @@ -273,11 +293,13 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) * We can hit rq == NULL here, because the tagging functions * test and set the bit before assining ->rqs[]. */ - rq = tags->rqs[bitnr]; - if (rq && blk_mq_request_started(rq)) + rq = blk_mq_find_and_get_req(tags, bitnr); + if (!rq) + return true; + if (blk_mq_request_started(rq)) iter_data->fn(rq, iter_data->data, reserved); - - return true; + blk_mq_put_rq_ref(rq); + return ret; } static void bt_tags_for_each(struct blk_mq_tags *tags, struct sbitmap_queue *bt, diff --git a/block/blk-mq.c b/block/blk-mq.c index 3baa7584caf8..086a0be25f65 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -812,6 +812,14 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next) return false; } +void blk_mq_put_rq_ref(struct request *rq) +{ + if (is_flush_rq(rq)) + rq->end_io(rq, 0); + else if (refcount_dec_and_test(&rq->ref)) + __blk_mq_free_request(rq); +} + static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, struct request *rq, void *priv, bool reserved) { @@ -844,11 +852,9 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, */ if (blk_mq_req_expired(rq, next)) blk_mq_rq_timed_out(rq, reserved); + blk_mq_put_rq_ref(rq); + return; - if (is_flush_rq(rq, hctx)) - rq->end_io(rq, 0); - else if (refcount_dec_and_test(&rq->ref)) - __blk_mq_free_request(rq); } static void blk_mq_timeout_work(struct work_struct *work) @@ -2198,18 +2204,51 @@ static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx) &hctx->cpuhp_dead); } +/* + * Before freeing hw queue, clearing the flush request reference in + * tags->rqs[] for avoiding potential UAF. + */ +static void blk_mq_clear_flush_rq_mapping(struct blk_mq_tags *tags, + unsigned int queue_depth, struct request *flush_rq) +{ + int i; + unsigned long flags; + + /* The hw queue may not be mapped yet */ + if (!tags) + return; + + WARN_ON_ONCE(refcount_read(&flush_rq->ref) != 0); + + for (i = 0; i < queue_depth; i++) + cmpxchg(&tags->rqs[i], flush_rq, NULL); + + /* + * Wait until all pending iteration is done. + * + * Request reference is cleared and it is guaranteed to be observed + * after the ->lock is released. + */ + spin_lock_irqsave(&tags->lock, flags); + spin_unlock_irqrestore(&tags->lock, flags); +} + /* hctx->ctxs will be freed in queue's release handler */ static void blk_mq_exit_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { + struct request *flush_rq = hctx->fq->flush_rq; + blk_mq_debugfs_unregister_hctx(hctx); if (blk_mq_hw_queue_mapped(hctx)) blk_mq_tag_idle(hctx); + blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx], + set->queue_depth, flush_rq); if (set->ops->exit_request) - set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx); + set->ops->exit_request(set, flush_rq, hctx_idx); if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); diff --git a/block/blk-mq.h b/block/blk-mq.h index 5ad9251627f8..31a576be0c90 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -39,6 +39,7 @@ void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); bool blk_mq_get_driver_tag(struct request *rq); struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *start); +void blk_mq_put_rq_ref(struct request *rq); /* * Internal helpers for allocating/freeing the request map diff --git a/block/blk.h b/block/blk.h index 1a5b67b57e6b..249dc315fbee 100644 --- a/block/blk.h +++ b/block/blk.h @@ -124,11 +124,7 @@ static inline void __blk_get_queue(struct request_queue *q) kobject_get(&q->kobj); } -static inline bool -is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx) -{ - return hctx->fq->flush_rq == req; -} +bool is_flush_rq(struct request *req); struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, int node, int cmd_size, gfp_t flags);