blk-mq: fix race with timeouts and requeue events
If a requeue event races with a timeout, we can get into the situation where we attempt to complete a request from the timeout handler when it's not start anymore. This causes a crash. So have the timeout handler check that REQ_ATOM_STARTED is still set on the request - if not, we ignore the event. If this happens, the request has now been marked as complete. As a consequence, we need to ensure to clear REQ_ATOM_COMPLETE in blk_mq_start_request(), as to maintain proper request state. Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
parent
70ab0b2d51
commit
87ee7b1121
@ -378,7 +378,15 @@ static void blk_mq_start_request(struct request *rq, bool last)
|
|||||||
* REQ_ATOMIC_STARTED is seen.
|
* REQ_ATOMIC_STARTED is seen.
|
||||||
*/
|
*/
|
||||||
rq->deadline = jiffies + q->rq_timeout;
|
rq->deadline = jiffies + q->rq_timeout;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Mark us as started and clear complete. Complete might have been
|
||||||
|
* set if requeue raced with timeout, which then marked it as
|
||||||
|
* complete. So be sure to clear complete again when we start
|
||||||
|
* the request, otherwise we'll ignore the completion event.
|
||||||
|
*/
|
||||||
set_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
|
set_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
|
||||||
|
clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
|
||||||
|
|
||||||
if (q->dma_drain_size && blk_rq_bytes(rq)) {
|
if (q->dma_drain_size && blk_rq_bytes(rq)) {
|
||||||
/*
|
/*
|
||||||
@ -485,6 +493,28 @@ static void blk_mq_hw_ctx_check_timeout(struct blk_mq_hw_ctx *hctx,
|
|||||||
blk_mq_tag_busy_iter(hctx->tags, blk_mq_timeout_check, &data);
|
blk_mq_tag_busy_iter(hctx->tags, blk_mq_timeout_check, &data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static enum blk_eh_timer_return blk_mq_rq_timed_out(struct request *rq)
|
||||||
|
{
|
||||||
|
struct request_queue *q = rq->q;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We know that complete is set at this point. If STARTED isn't set
|
||||||
|
* anymore, then the request isn't active and the "timeout" should
|
||||||
|
* just be ignored. This can happen due to the bitflag ordering.
|
||||||
|
* Timeout first checks if STARTED is set, and if it is, assumes
|
||||||
|
* the request is active. But if we race with completion, then
|
||||||
|
* we both flags will get cleared. So check here again, and ignore
|
||||||
|
* a timeout event with a request that isn't active.
|
||||||
|
*/
|
||||||
|
if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
|
||||||
|
return BLK_EH_NOT_HANDLED;
|
||||||
|
|
||||||
|
if (!q->mq_ops->timeout)
|
||||||
|
return BLK_EH_RESET_TIMER;
|
||||||
|
|
||||||
|
return q->mq_ops->timeout(rq);
|
||||||
|
}
|
||||||
|
|
||||||
static void blk_mq_rq_timer(unsigned long data)
|
static void blk_mq_rq_timer(unsigned long data)
|
||||||
{
|
{
|
||||||
struct request_queue *q = (struct request_queue *) data;
|
struct request_queue *q = (struct request_queue *) data;
|
||||||
@ -538,11 +568,6 @@ static bool blk_mq_attempt_merge(struct request_queue *q,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void blk_mq_add_timer(struct request *rq)
|
|
||||||
{
|
|
||||||
__blk_add_timer(rq, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Run this hardware queue, pulling any software queues mapped to it in.
|
* Run this hardware queue, pulling any software queues mapped to it in.
|
||||||
* Note that this function currently has various problems around ordering
|
* Note that this function currently has various problems around ordering
|
||||||
@ -799,7 +824,7 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
|
|||||||
/*
|
/*
|
||||||
* We do this early, to ensure we are on the right CPU.
|
* We do this early, to ensure we are on the right CPU.
|
||||||
*/
|
*/
|
||||||
blk_mq_add_timer(rq);
|
blk_add_timer(rq);
|
||||||
}
|
}
|
||||||
|
|
||||||
void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
|
void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
|
||||||
@ -1400,7 +1425,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
|
|||||||
q->sg_reserved_size = INT_MAX;
|
q->sg_reserved_size = INT_MAX;
|
||||||
|
|
||||||
blk_queue_make_request(q, blk_mq_make_request);
|
blk_queue_make_request(q, blk_mq_make_request);
|
||||||
blk_queue_rq_timed_out(q, set->ops->timeout);
|
blk_queue_rq_timed_out(q, blk_mq_rq_timed_out);
|
||||||
if (set->timeout)
|
if (set->timeout)
|
||||||
blk_queue_rq_timeout(q, set->timeout);
|
blk_queue_rq_timeout(q, set->timeout);
|
||||||
|
|
||||||
|
@ -51,6 +51,4 @@ void blk_mq_disable_hotplug(void);
|
|||||||
extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set);
|
extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set);
|
||||||
extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues);
|
extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues);
|
||||||
|
|
||||||
void blk_mq_add_timer(struct request *rq);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -96,11 +96,7 @@ static void blk_rq_timed_out(struct request *req)
|
|||||||
__blk_complete_request(req);
|
__blk_complete_request(req);
|
||||||
break;
|
break;
|
||||||
case BLK_EH_RESET_TIMER:
|
case BLK_EH_RESET_TIMER:
|
||||||
if (q->mq_ops)
|
blk_add_timer(req);
|
||||||
blk_mq_add_timer(req);
|
|
||||||
else
|
|
||||||
blk_add_timer(req);
|
|
||||||
|
|
||||||
blk_clear_rq_complete(req);
|
blk_clear_rq_complete(req);
|
||||||
break;
|
break;
|
||||||
case BLK_EH_NOT_HANDLED:
|
case BLK_EH_NOT_HANDLED:
|
||||||
@ -170,7 +166,8 @@ void blk_abort_request(struct request *req)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blk_abort_request);
|
EXPORT_SYMBOL_GPL(blk_abort_request);
|
||||||
|
|
||||||
void __blk_add_timer(struct request *req, struct list_head *timeout_list)
|
static void __blk_add_timer(struct request *req,
|
||||||
|
struct list_head *timeout_list)
|
||||||
{
|
{
|
||||||
struct request_queue *q = req->q;
|
struct request_queue *q = req->q;
|
||||||
unsigned long expiry;
|
unsigned long expiry;
|
||||||
@ -225,6 +222,11 @@ void __blk_add_timer(struct request *req, struct list_head *timeout_list)
|
|||||||
*/
|
*/
|
||||||
void blk_add_timer(struct request *req)
|
void blk_add_timer(struct request *req)
|
||||||
{
|
{
|
||||||
__blk_add_timer(req, &req->q->timeout_list);
|
struct request_queue *q = req->q;
|
||||||
|
|
||||||
|
if (q->mq_ops)
|
||||||
|
__blk_add_timer(req, NULL);
|
||||||
|
else
|
||||||
|
__blk_add_timer(req, &req->q->timeout_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,9 +37,8 @@ bool __blk_end_bidi_request(struct request *rq, int error,
|
|||||||
void blk_rq_timed_out_timer(unsigned long data);
|
void blk_rq_timed_out_timer(unsigned long data);
|
||||||
void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout,
|
void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout,
|
||||||
unsigned int *next_set);
|
unsigned int *next_set);
|
||||||
void __blk_add_timer(struct request *req, struct list_head *timeout_list);
|
void blk_add_timer(struct request *req);
|
||||||
void blk_delete_timer(struct request *);
|
void blk_delete_timer(struct request *);
|
||||||
void blk_add_timer(struct request *);
|
|
||||||
|
|
||||||
|
|
||||||
bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
|
bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
|
||||||
|
Loading…
Reference in New Issue
Block a user