Skip to content

Commit

Permalink
block: force noio scope in blk_mq_freeze_queue
Browse files Browse the repository at this point in the history
When block drivers or the core block code perform allocations with a
frozen queue, this could try to recurse into the block device to
reclaim memory and deadlock.  Thus all allocations done by a process
that froze a queue need to be done without __GFP_IO and __GFP_FS.
Instead of tying to track all of them down, force a noio scope as
part of freezing the queue.

Note that nvme is a bit of a mess here due to the non-owner freezes,
and they will be addressed separately.

Signed-off-by: Christoph Hellwig <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
Christoph Hellwig authored and axboe committed Jan 31, 2025
1 parent 14ef496 commit 1e1a9ce
Show file tree
Hide file tree
Showing 26 changed files with 136 additions and 84 deletions.
10 changes: 6 additions & 4 deletions block/blk-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1545,6 +1545,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
struct request_queue *q = disk->queue;
struct blkg_policy_data *pd_prealloc = NULL;
struct blkcg_gq *blkg, *pinned_blkg = NULL;
unsigned int memflags;
int ret;

if (blkcg_policy_enabled(q, pol))
Expand All @@ -1559,7 +1560,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
return -EINVAL;

if (queue_is_mq(q))
blk_mq_freeze_queue(q);
memflags = blk_mq_freeze_queue(q);
retry:
spin_lock_irq(&q->queue_lock);

Expand Down Expand Up @@ -1623,7 +1624,7 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
spin_unlock_irq(&q->queue_lock);
out:
if (queue_is_mq(q))
blk_mq_unfreeze_queue(q);
blk_mq_unfreeze_queue(q, memflags);
if (pinned_blkg)
blkg_put(pinned_blkg);
if (pd_prealloc)
Expand Down Expand Up @@ -1667,12 +1668,13 @@ void blkcg_deactivate_policy(struct gendisk *disk,
{
struct request_queue *q = disk->queue;
struct blkcg_gq *blkg;
unsigned int memflags;

if (!blkcg_policy_enabled(q, pol))
return;

if (queue_is_mq(q))
blk_mq_freeze_queue(q);
memflags = blk_mq_freeze_queue(q);

mutex_lock(&q->blkcg_mutex);
spin_lock_irq(&q->queue_lock);
Expand All @@ -1696,7 +1698,7 @@ void blkcg_deactivate_policy(struct gendisk *disk,
mutex_unlock(&q->blkcg_mutex);

if (queue_is_mq(q))
blk_mq_unfreeze_queue(q);
blk_mq_unfreeze_queue(q, memflags);
}
EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);

Expand Down
14 changes: 8 additions & 6 deletions block/blk-iocost.c
Original file line number Diff line number Diff line change
Expand Up @@ -3224,6 +3224,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
u32 qos[NR_QOS_PARAMS];
bool enable, user;
char *body, *p;
unsigned int memflags;
int ret;

blkg_conf_init(&ctx, input);
Expand All @@ -3247,7 +3248,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
ioc = q_to_ioc(disk->queue);
}

blk_mq_freeze_queue(disk->queue);
memflags = blk_mq_freeze_queue(disk->queue);
blk_mq_quiesce_queue(disk->queue);

spin_lock_irq(&ioc->lock);
Expand Down Expand Up @@ -3347,15 +3348,15 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
wbt_enable_default(disk);

blk_mq_unquiesce_queue(disk->queue);
blk_mq_unfreeze_queue(disk->queue);
blk_mq_unfreeze_queue(disk->queue, memflags);

blkg_conf_exit(&ctx);
return nbytes;
einval:
spin_unlock_irq(&ioc->lock);

blk_mq_unquiesce_queue(disk->queue);
blk_mq_unfreeze_queue(disk->queue);
blk_mq_unfreeze_queue(disk->queue, memflags);

ret = -EINVAL;
err:
Expand Down Expand Up @@ -3414,6 +3415,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
{
struct blkg_conf_ctx ctx;
struct request_queue *q;
unsigned int memflags;
struct ioc *ioc;
u64 u[NR_I_LCOEFS];
bool user;
Expand Down Expand Up @@ -3441,7 +3443,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
ioc = q_to_ioc(q);
}

blk_mq_freeze_queue(q);
memflags = blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);

spin_lock_irq(&ioc->lock);
Expand Down Expand Up @@ -3493,7 +3495,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
spin_unlock_irq(&ioc->lock);

blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q);
blk_mq_unfreeze_queue(q, memflags);

blkg_conf_exit(&ctx);
return nbytes;
Expand All @@ -3502,7 +3504,7 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
spin_unlock_irq(&ioc->lock);

blk_mq_unquiesce_queue(q);
blk_mq_unfreeze_queue(q);
blk_mq_unfreeze_queue(q, memflags);

ret = -EINVAL;
err:
Expand Down
6 changes: 4 additions & 2 deletions block/blk-iolatency.c
Original file line number Diff line number Diff line change
Expand Up @@ -749,9 +749,11 @@ static void blkiolatency_enable_work_fn(struct work_struct *work)
*/
enabled = atomic_read(&blkiolat->enable_cnt);
if (enabled != blkiolat->enabled) {
blk_mq_freeze_queue(blkiolat->rqos.disk->queue);
unsigned int memflags;

memflags = blk_mq_freeze_queue(blkiolat->rqos.disk->queue);
blkiolat->enabled = enabled;
blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue);
blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue, memflags);
}
}

Expand Down
21 changes: 13 additions & 8 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,12 +210,12 @@ int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
}
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);

void blk_mq_freeze_queue(struct request_queue *q)
void blk_mq_freeze_queue_nomemsave(struct request_queue *q)
{
blk_freeze_queue_start(q);
blk_mq_freeze_queue_wait(q);
}
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_nomemsave);

bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
{
Expand All @@ -236,12 +236,12 @@ bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
return unfreeze;
}

void blk_mq_unfreeze_queue(struct request_queue *q)
void blk_mq_unfreeze_queue_nomemrestore(struct request_queue *q)
{
if (__blk_mq_unfreeze_queue(q, false))
blk_unfreeze_release_lock(q);
}
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue_nomemrestore);

/*
* non_owner variant of blk_freeze_queue_start
Expand Down Expand Up @@ -4223,13 +4223,14 @@ static void blk_mq_update_tag_set_shared(struct blk_mq_tag_set *set,
bool shared)
{
struct request_queue *q;
unsigned int memflags;

lockdep_assert_held(&set->tag_list_lock);

list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_freeze_queue(q);
memflags = blk_mq_freeze_queue(q);
queue_set_hctx_shared(q, shared);
blk_mq_unfreeze_queue(q);
blk_mq_unfreeze_queue(q, memflags);
}
}

Expand Down Expand Up @@ -4992,6 +4993,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
struct request_queue *q;
LIST_HEAD(head);
int prev_nr_hw_queues = set->nr_hw_queues;
unsigned int memflags;
int i;

lockdep_assert_held(&set->tag_list_lock);
Expand All @@ -5003,8 +5005,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
if (set->nr_maps == 1 && nr_hw_queues == set->nr_hw_queues)
return;

memflags = memalloc_noio_save();
list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_freeze_queue(q);
blk_mq_freeze_queue_nomemsave(q);

/*
* Switch IO scheduler to 'none', cleaning up the data associated
* with the previous scheduler. We will switch back once we are done
Expand Down Expand Up @@ -5052,7 +5056,8 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
blk_mq_elv_switch_back(&head, q);

list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_unfreeze_queue(q);
blk_mq_unfreeze_queue_nomemrestore(q);
memalloc_noio_restore(memflags);

/* Free the excess tags when nr_hw_queues shrink. */
for (i = set->nr_hw_queues; i < prev_nr_hw_queues; i++)
Expand Down
2 changes: 1 addition & 1 deletion block/blk-pm.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ int blk_pre_runtime_suspend(struct request_queue *q)
if (percpu_ref_is_zero(&q->q_usage_counter))
ret = 0;
/* Switch q_usage_counter back to per-cpu mode. */
blk_mq_unfreeze_queue(q);
blk_mq_unfreeze_queue_nomemrestore(q);

if (ret < 0) {
spin_lock_irq(&q->queue_lock);
Expand Down
12 changes: 7 additions & 5 deletions block/blk-rq-qos.c
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
const struct rq_qos_ops *ops)
{
struct request_queue *q = disk->queue;
unsigned int memflags;

lockdep_assert_held(&q->rq_qos_mutex);

Expand All @@ -310,14 +311,14 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
* No IO can be in-flight when adding rqos, so freeze queue, which
* is fine since we only support rq_qos for blk-mq queue.
*/
blk_mq_freeze_queue(q);
memflags = blk_mq_freeze_queue(q);

if (rq_qos_id(q, rqos->id))
goto ebusy;
rqos->next = q->rq_qos;
q->rq_qos = rqos;

blk_mq_unfreeze_queue(q);
blk_mq_unfreeze_queue(q, memflags);

if (rqos->ops->debugfs_attrs) {
mutex_lock(&q->debugfs_mutex);
Expand All @@ -327,25 +328,26 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,

return 0;
ebusy:
blk_mq_unfreeze_queue(q);
blk_mq_unfreeze_queue(q, memflags);
return -EBUSY;
}

void rq_qos_del(struct rq_qos *rqos)
{
struct request_queue *q = rqos->disk->queue;
struct rq_qos **cur;
unsigned int memflags;

lockdep_assert_held(&q->rq_qos_mutex);

blk_mq_freeze_queue(q);
memflags = blk_mq_freeze_queue(q);
for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
if (*cur == rqos) {
*cur = rqos->next;
break;
}
}
blk_mq_unfreeze_queue(q);
blk_mq_unfreeze_queue(q, memflags);

mutex_lock(&q->debugfs_mutex);
blk_mq_debugfs_unregister_rqos(rqos);
Expand Down
5 changes: 3 additions & 2 deletions block/blk-settings.c
Original file line number Diff line number Diff line change
Expand Up @@ -461,11 +461,12 @@ EXPORT_SYMBOL_GPL(queue_limits_commit_update);
int queue_limits_commit_update_frozen(struct request_queue *q,
struct queue_limits *lim)
{
unsigned int memflags;
int ret;

blk_mq_freeze_queue(q);
memflags = blk_mq_freeze_queue(q);
ret = queue_limits_commit_update(q, lim);
blk_mq_unfreeze_queue(q);
blk_mq_unfreeze_queue(q, memflags);

return ret;
}
Expand Down
8 changes: 3 additions & 5 deletions block/blk-sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
struct queue_sysfs_entry *entry = to_queue(attr);
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
struct request_queue *q = disk->queue;
unsigned int noio_flag;
unsigned int memflags;
ssize_t res;

if (!entry->store_limit && !entry->store)
Expand Down Expand Up @@ -711,11 +711,9 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
}

mutex_lock(&q->sysfs_lock);
blk_mq_freeze_queue(q);
noio_flag = memalloc_noio_save();
memflags = blk_mq_freeze_queue(q);
res = entry->store(disk, page, length);
memalloc_noio_restore(noio_flag);
blk_mq_unfreeze_queue(q);
blk_mq_unfreeze_queue(q, memflags);
mutex_unlock(&q->sysfs_lock);
return res;
}
Expand Down
5 changes: 3 additions & 2 deletions block/blk-throttle.c
Original file line number Diff line number Diff line change
Expand Up @@ -1202,6 +1202,7 @@ static int blk_throtl_init(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct throtl_data *td;
unsigned int memflags;
int ret;

td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
Expand All @@ -1215,7 +1216,7 @@ static int blk_throtl_init(struct gendisk *disk)
* Freeze queue before activating policy, to synchronize with IO path,
* which is protected by 'q_usage_counter'.
*/
blk_mq_freeze_queue(disk->queue);
memflags = blk_mq_freeze_queue(disk->queue);
blk_mq_quiesce_queue(disk->queue);

q->td = td;
Expand All @@ -1239,7 +1240,7 @@ static int blk_throtl_init(struct gendisk *disk)

out:
blk_mq_unquiesce_queue(disk->queue);
blk_mq_unfreeze_queue(disk->queue);
blk_mq_unfreeze_queue(disk->queue, memflags);

return ret;
}
Expand Down
5 changes: 3 additions & 2 deletions block/blk-zoned.c
Original file line number Diff line number Diff line change
Expand Up @@ -1717,9 +1717,10 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
else
pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
if (ret) {
blk_mq_freeze_queue(q);
unsigned int memflags = blk_mq_freeze_queue(q);

disk_free_zone_resources(disk);
blk_mq_unfreeze_queue(q);
blk_mq_unfreeze_queue(q, memflags);
}

return ret;
Expand Down
Loading

0 comments on commit 1e1a9ce

Please sign in to comment.