From a909c179535383dc72a8f8e155ed3d35f298af86 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 Nov 2024 04:58:16 +0000 Subject: [PATCH 01/32] f2fs: Use a folio in f2fs_all_cluster_page_ready() Remove references to page->index and use folio_test_uptodate() instead of PageUptodate(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 7f26440e8595a..9f8d0f4c8564c 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -846,7 +846,7 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index) bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, int index, int nr_pages, bool uptodate) { - unsigned long pgidx = pages[index]->index; + unsigned long pgidx = page_folio(pages[index])->index; int i = uptodate ? 0 : 1; /* @@ -860,9 +860,11 @@ bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, return false; for (; i < cc->cluster_size; i++) { - if (pages[index + i]->index != pgidx + i) + struct folio *folio = page_folio(pages[index + i]); + + if (folio->index != pgidx + i) return false; - if (uptodate && !PageUptodate(pages[index + i])) + if (uptodate && !folio_test_uptodate(folio)) return false; } From ff6c82a934f7b5df8702579d921209c5ca336102 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 Nov 2024 04:58:17 +0000 Subject: [PATCH 02/32] f2fs: Use a folio in f2fs_compress_write_end() This removes an access of page->index. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 9f8d0f4c8564c..c91a2e4fe60c3 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1197,7 +1197,8 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata, .cluster_size = F2FS_I(inode)->i_cluster_size, .rpages = fsdata, }; - bool first_index = (index == cc.rpages[0]->index); + struct folio *folio = page_folio(cc.rpages[0]); + bool first_index = (index == folio->index); if (copied) set_cluster_dirty(&cc); From 1cda5bc0b2fe93cdcb5f05a02f814a282d32742c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 Nov 2024 04:58:18 +0000 Subject: [PATCH 03/32] f2fs: Use a folio in f2fs_truncate_partial_cluster() Convert the incoming page to a folio and use it throughout. Removes an access to page->index. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index c91a2e4fe60c3..494baa1e8bd3d 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1242,13 +1242,14 @@ int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock) int i; for (i = cluster_size - 1; i >= 0; i--) { - loff_t start = rpages[i]->index << PAGE_SHIFT; + struct folio *folio = page_folio(rpages[i]); + loff_t start = folio->index << PAGE_SHIFT; if (from <= start) { - zero_user_segment(rpages[i], 0, PAGE_SIZE); + folio_zero_segment(folio, 0, folio_size(folio)); } else { - zero_user_segment(rpages[i], from - start, - PAGE_SIZE); + folio_zero_segment(folio, from - start, + folio_size(folio)); break; } } From ac866908d7a92b6b2be1127a2d5e85e23da86fa3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 Nov 2024 04:58:19 +0000 Subject: [PATCH 04/32] f2fs: Use a folio in f2fs_write_compressed_pages() Remove accesses to page->index and an unnecessary reference to page->mapping. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 494baa1e8bd3d..0b55b2695c9ba 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1282,6 +1282,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode) ? 1 : 0, }; + struct folio *folio; struct dnode_of_data dn; struct node_info ni; struct compress_io_ctx *cic; @@ -1293,7 +1294,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, /* we should bypass data pages to proceed the kworker jobs */ if (unlikely(f2fs_cp_error(sbi))) { - mapping_set_error(cc->rpages[0]->mapping, -EIO); + mapping_set_error(inode->i_mapping, -EIO); goto out_free; } @@ -1320,7 +1321,8 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, goto out_put_dnode; } - psize = (loff_t)(cc->rpages[last_index]->index + 1) << PAGE_SHIFT; + folio = page_folio(cc->rpages[last_index]); + psize = folio_pos(folio) + folio_size(folio); err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false); if (err) @@ -1343,7 +1345,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, for (i = 0; i < cc->valid_nr_cpages; i++) { f2fs_set_compressed_page(cc->cpages[i], inode, - cc->rpages[i + 1]->index, cic); + page_folio(cc->rpages[i + 1])->index, cic); fio.compressed_page = cc->cpages[i]; fio.old_blkaddr = data_blkaddr(dn.inode, dn.node_page, From 87e2a15bc00840762b082399493597e8d3c1e42c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 Nov 2024 04:58:20 +0000 Subject: [PATCH 05/32] f2fs: Convert submit tracepoints to take a folio Remove accesses to page->index and page->mapping as well as unnecessary calls to page_file_mapping(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +++--- include/trace/events/f2fs.h | 30 +++++++++++++++--------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a2478c2afb3a0..7cb2272c723e0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -697,7 +697,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) META_GENERIC : DATA_GENERIC_ENHANCE))) return -EFSCORRUPTED; - trace_f2fs_submit_page_bio(page, fio); + trace_f2fs_submit_folio_bio(page_folio(page), fio); /* Allocate a new bio */ bio = __bio_alloc(fio, 1); @@ -894,7 +894,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) return -EFSCORRUPTED; - trace_f2fs_submit_page_bio(page, fio); + trace_f2fs_submit_folio_bio(page_folio(page), fio); if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, fio->new_blkaddr)) @@ -1018,7 +1018,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) io->last_block_in_bio = fio->new_blkaddr; - trace_f2fs_submit_page_write(fio->page, fio); + trace_f2fs_submit_folio_write(page_folio(fio->page), fio); #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && btype < META && is_end_zone_blkaddr(sbi, fio->new_blkaddr)) { diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 2851c823095bc..3253f45508e8e 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1119,11 +1119,11 @@ TRACE_EVENT(f2fs_reserve_new_blocks, (unsigned long long)__entry->count) ); -DECLARE_EVENT_CLASS(f2fs__submit_page_bio, +DECLARE_EVENT_CLASS(f2fs__submit_folio_bio, - TP_PROTO(struct page *page, struct f2fs_io_info *fio), + TP_PROTO(struct folio *folio, struct f2fs_io_info *fio), - TP_ARGS(page, fio), + TP_ARGS(folio, fio), TP_STRUCT__entry( __field(dev_t, dev) @@ -1138,9 +1138,9 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, ), TP_fast_assign( - __entry->dev = page_file_mapping(page)->host->i_sb->s_dev; - __entry->ino = page_file_mapping(page)->host->i_ino; - __entry->index = page->index; + __entry->dev = folio->mapping->host->i_sb->s_dev; + __entry->ino = folio->mapping->host->i_ino; + __entry->index = folio->index; __entry->old_blkaddr = fio->old_blkaddr; __entry->new_blkaddr = fio->new_blkaddr; __entry->op = fio->op; @@ -1149,7 +1149,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, __entry->type = fio->type; ), - TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " + TP_printk("dev = (%d,%d), ino = %lu, folio_index = 0x%lx, " "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s_%s", show_dev_ino(__entry), (unsigned long)__entry->index, @@ -1160,22 +1160,22 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, show_block_type(__entry->type)) ); -DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio, +DEFINE_EVENT_CONDITION(f2fs__submit_folio_bio, f2fs_submit_folio_bio, - TP_PROTO(struct page *page, struct f2fs_io_info *fio), + TP_PROTO(struct folio *folio, struct f2fs_io_info *fio), - TP_ARGS(page, fio), + TP_ARGS(folio, fio), - TP_CONDITION(page->mapping) + TP_CONDITION(folio->mapping) ); -DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_write, +DEFINE_EVENT_CONDITION(f2fs__submit_folio_bio, f2fs_submit_folio_write, - TP_PROTO(struct page *page, struct f2fs_io_info *fio), + TP_PROTO(struct folio *folio, struct f2fs_io_info *fio), - TP_ARGS(page, fio), + TP_ARGS(folio, fio), - TP_CONDITION(page->mapping) + TP_CONDITION(folio->mapping) ); DECLARE_EVENT_CLASS(f2fs__bio, From 1cf746007005593aa51395302ca0d31814f4ce42 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 Nov 2024 04:58:21 +0000 Subject: [PATCH 06/32] f2fs: Add F2FS_F_SB() This is the folio equivalent of F2FS_P_SB(). Removes a call to page_file_mapping() as we know folios seen by f2fs are never part of the swap cache. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6f2cbf4c57402..f523dd302bf6d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2003,9 +2003,14 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) return F2FS_I_SB(mapping->host); } +static inline struct f2fs_sb_info *F2FS_F_SB(struct folio *folio) +{ + return F2FS_M_SB(folio->mapping); +} + static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) { - return F2FS_M_SB(page_file_mapping(page)); + return F2FS_F_SB(page_folio(page)); } static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) From e0821645dd2d79180418dd9389e3e9e7e10e7281 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 Nov 2024 04:58:22 +0000 Subject: [PATCH 07/32] f2fs: Convert f2fs_finish_read_bio() to use folios Use bio_for_each_folio_all() to iterate over each folio in the bio. This lets us use folio_end_read() which saves an atomic operation and memory barrier compared to marking the folio uptodate and unlocking it as two separate operations. This also removes a few hidden calls to compound_head(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7cb2272c723e0..aa08ab387e58e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -136,27 +136,22 @@ struct bio_post_read_ctx { */ static void f2fs_finish_read_bio(struct bio *bio, bool in_task) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + struct folio_iter fi; struct bio_post_read_ctx *ctx = bio->bi_private; - bio_for_each_segment_all(bv, bio, iter_all) { - struct page *page = bv->bv_page; + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; - if (f2fs_is_compressed_page(page)) { + if (f2fs_is_compressed_page(&folio->page)) { if (ctx && !ctx->decompression_attempted) - f2fs_end_read_compressed_page(page, true, 0, + f2fs_end_read_compressed_page(&folio->page, true, 0, in_task); - f2fs_put_page_dic(page, in_task); + f2fs_put_page_dic(&folio->page, in_task); continue; } - if (bio->bi_status) - ClearPageUptodate(page); - else - SetPageUptodate(page); - dec_page_count(F2FS_P_SB(page), __read_io_type(page)); - unlock_page(page); + dec_page_count(F2FS_F_SB(folio), __read_io_type(&folio->page)); + folio_end_read(folio, bio->bi_status == 0); } if (ctx) From 0765b3f989a7eb757252951b21a244bfa3224561 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 Nov 2024 04:58:23 +0000 Subject: [PATCH 08/32] f2fs: Use a folio more in f2fs_submit_page_bio() Cache the result of page_folio(fio->page) in a local variable so we don't have to keep calling it. Saves a couple of calls to compound_head() and removes an access to page->mapping. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index aa08ab387e58e..cae5fa895b971 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -684,6 +684,7 @@ void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) int f2fs_submit_page_bio(struct f2fs_io_info *fio) { struct bio *bio; + struct folio *fio_folio = page_folio(fio->page); struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; @@ -697,8 +698,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) /* Allocate a new bio */ bio = __bio_alloc(fio, 1); - f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, - page_folio(fio->page)->index, fio, GFP_NOIO); + f2fs_set_bio_crypt_ctx(bio, fio_folio->mapping->host, + fio_folio->index, fio, GFP_NOIO); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); @@ -706,8 +707,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) } if (fio->io_wbc && !is_read_io(fio->op)) - wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page), - PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, fio_folio, PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? __read_io_type(page) : WB_DATA_TYPE(fio->page, false)); From f58d8645824b4885caa9e24989f8e601b5e7ed50 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 Nov 2024 04:58:24 +0000 Subject: [PATCH 09/32] f2fs: Use a data folio in f2fs_submit_page_bio() Remove a call to compound_head(). We can call bio_add_folio_nofail() here because we just allocated the bio, so we know it can't fail and thus the error path can never be taken. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cae5fa895b971..30d727808c928 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -685,32 +685,28 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) { struct bio *bio; struct folio *fio_folio = page_folio(fio->page); - struct page *page = fio->encrypted_page ? - fio->encrypted_page : fio->page; + struct folio *data_folio = fio->encrypted_page ? + page_folio(fio->encrypted_page) : fio_folio; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, fio->is_por ? META_POR : (__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC_ENHANCE))) return -EFSCORRUPTED; - trace_f2fs_submit_folio_bio(page_folio(page), fio); + trace_f2fs_submit_folio_bio(data_folio, fio); /* Allocate a new bio */ bio = __bio_alloc(fio, 1); f2fs_set_bio_crypt_ctx(bio, fio_folio->mapping->host, fio_folio->index, fio, GFP_NOIO); - - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - bio_put(bio); - return -EFAULT; - } + bio_add_folio_nofail(bio, data_folio, folio_size(data_folio), 0); if (fio->io_wbc && !is_read_io(fio->op)) wbc_account_cgroup_owner(fio->io_wbc, fio_folio, PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? - __read_io_type(page) : WB_DATA_TYPE(fio->page, false)); + __read_io_type(&data_folio->page) : WB_DATA_TYPE(fio->page, false)); if (is_read_io(bio_op(bio))) f2fs_submit_read_bio(fio->sbi, bio, fio->type); From 19bbd306ddfd50a2f6cf0c3ccaaa079f22ddf4c5 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 Nov 2024 04:58:25 +0000 Subject: [PATCH 10/32] f2fs: Convert __read_io_type() to take a folio Remove the last call to page_file_mapping() as both callers can now pass in a folio. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 30d727808c928..e68a4dd9bc177 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -70,9 +70,9 @@ bool f2fs_is_cp_guaranteed(struct page *page) return false; } -static enum count_type __read_io_type(struct page *page) +static enum count_type __read_io_type(struct folio *folio) { - struct address_space *mapping = page_file_mapping(page); + struct address_space *mapping = folio->mapping; if (mapping) { struct inode *inode = mapping->host; @@ -150,7 +150,7 @@ static void f2fs_finish_read_bio(struct bio *bio, bool in_task) continue; } - dec_page_count(F2FS_F_SB(folio), __read_io_type(&folio->page)); + dec_page_count(F2FS_F_SB(folio), __read_io_type(folio)); folio_end_read(folio, bio->bi_status == 0); } @@ -706,7 +706,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) wbc_account_cgroup_owner(fio->io_wbc, fio_folio, PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? - __read_io_type(&data_folio->page) : WB_DATA_TYPE(fio->page, false)); + __read_io_type(data_folio) : WB_DATA_TYPE(fio->page, false)); if (is_read_io(bio_op(bio))) f2fs_submit_read_bio(fio->sbi, bio, fio->type); From c910a64bc4e21782959221b6ea2d6c4cce0506c7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 Nov 2024 04:58:26 +0000 Subject: [PATCH 11/32] f2fs: Remove calls to folio_file_mapping() All folios that f2fs sees belong to f2fs and not to the swapcache so it can dereference folio->mapping directly like all other filesystems do. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/inline.c | 2 +- include/trace/events/f2fs.h | 9 ++++----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e68a4dd9bc177..0657f731d4b7b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2455,7 +2455,7 @@ static int f2fs_mpage_readpages(struct inode *inode, static int f2fs_read_data_folio(struct file *file, struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; int ret = -EAGAIN; trace_f2fs_readpage(folio, DATA); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 005babf1bed1e..cbd2a0d348045 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -81,7 +81,7 @@ bool f2fs_may_inline_dentry(struct inode *inode) void f2fs_do_read_inline_data(struct folio *folio, struct page *ipage) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; if (folio_test_uptodate(folio)) return; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 3253f45508e8e..eb3b2f1326b19 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1322,12 +1322,11 @@ DECLARE_EVENT_CLASS(f2fs__folio, ), TP_fast_assign( - __entry->dev = folio_file_mapping(folio)->host->i_sb->s_dev; - __entry->ino = folio_file_mapping(folio)->host->i_ino; + __entry->dev = folio->mapping->host->i_sb->s_dev; + __entry->ino = folio->mapping->host->i_ino; __entry->type = type; - __entry->dir = - S_ISDIR(folio_file_mapping(folio)->host->i_mode); - __entry->index = folio_index(folio); + __entry->dir = S_ISDIR(folio->mapping->host->i_mode); + __entry->index = folio->index; __entry->dirty = folio_test_dirty(folio); __entry->uptodate = folio_test_uptodate(folio); ), From 5f6594542779e69c6c6e2b57341c352174135eed Mon Sep 17 00:00:00 2001 From: zangyangyang1 Date: Fri, 22 Nov 2024 18:58:22 +0800 Subject: [PATCH 12/32] f2fs: cache more dentry pages While traversing dir entries in dentry page, it's better to refresh current accessed page in lru list by using FGP_ACCESSED flag, otherwise, such page may has less chance to survive during memory reclaim, result in causing additional IO when revisiting dentry page. Signed-off-by: zangyangyang1 Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0657f731d4b7b..2ec0cfb41260d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1280,7 +1280,7 @@ struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct page *page; - page = find_get_page(mapping, index); + page = find_get_page_flags(mapping, index, FGP_ACCESSED); if (page && PageUptodate(page)) return page; f2fs_put_page(page, 0); From e9a844f6e487ee0f64d995d1d8ffde2e270e2479 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Wed, 4 Dec 2024 11:31:13 +0800 Subject: [PATCH 13/32] f2fs: The GC triggered by ioctl also needs to mark the segno as victim In SSR mode, the segment selected for allocation might be the same as the target segment of the GC triggered by ioctl, resulting in the GC moving the CURSEG_I(sbi, type)->segno. Thread A Thread B or Thread A - f2fs_ioc_gc_range - __f2fs_ioc_gc_range(.victim_segno=segno#N) - f2fs_gc - __get_victim - f2fs_get_victim : segno#N is valid, return segno#N as source segment of GC - f2fs_allocate_data_block - need_new_seg - get_ssr_segment - f2fs_get_victim : get segno #N as destination segment - change_curseg Fixes: e066b83c9b40 ("f2fs: add ioctl to flush data from faster device to cold area") Signed-off-by: Yongpeng Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 3e1b6d2ff3a73..8029369bb71df 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -806,11 +806,14 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, goto out; } - if (sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) + if (sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) { ret = -EBUSY; - else - p.min_segno = *result; - goto out; + goto out; + } + if (gc_type == FG_GC) + clear_bit(GET_SEC_FROM_SEG(sbi, *result), dirty_i->victim_secmap); + p.min_segno = *result; + goto got_result; } ret = -ENODATA; From 76f01376df398304972bf337ba430a62062add31 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Thu, 12 Dec 2024 20:57:48 +0300 Subject: [PATCH 14/32] f2fs: ensure that node info flags are always initialized Syzbot has reported the following KMSAN splat: BUG: KMSAN: uninit-value in f2fs_new_node_page+0x1494/0x1630 f2fs_new_node_page+0x1494/0x1630 f2fs_new_inode_page+0xb9/0x100 f2fs_init_inode_metadata+0x176/0x1e90 f2fs_add_inline_entry+0x723/0xc90 f2fs_do_add_link+0x48f/0xa70 f2fs_symlink+0x6af/0xfc0 vfs_symlink+0x1f1/0x470 do_symlinkat+0x471/0xbc0 __x64_sys_symlink+0xcf/0x140 x64_sys_call+0x2fcc/0x3d90 do_syscall_64+0xd9/0x1b0 entry_SYSCALL_64_after_hwframe+0x77/0x7f Local variable new_ni created at: f2fs_new_node_page+0x9d/0x1630 f2fs_new_inode_page+0xb9/0x100 So adjust 'f2fs_get_node_info()' to ensure that 'flag' field of 'struct node_info' is always initialized. Reported-by: syzbot+5141f6db57a2f7614352@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=5141f6db57a2f7614352 Fixes: e05df3b115e7 ("f2fs: add node operations") Suggested-by: Chao Yu Signed-off-by: Dmitry Antipov Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0b900a7a48e59..c04ee1a7ce57e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -558,6 +558,7 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, block_t blkaddr; int i; + ni->flag = 0; ni->nid = nid; retry: /* Check nat cache */ From 3d56fbb1f03f2abf8c806aee14df2f462350dfba Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Mon, 4 Nov 2024 11:45:41 +0800 Subject: [PATCH 15/32] f2fs: expand f2fs_invalidate_compress_page() to f2fs_invalidate_compress_pages_range() New function f2fs_invalidate_compress_pages_range() adds the @len parameter. So it can process some consecutive blocks at a time. Signed-off-by: Yi Sun Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 5 +++-- fs/f2fs/f2fs.h | 9 +++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 0b55b2695c9ba..be3e6f4d33a2d 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1909,11 +1909,12 @@ struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi) return sbi->compress_inode->i_mapping; } -void f2fs_invalidate_compress_page(struct f2fs_sb_info *sbi, block_t blkaddr) +void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len) { if (!sbi->compress_inode) return; - invalidate_mapping_pages(COMPRESS_MAPPING(sbi), blkaddr, blkaddr); + invalidate_mapping_pages(COMPRESS_MAPPING(sbi), blkaddr, blkaddr + len - 1); } void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f523dd302bf6d..b88e3d1fea998 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4391,7 +4391,8 @@ void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi); int __init f2fs_init_compress_cache(void); void f2fs_destroy_compress_cache(void); struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi); -void f2fs_invalidate_compress_page(struct f2fs_sb_info *sbi, block_t blkaddr); +void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len); void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, nid_t ino, block_t blkaddr); bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, struct page *page, @@ -4446,8 +4447,8 @@ static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { } static inline int __init f2fs_init_compress_cache(void) { return 0; } static inline void f2fs_destroy_compress_cache(void) { } -static inline void f2fs_invalidate_compress_page(struct f2fs_sb_info *sbi, - block_t blkaddr) { } +static inline void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len) { } static inline void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, nid_t ino, block_t blkaddr) { } static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, @@ -4766,7 +4767,7 @@ static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi, block_t blkaddr) { f2fs_truncate_meta_inode_pages(sbi, blkaddr, 1); - f2fs_invalidate_compress_page(sbi, blkaddr); + f2fs_invalidate_compress_pages_range(sbi, blkaddr, 1); } #define EFSBADCRC EBADMSG /* Bad CRC detected */ From d217b5cea488c0f644c189f91b636aeefa12deb9 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Mon, 4 Nov 2024 11:45:42 +0800 Subject: [PATCH 16/32] f2fs: add parameter @len to f2fs_invalidate_internal_cache() New function can process some consecutive blocks at a time. Signed-off-by: Yi Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 6 +++--- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2ec0cfb41260d..35b9455fb8999 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1414,7 +1414,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) return err; if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) - f2fs_invalidate_internal_cache(sbi, old_blkaddr); + f2fs_invalidate_internal_cache(sbi, old_blkaddr, 1); f2fs_update_data_blkaddr(dn, dn->data_blkaddr); return 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b88e3d1fea998..c537f6b5a4130 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4764,10 +4764,10 @@ static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi, } static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi, - block_t blkaddr) + block_t blkaddr, unsigned int len) { - f2fs_truncate_meta_inode_pages(sbi, blkaddr, 1); - f2fs_invalidate_compress_pages_range(sbi, blkaddr, 1); + f2fs_truncate_meta_inode_pages(sbi, blkaddr, len); + f2fs_invalidate_compress_pages_range(sbi, blkaddr, len); } #define EFSBADCRC EBADMSG /* Bad CRC detected */ diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 8029369bb71df..faf9fa1c804d7 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1415,7 +1415,7 @@ static int move_data_block(struct inode *inode, block_t bidx, page_address(mpage), PAGE_SIZE); f2fs_put_page(mpage, 1); - f2fs_invalidate_internal_cache(fio.sbi, fio.old_blkaddr); + f2fs_invalidate_internal_cache(fio.sbi, fio.old_blkaddr, 1); set_page_dirty(fio.encrypted_page); if (clear_page_dirty_for_io(fio.encrypted_page)) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index eade36c5ef138..86e547f008f90 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2537,7 +2537,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) if (addr == NEW_ADDR || addr == COMPRESS_ADDR) return; - f2fs_invalidate_internal_cache(sbi, addr); + f2fs_invalidate_internal_cache(sbi, addr, 1); /* add it into sit main buffer */ down_write(&sit_i->sentry_lock); @@ -3857,7 +3857,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) goto out; } if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) - f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr); + f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr, 1); /* writeout dirty page into bdev */ f2fs_submit_page_write(fio); @@ -4049,7 +4049,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, update_sit_entry(sbi, new_blkaddr, 1); } if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { - f2fs_invalidate_internal_cache(sbi, old_blkaddr); + f2fs_invalidate_internal_cache(sbi, old_blkaddr, 1); if (!from_gc) update_segment_mtime(sbi, old_blkaddr, 0); update_sit_entry(sbi, old_blkaddr, -1); From 91b587ba79e1b68bb718d12b0758dbcdab4e9cb7 Mon Sep 17 00:00:00 2001 From: Daniel Lee Date: Fri, 20 Dec 2024 15:41:31 -0800 Subject: [PATCH 17/32] f2fs: Introduce linear search for dentries This patch addresses an issue where some files in case-insensitive directories become inaccessible due to changes in how the kernel function, utf8_casefold(), generates case-folded strings from the commit 5c26d2f1d3f5 ("unicode: Don't special case ignorable code points"). F2FS uses these case-folded names to calculate hash values for locating dentries and stores them on disk. Since utf8_casefold() can produce different output across kernel versions, stored hash values and newly calculated hash values may differ. This results in affected files no longer being found via the hash-based lookup. To resolve this, the patch introduces a linear search fallback. If the initial hash-based search fails, F2FS will sequentially scan the directory entries. Fixes: 5c26d2f1d3f5 ("unicode: Don't special case ignorable code points") Link: https://bugzilla.kernel.org/show_bug.cgi?id=219586 Signed-off-by: Daniel Lee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 53 ++++++++++++++++++++++++++++++++++-------------- fs/f2fs/f2fs.h | 6 ++++-- fs/f2fs/inline.c | 5 +++-- 3 files changed, 45 insertions(+), 19 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 47a5c806cf162..54dd52de7269d 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -175,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level, static struct f2fs_dir_entry *find_in_block(struct inode *dir, struct page *dentry_page, const struct f2fs_filename *fname, - int *max_slots) + int *max_slots, + bool use_hash) { struct f2fs_dentry_block *dentry_blk; struct f2fs_dentry_ptr d; @@ -183,7 +184,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); make_dentry_ptr_block(dir, &d, dentry_blk); - return f2fs_find_target_dentry(&d, fname, max_slots); + return f2fs_find_target_dentry(&d, fname, max_slots, use_hash); } static inline int f2fs_match_name(const struct inode *dir, @@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir, } struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, - const struct f2fs_filename *fname, int *max_slots) + const struct f2fs_filename *fname, int *max_slots, + bool use_hash) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; @@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, continue; } - if (de->hash_code == fname->hash) { + if (!use_hash || de->hash_code == fname->hash) { res = f2fs_match_name(d->inode, fname, d->filename[bit_pos], le16_to_cpu(de->name_len)); @@ -258,11 +260,12 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, static struct f2fs_dir_entry *find_in_level(struct inode *dir, unsigned int level, const struct f2fs_filename *fname, - struct page **res_page) + struct page **res_page, + bool use_hash) { int s = GET_DENTRY_SLOTS(fname->disk_name.len); unsigned int nbucket, nblock; - unsigned int bidx, end_block; + unsigned int bidx, end_block, bucket_no; struct page *dentry_page; struct f2fs_dir_entry *de = NULL; pgoff_t next_pgofs; @@ -272,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); + bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0; + +start_find_bucket: bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, - le32_to_cpu(fname->hash) % nbucket); + bucket_no); end_block = bidx + nblock; while (bidx < end_block) { @@ -290,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, } } - de = find_in_block(dir, dentry_page, fname, &max_slots); + de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash); if (IS_ERR(de)) { *res_page = ERR_CAST(de); de = NULL; @@ -307,12 +313,18 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, bidx++; } - if (!de && room && F2FS_I(dir)->chash != fname->hash) { - F2FS_I(dir)->chash = fname->hash; - F2FS_I(dir)->clevel = level; - } + if (de) + return de; - return de; + if (likely(use_hash)) { + if (room && F2FS_I(dir)->chash != fname->hash) { + F2FS_I(dir)->chash = fname->hash; + F2FS_I(dir)->clevel = level; + } + } else if (++bucket_no < nbucket) { + goto start_find_bucket; + } + return NULL; } struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, @@ -323,11 +335,15 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, struct f2fs_dir_entry *de = NULL; unsigned int max_depth; unsigned int level; + bool use_hash = true; *res_page = NULL; +#if IS_ENABLED(CONFIG_UNICODE) +start_find_entry: +#endif if (f2fs_has_inline_dentry(dir)) { - de = f2fs_find_in_inline_dir(dir, fname, res_page); + de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash); goto out; } @@ -343,11 +359,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, } for (level = 0; level < max_depth; level++) { - de = find_in_level(dir, level, fname, res_page); + de = find_in_level(dir, level, fname, res_page, use_hash); if (de || IS_ERR(*res_page)) break; } + out: +#if IS_ENABLED(CONFIG_UNICODE) + if (IS_CASEFOLDED(dir) && !de && use_hash) { + use_hash = false; + goto start_find_entry; + } +#endif /* This is to increase the speed of f2fs_create */ if (!de) F2FS_I(dir)->task = current; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c537f6b5a4130..f265c26d00382 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3588,7 +3588,8 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, struct f2fs_filename *fname); void f2fs_free_filename(struct f2fs_filename *fname); struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, - const struct f2fs_filename *fname, int *max_slots); + const struct f2fs_filename *fname, int *max_slots, + bool use_hash); int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, unsigned int start_pos, struct fscrypt_str *fstr); void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, @@ -4224,7 +4225,8 @@ int f2fs_write_inline_data(struct inode *inode, struct folio *folio); int f2fs_recover_inline_data(struct inode *inode, struct page *npage); struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, const struct f2fs_filename *fname, - struct page **res_page); + struct page **res_page, + bool use_hash); int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage); int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index cbd2a0d348045..3e3c35d4c98bc 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -352,7 +352,8 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage) struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, const struct f2fs_filename *fname, - struct page **res_page) + struct page **res_page, + bool use_hash) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct f2fs_dir_entry *de; @@ -369,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, inline_dentry = inline_data_addr(dir, ipage); make_dentry_ptr_inline(dir, &d, inline_dentry); - de = f2fs_find_target_dentry(&d, fname, NULL); + de = f2fs_find_target_dentry(&d, fname, NULL, use_hash); unlock_page(ipage); if (IS_ERR(de)) { *res_page = ERR_CAST(de); From cf5817ce66f6c75452027af243689da780dbddb4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 7 Jan 2025 02:26:34 +0000 Subject: [PATCH 18/32] f2fs: don't call block truncation for aliased file This patch should avoid the below warning which does not corrupt the metadata tho. [ 51.508120][ T253] F2FS-fs (dm-59): access invalid blkaddr:36 [ 51.508156][ T253] __f2fs_is_valid_blkaddr+0x330/0x384 [ 51.508162][ T253] f2fs_is_valid_blkaddr_raw+0x10/0x24 [ 51.508163][ T253] f2fs_truncate_data_blocks_range+0x1ec/0x438 [ 51.508177][ T253] f2fs_remove_inode_page+0x8c/0x148 [ 51.508194][ T253] f2fs_evict_inode+0x230/0x76c Fixes: 128d333f0dff ("f2fs: introduce device aliasing file") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c04ee1a7ce57e..00f340c91fcbe 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1275,8 +1275,9 @@ int f2fs_remove_inode_page(struct inode *inode) } /* remove potential inline_data blocks */ - if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) + if (!IS_DEVICE_ALIASING(inode) && + (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) f2fs_truncate_data_blocks_range(&dn, 1); /* 0 is possible, after f2fs_new_inode() has failed */ From 66baee2b886d72ab6be11a08d4c7897f9612e25b Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Mon, 23 Dec 2024 16:10:41 +0800 Subject: [PATCH 19/32] f2fs: introduce update_sit_entry_for_release/alloc() No logical changes, just for cleanliness. Signed-off-by: Yi Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 162 ++++++++++++++++++++++++++-------------------- 1 file changed, 93 insertions(+), 69 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 86e547f008f90..bc761d9b889a3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2426,16 +2426,103 @@ static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr, SIT_I(sbi)->max_mtime = ctime; } -static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) +static int update_sit_entry_for_release(struct f2fs_sb_info *sbi, struct seg_entry *se, + block_t blkaddr, unsigned int offset, int del) +{ + bool exist; +#ifdef CONFIG_F2FS_CHECK_FS + bool mir_exist; +#endif + + exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); +#ifdef CONFIG_F2FS_CHECK_FS + mir_exist = f2fs_test_and_clear_bit(offset, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", + blkaddr, exist); + f2fs_bug_on(sbi, 1); + } +#endif + if (unlikely(!exist)) { + f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", blkaddr); + f2fs_bug_on(sbi, 1); + se->valid_blocks++; + del = 0; + } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + /* + * If checkpoints are off, we must not reuse data that + * was used in the previous checkpoint. If it was used + * before, we must track that to know how much space we + * really have. + */ + if (f2fs_test_bit(offset, se->ckpt_valid_map)) { + spin_lock(&sbi->stat_lock); + sbi->unusable_block_count++; + spin_unlock(&sbi->stat_lock); + } + } + + if (f2fs_block_unit_discard(sbi) && + f2fs_test_and_clear_bit(offset, se->discard_map)) + sbi->discard_blks++; + + if (!f2fs_test_bit(offset, se->ckpt_valid_map)) + se->ckpt_valid_blocks += del; + + return del; +} + +static int update_sit_entry_for_alloc(struct f2fs_sb_info *sbi, struct seg_entry *se, + block_t blkaddr, unsigned int offset, int del) { - struct seg_entry *se; - unsigned int segno, offset; - long int new_vblocks; bool exist; #ifdef CONFIG_F2FS_CHECK_FS bool mir_exist; #endif + exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); +#ifdef CONFIG_F2FS_CHECK_FS + mir_exist = f2fs_test_and_set_bit(offset, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", + blkaddr, exist); + f2fs_bug_on(sbi, 1); + } +#endif + if (unlikely(exist)) { + f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", blkaddr); + f2fs_bug_on(sbi, 1); + se->valid_blocks--; + del = 0; + } + + if (f2fs_block_unit_discard(sbi) && + !f2fs_test_and_set_bit(offset, se->discard_map)) + sbi->discard_blks--; + + /* + * SSR should never reuse block which is checkpointed + * or newly invalidated. + */ + if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { + if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) + se->ckpt_valid_blocks++; + } + + if (!f2fs_test_bit(offset, se->ckpt_valid_map)) + se->ckpt_valid_blocks += del; + + return del; +} + +static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) +{ + struct seg_entry *se; + unsigned int segno, offset; + long int new_vblocks; + segno = GET_SEGNO(sbi, blkaddr); if (segno == NULL_SEGNO) return; @@ -2451,73 +2538,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* Update valid block bitmap */ if (del > 0) { - exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); -#ifdef CONFIG_F2FS_CHECK_FS - mir_exist = f2fs_test_and_set_bit(offset, - se->cur_valid_map_mir); - if (unlikely(exist != mir_exist)) { - f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", - blkaddr, exist); - f2fs_bug_on(sbi, 1); - } -#endif - if (unlikely(exist)) { - f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", - blkaddr); - f2fs_bug_on(sbi, 1); - se->valid_blocks--; - del = 0; - } - - if (f2fs_block_unit_discard(sbi) && - !f2fs_test_and_set_bit(offset, se->discard_map)) - sbi->discard_blks--; - - /* - * SSR should never reuse block which is checkpointed - * or newly invalidated. - */ - if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { - if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) - se->ckpt_valid_blocks++; - } + del = update_sit_entry_for_alloc(sbi, se, blkaddr, offset, del); } else { - exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); -#ifdef CONFIG_F2FS_CHECK_FS - mir_exist = f2fs_test_and_clear_bit(offset, - se->cur_valid_map_mir); - if (unlikely(exist != mir_exist)) { - f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", - blkaddr, exist); - f2fs_bug_on(sbi, 1); - } -#endif - if (unlikely(!exist)) { - f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", - blkaddr); - f2fs_bug_on(sbi, 1); - se->valid_blocks++; - del = 0; - } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { - /* - * If checkpoints are off, we must not reuse data that - * was used in the previous checkpoint. If it was used - * before, we must track that to know how much space we - * really have. - */ - if (f2fs_test_bit(offset, se->ckpt_valid_map)) { - spin_lock(&sbi->stat_lock); - sbi->unusable_block_count++; - spin_unlock(&sbi->stat_lock); - } - } - - if (f2fs_block_unit_discard(sbi) && - f2fs_test_and_clear_bit(offset, se->discard_map)) - sbi->discard_blks++; + del = update_sit_entry_for_release(sbi, se, blkaddr, offset, del); } - if (!f2fs_test_bit(offset, se->ckpt_valid_map)) - se->ckpt_valid_blocks += del; __mark_sit_entry_dirty(sbi, segno); From 81ffbd224e5f926bf8df01d6107db9c8779f7d57 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Mon, 23 Dec 2024 16:10:42 +0800 Subject: [PATCH 20/32] f2fs: update_sit_entry_for_release() supports consecutive blocks. This function can process some consecutive blocks at a time. When using update_sit_entry() to release consecutive blocks, ensure that the consecutive blocks belong to the same segment. Because after update_sit_entry_for_realese(), @segno is still in use in update_sit_entry(). Signed-off-by: Yi Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 75 ++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index bc761d9b889a3..67d2859831e46 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2426,6 +2426,10 @@ static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr, SIT_I(sbi)->max_mtime = ctime; } +/* + * NOTE: when updating multiple blocks at the same time, please ensure + * that the consecutive input blocks belong to the same segment. + */ static int update_sit_entry_for_release(struct f2fs_sb_info *sbi, struct seg_entry *se, block_t blkaddr, unsigned int offset, int del) { @@ -2433,42 +2437,48 @@ static int update_sit_entry_for_release(struct f2fs_sb_info *sbi, struct seg_ent #ifdef CONFIG_F2FS_CHECK_FS bool mir_exist; #endif + int i; + int del_count = -del; + + f2fs_bug_on(sbi, GET_SEGNO(sbi, blkaddr) != GET_SEGNO(sbi, blkaddr + del_count - 1)); - exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); + for (i = 0; i < del_count; i++) { + exist = f2fs_test_and_clear_bit(offset + i, se->cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - mir_exist = f2fs_test_and_clear_bit(offset, - se->cur_valid_map_mir); - if (unlikely(exist != mir_exist)) { - f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", - blkaddr, exist); - f2fs_bug_on(sbi, 1); - } + mir_exist = f2fs_test_and_clear_bit(offset + i, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", + blkaddr + i, exist); + f2fs_bug_on(sbi, 1); + } #endif - if (unlikely(!exist)) { - f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", blkaddr); - f2fs_bug_on(sbi, 1); - se->valid_blocks++; - del = 0; - } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { - /* - * If checkpoints are off, we must not reuse data that - * was used in the previous checkpoint. If it was used - * before, we must track that to know how much space we - * really have. - */ - if (f2fs_test_bit(offset, se->ckpt_valid_map)) { - spin_lock(&sbi->stat_lock); - sbi->unusable_block_count++; - spin_unlock(&sbi->stat_lock); + if (unlikely(!exist)) { + f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", blkaddr + i); + f2fs_bug_on(sbi, 1); + se->valid_blocks++; + del += 1; + } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + /* + * If checkpoints are off, we must not reuse data that + * was used in the previous checkpoint. If it was used + * before, we must track that to know how much space we + * really have. + */ + if (f2fs_test_bit(offset + i, se->ckpt_valid_map)) { + spin_lock(&sbi->stat_lock); + sbi->unusable_block_count++; + spin_unlock(&sbi->stat_lock); + } } - } - if (f2fs_block_unit_discard(sbi) && - f2fs_test_and_clear_bit(offset, se->discard_map)) - sbi->discard_blks++; + if (f2fs_block_unit_discard(sbi) && + f2fs_test_and_clear_bit(offset + i, se->discard_map)) + sbi->discard_blks++; - if (!f2fs_test_bit(offset, se->ckpt_valid_map)) - se->ckpt_valid_blocks += del; + if (!f2fs_test_bit(offset + i, se->ckpt_valid_map)) + se->ckpt_valid_blocks -= 1; + } return del; } @@ -2517,6 +2527,11 @@ static int update_sit_entry_for_alloc(struct f2fs_sb_info *sbi, struct seg_entry return del; } +/* + * If releasing blocks, this function supports updating multiple consecutive blocks + * at one time, but please note that these consecutive blocks need to belong to the + * same segment. + */ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) { struct seg_entry *se; From e53c568f4603e997426712146dce0bc194c1db12 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Mon, 23 Dec 2024 16:10:43 +0800 Subject: [PATCH 21/32] f2fs: add parameter @len to f2fs_invalidate_blocks() New function can process some consecutive blocks at a time. Function f2fs_invalidate_blocks()->down_write() and up_write() are very time-consuming, so if f2fs_invalidate_blocks() can process consecutive blocks at one time, it will save a lot of time. Signed-off-by: Yi Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 4 ++-- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/file.c | 8 ++++---- fs/f2fs/node.c | 4 ++-- fs/f2fs/segment.c | 32 +++++++++++++++++++++++++------- 5 files changed, 35 insertions(+), 16 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index be3e6f4d33a2d..c5e42eec8ac9a 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1380,7 +1380,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, if (blkaddr == COMPRESS_ADDR) fio.compr_blocks++; if (__is_valid_data_blkaddr(blkaddr)) - f2fs_invalidate_blocks(sbi, blkaddr); + f2fs_invalidate_blocks(sbi, blkaddr, 1); f2fs_update_data_blkaddr(&dn, COMPRESS_ADDR); goto unlock_continue; } @@ -1390,7 +1390,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, if (i > cc->valid_nr_cpages) { if (__is_valid_data_blkaddr(blkaddr)) { - f2fs_invalidate_blocks(sbi, blkaddr); + f2fs_invalidate_blocks(sbi, blkaddr, 1); f2fs_update_data_blkaddr(&dn, NEW_ADDR); } goto unlock_continue; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f265c26d00382..4bfe162eefd36 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3724,7 +3724,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino); int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi); int f2fs_flush_device_cache(struct f2fs_sb_info *sbi); void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); -void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); +void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr, + unsigned int len); bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); int f2fs_start_discard_thread(struct f2fs_sb_info *sbi); void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index aa9679b3d8e4f..81764b10840b2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -652,7 +652,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) valid_blocks++; } - f2fs_invalidate_blocks(sbi, blkaddr); + f2fs_invalidate_blocks(sbi, blkaddr, 1); if (!released || blkaddr != COMPRESS_ADDR) nr_free++; @@ -750,7 +750,7 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) unsigned int i; for (i = 0; i < ei.len; i++) - f2fs_invalidate_blocks(sbi, ei.blk + i); + f2fs_invalidate_blocks(sbi, ei.blk + i, 1); dec_valid_block_count(sbi, inode, ei.len); f2fs_update_time(sbi, REQ_TIME); @@ -1323,7 +1323,7 @@ static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); if (ret) { dec_valid_block_count(sbi, inode, 1); - f2fs_invalidate_blocks(sbi, *blkaddr); + f2fs_invalidate_blocks(sbi, *blkaddr, 1); } else { f2fs_update_data_blkaddr(&dn, *blkaddr); } @@ -1575,7 +1575,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, break; } - f2fs_invalidate_blocks(sbi, dn->data_blkaddr); + f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1); f2fs_set_data_blkaddr(dn, NEW_ADDR); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 00f340c91fcbe..f88392fc4ba95 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -917,7 +917,7 @@ static int truncate_node(struct dnode_of_data *dn) } /* Deallocate node address */ - f2fs_invalidate_blocks(sbi, ni.blk_addr); + f2fs_invalidate_blocks(sbi, ni.blk_addr, 1); dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino); set_node_addr(sbi, &ni, NULL_ADDR, false); @@ -2765,7 +2765,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) if (err) return err; - f2fs_invalidate_blocks(sbi, ni.blk_addr); + f2fs_invalidate_blocks(sbi, ni.blk_addr, 1); dec_valid_node_count(sbi, inode, false); set_node_addr(sbi, &ni, NULL_ADDR, false); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 67d2859831e46..813254dcc00e7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -245,7 +245,7 @@ static int __replace_atomic_write_block(struct inode *inode, pgoff_t index, if (!__is_valid_data_blkaddr(new_addr)) { if (new_addr == NULL_ADDR) dec_valid_block_count(sbi, inode, 1); - f2fs_invalidate_blocks(sbi, dn.data_blkaddr); + f2fs_invalidate_blocks(sbi, dn.data_blkaddr, 1); f2fs_update_data_blkaddr(&dn, new_addr); } else { f2fs_replace_block(sbi, &dn, dn.data_blkaddr, @@ -2567,25 +2567,43 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) get_sec_entry(sbi, segno)->valid_blocks += del; } -void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) +void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr, + unsigned int len) { unsigned int segno = GET_SEGNO(sbi, addr); struct sit_info *sit_i = SIT_I(sbi); + block_t addr_start = addr, addr_end = addr + len - 1; + unsigned int seg_num = GET_SEGNO(sbi, addr_end) - segno + 1; + unsigned int i = 1, max_blocks = sbi->blocks_per_seg, cnt; f2fs_bug_on(sbi, addr == NULL_ADDR); if (addr == NEW_ADDR || addr == COMPRESS_ADDR) return; - f2fs_invalidate_internal_cache(sbi, addr, 1); + f2fs_invalidate_internal_cache(sbi, addr, len); /* add it into sit main buffer */ down_write(&sit_i->sentry_lock); - update_segment_mtime(sbi, addr, 0); - update_sit_entry(sbi, addr, -1); + if (seg_num == 1) + cnt = len; + else + cnt = max_blocks - GET_BLKOFF_FROM_SEG0(sbi, addr); - /* add it into dirty seglist */ - locate_dirty_segment(sbi, segno); + do { + update_segment_mtime(sbi, addr_start, 0); + update_sit_entry(sbi, addr_start, -cnt); + + /* add it into dirty seglist */ + locate_dirty_segment(sbi, segno); + + /* update @addr_start and @cnt and @segno */ + addr_start = START_BLOCK(sbi, ++segno); + if (++i == seg_num) + cnt = GET_BLKOFF_FROM_SEG0(sbi, addr_end) + 1; + else + cnt = max_blocks; + } while (i <= seg_num); up_write(&sit_i->sentry_lock); } From c84c2424932d18cf3888adfe9bd16f95dc5d9fd4 Mon Sep 17 00:00:00 2001 From: zangyangyang1 Date: Mon, 13 Jan 2025 11:05:18 +0800 Subject: [PATCH 22/32] f2fs: fix using wrong 'submitted' value in f2fs_write_cache_pages When f2fs_write_single_data_page fails, f2fs_write_cache_pages will use the last 'submitted' value incorrectly, which will cause 'nwritten' and 'wbc->nr_to_write' calculation errors Signed-off-by: zangyangyang1 Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 1 + fs/f2fs/data.c | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index c5e42eec8ac9a..985690d81a82c 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1551,6 +1551,7 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, if (!clear_page_dirty_for_io(cc->rpages[i])) goto continue_unlock; + submitted = 0; ret = f2fs_write_single_data_page(page_folio(cc->rpages[i]), &submitted, NULL, NULL, wbc, io_type, diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 35b9455fb8999..e9582e06403b6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3154,6 +3154,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping, continue; } #endif + submitted = 0; ret = f2fs_write_single_data_page(folio, &submitted, &bio, &last_block, wbc, io_type, 0, true); From 120ac1dc322f402544423582234f441d98ea4a6e Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Wed, 15 Jan 2025 13:39:43 +0800 Subject: [PATCH 23/32] f2fs: Optimize f2fs_truncate_data_blocks_range() Function f2fs_invalidate_blocks() can process consecutive blocks at a time, so f2fs_truncate_data_blocks_range() is optimized to use the new functionality of f2fs_invalidate_blocks(). Add two variables @blkstart and @blklen, @blkstart records the first address of the consecutive blocks, and @blkstart records the number of consecutive blocks. Signed-off-by: Yi Sun Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 81764b10840b2..3e06fea9795c5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -621,8 +621,11 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) int cluster_index = 0, valid_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks); + block_t blkstart; + int blklen = 0; addr = get_dnode_addr(dn->inode, dn->node_page) + ofs; + blkstart = le32_to_cpu(*addr); /* Assumption: truncation starts with cluster */ for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { @@ -638,26 +641,44 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) } if (blkaddr == NULL_ADDR) - continue; + goto next; f2fs_set_data_blkaddr(dn, NULL_ADDR); if (__is_valid_data_blkaddr(blkaddr)) { if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE)) - continue; + goto next; if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr, DATA_GENERIC_ENHANCE)) - continue; + goto next; if (compressed_cluster) valid_blocks++; } - f2fs_invalidate_blocks(sbi, blkaddr, 1); + if (blkstart + blklen == blkaddr) { + blklen++; + } else { + f2fs_invalidate_blocks(sbi, blkstart, blklen); + blkstart = blkaddr; + blklen = 1; + } if (!released || blkaddr != COMPRESS_ADDR) nr_free++; + + continue; + +next: + if (blklen) + f2fs_invalidate_blocks(sbi, blkstart, blklen); + + blkstart = le32_to_cpu(*(addr + 1)); + blklen = 0; } + if (blklen) + f2fs_invalidate_blocks(sbi, blkstart, blklen); + if (compressed_cluster) f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false); From 4811fee8283f3f7416dd66e2303a55f6761187fc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 8 Jan 2025 18:18:32 +0000 Subject: [PATCH 24/32] f2fs: remove blk_finish_plug Let's remove unclear blk_finish_plug. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e9582e06403b6..008ecd8fd386a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -511,10 +511,6 @@ static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio, enum page_type type) { WARN_ON_ONCE(is_read_io(bio_op(bio))); - - if (f2fs_lfs_mode(sbi) && current->plug && PAGE_TYPE_ON_MAIN(type)) - blk_finish_plug(current->plug); - trace_f2fs_submit_write_bio(sbi->sb, type, bio); iostat_update_submit_ctx(bio, type); submit_bio(bio); From 5c1768b6725049e1fcfc841924d65f2872413000 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 14 Jan 2025 20:34:10 +0800 Subject: [PATCH 25/32] f2fs: fix to do sanity check correctly on i_inline_xattr_size syzbot reported an out-of-range access issue as below: UBSAN: array-index-out-of-bounds in fs/f2fs/f2fs.h:3292:19 index 18446744073709550491 is out of range for type '__le32[923]' (aka 'unsigned int[923]') CPU: 0 UID: 0 PID: 5338 Comm: syz.0.0 Not tainted 6.12.0-syzkaller-10689-g7af08b57bcb9 #0 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 ubsan_epilogue lib/ubsan.c:231 [inline] __ubsan_handle_out_of_bounds+0x121/0x150 lib/ubsan.c:429 read_inline_xattr+0x273/0x280 lookup_all_xattrs fs/f2fs/xattr.c:341 [inline] f2fs_getxattr+0x57b/0x13b0 fs/f2fs/xattr.c:533 vfs_getxattr_alloc+0x472/0x5c0 fs/xattr.c:393 ima_read_xattr+0x38/0x60 security/integrity/ima/ima_appraise.c:229 process_measurement+0x117a/0x1fb0 security/integrity/ima/ima_main.c:353 ima_file_check+0xd9/0x120 security/integrity/ima/ima_main.c:572 security_file_post_open+0xb9/0x280 security/security.c:3121 do_open fs/namei.c:3830 [inline] path_openat+0x2ccd/0x3590 fs/namei.c:3987 do_file_open_root+0x3a7/0x720 fs/namei.c:4039 file_open_root+0x247/0x2a0 fs/open.c:1382 do_handle_open+0x85b/0x9d0 fs/fhandle.c:414 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f index: 18446744073709550491 (decimal, unsigned long long) = 0xfffffffffffffb9b (hexadecimal) = -1125 (decimal, long long) UBSAN detects that inline_xattr_addr() tries to access .i_addr[-1125]. w/ below testcase, it can reproduce this bug easily: - mkfs.f2fs -f -O extra_attr,flexible_inline_xattr /dev/sdb - mount -o inline_xattr_size=512 /dev/sdb /mnt/f2fs - touch /mnt/f2fs/file - umount /mnt/f2fs - inject.f2fs --node --mb i_inline --nid 4 --val 0x1 /dev/sdb - inject.f2fs --node --mb i_inline_xattr_size --nid 4 --val 2048 /dev/sdb - mount /dev/sdb /mnt/f2fs - getfattr /mnt/f2fs/file The root cause is if metadata of filesystem and inode were fuzzed as below: - extra_attr feature is enabled - flexible_inline_xattr feature is enabled - ri.i_inline_xattr_size = 2048 - F2FS_EXTRA_ATTR bit in ri.i_inline was not set sanity_check_inode() will skip doing sanity check on fi->i_inline_xattr_size, result in using invalid inline_xattr_size later incorrectly, fix it. Meanwhile, let's fix to check lower boundary for .i_inline_xattr_size w/ MIN_INLINE_XATTR_SIZE like we did in parse_options(). There is a related issue reported by syzbot, Qasim Ijaz has anlyzed and fixed it w/ very similar way [1], as discussed, we all agree that it will be better to do sanity check in sanity_check_inode() for fix, so finally, let's fix these two related bugs w/ current patch. Including commit message from Qasim's patch as below, thanks a lot for his contribution. "In f2fs_getxattr(), the function lookup_all_xattrs() allocates a 12-byte (base_size) buffer for an inline extended attribute. However, when __find_inline_xattr() calls __find_xattr(), it uses the macro "list_for_each_xattr(entry, addr)", which starts by calling XATTR_FIRST_ENTRY(addr). This skips a 24-byte struct f2fs_xattr_header at the beginning of the buffer, causing an immediate out-of-bounds read in a 12-byte allocation. The subsequent !IS_XATTR_LAST_ENTRY(entry) check then dereferences memory outside the allocated region, triggering the slab-out-of bounds read. This patch prevents the out-of-bounds read by adding a check to bail out early if inline_size is too small and does not account for the header plus the 4-byte value that IS_XATTR_LAST_ENTRY reads." [1]: https://lore.kernel.org/linux-f2fs-devel/Z32y1rfBY9Qb5ZjM@qasdev.system/ Fixes: 6afc662e68b5 ("f2fs: support flexible inline xattr size") Reported-by: syzbot+69f5379a1717a0b982a1@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/674f4e7d.050a0220.17bd51.004f.GAE@google.com Reported-by: syzbot Closes: https://syzkaller.appspot.com/bug?extid=f5e74075e096e757bdbf Tested-by: syzbot Tested-by: Qasim Ijaz Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 282fd320bdb35..7de33da8b3ead 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -302,15 +302,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) F2FS_TOTAL_EXTRA_ATTR_SIZE); return false; } - if (f2fs_sb_has_flexible_inline_xattr(sbi) && - f2fs_has_inline_xattr(inode) && - (!fi->i_inline_xattr_size || - fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { - f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %lu", - __func__, inode->i_ino, fi->i_inline_xattr_size, - MAX_INLINE_XATTR_SIZE); - return false; - } if (f2fs_sb_has_compression(sbi) && fi->i_flags & F2FS_COMPR_FL && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, @@ -320,6 +311,16 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) } } + if (f2fs_sb_has_flexible_inline_xattr(sbi) && + f2fs_has_inline_xattr(inode) && + (fi->i_inline_xattr_size < MIN_INLINE_XATTR_SIZE || + fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, min: %lu, max: %lu", + __func__, inode->i_ino, fi->i_inline_xattr_size, + MIN_INLINE_XATTR_SIZE, MAX_INLINE_XATTR_SIZE); + return false; + } + if (!f2fs_sb_has_extra_attr(sbi)) { if (f2fs_sb_has_project_quota(sbi)) { f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", From e02938613eb206ebf788e2d3d4fccf534e4ea12e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 17 Jan 2025 21:38:22 +0000 Subject: [PATCH 26/32] f2fs: avoid trying to get invalid block address In f2fs_new_inode(), if we fail to get a new inode, we go iput(), followed by f2fs_evict_inode(). If the inode is not marked as bad, it'll try to call f2fs_remove_inode_page() which tries to read the inode block given node id. But, there's no block address allocated yet, which gives a chance to access a wrong block address, if the block device has some garbage data in NAT table. We need to make sure NAT table should have zero data for all the unallocated node ids, but also would be better to take this unnecessary path as well. Let's mark the faild inode as bad. Fixes: 0abd675e97e6 ("f2fs: support plain user/group quota") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 57d46e1439ded..a278c7da81778 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -341,6 +341,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap, trace_f2fs_new_inode(inode, err); dquot_drop(inode); inode->i_flags |= S_NOQUOTA; + make_bad_inode(inode); if (nid_free) set_inode_flag(inode, FI_FREE_NID); clear_nlink(inode); From a68905d48a4efe5ca45bb4ceab5cd7d6ac47c300 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 20 Jan 2025 05:59:44 -0700 Subject: [PATCH 27/32] f2fs: Fix format specifier in sanity_check_inode() When building for 32-bit platforms, for which 'size_t' is 'unsigned int', there is a warning due to an incorrect format specifier: fs/f2fs/inode.c:320:6: error: format specifies type 'unsigned long' but the argument has type 'unsigned int' [-Werror,-Wformat] 318 | f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, min: %lu, max: %lu", | ~~~ | %u 319 | __func__, inode->i_ino, fi->i_inline_xattr_size, 320 | MIN_INLINE_XATTR_SIZE, MAX_INLINE_XATTR_SIZE); | ^~~~~~~~~~~~~~~~~~~~~ fs/f2fs/f2fs.h:1855:46: note: expanded from macro 'f2fs_warn' 1855 | f2fs_printk(sbi, false, KERN_WARNING fmt, ##__VA_ARGS__) | ~~~ ^~~~~~~~~~~ fs/f2fs/xattr.h:86:31: note: expanded from macro 'MIN_INLINE_XATTR_SIZE' 86 | #define MIN_INLINE_XATTR_SIZE (sizeof(struct f2fs_xattr_header) / sizeof(__le32)) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Use the format specifier for 'size_t', '%zu', to resolve the warning. Fixes: 5c1768b67250 ("f2fs: fix to do sanity check correctly on i_inline_xattr_size") Signed-off-by: Nathan Chancellor Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 7de33da8b3ead..3dd25f64d6f1e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -315,7 +315,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) f2fs_has_inline_xattr(inode) && (fi->i_inline_xattr_size < MIN_INLINE_XATTR_SIZE || fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { - f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, min: %lu, max: %lu", + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, min: %zu, max: %lu", __func__, inode->i_ino, fi->i_inline_xattr_size, MIN_INLINE_XATTR_SIZE, MAX_INLINE_XATTR_SIZE); return false; From 207764e5d6f19de483d7f0e43243d1a1fce4fb32 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 20 Jan 2025 19:19:40 +0800 Subject: [PATCH 28/32] f2fs: fix to avoid return invalid mtime from f2fs_get_section_mtime() syzbot reported a f2fs bug as below: ------------[ cut here ]------------ kernel BUG at fs/f2fs/gc.c:373! CPU: 0 UID: 0 PID: 5316 Comm: syz.0.0 Not tainted 6.13.0-rc3-syzkaller-00044-gaef25be35d23 #0 RIP: 0010:get_cb_cost fs/f2fs/gc.c:373 [inline] RIP: 0010:get_gc_cost fs/f2fs/gc.c:406 [inline] RIP: 0010:f2fs_get_victim+0x68b1/0x6aa0 fs/f2fs/gc.c:912 Call Trace: __get_victim fs/f2fs/gc.c:1707 [inline] f2fs_gc+0xc89/0x2f60 fs/f2fs/gc.c:1915 f2fs_ioc_gc fs/f2fs/file.c:2624 [inline] __f2fs_ioctl+0x4cc9/0xb8b0 fs/f2fs/file.c:4482 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:906 [inline] __se_sys_ioctl+0xf5/0x170 fs/ioctl.c:892 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f w/ below testcase, it can reproduce directly: - dd if=/dev/zero of=/tmp/file bs=1M count=64 - mkfs.f2fs /tmp/file - mount -t f2fs -o loop,mode=fragment:block /tmp/file /mnt/f2fs - echo 0 > /sys/fs/f2fs/loop0/min_ssr_sections - dd if=/dev/zero of=/mnt/f2fs/file bs=1M count=5 - umount /mnt/f2fs - for((i=4096;i<16384;i+=512)) do inject.f2fs --sit 0 --blk $i --mb mtime --val -1 /tmp/file; done - mount -o loop /tmp/file /mnt/f2fs - f2fs_io gc 0 /mnt/f2fs/file static unsigned int get_cb_cost() { ... mtime = f2fs_get_section_mtime(sbi, segno); f2fs_bug_on(sbi, mtime == INVALID_MTIME); ... } The root cause is: mtime in f2fs_sit_entry can be fuzzed to INVALID_MTIME, then it will trigger BUG_ON in get_cb_cost() during GC. Let's change behavior of f2fs_get_section_mtime() as below for fix: - return INVALID_MTIME only if total valid blocks is zero. - return INVALID_MTIME - 1 if average mtime calculated is INVALID_MTIME. Fixes: b19ee7272208 ("f2fs: introduce f2fs_get_section_mtime") Reported-by: syzbot+b9972806adbe20a910eb@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/6768c82e.050a0220.226966.0035.GAE@google.com Cc: liuderong Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 813254dcc00e7..b3a82a8cdc5fd 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -5549,8 +5549,10 @@ unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, secno = GET_SEC_FROM_SEG(sbi, segno); start = GET_SEG_FROM_SEC(sbi, secno); - if (!__is_large_section(sbi)) - return get_seg_entry(sbi, start + i)->mtime; + if (!__is_large_section(sbi)) { + mtime = get_seg_entry(sbi, start + i)->mtime; + goto out; + } for (i = 0; i < usable_segs_per_sec; i++) { /* for large section, only check the mtime of valid segments */ @@ -5563,7 +5565,11 @@ unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, if (total_valid_blocks == 0) return INVALID_MTIME; - return div_u64(mtime, total_valid_blocks); + mtime = div_u64(mtime, total_valid_blocks); +out: + if (unlikely(mtime == INVALID_MTIME)) + mtime -= 1; + return mtime; } /* From f6370a360d46e4cf10f5dde3c857747994fe1fd5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 20 Jan 2025 19:19:41 +0800 Subject: [PATCH 29/32] f2fs: procfs: show mtime in segment_bits Show mtime in segment_bits for debug. cat /proc/fs//f2fs/loop0/segment_bits format: segment_type|valid_blocks|bitmaps|mtime segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN) 0 3|1 | 04 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00| ffffffffffffffff 1 4|3 | 00 d0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00| ffffffffffffffff 2 5|0 | 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00| ffffffffffffffff 3 0|1 | 40 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00| ffffffffffffffff Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 6b99dc49f7761..d15c68b28952b 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -1472,7 +1472,7 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, le32_to_cpu(sbi->raw_super->segment_count_main); int i, j; - seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n" + seq_puts(seq, "format: segment_type|valid_blocks|bitmaps|mtime\n" "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); for (i = 0; i < total_segs; i++) { @@ -1482,6 +1482,7 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, seq_printf(seq, "%d|%-3u|", se->type, se->valid_blocks); for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) seq_printf(seq, " %.2x", se->cur_valid_map[j]); + seq_printf(seq, "| %llx", se->mtime); seq_putc(seq, '\n'); } return 0; From 6d4008dc4a8e0949afe1d9ff27c93fe68672ea06 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Mon, 13 Jan 2025 09:47:02 +0800 Subject: [PATCH 30/32] f2fs: Clean up the loop outside of f2fs_invalidate_blocks() Now f2fs_invalidate_blocks() supports a continuous range of addresses, so the for loop can be omitted. Signed-off-by: Yi Sun Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3e06fea9795c5..f92a9fba9991b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -768,10 +768,8 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) if (IS_DEVICE_ALIASING(inode)) { struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; struct extent_info ei = et->largest; - unsigned int i; - for (i = 0; i < ei.len; i++) - f2fs_invalidate_blocks(sbi, ei.blk + i, 1); + f2fs_invalidate_blocks(sbi, ei.blk, ei.len); dec_valid_block_count(sbi, inode, ei.len); f2fs_update_time(sbi, REQ_TIME); From edf3c0860060f8171d60dc5a6c28cb6702559f32 Mon Sep 17 00:00:00 2001 From: Zhiguo Niu Date: Tue, 21 Jan 2025 10:15:41 +0800 Subject: [PATCH 31/32] f2fs: fix to avoid changing 'check only' behaior of recovery The following two 'check only recovery' processes are very dependent on the return value of f2fs_recover_fsync_data, especially when the return value is greater than 0. 1. when device has readonly mode, shown as commit 23738e74472f ("f2fs: fix to restrict mount condition on readonly block device") 2. mount optiont NORECOVERY or DISABLE_ROLL_FORWARD is set, shown as commit 6781eabba1bd ("f2fs: give -EINVAL for norecovery and rw mount") However, commit c426d99127b1 ("f2fs: Check write pointer consistency of open zones") will change the return value unexpectedly, thereby changing the caller's behavior This patch let the f2fs_recover_fsync_data return correct value,and not do f2fs_check_and_fix_write_pointer when the device is read-only. Fixes: c426d99127b1 ("f2fs: Check write pointer consistency of open zones") Signed-off-by: Zhiguo Niu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 4 +--- fs/f2fs/segment.c | 3 ++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index f35be2c48e3c0..69a2027e3ebcf 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -899,10 +899,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) * and the f2fs is not read only, check and fix zoned block devices' * write pointer consistency. */ - if (!err) { + if (!err) err = f2fs_check_and_fix_write_pointer(sbi); - ret = err; - } if (!err) clear_sbi_flag(sbi, SBI_POR_DOING); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b3a82a8cdc5fd..dc1b47f9269a6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -5462,7 +5462,8 @@ int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi) { int ret; - if (!f2fs_sb_has_blkzoned(sbi) || f2fs_readonly(sbi->sb)) + if (!f2fs_sb_has_blkzoned(sbi) || f2fs_readonly(sbi->sb) || + f2fs_hw_is_readonly(sbi)) return 0; f2fs_notice(sbi, "Checking entire write pointers"); From 03511e936916873bf880e6678c98d5fb59c19742 Mon Sep 17 00:00:00 2001 From: Jianan Huang Date: Fri, 24 Jan 2025 13:57:51 +0800 Subject: [PATCH 32/32] f2fs: fix inconsistent dirty state of atomic file When testing the atomic write fix patches, the f2fs_bug_on was triggered as below: ------------[ cut here ]------------ kernel BUG at fs/f2fs/inode.c:935! Oops: invalid opcode: 0000 [#1] PREEMPT SMP PTI CPU: 3 UID: 0 PID: 257 Comm: bash Not tainted 6.13.0-rc1-00033-gc283a70d3497 #5 RIP: 0010:f2fs_evict_inode+0x50f/0x520 Call Trace: ? __die_body+0x65/0xb0 ? die+0x9f/0xc0 ? do_trap+0xa1/0x170 ? f2fs_evict_inode+0x50f/0x520 ? f2fs_evict_inode+0x50f/0x520 ? handle_invalid_op+0x65/0x80 ? f2fs_evict_inode+0x50f/0x520 ? exc_invalid_op+0x39/0x50 ? asm_exc_invalid_op+0x1a/0x20 ? __pfx_f2fs_get_dquots+0x10/0x10 ? f2fs_evict_inode+0x50f/0x520 ? f2fs_evict_inode+0x2e5/0x520 evict+0x186/0x2f0 prune_icache_sb+0x75/0xb0 super_cache_scan+0x1a8/0x200 do_shrink_slab+0x163/0x320 shrink_slab+0x2fc/0x470 drop_slab+0x82/0xf0 drop_caches_sysctl_handler+0x4e/0xb0 proc_sys_call_handler+0x183/0x280 vfs_write+0x36d/0x450 ksys_write+0x68/0xd0 do_syscall_64+0xc8/0x1a0 ? arch_exit_to_user_mode_prepare+0x11/0x60 ? irqentry_exit_to_user_mode+0x7e/0xa0 The root cause is: f2fs uses FI_ATOMIC_DIRTIED to indicate dirty atomic files during commit. If the inode is dirtied during commit, such as by f2fs_i_pino_write, the vfs inode keeps clean and the f2fs inode is set to FI_DIRTY_INODE. The FI_DIRTY_INODE flag cann't be cleared by write_inode later due to the clean vfs inode. Finally, f2fs_bug_on is triggered due to this inconsistent state when evict. To reproduce this situation: - fd = open("/mnt/test.db", O_WRONLY) - ioctl(fd, F2FS_IOC_START_ATOMIC_WRITE) - mv /mnt/test.db /mnt/test1.db - ioctl(fd, F2FS_IOC_COMMIT_ATOMIC_WRITE) - echo 3 > /proc/sys/vm/drop_caches To fix this problem, clear FI_DIRTY_INODE after commit, then f2fs_mark_inode_dirty_sync will ensure a consistent dirty state. Fixes: fccaa81de87e ("f2fs: prevent atomic file from being dirtied before commit") Signed-off-by: Yunlei He Signed-off-by: Jianan Huang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index dc1b47f9269a6..c282e8a0a2ec1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -201,6 +201,12 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) clear_inode_flag(inode, FI_ATOMIC_FILE); if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { clear_inode_flag(inode, FI_ATOMIC_DIRTIED); + /* + * The vfs inode keeps clean during commit, but the f2fs inode + * doesn't. So clear the dirty state after commit and let + * f2fs_mark_inode_dirty_sync ensure a consistent dirty state. + */ + f2fs_inode_synced(inode); f2fs_mark_inode_dirty_sync(inode, true); } stat_dec_atomic_inode(inode);