f2fs: fix race of pending_pages in decompression
I found out f2fs_free_dic() is invoked in a wrong timing, but
f2fs_verify_bio() still needed the dic info and it triggered the
below kernel panic. It has been caused by the race condition of
pending_pages value between decompression and verity logic, when
the same compression cluster had been split in different bios.
By split bios, f2fs_verify_bio() ended up with decreasing
pending_pages value before it is reset to nr_cpages by
f2fs_decompress_pages() and caused the kernel panic.
[ 4416.564763] Unable to handle kernel NULL pointer dereference
at virtual address 0000000000000000
...
[ 4416.896016] Workqueue: fsverity_read_queue f2fs_verity_work
[ 4416.908515] pc : fsverity_verify_page+0x20/0x78
[ 4416.913721] lr : f2fs_verify_bio+0x11c/0x29c
[ 4416.913722] sp : ffffffc019533cd0
[ 4416.913723] x29: ffffffc019533cd0 x28: 0000000000000402
[ 4416.913724] x27: 0000000000000001 x26: 0000000000000100
[ 4416.913726] x25: 0000000000000001 x24: 0000000000000004
[ 4416.913727] x23: 0000000000001000 x22: 0000000000000000
[ 4416.913728] x21: 0000000000000000 x20: ffffffff2076f9c0
[ 4416.913729] x19: ffffffff2076f9c0 x18: ffffff8a32380c30
[ 4416.913731] x17: ffffffc01f966d97 x16: 0000000000000298
[ 4416.913732] x15: 0000000000000000 x14: 0000000000000000
[ 4416.913733] x13: f074faec89ffffff x12: 0000000000000000
[ 4416.913734] x11: 0000000000001000 x10: 0000000000001000
[ 4416.929176] x9 : ffffffff20d1f5c7 x8 : 0000000000000000
[ 4416.929178] x7 : 626d7464ff286b6b x6 : ffffffc019533ade
[ 4416.929179] x5 : 000000008049000e x4 : ffffffff2793e9e0
[ 4416.929180] x3 : 000000008049000e x2 : ffffff89ecfa74d0
[ 4416.929181] x1 : 0000000000000c40 x0 : ffffffff2076f9c0
[ 4416.929184] Call trace:
[ 4416.929187] fsverity_verify_page+0x20/0x78
[ 4416.929189] f2fs_verify_bio+0x11c/0x29c
[ 4416.929192] f2fs_verity_work+0x58/0x84
[ 4417.050667] process_one_work+0x270/0x47c
[ 4417.055354] worker_thread+0x27c/0x4d8
[ 4417.059784] kthread+0x13c/0x320
[ 4417.063693] ret_from_fork+0x10/0x18
Chao pointed this can happen by the below race condition.
Thread A f2fs_post_read_wq fsverity_wq
- f2fs_read_multi_pages()
- f2fs_alloc_dic
- dic->pending_pages = 2
- submit_bio()
- submit_bio()
- f2fs_post_read_work() handle first bio
- f2fs_decompress_work()
- __read_end_io()
- f2fs_decompress_pages()
- dic->pending_pages--
- enqueue f2fs_verity_work()
- f2fs_verity_work() handle first bio
- f2fs_verify_bio()
- dic->pending_pages--
- f2fs_post_read_work() handle second bio
- f2fs_decompress_work()
- enqueue f2fs_verity_work()
- f2fs_verify_pages()
- f2fs_free_dic()
- f2fs_verity_work() handle second bio
- f2fs_verfy_bio()
- use-after-free on dic
Signed-off-by: Daeho Jeong <daehojeong@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
This commit is contained in:
@@ -792,8 +792,6 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
|
|||||||
if (cops->destroy_decompress_ctx)
|
if (cops->destroy_decompress_ctx)
|
||||||
cops->destroy_decompress_ctx(dic);
|
cops->destroy_decompress_ctx(dic);
|
||||||
out_free_dic:
|
out_free_dic:
|
||||||
if (verity)
|
|
||||||
atomic_set(&dic->pending_pages, dic->nr_cpages);
|
|
||||||
if (!verity)
|
if (!verity)
|
||||||
f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
|
f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
|
||||||
ret, false);
|
ret, false);
|
||||||
|
|||||||
@@ -200,7 +200,7 @@ static void f2fs_verify_bio(struct bio *bio)
|
|||||||
dic = (struct decompress_io_ctx *)page_private(page);
|
dic = (struct decompress_io_ctx *)page_private(page);
|
||||||
|
|
||||||
if (dic) {
|
if (dic) {
|
||||||
if (atomic_dec_return(&dic->pending_pages))
|
if (atomic_dec_return(&dic->verity_pages))
|
||||||
continue;
|
continue;
|
||||||
f2fs_verify_pages(dic->rpages,
|
f2fs_verify_pages(dic->rpages,
|
||||||
dic->cluster_size);
|
dic->cluster_size);
|
||||||
@@ -984,7 +984,8 @@ static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
|
|||||||
|
|
||||||
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
|
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
|
||||||
unsigned nr_pages, unsigned op_flag,
|
unsigned nr_pages, unsigned op_flag,
|
||||||
pgoff_t first_idx, bool for_write)
|
pgoff_t first_idx, bool for_write,
|
||||||
|
bool for_verity)
|
||||||
{
|
{
|
||||||
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
@@ -1003,7 +1004,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
|
|||||||
post_read_steps |= 1 << STEP_DECRYPT;
|
post_read_steps |= 1 << STEP_DECRYPT;
|
||||||
if (f2fs_compressed_file(inode))
|
if (f2fs_compressed_file(inode))
|
||||||
post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
|
post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
|
||||||
if (f2fs_need_verity(inode, first_idx))
|
if (for_verity && f2fs_need_verity(inode, first_idx))
|
||||||
post_read_steps |= 1 << STEP_VERITY;
|
post_read_steps |= 1 << STEP_VERITY;
|
||||||
|
|
||||||
if (post_read_steps) {
|
if (post_read_steps) {
|
||||||
@@ -1033,7 +1034,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
|
|||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
|
|
||||||
bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
|
bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
|
||||||
page->index, for_write);
|
page->index, for_write, true);
|
||||||
if (IS_ERR(bio))
|
if (IS_ERR(bio))
|
||||||
return PTR_ERR(bio);
|
return PTR_ERR(bio);
|
||||||
|
|
||||||
@@ -2070,7 +2071,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
|
|||||||
if (bio == NULL) {
|
if (bio == NULL) {
|
||||||
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
|
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
|
||||||
is_readahead ? REQ_RAHEAD : 0, page->index,
|
is_readahead ? REQ_RAHEAD : 0, page->index,
|
||||||
false);
|
false, true);
|
||||||
if (IS_ERR(bio)) {
|
if (IS_ERR(bio)) {
|
||||||
ret = PTR_ERR(bio);
|
ret = PTR_ERR(bio);
|
||||||
bio = NULL;
|
bio = NULL;
|
||||||
@@ -2116,6 +2117,8 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
|
|||||||
sector_t last_block_in_file;
|
sector_t last_block_in_file;
|
||||||
const unsigned blocksize = blks_to_bytes(inode, 1);
|
const unsigned blocksize = blks_to_bytes(inode, 1);
|
||||||
struct decompress_io_ctx *dic = NULL;
|
struct decompress_io_ctx *dic = NULL;
|
||||||
|
struct bio_post_read_ctx *ctx;
|
||||||
|
bool for_verity = false;
|
||||||
int i;
|
int i;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
@@ -2181,10 +2184,29 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
|
|||||||
goto out_put_dnode;
|
goto out_put_dnode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It's possible to enable fsverity on the fly when handling a cluster,
|
||||||
|
* which requires complicated error handling. Instead of adding more
|
||||||
|
* complexity, let's give a rule where end_io post-processes fsverity
|
||||||
|
* per cluster. In order to do that, we need to submit bio, if previous
|
||||||
|
* bio sets a different post-process policy.
|
||||||
|
*/
|
||||||
|
if (fsverity_active(cc->inode)) {
|
||||||
|
atomic_set(&dic->verity_pages, cc->nr_cpages);
|
||||||
|
for_verity = true;
|
||||||
|
|
||||||
|
if (bio) {
|
||||||
|
ctx = bio->bi_private;
|
||||||
|
if (!(ctx->enabled_steps & (1 << STEP_VERITY))) {
|
||||||
|
__submit_bio(sbi, bio, DATA);
|
||||||
|
bio = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < dic->nr_cpages; i++) {
|
for (i = 0; i < dic->nr_cpages; i++) {
|
||||||
struct page *page = dic->cpages[i];
|
struct page *page = dic->cpages[i];
|
||||||
block_t blkaddr;
|
block_t blkaddr;
|
||||||
struct bio_post_read_ctx *ctx;
|
|
||||||
|
|
||||||
blkaddr = data_blkaddr(dn.inode, dn.node_page,
|
blkaddr = data_blkaddr(dn.inode, dn.node_page,
|
||||||
dn.ofs_in_node + i + 1);
|
dn.ofs_in_node + i + 1);
|
||||||
@@ -2199,17 +2221,31 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
|
|||||||
if (!bio) {
|
if (!bio) {
|
||||||
bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
|
bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
|
||||||
is_readahead ? REQ_RAHEAD : 0,
|
is_readahead ? REQ_RAHEAD : 0,
|
||||||
page->index, for_write);
|
page->index, for_write, for_verity);
|
||||||
if (IS_ERR(bio)) {
|
if (IS_ERR(bio)) {
|
||||||
|
unsigned int remained = dic->nr_cpages - i;
|
||||||
|
bool release = false;
|
||||||
|
|
||||||
ret = PTR_ERR(bio);
|
ret = PTR_ERR(bio);
|
||||||
dic->failed = true;
|
dic->failed = true;
|
||||||
if (!atomic_sub_return(dic->nr_cpages - i,
|
|
||||||
&dic->pending_pages)) {
|
if (for_verity) {
|
||||||
|
if (!atomic_sub_return(remained,
|
||||||
|
&dic->verity_pages))
|
||||||
|
release = true;
|
||||||
|
} else {
|
||||||
|
if (!atomic_sub_return(remained,
|
||||||
|
&dic->pending_pages))
|
||||||
|
release = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (release) {
|
||||||
f2fs_decompress_end_io(dic->rpages,
|
f2fs_decompress_end_io(dic->rpages,
|
||||||
cc->cluster_size, true,
|
cc->cluster_size, true,
|
||||||
false);
|
false);
|
||||||
f2fs_free_dic(dic);
|
f2fs_free_dic(dic);
|
||||||
}
|
}
|
||||||
|
|
||||||
f2fs_put_dnode(&dn);
|
f2fs_put_dnode(&dn);
|
||||||
*bio_ret = NULL;
|
*bio_ret = NULL;
|
||||||
return ret;
|
return ret;
|
||||||
|
|||||||
@@ -1358,6 +1358,7 @@ struct decompress_io_ctx {
|
|||||||
size_t rlen; /* valid data length in rbuf */
|
size_t rlen; /* valid data length in rbuf */
|
||||||
size_t clen; /* valid data length in cbuf */
|
size_t clen; /* valid data length in cbuf */
|
||||||
atomic_t pending_pages; /* in-flight compressed page count */
|
atomic_t pending_pages; /* in-flight compressed page count */
|
||||||
|
atomic_t verity_pages; /* in-flight page count for verity */
|
||||||
bool failed; /* indicate IO error during decompression */
|
bool failed; /* indicate IO error during decompression */
|
||||||
void *private; /* payload buffer for specified decompression algorithm */
|
void *private; /* payload buffer for specified decompression algorithm */
|
||||||
void *private2; /* extra payload buffer */
|
void *private2; /* extra payload buffer */
|
||||||
|
|||||||
Reference in New Issue
Block a user