Merge remote-tracking branch 'qcom_sm8250/lineage-20' into lineage-22.2

Change-Id: Ib75fa5f45c643fe313df6e95a5b3af92593b1962
This commit is contained in:
Sebastiano Barezzi
2025-07-29 23:07:57 +02:00
86 changed files with 12865 additions and 6227 deletions

View File

@@ -415,9 +415,9 @@ EXPORT_SYMBOL(fscrypt_fname_disk_to_usr);
* directory's encryption key, then @iname is the plaintext, so we encrypt it to
* get the disk_name.
*
* Else, for keyless @lookup operations, @iname is the presented ciphertext, so
* we decode it to get the fscrypt_nokey_name. Non-@lookup operations will be
* impossible in this case, so we fail them with ENOKEY.
* Else, for keyless @lookup operations, @iname should be a no-key name, so we
* decode it to get the struct fscrypt_nokey_name. Non-@lookup operations will
* be impossible in this case, so we fail them with ENOKEY.
*
* If successful, fscrypt_free_filename() must be called later to clean up.
*
@@ -461,7 +461,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
}
if (!lookup)
return -ENOKEY;
fname->is_ciphertext_name = true;
fname->is_nokey_name = true;
/*
* We don't have the key and we are doing a lookup; decode the
@@ -571,17 +571,17 @@ int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
/*
* Plaintext names are always valid, since fscrypt doesn't support
* reverting to ciphertext names without evicting the directory's inode
* reverting to no-key names without evicting the directory's inode
* -- which implies eviction of the dentries in the directory.
*/
if (!(dentry->d_flags & DCACHE_ENCRYPTED_NAME))
if (!(dentry->d_flags & DCACHE_NOKEY_NAME))
return 1;
/*
* Ciphertext name; valid if the directory's key is still unavailable.
* No-key name; valid if the directory's key is still unavailable.
*
* Although fscrypt forbids rename() on ciphertext names, we still must
* use dget_parent() here rather than use ->d_parent directly. That's
* Although fscrypt forbids rename() on no-key names, we still must use
* dget_parent() here rather than use ->d_parent directly. That's
* because a corrupted fs image may contain directory hard links, which
* the VFS handles by moving the directory's dentry tree in the dcache
* each time ->lookup() finds the directory and it already has a dentry

View File

@@ -113,9 +113,9 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry,
if (err && err != -ENOENT)
return err;
if (fname->is_ciphertext_name) {
if (fname->is_nokey_name) {
spin_lock(&dentry->d_lock);
dentry->d_flags |= DCACHE_ENCRYPTED_NAME;
dentry->d_flags |= DCACHE_NOKEY_NAME;
spin_unlock(&dentry->d_lock);
}
return err;

View File

@@ -3146,10 +3146,6 @@ static inline void ext4_unlock_group(struct super_block *sb,
/* dir.c */
extern const struct file_operations ext4_dir_operations;
#ifdef CONFIG_UNICODE
extern const struct dentry_operations ext4_dentry_ops;
#endif
/* file.c */
extern const struct inode_operations ext4_file_inode_operations;
extern const struct file_operations ext4_file_operations;

View File

@@ -12,7 +12,6 @@
#include "ext4.h"
#include "xattr.h"
#include "truncate.h"
#include <trace/events/android_fs.h>
#define EXT4_XATTR_SYSTEM_DATA "data"
#define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS))
@@ -518,17 +517,6 @@ int ext4_readpage_inline(struct inode *inode, struct page *page)
return -EAGAIN;
}
if (trace_android_fs_dataread_start_enabled()) {
char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
path = android_fstrace_get_pathname(pathbuf,
MAX_TRACE_PATHBUF_LEN,
inode);
trace_android_fs_dataread_start(inode, page_offset(page),
PAGE_SIZE, current->pid,
path, current->comm);
}
/*
* Current inline data can only exist in the 1st page,
* So for all the other pages, just set them uptodate.
@@ -540,8 +528,6 @@ int ext4_readpage_inline(struct inode *inode, struct page *page)
SetPageUptodate(page);
}
trace_android_fs_dataread_end(inode, page_offset(page), PAGE_SIZE);
up_read(&EXT4_I(inode)->xattr_sem);
unlock_page(page);

View File

@@ -47,7 +47,6 @@
#include "truncate.h"
#include <trace/events/ext4.h>
#include <trace/events/android_fs.h>
#define MPAGE_DA_EXTENT_TAIL 0x01
@@ -1271,16 +1270,6 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
if (trace_android_fs_datawrite_start_enabled()) {
char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
path = android_fstrace_get_pathname(pathbuf,
MAX_TRACE_PATHBUF_LEN,
inode);
trace_android_fs_datawrite_start(inode, pos, len,
current->pid, path,
current->comm);
}
trace_ext4_write_begin(inode, pos, len, flags);
/*
* Reserve one block more for addition to orphan list in case
@@ -1430,7 +1419,6 @@ static int ext4_write_end(struct file *file,
int inline_data = ext4_has_inline_data(inode);
bool verity = ext4_verity_in_progress(inode);
trace_android_fs_datawrite_end(inode, pos, len);
trace_ext4_write_end(inode, pos, len, copied);
if (inline_data &&
ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -1542,7 +1530,6 @@ static int ext4_journalled_write_end(struct file *file,
int inline_data = ext4_has_inline_data(inode);
bool verity = ext4_verity_in_progress(inode);
trace_android_fs_datawrite_end(inode, pos, len);
trace_ext4_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_SIZE - 1);
to = from + len;
@@ -3110,16 +3097,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
len, flags, pagep, fsdata);
}
*fsdata = (void *)0;
if (trace_android_fs_datawrite_start_enabled()) {
char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
path = android_fstrace_get_pathname(pathbuf,
MAX_TRACE_PATHBUF_LEN,
inode);
trace_android_fs_datawrite_start(inode, pos, len,
current->pid,
path, current->comm);
}
trace_ext4_da_write_begin(inode, pos, len, flags);
if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -3238,7 +3215,6 @@ static int ext4_da_write_end(struct file *file,
return ext4_write_end(file, mapping, pos,
len, copied, page, fsdata);
trace_android_fs_datawrite_end(inode, pos, len);
trace_ext4_da_write_end(inode, pos, len, copied);
start = pos & (PAGE_SIZE - 1);
end = start + copied - 1;
@@ -3962,7 +3938,6 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
size_t count = iov_iter_count(iter);
loff_t offset = iocb->ki_pos;
ssize_t ret;
int rw = iov_iter_rw(iter);
if (!fscrypt_dio_supported(iocb, iter))
return 0;
@@ -3980,28 +3955,6 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (ext4_has_inline_data(inode))
return 0;
if (trace_android_fs_dataread_start_enabled() &&
(rw == READ)) {
char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
path = android_fstrace_get_pathname(pathbuf,
MAX_TRACE_PATHBUF_LEN,
inode);
trace_android_fs_dataread_start(inode, offset, count,
current->pid, path,
current->comm);
}
if (trace_android_fs_datawrite_start_enabled() &&
(rw == WRITE)) {
char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
path = android_fstrace_get_pathname(pathbuf,
MAX_TRACE_PATHBUF_LEN,
inode);
trace_android_fs_datawrite_start(inode, offset, count,
current->pid, path,
current->comm);
}
trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
if (iov_iter_rw(iter) == READ)
ret = ext4_direct_IO_read(iocb, iter);
@@ -4009,13 +3962,6 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
ret = ext4_direct_IO_write(iocb, iter);
trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
if (trace_android_fs_dataread_start_enabled() &&
(rw == READ))
trace_android_fs_dataread_end(inode, offset, count);
if (trace_android_fs_datawrite_start_enabled() &&
(rw == WRITE))
trace_android_fs_datawrite_end(inode, offset, count);
return ret;
}

View File

@@ -1242,6 +1242,12 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return -EOPNOTSUPP;
return fsverity_ioctl_measure(filp, (void __user *)arg);
case FS_IOC_READ_VERITY_METADATA:
if (!ext4_has_feature_verity(sb))
return -EOPNOTSUPP;
return fsverity_ioctl_read_metadata(filp,
(const void __user *)arg);
default:
return -ENOTTY;
}
@@ -1314,6 +1320,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case FS_IOC_MEASURE_VERITY:
case EXT4_IOC_FSGETXATTR:
case EXT4_IOC_FSSETXATTR:
case FS_IOC_READ_VERITY_METADATA:
break;
default:
return -ENOIOCTLCMD;

View File

@@ -1397,7 +1397,7 @@ static int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
/* Handle invalid character sequence as either an error
* or as an opaque byte sequence.
*/
if (sb_has_enc_strict_mode(sb))
if (sb_has_strict_encoding(sb))
ret = -EINVAL;
else if (name->len != entry.len)
ret = 1;
@@ -1731,7 +1731,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir,
struct buffer_head *bh;
err = ext4_fname_prepare_lookup(dir, dentry, &fname);
generic_set_encrypted_ci_d_ops(dir, dentry);
generic_set_encrypted_ci_d_ops(dentry);
if (err == -ENOENT)
return NULL;
if (err)
@@ -2393,7 +2393,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
return -EINVAL;
#ifdef CONFIG_UNICODE
if (sb_has_enc_strict_mode(sb) && IS_CASEFOLDED(dir) &&
if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name))
return -EINVAL;
#endif

View File

@@ -46,7 +46,6 @@
#include <linux/cleancache.h>
#include "ext4.h"
#include <trace/events/android_fs.h>
#define NUM_PREALLOC_POST_READ_CTXS 128
@@ -147,17 +146,6 @@ static bool bio_post_read_required(struct bio *bio)
return bio->bi_private && !bio->bi_status;
}
static void
ext4_trace_read_completion(struct bio *bio)
{
struct page *first_page = bio->bi_io_vec[0].bv_page;
if (first_page != NULL)
trace_android_fs_dataread_end(first_page->mapping->host,
page_offset(first_page),
bio->bi_iter.bi_size);
}
/*
* I/O completion handler for multipage BIOs.
*
@@ -172,9 +160,6 @@ ext4_trace_read_completion(struct bio *bio)
*/
static void mpage_end_io(struct bio *bio)
{
if (trace_android_fs_dataread_start_enabled())
ext4_trace_read_completion(bio);
if (bio_post_read_required(bio)) {
struct bio_post_read_ctx *ctx = bio->bi_private;
@@ -224,30 +209,6 @@ static inline loff_t ext4_readpage_limit(struct inode *inode)
return i_size_read(inode);
}
static void
ext4_submit_bio_read(struct bio *bio)
{
if (trace_android_fs_dataread_start_enabled()) {
struct page *first_page = bio->bi_io_vec[0].bv_page;
if (first_page != NULL) {
char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
path = android_fstrace_get_pathname(pathbuf,
MAX_TRACE_PATHBUF_LEN,
first_page->mapping->host);
trace_android_fs_dataread_start(
first_page->mapping->host,
page_offset(first_page),
bio->bi_iter.bi_size,
current->pid,
path,
current->comm);
}
}
submit_bio(bio);
}
int ext4_mpage_readpages(struct address_space *mapping,
struct list_head *pages, struct page *page,
unsigned nr_pages, bool is_readahead)
@@ -395,7 +356,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
if (bio && (last_block_in_bio != blocks[0] - 1 ||
!fscrypt_mergeable_bio(bio, inode, next_block))) {
submit_and_realloc:
ext4_submit_bio_read(bio);
submit_bio(bio);
bio = NULL;
}
if (bio == NULL) {
@@ -428,14 +389,14 @@ int ext4_mpage_readpages(struct address_space *mapping,
if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
(relative_block == map.m_len)) ||
(first_hole != blocks_per_page)) {
ext4_submit_bio_read(bio);
submit_bio(bio);
bio = NULL;
} else
last_block_in_bio = blocks[blocks_per_page - 1];
goto next_page;
confused:
if (bio) {
ext4_submit_bio_read(bio);
submit_bio(bio);
bio = NULL;
}
if (!PageUptodate(page))
@@ -448,7 +409,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
}
BUG_ON(pages && !list_empty(pages));
if (bio)
ext4_submit_bio_read(bio);
submit_bio(bio);
return 0;
}

View File

@@ -6,6 +6,13 @@ config F2FS_FS
select CRYPTO_CRC32
select F2FS_FS_XATTR if FS_ENCRYPTION
select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
select LZ4_COMPRESS if F2FS_FS_LZ4
select LZ4_DECOMPRESS if F2FS_FS_LZ4
select LZ4HC_COMPRESS if F2FS_FS_LZ4HC
select LZO_COMPRESS if F2FS_FS_LZO
select LZO_DECOMPRESS if F2FS_FS_LZO
select ZSTD_COMPRESS if F2FS_FS_ZSTD
select ZSTD_DECOMPRESS if F2FS_FS_ZSTD
help
F2FS is based on Log-structured File System (LFS), which supports
versatile "flash-friendly" features. The design has been focused on
@@ -84,16 +91,6 @@ config F2FS_FS_ENCRYPTION
FS_ENCRYPTION. Use CONFIG_FS_ENCRYPTION=y in new config
files.
config F2FS_IO_TRACE
bool "F2FS IO tracer"
depends on F2FS_FS
depends on FUNCTION_TRACER
help
F2FS IO trace is based on a function trace, which gathers process
information and block IO patterns in the filesystem level.
If unsure, say N.
config F2FS_FAULT_INJECTION
bool "F2FS fault injection facility"
depends on F2FS_FS
@@ -112,26 +109,51 @@ config F2FS_FS_COMPRESSION
config F2FS_FS_LZO
bool "LZO compression support"
depends on F2FS_FS_COMPRESSION
select LZO_COMPRESS
select LZO_DECOMPRESS
default y
help
Support LZO compress algorithm, if unsure, say Y.
config F2FS_FS_LZORLE
bool "LZO-RLE compression support"
depends on F2FS_FS_LZO
default y
help
Support LZO-RLE compress algorithm, if unsure, say Y.
config F2FS_FS_LZ4
bool "LZ4 compression support"
depends on F2FS_FS_COMPRESSION
select LZ4_COMPRESS
select LZ4_DECOMPRESS
default y
help
Support LZ4 compress algorithm, if unsure, say Y.
config F2FS_FS_LZ4HC
bool "LZ4HC compression support"
depends on F2FS_FS_LZ4
default y
help
Support LZ4HC compress algorithm, LZ4HC has compatible on-disk
layout with LZ4, if unsure, say Y.
config F2FS_FS_ZSTD
bool "ZSTD compression support"
depends on F2FS_FS_COMPRESSION
select ZSTD_COMPRESS
select ZSTD_DECOMPRESS
default y
help
Support ZSTD compress algorithm, if unsure, say Y.
config F2FS_IOSTAT
bool "F2FS IO statistics information"
depends on F2FS_FS
default y
help
Support getting IO statistics through sysfs and printing out periodic
IO statistics tracepoint events. You have to turn on "iostat_enable"
sysfs node to enable this feature.
config F2FS_UNFAIR_RWSEM
bool "F2FS unfair rw_semaphore"
depends on F2FS_FS && BLK_CGROUP
help
Use unfair rw_semaphore, if system configured IO priority by block
cgroup.

View File

@@ -7,6 +7,6 @@ f2fs-y += shrinker.o extent_cache.o sysfs.o
f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o
f2fs-$(CONFIG_FS_VERITY) += verity.o
f2fs-$(CONFIG_F2FS_FS_COMPRESSION) += compress.o
f2fs-$(CONFIG_F2FS_IOSTAT) += iostat.o

View File

@@ -29,6 +29,7 @@ static inline size_t f2fs_acl_size(int count)
static inline int f2fs_acl_count(size_t size)
{
ssize_t s;
size -= sizeof(struct f2fs_acl_header);
s = size - 4 * sizeof(struct f2fs_acl_entry_short);
if (s < 0) {
@@ -160,7 +161,7 @@ static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi,
return (void *)f2fs_acl;
fail:
kvfree(f2fs_acl);
kfree(f2fs_acl);
return ERR_PTR(-EINVAL);
}
@@ -190,7 +191,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
acl = NULL;
else
acl = ERR_PTR(retval);
kvfree(value);
kfree(value);
return acl;
}
@@ -261,7 +262,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);
kvfree(value);
kfree(value);
if (!error)
set_cached_acl(inode, type, acl);
@@ -405,7 +406,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
struct page *dpage)
{
struct posix_acl *default_acl = NULL, *acl = NULL;
int error = 0;
int error;
error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dpage);
if (error)

View File

@@ -13,22 +13,29 @@
#include <linux/f2fs_fs.h>
#include <linux/pagevec.h>
#include <linux/swap.h>
#include <linux/kthread.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"
#include "trace.h"
#include "iostat.h"
#include <trace/events/f2fs.h>
#define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 3))
static struct kmem_cache *ino_entry_slab;
struct kmem_cache *f2fs_inode_entry_slab;
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
unsigned char reason)
{
f2fs_build_fault_attr(sbi, 0, 0);
set_ckpt_flags(sbi, CP_ERROR_FLAG);
if (!end_io)
if (!end_io) {
f2fs_flush_merged_writes(sbi);
f2fs_handle_stop(sbi, reason);
}
}
/*
@@ -37,7 +44,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
struct address_space *mapping = META_MAPPING(sbi);
struct page *page = NULL;
struct page *page;
repeat:
page = f2fs_grab_cache_page(mapping, index, false);
if (!page) {
@@ -95,6 +102,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
}
if (unlikely(!PageUptodate(page))) {
f2fs_handle_page_eio(sbi, page->index, META);
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
@@ -107,7 +115,7 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
return __get_meta_page(sbi, index, true);
}
struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index)
{
struct page *page;
int count = 0;
@@ -118,7 +126,7 @@ struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
if (PTR_ERR(page) == -EIO &&
++count <= DEFAULT_RETRY_IO_COUNT)
goto retry;
f2fs_stop_checkpoint(sbi, false);
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_META_PAGE);
}
return page;
}
@@ -136,7 +144,7 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
unsigned int segno, offset;
bool exist;
if (type != DATA_GENERIC_ENHANCE && type != DATA_GENERIC_ENHANCE_READ)
if (type == DATA_GENERIC)
return true;
segno = GET_SEGNO(sbi, blkaddr);
@@ -144,11 +152,18 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
se = get_seg_entry(sbi, segno);
exist = f2fs_test_bit(offset, se->cur_valid_map);
if (exist && type == DATA_GENERIC_ENHANCE_UPDATE) {
f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
blkaddr, exist);
set_sbi_flag(sbi, SBI_NEED_FSCK);
return exist;
}
if (!exist && type == DATA_GENERIC_ENHANCE) {
f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
blkaddr, exist);
set_sbi_flag(sbi, SBI_NEED_FSCK);
WARN_ON(1);
dump_stack();
}
return exist;
}
@@ -156,6 +171,11 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
if (time_to_inject(sbi, FAULT_BLKADDR)) {
f2fs_show_injection_info(sbi, FAULT_BLKADDR);
return false;
}
switch (type) {
case META_NAT:
break;
@@ -181,12 +201,13 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
case DATA_GENERIC:
case DATA_GENERIC_ENHANCE:
case DATA_GENERIC_ENHANCE_READ:
case DATA_GENERIC_ENHANCE_UPDATE:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
blkaddr < MAIN_BLKADDR(sbi))) {
f2fs_warn(sbi, "access invalid blkaddr:%u",
blkaddr);
set_sbi_flag(sbi, SBI_NEED_FSCK);
WARN_ON(1);
dump_stack();
return false;
} else {
return __is_bitmap_valid(sbi, blkaddr, type);
@@ -279,18 +300,22 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
return blkno - start;
}
void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
unsigned int ra_blocks)
{
struct page *page;
bool readahead = false;
if (ra_blocks == RECOVERY_MIN_RA_BLOCKS)
return;
page = find_get_page(META_MAPPING(sbi), index);
if (!page || !PageUptodate(page))
readahead = true;
f2fs_put_page(page, 0);
if (readahead)
f2fs_ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
f2fs_ra_meta_pages(sbi, index, ra_blocks, META_POR, true);
}
static int __f2fs_write_meta_page(struct page *page,
@@ -355,13 +380,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
goto skip_write;
/* if locked failed, cp will flush dirty pages instead */
if (!down_write_trylock(&sbi->cp_global_sem))
if (!f2fs_down_write_trylock(&sbi->cp_global_sem))
goto skip_write;
trace_f2fs_writepages(mapping->host, wbc, META);
diff = nr_pages_to_write(sbi, META, wbc);
written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
up_write(&sbi->cp_global_sem);
f2fs_up_write(&sbi->cp_global_sem);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
return 0;
@@ -446,11 +471,9 @@ static int f2fs_set_meta_page_dirty(struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
if (__set_page_dirty_nobuffers(page)) {
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
f2fs_set_page_private(page, 0);
f2fs_trace_pid(page);
set_page_private_reference(page);
return 1;
}
return 0;
@@ -471,16 +494,29 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino,
unsigned int devidx, int type)
{
struct inode_management *im = &sbi->im[type];
struct ino_entry *e, *tmp;
struct ino_entry *e = NULL, *new = NULL;
tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
if (type == FLUSH_INO) {
rcu_read_lock();
e = radix_tree_lookup(&im->ino_root, ino);
rcu_read_unlock();
}
retry:
if (!e)
new = f2fs_kmem_cache_alloc(ino_entry_slab,
GFP_NOFS, true, NULL);
radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
spin_lock(&im->ino_lock);
e = radix_tree_lookup(&im->ino_root, ino);
if (!e) {
e = tmp;
if (!new) {
spin_unlock(&im->ino_lock);
goto retry;
}
e = new;
if (unlikely(radix_tree_insert(&im->ino_root, ino, e)))
f2fs_bug_on(sbi, 1);
@@ -498,8 +534,8 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino,
spin_unlock(&im->ino_lock);
radix_tree_preload_end();
if (e != tmp)
kmem_cache_free(ino_entry_slab, tmp);
if (new && e != new)
kmem_cache_free(ino_entry_slab, new);
}
static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -532,7 +568,7 @@ void f2fs_remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
__remove_ino_entry(sbi, ino, type);
}
/* mode should be APPEND_INO or UPDATE_INO */
/* mode should be APPEND_INO, UPDATE_INO or TRANS_DIR_INO */
bool f2fs_exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
{
struct inode_management *im = &sbi->im[mode];
@@ -645,7 +681,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
return PTR_ERR(inode);
}
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err) {
iput(inode);
goto err_out;
@@ -656,7 +692,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
/* truncate all the data during iput */
iput(inode);
err = f2fs_get_node_info(sbi, ino, &ni);
err = f2fs_get_node_info(sbi, ino, &ni, false);
if (err)
goto err_out;
@@ -697,9 +733,6 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
}
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= SB_ACTIVE;
/*
* Turn on quotas which were not enabled for read-only mounts if
* filesystem has quota feature, so that they are updated correctly.
@@ -725,6 +758,7 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
orphan_blk = (struct f2fs_orphan_block *)page_address(page);
for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
err = recover_orphan_inode(sbi, ino);
if (err) {
f2fs_put_page(page, 1);
@@ -858,6 +892,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
struct f2fs_checkpoint *cp_block = NULL;
unsigned long long cur_version = 0, pre_version = 0;
unsigned int cp_blocks;
int err;
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
@@ -865,15 +900,16 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
if (err)
return NULL;
if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
sbi->blocks_per_seg) {
cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count);
if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) {
f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u",
le32_to_cpu(cp_block->cp_pack_total_block_count));
goto invalid_cp;
}
pre_version = *version;
cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
cp_addr += cp_blocks - 1;
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_2, version);
if (err)
@@ -990,9 +1026,7 @@ static void __add_dirty_inode(struct inode *inode, enum inode_type type)
return;
set_inode_flag(inode, flag);
if (!f2fs_is_volatile_file(inode))
list_add_tail(&F2FS_I(inode)->dirty_list,
&sbi->inode_list[type]);
list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]);
stat_inc_dirty_inode(sbi, type);
}
@@ -1023,8 +1057,7 @@ void f2fs_update_dirty_page(struct inode *inode, struct page *page)
inode_inc_dirty_pages(inode);
spin_unlock(&sbi->inode_lock[type]);
f2fs_set_page_private(page, 0);
f2fs_trace_pid(page);
set_page_private_reference(page);
}
void f2fs_remove_dirty_inode(struct inode *inode)
@@ -1044,7 +1077,8 @@ void f2fs_remove_dirty_inode(struct inode *inode)
spin_unlock(&sbi->inode_lock[type]);
}
int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type,
bool from_cp)
{
struct list_head *head;
struct inode *inode;
@@ -1079,11 +1113,15 @@ int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
if (inode) {
unsigned long cur_ino = inode->i_ino;
F2FS_I(inode)->cp_task = current;
if (from_cp)
F2FS_I(inode)->cp_task = current;
F2FS_I(inode)->wb_task = current;
filemap_fdatawrite(inode->i_mapping);
F2FS_I(inode)->cp_task = NULL;
F2FS_I(inode)->wb_task = NULL;
if (from_cp)
F2FS_I(inode)->cp_task = NULL;
iput(inode);
/* We need to give cpu to another writers. */
@@ -1154,7 +1192,8 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
if (!is_journalled_quota(sbi))
return false;
down_write(&sbi->quota_sem);
if (!f2fs_down_write_trylock(&sbi->quota_sem))
return true;
if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
ret = false;
} else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) {
@@ -1165,7 +1204,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
} else if (get_pages(sbi, F2FS_DIRTY_QDATA)) {
ret = true;
}
up_write(&sbi->quota_sem);
f2fs_up_write(&sbi->quota_sem);
return ret;
}
@@ -1211,7 +1250,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
/* write all the dirty dentry pages */
if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
f2fs_unlock_all(sbi);
err = f2fs_sync_dirty_inodes(sbi, DIR_INODE);
err = f2fs_sync_dirty_inodes(sbi, DIR_INODE, true);
if (err)
return err;
cond_resched();
@@ -1222,10 +1261,10 @@ static int block_operations(struct f2fs_sb_info *sbi)
* POR: we should ensure that there are no dirty node pages
* until finishing nat/sit flush. inode->i_blocks can be updated.
*/
down_write(&sbi->node_change);
f2fs_down_write(&sbi->node_change);
if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
up_write(&sbi->node_change);
f2fs_up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
err = f2fs_sync_inode_meta(sbi);
if (err)
@@ -1235,15 +1274,15 @@ static int block_operations(struct f2fs_sb_info *sbi)
}
retry_flush_nodes:
down_write(&sbi->node_write);
f2fs_down_write(&sbi->node_write);
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
up_write(&sbi->node_write);
f2fs_up_write(&sbi->node_write);
atomic_inc(&sbi->wb_sync_req[NODE]);
err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
atomic_dec(&sbi->wb_sync_req[NODE]);
if (err) {
up_write(&sbi->node_change);
f2fs_up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
return err;
}
@@ -1256,13 +1295,13 @@ static int block_operations(struct f2fs_sb_info *sbi)
* dirty node blocks and some checkpoint values by block allocation.
*/
__prepare_cp_block(sbi);
up_write(&sbi->node_change);
f2fs_up_write(&sbi->node_change);
return err;
}
static void unblock_operations(struct f2fs_sb_info *sbi)
{
up_write(&sbi->node_write);
f2fs_up_write(&sbi->node_write);
f2fs_unlock_all(sbi);
}
@@ -1296,12 +1335,20 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned long flags;
spin_lock_irqsave(&sbi->cp_lock, flags);
if (cpc->reason & CP_UMOUNT) {
if (le32_to_cpu(ckpt->cp_pack_total_block_count) +
NM_I(sbi)->nat_bits_blocks > sbi->blocks_per_seg) {
clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
f2fs_notice(sbi, "Disable nat_bits due to no space");
} else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) &&
f2fs_nat_bitmap_enabled(sbi)) {
f2fs_enable_nat_bits(sbi);
set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
f2fs_notice(sbi, "Rebuild and enable nat_bits");
}
}
if ((cpc->reason & CP_UMOUNT) &&
le32_to_cpu(ckpt->cp_pack_total_block_count) >
sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
disable_nat_bits(sbi, false);
spin_lock_irqsave(&sbi->cp_lock, flags);
if (cpc->reason & CP_TRIMMED)
__set_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
@@ -1393,6 +1440,26 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi,
f2fs_submit_merged_write(sbi, META_FLUSH);
}
static inline u64 get_sectors_written(struct block_device *bdev)
{
return (u64)part_stat_read(bdev->bd_part, sectors[STAT_WRITE]);
}
u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi)
{
if (f2fs_is_multi_device(sbi)) {
u64 sectors = 0;
int i;
for (i = 0; i < sbi->s_ndevs; i++)
sectors += get_sectors_written(FDEV(i).bdev);
return sectors;
}
return get_sectors_written(sbi->sb->s_bdev);
}
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -1403,7 +1470,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
__u32 crc32 = 0;
int i;
int cp_payload_blks = __cp_payload(sbi);
struct super_block *sb = sbi->sb;
struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
u64 kbytes_written;
int err;
@@ -1435,7 +1501,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
}
/* 2 cp + n data seg summary + orphan inode blocks */
/* 2 cp + n data seg summary + orphan inode blocks */
data_sum_blocks = f2fs_npages_for_summary_flush(sbi, false);
spin_lock_irqsave(&sbi->cp_lock, flags);
if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
@@ -1449,7 +1515,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
orphan_blocks);
if (__remain_node_summaries(cpc->reason))
ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
cp_payload_blks + data_sum_blocks +
orphan_blocks + NR_CURSEG_NODE_TYPE);
else
@@ -1472,7 +1538,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk = __start_cp_next_addr(sbi);
/* write nat bits */
if (enabled_nat_bits(sbi, cpc)) {
if ((cpc->reason & CP_UMOUNT) &&
is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
__u64 cp_ver = cur_cp_version(ckpt);
block_t blk;
@@ -1502,9 +1569,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Record write statistics in the hot node summary */
kbytes_written = sbi->kbytes_written;
if (sb->s_bdev->bd_part)
kbytes_written += BD_PART_WRITTEN(sbi);
kbytes_written += (f2fs_get_sectors_written(sbi) -
sbi->sectors_written_start) >> 1;
seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
if (__remain_node_summaries(cpc->reason)) {
@@ -1515,6 +1581,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* update user_block_counts */
sbi->last_valid_block_count = sbi->total_valid_block_count;
percpu_counter_set(&sbi->alloc_valid_block_count, 0);
percpu_counter_set(&sbi->rf_node_block_count, 0);
/* Here, we have one bio having CP pack except cp pack 2 page */
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
@@ -1539,9 +1606,10 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/*
* invalidate intermediate page cache borrowed from meta inode which are
* used for migration of encrypted or verity inode's blocks.
* used for migration of encrypted, verity or compressed inode's blocks.
*/
if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi))
if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) ||
f2fs_sb_has_compression(sbi))
invalidate_mapping_pages(META_MAPPING(sbi),
MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
@@ -1587,7 +1655,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_warn(sbi, "Start checkpoint disabled!");
}
if (cpc->reason != CP_RESIZE)
down_write(&sbi->cp_global_sem);
f2fs_down_write(&sbi->cp_global_sem);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
@@ -1615,7 +1683,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
goto out;
}
if (NM_I(sbi)->dirty_nat_cnt == 0 &&
if (NM_I(sbi)->nat_cnt[DIRTY_NAT] == 0 &&
SIT_I(sbi)->dirty_sentries == 0 &&
prefree_segments(sbi) == 0) {
f2fs_flush_sit_entries(sbi, cpc);
@@ -1635,16 +1703,27 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* write cached NAT/SIT entries to NAT/SIT area */
err = f2fs_flush_nat_entries(sbi, cpc);
if (err)
if (err) {
f2fs_err(sbi, "f2fs_flush_nat_entries failed err:%d, stop checkpoint", err);
f2fs_bug_on(sbi, !f2fs_cp_error(sbi));
goto stop;
}
f2fs_flush_sit_entries(sbi, cpc);
/* save inmem log status */
f2fs_save_inmem_curseg(sbi);
err = do_checkpoint(sbi, cpc);
if (err)
if (err) {
f2fs_err(sbi, "do_checkpoint failed err:%d, stop checkpoint", err);
f2fs_bug_on(sbi, !f2fs_cp_error(sbi));
f2fs_release_discard_addrs(sbi);
else
} else {
f2fs_clear_prefree_segments(sbi, cpc);
}
f2fs_restore_inmem_curseg(sbi);
stop:
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
@@ -1657,7 +1736,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
out:
if (cpc->reason != CP_RESIZE)
up_write(&sbi->cp_global_sem);
f2fs_up_write(&sbi->cp_global_sem);
return err;
}
@@ -1675,7 +1754,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
}
sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
NR_CURSEG_TYPE - __cp_payload(sbi)) *
NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
F2FS_ORPHANS_PER_BLOCK;
}
@@ -1699,3 +1778,192 @@ void f2fs_destroy_checkpoint_caches(void)
kmem_cache_destroy(ino_entry_slab);
kmem_cache_destroy(f2fs_inode_entry_slab);
}
static int __write_checkpoint_sync(struct f2fs_sb_info *sbi)
{
struct cp_control cpc = { .reason = CP_SYNC, };
int err;
f2fs_down_write(&sbi->gc_lock);
err = f2fs_write_checkpoint(sbi, &cpc);
f2fs_up_write(&sbi->gc_lock);
return err;
}
static void __checkpoint_and_complete_reqs(struct f2fs_sb_info *sbi)
{
struct ckpt_req_control *cprc = &sbi->cprc_info;
struct ckpt_req *req, *next;
struct llist_node *dispatch_list;
u64 sum_diff = 0, diff, count = 0;
int ret;
dispatch_list = llist_del_all(&cprc->issue_list);
if (!dispatch_list)
return;
dispatch_list = llist_reverse_order(dispatch_list);
ret = __write_checkpoint_sync(sbi);
atomic_inc(&cprc->issued_ckpt);
llist_for_each_entry_safe(req, next, dispatch_list, llnode) {
diff = (u64)ktime_ms_delta(ktime_get(), req->queue_time);
req->ret = ret;
complete(&req->wait);
sum_diff += diff;
count++;
}
atomic_sub(count, &cprc->queued_ckpt);
atomic_add(count, &cprc->total_ckpt);
spin_lock(&cprc->stat_lock);
cprc->cur_time = (unsigned int)div64_u64(sum_diff, count);
if (cprc->peak_time < cprc->cur_time)
cprc->peak_time = cprc->cur_time;
spin_unlock(&cprc->stat_lock);
}
static int issue_checkpoint_thread(void *data)
{
struct f2fs_sb_info *sbi = data;
struct ckpt_req_control *cprc = &sbi->cprc_info;
wait_queue_head_t *q = &cprc->ckpt_wait_queue;
repeat:
if (kthread_should_stop())
return 0;
if (!llist_empty(&cprc->issue_list))
__checkpoint_and_complete_reqs(sbi);
wait_event_interruptible(*q,
kthread_should_stop() || !llist_empty(&cprc->issue_list));
goto repeat;
}
static void flush_remained_ckpt_reqs(struct f2fs_sb_info *sbi,
struct ckpt_req *wait_req)
{
struct ckpt_req_control *cprc = &sbi->cprc_info;
if (!llist_empty(&cprc->issue_list)) {
__checkpoint_and_complete_reqs(sbi);
} else {
/* already dispatched by issue_checkpoint_thread */
if (wait_req)
wait_for_completion(&wait_req->wait);
}
}
static void init_ckpt_req(struct ckpt_req *req)
{
memset(req, 0, sizeof(struct ckpt_req));
init_completion(&req->wait);
req->queue_time = ktime_get();
}
int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi)
{
struct ckpt_req_control *cprc = &sbi->cprc_info;
struct ckpt_req req;
struct cp_control cpc;
cpc.reason = __get_cp_reason(sbi);
if (!test_opt(sbi, MERGE_CHECKPOINT) || cpc.reason != CP_SYNC) {
int ret;
f2fs_down_write(&sbi->gc_lock);
ret = f2fs_write_checkpoint(sbi, &cpc);
f2fs_up_write(&sbi->gc_lock);
return ret;
}
if (!cprc->f2fs_issue_ckpt)
return __write_checkpoint_sync(sbi);
init_ckpt_req(&req);
llist_add(&req.llnode, &cprc->issue_list);
atomic_inc(&cprc->queued_ckpt);
/*
* update issue_list before we wake up issue_checkpoint thread,
* this smp_mb() pairs with another barrier in ___wait_event(),
* see more details in comments of waitqueue_active().
*/
smp_mb();
if (waitqueue_active(&cprc->ckpt_wait_queue))
wake_up(&cprc->ckpt_wait_queue);
if (cprc->f2fs_issue_ckpt)
wait_for_completion(&req.wait);
else
flush_remained_ckpt_reqs(sbi, &req);
return req.ret;
}
int f2fs_start_ckpt_thread(struct f2fs_sb_info *sbi)
{
dev_t dev = sbi->sb->s_bdev->bd_dev;
struct ckpt_req_control *cprc = &sbi->cprc_info;
if (cprc->f2fs_issue_ckpt)
return 0;
cprc->f2fs_issue_ckpt = kthread_run(issue_checkpoint_thread, sbi,
"f2fs_ckpt-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(cprc->f2fs_issue_ckpt)) {
int err = PTR_ERR(cprc->f2fs_issue_ckpt);
cprc->f2fs_issue_ckpt = NULL;
return err;
}
set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio);
return 0;
}
void f2fs_stop_ckpt_thread(struct f2fs_sb_info *sbi)
{
struct ckpt_req_control *cprc = &sbi->cprc_info;
struct task_struct *ckpt_task;
if (!cprc->f2fs_issue_ckpt)
return;
ckpt_task = cprc->f2fs_issue_ckpt;
cprc->f2fs_issue_ckpt = NULL;
kthread_stop(ckpt_task);
f2fs_flush_ckpt_thread(sbi);
}
void f2fs_flush_ckpt_thread(struct f2fs_sb_info *sbi)
{
struct ckpt_req_control *cprc = &sbi->cprc_info;
flush_remained_ckpt_reqs(sbi, NULL);
/* Let's wait for the previous dispatched checkpoint. */
while (atomic_read(&cprc->queued_ckpt))
io_schedule_timeout(DEFAULT_IO_TIMEOUT);
}
void f2fs_init_ckpt_req_control(struct f2fs_sb_info *sbi)
{
struct ckpt_req_control *cprc = &sbi->cprc_info;
atomic_set(&cprc->issued_ckpt, 0);
atomic_set(&cprc->total_ckpt, 0);
atomic_set(&cprc->queued_ckpt, 0);
cprc->ckpt_thread_ioprio = DEFAULT_CHECKPOINT_IOPRIO;
init_waitqueue_head(&cprc->ckpt_wait_queue);
init_llist_head(&cprc->issue_list);
spin_lock_init(&cprc->stat_lock);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -21,7 +21,7 @@
#include "gc.h"
static LIST_HEAD(f2fs_stat_list);
static DEFINE_MUTEX(f2fs_stat_mutex);
static DEFINE_RAW_SPINLOCK(f2fs_stat_lock);
#ifdef CONFIG_DEBUG_FS
static struct dentry *f2fs_debugfs_root;
#endif
@@ -39,7 +39,7 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi)
bimodal = 0;
total_vblocks = 0;
blks_per_sec = BLKS_PER_SEC(sbi);
blks_per_sec = CAP_BLKS_PER_SEC(sbi);
hblks_per_sec = blks_per_sec / 2;
for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
vblocks = get_valid_blocks(sbi, segno, true);
@@ -72,15 +72,26 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->main_area_zones = si->main_area_sections /
le32_to_cpu(raw_super->secs_per_zone);
/* validation check of the segment numbers */
/* general extent cache stats */
for (i = 0; i < NR_EXTENT_CACHES; i++) {
struct extent_tree_info *eti = &sbi->extent_tree[i];
si->hit_cached[i] = atomic64_read(&sbi->read_hit_cached[i]);
si->hit_rbtree[i] = atomic64_read(&sbi->read_hit_rbtree[i]);
si->total_ext[i] = atomic64_read(&sbi->total_hit_ext[i]);
si->hit_total[i] = si->hit_cached[i] + si->hit_rbtree[i];
si->ext_tree[i] = atomic_read(&eti->total_ext_tree);
si->zombie_tree[i] = atomic_read(&eti->total_zombie_tree);
si->ext_node[i] = atomic_read(&eti->total_ext_node);
}
/* read extent_cache only */
si->hit_largest = atomic64_read(&sbi->read_hit_largest);
si->hit_cached = atomic64_read(&sbi->read_hit_cached);
si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree);
si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
si->total_ext = atomic64_read(&sbi->total_hit_ext);
si->ext_tree = atomic_read(&sbi->total_ext_tree);
si->zombie_tree = atomic_read(&sbi->total_zombie_tree);
si->ext_node = atomic_read(&sbi->total_ext_node);
si->hit_total[EX_READ] += si->hit_largest;
/* block age extent_cache only */
si->allocated_data_blocks = atomic64_read(&sbi->allocated_data_blocks);
/* validation check of the segment numbers */
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
@@ -91,11 +102,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
si->nquota_files = sbi->nquota_files;
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->aw_cnt = sbi->atomic_files;
si->vw_cnt = atomic_read(&sbi->vw_cnt);
si->aw_cnt = atomic_read(&sbi->atomic_files);
si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt);
si->nr_dio_read = get_pages(sbi, F2FS_DIO_READ);
si->nr_dio_write = get_pages(sbi, F2FS_DIO_WRITE);
si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
@@ -120,6 +128,13 @@ static void update_general_status(struct f2fs_sb_info *sbi)
atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt);
si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks;
}
si->nr_issued_ckpt = atomic_read(&sbi->cprc_info.issued_ckpt);
si->nr_total_ckpt = atomic_read(&sbi->cprc_info.total_ckpt);
si->nr_queued_ckpt = atomic_read(&sbi->cprc_info.queued_ckpt);
spin_lock(&sbi->cprc_info.stat_lock);
si->cur_ckpt_time = sbi->cprc_info.cur_time;
si->peak_ckpt_time = sbi->cprc_info.peak_time;
spin_unlock(&sbi->cprc_info.stat_lock);
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
@@ -131,7 +146,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->inline_inode = atomic_read(&sbi->inline_inode);
si->inline_dir = atomic_read(&sbi->inline_dir);
si->compr_inode = atomic_read(&sbi->compr_inode);
si->compr_blocks = atomic_read(&sbi->compr_blocks);
si->swapfile_inode = atomic_read(&sbi->swapfile_inode);
si->compr_blocks = atomic64_read(&sbi->compr_blocks);
si->append = sbi->im[APPEND_INO].ino_num;
si->update = sbi->im[UPDATE_INO].ino_num;
si->orphans = sbi->im[ORPHAN_INO].ino_num;
@@ -145,8 +161,14 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->node_pages = NODE_MAPPING(sbi)->nrpages;
if (sbi->meta_inode)
si->meta_pages = META_MAPPING(sbi)->nrpages;
si->nats = NM_I(sbi)->nat_cnt;
si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
#ifdef CONFIG_F2FS_FS_COMPRESSION
if (sbi->compress_inode) {
si->compress_pages = COMPRESS_MAPPING(sbi)->nrpages;
si->compress_page_hit = atomic_read(&sbi->compress_page_hit);
}
#endif
si->nats = NM_I(sbi)->nat_cnt[TOTAL_NAT];
si->dirty_nats = NM_I(sbi)->nat_cnt[DIRTY_NAT];
si->sits = MAIN_SEGS(sbi);
si->dirty_sits = SIT_I(sbi)->dirty_sentries;
si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID];
@@ -154,8 +176,6 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
si->io_skip_bggc = sbi->io_skip_bggc;
si->other_skip_bggc = sbi->other_skip_bggc;
si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC];
si->skipped_atomic_files[FG_GC] = sbi->skipped_atomic_files[FG_GC];
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
/ 2;
@@ -164,8 +184,9 @@ static void update_general_status(struct f2fs_sb_info *sbi)
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
/ 2;
si->util_invalid = 50 - si->util_free - si->util_valid;
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
si->curseg[i] = curseg->segno;
si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]);
@@ -174,6 +195,26 @@ static void update_general_status(struct f2fs_sb_info *sbi)
for (i = META_CP; i < META_MAX; i++)
si->meta_count[i] = atomic_read(&sbi->meta_count[i]);
for (i = 0; i < NO_CHECK_TYPE; i++) {
si->dirty_seg[i] = 0;
si->full_seg[i] = 0;
si->valid_blks[i] = 0;
}
for (i = 0; i < MAIN_SEGS(sbi); i++) {
int blks = get_seg_entry(sbi, i)->valid_blocks;
int type = get_seg_entry(sbi, i)->type;
if (!blks)
continue;
if (blks == sbi->blocks_per_seg)
si->full_seg[type]++;
else
si->dirty_seg[type]++;
si->valid_blks[type] += blks;
}
for (i = 0; i < 2; i++) {
si->segment_count[i] = sbi->segment_count[i];
si->block_count[i] = sbi->block_count[i];
@@ -258,43 +299,82 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] +
NM_I(sbi)->nid_cnt[PREALLOC_NID]) *
sizeof(struct free_nid);
si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
sizeof(struct nat_entry_set);
si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
si->cache_mem += NM_I(sbi)->nat_cnt[TOTAL_NAT] *
sizeof(struct nat_entry);
si->cache_mem += NM_I(sbi)->nat_cnt[DIRTY_NAT] *
sizeof(struct nat_entry_set);
for (i = 0; i < MAX_INO_ENTRY; i++)
si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
si->cache_mem += atomic_read(&sbi->total_ext_tree) *
for (i = 0; i < NR_EXTENT_CACHES; i++) {
struct extent_tree_info *eti = &sbi->extent_tree[i];
si->ext_mem[i] = atomic_read(&eti->total_ext_tree) *
sizeof(struct extent_tree);
si->cache_mem += atomic_read(&sbi->total_ext_node) *
si->ext_mem[i] += atomic_read(&eti->total_ext_node) *
sizeof(struct extent_node);
si->cache_mem += si->ext_mem[i];
}
si->page_mem = 0;
if (sbi->node_inode) {
unsigned npages = NODE_MAPPING(sbi)->nrpages;
unsigned long npages = NODE_MAPPING(sbi)->nrpages;
si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
}
if (sbi->meta_inode) {
unsigned npages = META_MAPPING(sbi)->nrpages;
unsigned long npages = META_MAPPING(sbi)->nrpages;
si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
if (sbi->compress_inode) {
unsigned long npages = COMPRESS_MAPPING(sbi)->nrpages;
si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
}
#endif
}
static char *s_flag[] = {
[SBI_IS_DIRTY] = " fs_dirty",
[SBI_IS_CLOSE] = " closing",
[SBI_NEED_FSCK] = " need_fsck",
[SBI_POR_DOING] = " recovering",
[SBI_NEED_SB_WRITE] = " sb_dirty",
[SBI_NEED_CP] = " need_cp",
[SBI_IS_SHUTDOWN] = " shutdown",
[SBI_IS_RECOVERED] = " recovered",
[SBI_CP_DISABLED] = " cp_disabled",
[SBI_CP_DISABLED_QUICK] = " cp_disabled_quick",
[SBI_QUOTA_NEED_FLUSH] = " quota_need_flush",
[SBI_QUOTA_SKIP_FLUSH] = " quota_skip_flush",
[SBI_QUOTA_NEED_REPAIR] = " quota_need_repair",
[SBI_IS_RESIZEFS] = " resizefs",
[SBI_IS_FREEZING] = " freezefs",
};
static int stat_show(struct seq_file *s, void *v)
{
struct f2fs_stat_info *si;
int i = 0;
int j;
int i = 0, j = 0;
unsigned long flags;
mutex_lock(&f2fs_stat_mutex);
raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
list_for_each_entry(si, &f2fs_stat_list, stat_list) {
update_general_status(si->sbi);
seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
si->sbi->sb->s_bdev, i++,
f2fs_readonly(si->sbi->sb) ? "RO": "RW",
f2fs_readonly(si->sbi->sb) ? "RO" : "RW",
is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ?
"Disabled": (f2fs_cp_error(si->sbi) ? "Error": "Good"));
"Disabled" : (f2fs_cp_error(si->sbi) ? "Error" : "Good"));
if (si->sbi->s_flag) {
seq_puts(s, "[SBI:");
for_each_set_bit(j, &si->sbi->s_flag, 32)
seq_puts(s, s_flag[j]);
seq_puts(s, "]\n");
}
seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
si->sit_area_segs, si->nat_area_segs);
seq_printf(s, "[SSA: %d] [MAIN: %d",
@@ -322,37 +402,67 @@ static int stat_show(struct seq_file *s, void *v)
si->inline_inode);
seq_printf(s, " - Inline_dentry Inode: %u\n",
si->inline_dir);
seq_printf(s, " - Compressed Inode: %u, Blocks: %u\n",
seq_printf(s, " - Compressed Inode: %u, Blocks: %llu\n",
si->compr_inode, si->compr_blocks);
seq_printf(s, " - Swapfile Inode: %u\n",
si->swapfile_inode);
seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
si->orphans, si->append, si->update);
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
si->main_area_segs, si->main_area_sections,
si->main_area_zones);
seq_printf(s, " - COLD data: %d, %d, %d\n",
seq_printf(s, " TYPE %8s %8s %8s %10s %10s %10s\n",
"segno", "secno", "zoneno", "dirty_seg", "full_seg", "valid_blk");
seq_printf(s, " - COLD data: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_COLD_DATA],
si->cursec[CURSEG_COLD_DATA],
si->curzone[CURSEG_COLD_DATA]);
seq_printf(s, " - WARM data: %d, %d, %d\n",
si->curzone[CURSEG_COLD_DATA],
si->dirty_seg[CURSEG_COLD_DATA],
si->full_seg[CURSEG_COLD_DATA],
si->valid_blks[CURSEG_COLD_DATA]);
seq_printf(s, " - WARM data: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_WARM_DATA],
si->cursec[CURSEG_WARM_DATA],
si->curzone[CURSEG_WARM_DATA]);
seq_printf(s, " - HOT data: %d, %d, %d\n",
si->curzone[CURSEG_WARM_DATA],
si->dirty_seg[CURSEG_WARM_DATA],
si->full_seg[CURSEG_WARM_DATA],
si->valid_blks[CURSEG_WARM_DATA]);
seq_printf(s, " - HOT data: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_HOT_DATA],
si->cursec[CURSEG_HOT_DATA],
si->curzone[CURSEG_HOT_DATA]);
seq_printf(s, " - Dir dnode: %d, %d, %d\n",
si->curzone[CURSEG_HOT_DATA],
si->dirty_seg[CURSEG_HOT_DATA],
si->full_seg[CURSEG_HOT_DATA],
si->valid_blks[CURSEG_HOT_DATA]);
seq_printf(s, " - Dir dnode: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_HOT_NODE],
si->cursec[CURSEG_HOT_NODE],
si->curzone[CURSEG_HOT_NODE]);
seq_printf(s, " - File dnode: %d, %d, %d\n",
si->curzone[CURSEG_HOT_NODE],
si->dirty_seg[CURSEG_HOT_NODE],
si->full_seg[CURSEG_HOT_NODE],
si->valid_blks[CURSEG_HOT_NODE]);
seq_printf(s, " - File dnode: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_WARM_NODE],
si->cursec[CURSEG_WARM_NODE],
si->curzone[CURSEG_WARM_NODE]);
seq_printf(s, " - Indir nodes: %d, %d, %d\n",
si->curzone[CURSEG_WARM_NODE],
si->dirty_seg[CURSEG_WARM_NODE],
si->full_seg[CURSEG_WARM_NODE],
si->valid_blks[CURSEG_WARM_NODE]);
seq_printf(s, " - Indir nodes: %8d %8d %8d %10u %10u %10u\n",
si->curseg[CURSEG_COLD_NODE],
si->cursec[CURSEG_COLD_NODE],
si->curzone[CURSEG_COLD_NODE]);
si->curzone[CURSEG_COLD_NODE],
si->dirty_seg[CURSEG_COLD_NODE],
si->full_seg[CURSEG_COLD_NODE],
si->valid_blks[CURSEG_COLD_NODE]);
seq_printf(s, " - Pinned file: %8d %8d %8d\n",
si->curseg[CURSEG_COLD_DATA_PINNED],
si->cursec[CURSEG_COLD_DATA_PINNED],
si->curzone[CURSEG_COLD_DATA_PINNED]);
seq_printf(s, " - ATGC data: %8d %8d %8d\n",
si->curseg[CURSEG_ALL_DATA_ATGC],
si->cursec[CURSEG_ALL_DATA_ATGC],
si->curzone[CURSEG_ALL_DATA_ATGC]);
seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n",
si->main_area_segs - si->dirty_count -
si->prefree_count - si->free_segs,
@@ -368,50 +478,79 @@ static int stat_show(struct seq_file *s, void *v)
si->meta_count[META_NAT]);
seq_printf(s, " - ssa blocks : %u\n",
si->meta_count[META_SSA]);
seq_puts(s, "CP merge:\n");
seq_printf(s, " - Queued : %4d\n", si->nr_queued_ckpt);
seq_printf(s, " - Issued : %4d\n", si->nr_issued_ckpt);
seq_printf(s, " - Total : %4d\n", si->nr_total_ckpt);
seq_printf(s, " - Cur time : %4d(ms)\n", si->cur_ckpt_time);
seq_printf(s, " - Peak time : %4d(ms)\n", si->peak_ckpt_time);
seq_printf(s, "GC calls: %d (BG: %d)\n",
si->call_count, si->bg_gc);
seq_printf(s, " - data segments : %d (%d)\n",
si->data_segs, si->bg_data_segs);
seq_printf(s, " - node segments : %d (%d)\n",
si->node_segs, si->bg_node_segs);
seq_puts(s, " - Reclaimed segs :\n");
seq_printf(s, " - Normal : %d\n", si->sbi->gc_reclaimed_segs[GC_NORMAL]);
seq_printf(s, " - Idle CB : %d\n", si->sbi->gc_reclaimed_segs[GC_IDLE_CB]);
seq_printf(s, " - Idle Greedy : %d\n",
si->sbi->gc_reclaimed_segs[GC_IDLE_GREEDY]);
seq_printf(s, " - Idle AT : %d\n", si->sbi->gc_reclaimed_segs[GC_IDLE_AT]);
seq_printf(s, " - Urgent High : %d\n",
si->sbi->gc_reclaimed_segs[GC_URGENT_HIGH]);
seq_printf(s, " - Urgent Mid : %d\n", si->sbi->gc_reclaimed_segs[GC_URGENT_MID]);
seq_printf(s, " - Urgent Low : %d\n", si->sbi->gc_reclaimed_segs[GC_URGENT_LOW]);
seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks,
si->bg_data_blks + si->bg_node_blks);
seq_printf(s, " - data blocks : %d (%d)\n", si->data_blks,
si->bg_data_blks);
seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks,
si->bg_node_blks);
seq_printf(s, "Skipped : atomic write %llu (%llu)\n",
si->skipped_atomic_files[BG_GC] +
si->skipped_atomic_files[FG_GC],
si->skipped_atomic_files[BG_GC]);
seq_printf(s, "BG skip : IO: %u, Other: %u\n",
si->io_skip_bggc, si->other_skip_bggc);
seq_puts(s, "\nExtent Cache:\n");
seq_puts(s, "\nExtent Cache (Read):\n");
seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n",
si->hit_largest, si->hit_cached,
si->hit_rbtree);
si->hit_largest, si->hit_cached[EX_READ],
si->hit_rbtree[EX_READ]);
seq_printf(s, " - Hit Ratio: %llu%% (%llu / %llu)\n",
!si->total_ext ? 0 :
div64_u64(si->hit_total * 100, si->total_ext),
si->hit_total, si->total_ext);
!si->total_ext[EX_READ] ? 0 :
div64_u64(si->hit_total[EX_READ] * 100,
si->total_ext[EX_READ]),
si->hit_total[EX_READ], si->total_ext[EX_READ]);
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
si->ext_tree[EX_READ], si->zombie_tree[EX_READ],
si->ext_node[EX_READ]);
seq_puts(s, "\nExtent Cache (Block Age):\n");
seq_printf(s, " - Allocated Data Blocks: %llu\n",
si->allocated_data_blocks);
seq_printf(s, " - Hit Count: L1:%llu L2:%llu\n",
si->hit_cached[EX_BLOCK_AGE],
si->hit_rbtree[EX_BLOCK_AGE]);
seq_printf(s, " - Hit Ratio: %llu%% (%llu / %llu)\n",
!si->total_ext[EX_BLOCK_AGE] ? 0 :
div64_u64(si->hit_total[EX_BLOCK_AGE] * 100,
si->total_ext[EX_BLOCK_AGE]),
si->hit_total[EX_BLOCK_AGE],
si->total_ext[EX_BLOCK_AGE]);
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree[EX_BLOCK_AGE],
si->zombie_tree[EX_BLOCK_AGE],
si->ext_node[EX_BLOCK_AGE]);
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - DIO (R: %4d, W: %4d)\n",
si->nr_dio_read, si->nr_dio_write);
seq_printf(s, " - IO_R (Data: %4d, Node: %4d, Meta: %4d\n",
si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta);
seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
"Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
seq_printf(s, " - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), ",
si->nr_wb_cp_data, si->nr_wb_data,
si->nr_flushing, si->nr_flushed,
si->flush_list_empty,
si->flush_list_empty);
seq_printf(s, "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
si->nr_discarding, si->nr_discarded,
si->nr_discard_cmd, si->undiscard_blks);
seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), "
"volatile IO: %4d (Max. %4d)\n",
si->inmem_pages, si->aw_cnt, si->max_aw_cnt,
si->vw_cnt, si->max_vw_cnt);
seq_printf(s, " - atomic IO: %4d (Max. %4d)\n",
si->aw_cnt, si->max_aw_cnt);
seq_printf(s, " - compress: %4d, hit:%8d\n", si->compress_pages, si->compress_page_hit);
seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n",
@@ -424,6 +563,9 @@ static int stat_show(struct seq_file *s, void *v)
si->ndirty_meta, si->meta_pages);
seq_printf(s, " - imeta: %4d\n",
si->ndirty_imeta);
seq_printf(s, " - fsync mark: %4lld\n",
percpu_counter_sum_positive(
&si->sbi->rf_node_block_count));
seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
si->dirty_nats, si->nats, si->dirty_sits, si->sits);
seq_printf(s, " - free_nids: %9d/%9d\n - alloc_nids: %9d\n",
@@ -460,12 +602,16 @@ static int stat_show(struct seq_file *s, void *v)
(si->base_mem + si->cache_mem + si->page_mem) >> 10);
seq_printf(s, " - static: %llu KB\n",
si->base_mem >> 10);
seq_printf(s, " - cached: %llu KB\n",
seq_printf(s, " - cached all: %llu KB\n",
si->cache_mem >> 10);
seq_printf(s, " - read extent cache: %llu KB\n",
si->ext_mem[EX_READ] >> 10);
seq_printf(s, " - block age extent cache: %llu KB\n",
si->ext_mem[EX_BLOCK_AGE] >> 10);
seq_printf(s, " - paged : %llu KB\n",
si->page_mem >> 10);
}
mutex_unlock(&f2fs_stat_mutex);
raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
return 0;
}
@@ -476,6 +622,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_stat_info *si;
unsigned long flags;
int i;
si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL);
@@ -493,27 +640,32 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
si->sbi = sbi;
sbi->stat_info = si;
atomic64_set(&sbi->total_hit_ext, 0);
atomic64_set(&sbi->read_hit_rbtree, 0);
/* general extent cache stats */
for (i = 0; i < NR_EXTENT_CACHES; i++) {
atomic64_set(&sbi->total_hit_ext[i], 0);
atomic64_set(&sbi->read_hit_rbtree[i], 0);
atomic64_set(&sbi->read_hit_cached[i], 0);
}
/* read extent_cache only */
atomic64_set(&sbi->read_hit_largest, 0);
atomic64_set(&sbi->read_hit_cached, 0);
atomic_set(&sbi->inline_xattr, 0);
atomic_set(&sbi->inline_inode, 0);
atomic_set(&sbi->inline_dir, 0);
atomic_set(&sbi->compr_inode, 0);
atomic_set(&sbi->compr_blocks, 0);
atomic64_set(&sbi->compr_blocks, 0);
atomic_set(&sbi->swapfile_inode, 0);
atomic_set(&sbi->atomic_files, 0);
atomic_set(&sbi->inplace_count, 0);
for (i = META_CP; i < META_MAX; i++)
atomic_set(&sbi->meta_count[i], 0);
atomic_set(&sbi->vw_cnt, 0);
atomic_set(&sbi->max_aw_cnt, 0);
atomic_set(&sbi->max_vw_cnt, 0);
mutex_lock(&f2fs_stat_mutex);
raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
list_add_tail(&si->stat_list, &f2fs_stat_list);
mutex_unlock(&f2fs_stat_mutex);
raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
return 0;
}
@@ -521,12 +673,13 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
unsigned long flags;
mutex_lock(&f2fs_stat_mutex);
raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
list_del(&si->stat_list);
mutex_unlock(&f2fs_stat_mutex);
raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags);
kvfree(si);
kfree(si);
}
void __init f2fs_create_root_stats(void)
@@ -534,7 +687,7 @@ void __init f2fs_create_root_stats(void)
#ifdef CONFIG_DEBUG_FS
f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL,
debugfs_create_file("status", 0444, f2fs_debugfs_root, NULL,
&stat_fops);
#endif
}

View File

@@ -16,6 +16,10 @@
#include "xattr.h"
#include <trace/events/f2fs.h>
#ifdef CONFIG_UNICODE
extern struct kmem_cache *f2fs_cf_name_slab;
#endif
static unsigned long dir_blocks(struct inode *inode)
{
return ((unsigned long long) (i_size_read(inode) + PAGE_SIZE - 1))
@@ -76,21 +80,22 @@ int f2fs_init_casefolded_name(const struct inode *dir,
struct f2fs_filename *fname)
{
#ifdef CONFIG_UNICODE
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct super_block *sb = dir->i_sb;
if (IS_CASEFOLDED(dir)) {
fname->cf_name.name = f2fs_kmalloc(sbi, F2FS_NAME_LEN,
GFP_NOFS);
if (IS_CASEFOLDED(dir) &&
!is_dot_dotdot(fname->usr_fname->name, fname->usr_fname->len)) {
fname->cf_name.name = f2fs_kmem_cache_alloc(f2fs_cf_name_slab,
GFP_NOFS, false, F2FS_SB(sb));
if (!fname->cf_name.name)
return -ENOMEM;
fname->cf_name.len = utf8_casefold(sbi->sb->s_encoding,
fname->cf_name.len = utf8_casefold(sb->s_encoding,
fname->usr_fname,
fname->cf_name.name,
F2FS_NAME_LEN);
if ((int)fname->cf_name.len <= 0) {
kfree(fname->cf_name.name);
kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
fname->cf_name.name = NULL;
if (sb_has_enc_strict_mode(dir->i_sb))
if (sb_has_strict_encoding(sb))
return -EINVAL;
/* fall back to treating name as opaque byte sequence */
}
@@ -112,7 +117,7 @@ static int __f2fs_setup_filename(const struct inode *dir,
#ifdef CONFIG_FS_ENCRYPTION
fname->crypto_buf = crypt_name->crypto_buf;
#endif
if (crypt_name->is_ciphertext_name) {
if (crypt_name->is_nokey_name) {
/* hash was decoded from the no-key name */
fname->hash = cpu_to_le32(crypt_name->hash);
} else {
@@ -171,8 +176,10 @@ void f2fs_free_filename(struct f2fs_filename *fname)
fname->crypto_buf.name = NULL;
#endif
#ifdef CONFIG_UNICODE
kfree(fname->cf_name.name);
fname->cf_name.name = NULL;
if (fname->cf_name.name) {
kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
fname->cf_name.name = NULL;
}
#endif
}
@@ -192,29 +199,25 @@ static unsigned long dir_block_index(unsigned int level,
static struct f2fs_dir_entry *find_in_block(struct inode *dir,
struct page *dentry_page,
const struct f2fs_filename *fname,
int *max_slots,
struct page **res_page)
int *max_slots)
{
struct f2fs_dentry_block *dentry_blk;
struct f2fs_dir_entry *de;
struct f2fs_dentry_ptr d;
dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
make_dentry_ptr_block(dir, &d, dentry_blk);
de = f2fs_find_target_dentry(&d, fname, max_slots);
if (de)
*res_page = dentry_page;
return de;
return f2fs_find_target_dentry(&d, fname, max_slots);
}
#ifdef CONFIG_UNICODE
/*
* Test whether a case-insensitive directory entry matches the filename
* being searched for.
*
* Returns 1 for a match, 0 for no match, and -errno on an error.
*/
static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
static int f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
const u8 *de_name, u32 de_name_len)
{
const struct super_block *sb = dir->i_sb;
@@ -228,11 +231,11 @@ static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
FSTR_INIT((u8 *)de_name, de_name_len);
if (WARN_ON_ONCE(!fscrypt_has_encryption_key(dir)))
return false;
return -EINVAL;
decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
if (!decrypted_name.name)
return false;
return -ENOMEM;
res = fscrypt_fname_disk_to_usr(dir, 0, 0, &encrypted_name,
&decrypted_name);
if (res < 0)
@@ -242,23 +245,24 @@ static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
}
res = utf8_strncasecmp_folded(um, name, &entry);
if (res < 0) {
/*
* In strict mode, ignore invalid names. In non-strict mode,
* fall back to treating them as opaque byte sequences.
*/
if (sb_has_enc_strict_mode(sb) || name->len != entry.len)
res = 1;
else
res = memcmp(name->name, entry.name, name->len);
/*
* In strict mode, ignore invalid names. In non-strict mode,
* fall back to treating them as opaque byte sequences.
*/
if (res < 0 && !sb_has_strict_encoding(sb)) {
res = name->len == entry.len &&
memcmp(name->name, entry.name, name->len) == 0;
} else {
/* utf8_strncasecmp_folded returns 0 on match */
res = (res == 0);
}
out:
kfree(decrypted_name.name);
return res == 0;
return res;
}
#endif /* CONFIG_UNICODE */
static inline bool f2fs_match_name(const struct inode *dir,
static inline int f2fs_match_name(const struct inode *dir,
const struct f2fs_filename *fname,
const u8 *de_name, u32 de_name_len)
{
@@ -285,6 +289,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
struct f2fs_dir_entry *de;
unsigned long bit_pos = 0;
int max_len = 0;
int res = 0;
if (max_slots)
*max_slots = 0;
@@ -302,10 +307,15 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
continue;
}
if (de->hash_code == fname->hash &&
f2fs_match_name(d->inode, fname, d->filename[bit_pos],
le16_to_cpu(de->name_len)))
goto found;
if (de->hash_code == fname->hash) {
res = f2fs_match_name(d->inode, fname,
d->filename[bit_pos],
le16_to_cpu(de->name_len));
if (res < 0)
return ERR_PTR(res);
if (res)
goto found;
}
if (max_slots && max_len > *max_slots)
*max_slots = max_len;
@@ -331,6 +341,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
unsigned int bidx, end_block;
struct page *dentry_page;
struct f2fs_dir_entry *de = NULL;
pgoff_t next_pgofs;
bool room = false;
int max_slots;
@@ -341,12 +352,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
le32_to_cpu(fname->hash) % nbucket);
end_block = bidx + nblock;
for (; bidx < end_block; bidx++) {
while (bidx < end_block) {
/* no need to allocate new dentry pages to all the indices */
dentry_page = f2fs_find_data_page(dir, bidx);
dentry_page = f2fs_find_data_page(dir, bidx, &next_pgofs);
if (IS_ERR(dentry_page)) {
if (PTR_ERR(dentry_page) == -ENOENT) {
room = true;
bidx = next_pgofs;
continue;
} else {
*res_page = dentry_page;
@@ -354,14 +366,21 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
}
}
de = find_in_block(dir, dentry_page, fname, &max_slots,
res_page);
if (de)
de = find_in_block(dir, dentry_page, fname, &max_slots);
if (IS_ERR(de)) {
*res_page = ERR_CAST(de);
de = NULL;
break;
} else if (de) {
*res_page = dentry_page;
break;
}
if (max_slots >= s)
room = true;
f2fs_put_page(dentry_page, 0);
bidx++;
}
if (!de && room && F2FS_I(dir)->chash != fname->hash) {
@@ -465,6 +484,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
struct page *page, struct inode *inode)
{
enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
lock_page(page);
f2fs_wait_on_page_writeback(page, type, true, true);
de->ino = cpu_to_le32(inode->i_ino);
@@ -754,7 +774,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct f2fs_filename *fname,
f2fs_wait_on_page_writeback(dentry_page, DATA, true, true);
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
f2fs_down_write(&F2FS_I(inode)->i_sem);
page = f2fs_init_inode_metadata(inode, dir, fname, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -781,7 +801,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct f2fs_filename *fname,
f2fs_update_parent_metadata(dir, inode, current_depth);
fail:
if (inode)
up_write(&F2FS_I(inode)->i_sem);
f2fs_up_write(&F2FS_I(inode)->i_sem);
f2fs_put_page(dentry_page, 1);
@@ -819,7 +839,7 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name,
return err;
/*
* An immature stakable filesystem shows a race condition between lookup
* An immature stackable filesystem shows a race condition between lookup
* and create. If we have same task when doing lookup and create, it's
* definitely fine as expected by VFS normally. Otherwise, let's just
* verify on-disk dentry one more time, which guarantees filesystem
@@ -846,7 +866,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
struct page *page;
int err = 0;
down_write(&F2FS_I(inode)->i_sem);
f2fs_down_write(&F2FS_I(inode)->i_sem);
page = f2fs_init_inode_metadata(inode, dir, NULL, NULL);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -857,7 +877,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
clear_inode_flag(inode, FI_NEW_INODE);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
fail:
up_write(&F2FS_I(inode)->i_sem);
f2fs_up_write(&F2FS_I(inode)->i_sem);
return err;
}
@@ -865,7 +885,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
down_write(&F2FS_I(inode)->i_sem);
f2fs_down_write(&F2FS_I(inode)->i_sem);
if (S_ISDIR(inode->i_mode))
f2fs_i_links_write(dir, false);
@@ -876,7 +896,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode)
f2fs_i_links_write(inode, false);
f2fs_i_size_write(inode, 0);
}
up_write(&F2FS_I(inode)->i_sem);
f2fs_up_write(&F2FS_I(inode)->i_sem);
if (inode->i_nlink == 0)
f2fs_add_orphan_inode(inode);
@@ -922,11 +942,15 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
!f2fs_truncate_hole(dir, page->index, page->index + 1)) {
f2fs_clear_radix_tree_dirty_tag(page);
clear_page_dirty_for_io(page);
f2fs_clear_page_private(page);
ClearPageUptodate(page);
clear_cold_data(page);
clear_page_private_gcing(page);
inode_dec_dirty_pages(dir);
f2fs_remove_dirty_inode(dir);
detach_page_private(page);
set_page_private(page, 0);
}
f2fs_put_page(page, 1);
@@ -939,7 +963,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
bool f2fs_empty_dir(struct inode *dir)
{
unsigned long bidx;
unsigned long bidx = 0;
struct page *dentry_page;
unsigned int bit_pos;
struct f2fs_dentry_block *dentry_blk;
@@ -948,13 +972,17 @@ bool f2fs_empty_dir(struct inode *dir)
if (f2fs_has_inline_dentry(dir))
return f2fs_empty_inline_dir(dir);
for (bidx = 0; bidx < nblock; bidx++) {
dentry_page = f2fs_get_lock_data_page(dir, bidx, false);
while (bidx < nblock) {
pgoff_t next_pgofs;
dentry_page = f2fs_find_data_page(dir, bidx, &next_pgofs);
if (IS_ERR(dentry_page)) {
if (PTR_ERR(dentry_page) == -ENOENT)
if (PTR_ERR(dentry_page) == -ENOENT) {
bidx = next_pgofs;
continue;
else
} else {
return false;
}
}
dentry_blk = page_address(dentry_page);
@@ -966,10 +994,12 @@ bool f2fs_empty_dir(struct inode *dir)
NR_DENTRY_IN_BLOCK,
bit_pos);
f2fs_put_page(dentry_page, 1);
f2fs_put_page(dentry_page, 0);
if (bit_pos < NR_DENTRY_IN_BLOCK)
return false;
bidx++;
}
return true;
}
@@ -983,7 +1013,8 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode);
struct blk_plug plug;
bool readdir_ra = sbi->readdir_ra == 1;
bool readdir_ra = sbi->readdir_ra;
bool found_valid_dirent = false;
int err = 0;
bit_pos = ((unsigned long)ctx->pos % d->max);
@@ -998,13 +1029,15 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
de = &d->dentry[bit_pos];
if (de->name_len == 0) {
if (found_valid_dirent || !bit_pos) {
printk_ratelimited(
"%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.",
KERN_WARNING, sbi->sb->s_id,
le32_to_cpu(de->ino));
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
bit_pos++;
ctx->pos = start_pos + bit_pos;
printk_ratelimited(
"%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.",
KERN_WARNING, sbi->sb->s_id,
le32_to_cpu(de->ino));
set_sbi_flag(sbi, SBI_NEED_FSCK);
continue;
}
@@ -1021,6 +1054,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
__func__, le16_to_cpu(de->name_len));
set_sbi_flag(sbi, SBI_NEED_FSCK);
err = -EFSCORRUPTED;
f2fs_handle_error(sbi, ERROR_CORRUPTED_DIRENT);
goto out;
}
@@ -1047,6 +1081,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
f2fs_ra_node_page(sbi, le32_to_cpu(de->ino));
ctx->pos = start_pos + bit_pos;
found_valid_dirent = true;
}
out:
if (readdir_ra)
@@ -1082,7 +1117,8 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
goto out_free;
}
for (; n < npages; n++, ctx->pos = n * NR_DENTRY_IN_BLOCK) {
for (; n < npages; ctx->pos = n * NR_DENTRY_IN_BLOCK) {
pgoff_t next_pgofs;
/* allow readdir() to be interrupted */
if (fatal_signal_pending(current)) {
@@ -1096,11 +1132,12 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
page_cache_sync_readahead(inode->i_mapping, ra, file, n,
min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
dentry_page = f2fs_find_data_page(inode, n);
dentry_page = f2fs_find_data_page(inode, n, &next_pgofs);
if (IS_ERR(dentry_page)) {
err = PTR_ERR(dentry_page);
if (err == -ENOENT) {
err = 0;
n = next_pgofs;
continue;
} else {
goto out_free;
@@ -1119,6 +1156,8 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
}
f2fs_put_page(dentry_page, 0);
n++;
}
out_free:
fscrypt_fname_free_buffer(&fstr);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -14,6 +14,14 @@
#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */
#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000
#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
/* choose candidates from sections which has age of more than 7 days */
#define DEF_GC_THREAD_AGE_THRESHOLD (60 * 60 * 24 * 7)
#define DEF_GC_THREAD_CANDIDATE_RATIO 20 /* select 20% oldest sections as candidates */
#define DEF_GC_THREAD_MAX_CANDIDATE_COUNT 10 /* select at most 10 sections as candidates */
#define DEF_GC_THREAD_AGE_WEIGHT 60 /* age weight */
#define DEFAULT_ACCURACY_CLASS 10000 /* accuracy class */
#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
@@ -34,6 +42,12 @@ struct f2fs_gc_kthread {
/* for changing gc mode */
unsigned int gc_wake;
/* for GC_MERGE mount option */
wait_queue_head_t fggc_wq; /*
* caller of f2fs_balance_fs()
* will wait on this wait queue.
*/
};
struct gc_inode_list {
@@ -41,27 +55,78 @@ struct gc_inode_list {
struct radix_tree_root iroot;
};
struct victim_info {
unsigned long long mtime; /* mtime of section */
unsigned int segno; /* section No. */
};
struct victim_entry {
struct rb_node rb_node; /* rb node located in rb-tree */
union {
struct {
unsigned long long mtime; /* mtime of section */
unsigned int segno; /* segment No. */
};
struct victim_info vi; /* victim info */
};
struct list_head list;
} __packed;
/*
* inline functions
*/
/*
* On a Zoned device zone-capacity can be less than zone-size and if
* zone-capacity is not aligned to f2fs segment size(2MB), then the segment
* starting just before zone-capacity has some blocks spanning across the
* zone-capacity, these blocks are not usable.
* Such spanning segments can be in free list so calculate the sum of usable
* blocks in currently free segments including normal and spanning segments.
*/
static inline block_t free_segs_blk_count_zoned(struct f2fs_sb_info *sbi)
{
block_t free_seg_blks = 0;
struct free_segmap_info *free_i = FREE_I(sbi);
int j;
spin_lock(&free_i->segmap_lock);
for (j = 0; j < MAIN_SEGS(sbi); j++)
if (!test_bit(j, free_i->free_segmap))
free_seg_blks += f2fs_usable_blks_in_seg(sbi, j);
spin_unlock(&free_i->segmap_lock);
return free_seg_blks;
}
static inline block_t free_segs_blk_count(struct f2fs_sb_info *sbi)
{
if (f2fs_sb_has_blkzoned(sbi))
return free_segs_blk_count_zoned(sbi);
return free_segments(sbi) << sbi->log_blocks_per_seg;
}
static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
{
if (free_segments(sbi) < overprovision_segments(sbi))
block_t free_blks, ovp_blks;
free_blks = free_segs_blk_count(sbi);
ovp_blks = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
if (free_blks < ovp_blks)
return 0;
else
return (free_segments(sbi) - overprovision_segments(sbi))
<< sbi->log_blocks_per_seg;
return free_blks - ovp_blks;
}
static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi)
static inline block_t limit_invalid_user_blocks(block_t user_block_count)
{
return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100;
return (long)(user_block_count * LIMIT_INVALID_BLOCK) / 100;
}
static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
static inline block_t limit_free_user_blocks(block_t reclaimable_user_blocks)
{
block_t reclaimable_user_blocks = sbi->user_block_count -
written_block_count(sbi);
return (long)(reclaimable_user_blocks * LIMIT_FREE_BLOCK) / 100;
}
@@ -96,15 +161,16 @@ static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th,
static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
{
block_t invalid_user_blocks = sbi->user_block_count -
written_block_count(sbi);
block_t user_block_count = sbi->user_block_count;
block_t invalid_user_blocks = user_block_count -
written_block_count(sbi);
/*
* Background GC is triggered with the following conditions.
* 1. There are a number of invalid blocks.
* 2. There is not enough free space.
*/
if (invalid_user_blocks > limit_invalid_user_blocks(sbi) &&
free_user_blocks(sbi) < limit_free_user_blocks(sbi))
return true;
return false;
return (invalid_user_blocks >
limit_invalid_user_blocks(user_block_count) &&
free_user_blocks(sbi) <
limit_free_user_blocks(invalid_user_blocks));
}

View File

@@ -92,7 +92,7 @@ static u32 TEA_hash_name(const u8 *p, size_t len)
/*
* Compute @fname->hash. For all directories, @fname->disk_name must be set.
* For casefolded directories, @fname->usr_fname must be set, and also
* @fname->cf_name if the filename is valid Unicode.
* @fname->cf_name if the filename is valid Unicode and is not "." or "..".
*/
void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname)
{
@@ -111,10 +111,11 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname)
/*
* If the casefolded name is provided, hash it instead of the
* on-disk name. If the casefolded name is *not* provided, that
* should only be because the name wasn't valid Unicode, so fall
* back to treating the name as an opaque byte sequence. Note
* that to handle encrypted directories, the fallback must use
* usr_fname (plaintext) rather than disk_name (ciphertext).
* should only be because the name wasn't valid Unicode or was
* "." or "..", so fall back to treating the name as an opaque
* byte sequence. Note that to handle encrypted directories,
* the fallback must use usr_fname (plaintext) rather than
* disk_name (ciphertext).
*/
WARN_ON_ONCE(!fname->usr_fname->name);
if (fname->cf_name.name) {

View File

@@ -11,23 +11,42 @@
#include "f2fs.h"
#include "node.h"
#include <trace/events/android_fs.h>
#include <trace/events/f2fs.h>
bool f2fs_may_inline_data(struct inode *inode)
static bool support_inline_data(struct inode *inode)
{
if (f2fs_is_atomic_file(inode))
return false;
if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))
return false;
if (i_size_read(inode) > MAX_INLINE_DATA(inode))
return false;
return true;
}
if (f2fs_post_read_required(inode))
bool f2fs_may_inline_data(struct inode *inode)
{
if (!support_inline_data(inode))
return false;
return true;
return !f2fs_post_read_required(inode);
}
bool f2fs_sanity_check_inline_data(struct inode *inode)
{
if (!f2fs_has_inline_data(inode))
return false;
if (!support_inline_data(inode))
return true;
/*
* used by sanity_check_inode(), when disk layout fields has not
* been synchronized to inmem fields.
*/
return (S_ISREG(inode->i_mode) &&
(file_is_encrypt(inode) || file_is_verity(inode) ||
(F2FS_I(inode)->i_flags & F2FS_COMPR_FL)));
}
bool f2fs_may_inline_dentry(struct inode *inode)
@@ -85,29 +104,14 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
{
struct page *ipage;
if (trace_android_fs_dataread_start_enabled()) {
char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
path = android_fstrace_get_pathname(pathbuf,
MAX_TRACE_PATHBUF_LEN,
inode);
trace_android_fs_dataread_start(inode, page_offset(page),
PAGE_SIZE, current->pid,
path, current->comm);
}
ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage)) {
trace_android_fs_dataread_end(inode, page_offset(page),
PAGE_SIZE);
unlock_page(page);
return PTR_ERR(ipage);
}
if (!f2fs_has_inline_data(inode)) {
f2fs_put_page(ipage, 1);
trace_android_fs_dataread_end(inode, page_offset(page),
PAGE_SIZE);
return -EAGAIN;
}
@@ -119,8 +123,6 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
f2fs_put_page(ipage, 1);
trace_android_fs_dataread_end(inode, page_offset(page),
PAGE_SIZE);
unlock_page(page);
return 0;
}
@@ -147,7 +149,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
if (err)
return err;
err = f2fs_get_node_info(fio.sbi, dn->nid, &ni);
err = f2fs_get_node_info(fio.sbi, dn->nid, &ni, false);
if (err) {
f2fs_truncate_data_blocks_range(dn, 1);
f2fs_put_dnode(dn);
@@ -161,6 +163,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.",
__func__, dn->inode->i_ino, dn->data_blkaddr);
f2fs_handle_error(fio.sbi, ERROR_INVALID_BLKADDR);
return -EFSCORRUPTED;
}
@@ -189,7 +192,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
/* clear inline data and flag after data writeback */
f2fs_truncate_inline_inode(dn->inode, dn->inode_page, 0);
clear_inline_node(dn->inode_page);
clear_page_private_inline(dn->inode_page);
clear_out:
stat_dec_inline_inode(dn->inode);
clear_inode_flag(dn->inode, FI_INLINE_DATA);
@@ -204,10 +207,11 @@ int f2fs_convert_inline_inode(struct inode *inode)
struct page *ipage, *page;
int err = 0;
if (!f2fs_has_inline_data(inode))
if (!f2fs_has_inline_data(inode) ||
f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb))
return 0;
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err)
return err;
@@ -270,7 +274,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
set_inode_flag(inode, FI_APPEND_WRITE);
set_inode_flag(inode, FI_DATA_EXIST);
clear_inline_node(dn.inode_page);
clear_page_private_inline(dn.inode_page);
f2fs_put_dnode(&dn);
return 0;
}
@@ -287,7 +291,7 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
* [prev.] [next] of inline_data flag
* o o -> recover inline_data
* o x -> remove inline_data, and then recover data blocks
* x o -> remove inline_data, and then recover inline_data
* x o -> remove data blocks, and then recover inline_data
* x x -> recover data blocks
*/
if (IS_INODE(npage))
@@ -319,6 +323,7 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
if (IS_ERR(ipage))
return PTR_ERR(ipage);
f2fs_truncate_inline_inode(inode, ipage, 0);
stat_dec_inline_inode(inode);
clear_inode_flag(inode, FI_INLINE_DATA);
f2fs_put_page(ipage, 1);
} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
@@ -327,6 +332,7 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
ret = f2fs_truncate_blocks(inode, 0, false);
if (ret)
return ret;
stat_inc_inline_inode(inode);
goto process_inline;
}
return 0;
@@ -353,6 +359,10 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
make_dentry_ptr_inline(dir, &d, inline_dentry);
de = f2fs_find_target_dentry(&d, fname, NULL);
unlock_page(ipage);
if (IS_ERR(de)) {
*res_page = ERR_CAST(de);
de = NULL;
}
if (de)
*res_page = ipage;
else
@@ -409,6 +419,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
f2fs_warn(F2FS_P_SB(page), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.",
__func__, dir->i_ino, dn.data_blkaddr);
f2fs_handle_error(F2FS_P_SB(page), ERROR_INVALID_BLKADDR);
err = -EFSCORRUPTED;
goto out;
}
@@ -544,7 +555,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
!f2fs_has_inline_xattr(dir))
F2FS_I(dir)->i_inline_xattr_size = 0;
kvfree(backup_dentry);
kfree(backup_dentry);
return 0;
recover:
lock_page(ipage);
@@ -555,7 +566,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
set_page_dirty(ipage);
f2fs_put_page(ipage, 1);
kvfree(backup_dentry);
kfree(backup_dentry);
return err;
}
@@ -637,7 +648,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
}
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
f2fs_down_write(&F2FS_I(inode)->i_sem);
page = f2fs_init_inode_metadata(inode, dir, fname, ipage);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -666,7 +677,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
f2fs_update_parent_metadata(dir, inode, 0);
fail:
if (inode)
up_write(&F2FS_I(inode)->i_sem);
f2fs_up_write(&F2FS_I(inode)->i_sem);
out:
f2fs_put_page(ipage, 1);
return err;
@@ -794,7 +805,7 @@ int f2fs_inline_data_fiemap(struct inode *inode,
ilen = start + len;
ilen -= start;
err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni, false);
if (err)
goto out;
@@ -802,6 +813,7 @@ int f2fs_inline_data_fiemap(struct inode *inode,
byteaddr += (char *)inline_data_addr(inode, ipage) -
(char *)F2FS_INODE(ipage);
err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags);
trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err);
out:
f2fs_put_page(ipage, 1);
return err;

View File

@@ -18,6 +18,10 @@
#include <trace/events/f2fs.h>
#ifdef CONFIG_F2FS_FS_COMPRESSION
extern const struct address_space_operations f2fs_compress_aops;
#endif
void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync)
{
if (is_inode_flag_set(inode, FI_NEW_INODE))
@@ -80,8 +84,10 @@ static int __written_first_block(struct f2fs_sb_info *sbi,
if (!__is_valid_data_blkaddr(addr))
return 1;
if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE))
if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE)) {
f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
return -EFSCORRUPTED;
}
return 0;
}
@@ -259,8 +265,8 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
if (F2FS_I(inode)->extent_tree) {
struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
if (fi->extent_tree[EX_READ]) {
struct extent_info *ei = &fi->extent_tree[EX_READ]->largest;
if (ei->len &&
(!f2fs_is_valid_blkaddr(sbi, ei->blk,
@@ -275,8 +281,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
}
}
if (f2fs_has_inline_data(inode) &&
(!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) {
if (f2fs_sanity_check_inline_data(inode)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix",
__func__, inode->i_ino, inode->i_mode);
@@ -290,11 +295,19 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
if ((fi->i_flags & F2FS_CASEFOLD_FL) && !f2fs_sb_has_casefold(sbi)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has casefold flag, but casefold feature is off",
__func__, inode->i_ino);
return false;
}
if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) &&
fi->i_flags & F2FS_COMPR_FL &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
i_log_cluster_size)) {
if (ri->i_compress_algorithm >= COMPRESS_MAX) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported "
"compress algorithm: %u, run fsck to fix",
__func__, inode->i_ino,
@@ -303,6 +316,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
}
if (le64_to_cpu(ri->i_compr_blocks) >
SECTOR_TO_BLOCK(inode->i_blocks)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has inconsistent "
"i_compr_blocks:%llu, i_blocks:%lu, run fsck to fix",
__func__, inode->i_ino,
@@ -312,6 +326,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
}
if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported "
"log cluster size: %u, run fsck to fix",
__func__, inode->i_ino,
@@ -323,6 +338,16 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return true;
}
static void init_idisk_time(struct inode *inode)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
fi->i_disk_time[0] = inode->i_atime;
fi->i_disk_time[1] = inode->i_ctime;
fi->i_disk_time[2] = inode->i_mtime;
fi->i_disk_time[3] = fi->i_crtime;
}
static int do_read_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -370,9 +395,6 @@ static int do_read_inode(struct inode *inode)
fi->i_pino = le32_to_cpu(ri->i_pino);
fi->i_dir_level = ri->i_dir_level;
if (f2fs_init_extent_tree(inode, &ri->i_ext))
set_page_dirty(node_page);
get_inline_info(inode, ri);
fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
@@ -396,6 +418,7 @@ static int do_read_inode(struct inode *inode)
if (!sanity_check_inode(inode, node_page)) {
f2fs_put_page(node_page, 1);
f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
return -EFSCORRUPTED;
}
@@ -405,6 +428,7 @@ static int do_read_inode(struct inode *inode)
/* try to recover cold bit for non-dir inode */
if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_page)) {
f2fs_wait_on_page_writeback(node_page, NODE, true, true);
set_cold_node(node_page, false);
set_page_dirty(node_page);
}
@@ -445,29 +469,39 @@ static int do_read_inode(struct inode *inode)
(fi->i_flags & F2FS_COMPR_FL)) {
if (F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
i_log_cluster_size)) {
fi->i_compr_blocks = le64_to_cpu(ri->i_compr_blocks);
atomic_set(&fi->i_compr_blocks,
le64_to_cpu(ri->i_compr_blocks));
fi->i_compress_algorithm = ri->i_compress_algorithm;
fi->i_log_cluster_size = ri->i_log_cluster_size;
fi->i_compress_flag = le16_to_cpu(ri->i_compress_flag);
fi->i_cluster_size = 1 << fi->i_log_cluster_size;
set_inode_flag(inode, FI_COMPRESSED_FILE);
}
}
F2FS_I(inode)->i_disk_time[0] = inode->i_atime;
F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
init_idisk_time(inode);
/* Need all the flag bits */
f2fs_init_read_extent_tree(inode, node_page);
f2fs_init_age_extent_tree(inode);
f2fs_put_page(node_page, 1);
stat_inc_inline_xattr(inode);
stat_inc_inline_inode(inode);
stat_inc_inline_dir(inode);
stat_inc_compr_inode(inode);
stat_add_compr_blocks(inode, F2FS_I(inode)->i_compr_blocks);
stat_add_compr_blocks(inode, atomic_read(&fi->i_compr_blocks));
return 0;
}
static bool is_meta_ino(struct f2fs_sb_info *sbi, unsigned int ino)
{
return ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi) ||
ino == F2FS_COMPRESS_INO(sbi);
}
struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -479,10 +513,21 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
return ERR_PTR(-ENOMEM);
if (!(inode->i_state & I_NEW)) {
if (is_meta_ino(sbi, ino)) {
f2fs_err(sbi, "inaccessible inode: %lu, run fsck to repair", ino);
set_sbi_flag(sbi, SBI_NEED_FSCK);
ret = -EFSCORRUPTED;
trace_f2fs_iget_exit(inode, ret);
iput(inode);
f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
return ERR_PTR(ret);
}
trace_f2fs_iget(inode);
return inode;
}
if (ino == F2FS_NODE_INO(sbi) || ino == F2FS_META_INO(sbi))
if (is_meta_ino(sbi, ino))
goto make_now;
ret = do_read_inode(inode);
@@ -495,6 +540,17 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
} else if (ino == F2FS_META_INO(sbi)) {
inode->i_mapping->a_ops = &f2fs_meta_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
} else if (ino == F2FS_COMPRESS_INO(sbi)) {
#ifdef CONFIG_F2FS_FS_COMPRESSION
inode->i_mapping->a_ops = &f2fs_compress_aops;
/*
* generic_error_remove_page only truncates pages of regular
* inode
*/
inode->i_mode |= S_IFREG;
#endif
mapping_set_gfp_mask(inode->i_mapping,
GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE);
} else if (S_ISREG(inode->i_mode)) {
inode->i_op = &f2fs_file_inode_operations;
inode->i_fop = &f2fs_file_operations;
@@ -520,6 +576,15 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
goto bad_inode;
}
f2fs_set_inode_flags(inode);
if (file_should_truncate(inode) &&
!is_sbi_flag_set(sbi, SBI_POR_DOING)) {
ret = f2fs_truncate(inode);
if (ret)
goto bad_inode;
file_dont_truncate(inode);
}
unlock_new_inode(inode);
trace_f2fs_iget(inode);
return inode;
@@ -548,7 +613,7 @@ struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino)
void f2fs_update_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_inode *ri;
struct extent_tree *et = F2FS_I(inode)->extent_tree;
struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
f2fs_wait_on_page_writeback(node_page, NODE, true, true);
set_page_dirty(node_page);
@@ -562,12 +627,15 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
ri->i_uid = cpu_to_le32(i_uid_read(inode));
ri->i_gid = cpu_to_le32(i_gid_read(inode));
ri->i_links = cpu_to_le32(inode->i_nlink);
ri->i_size = cpu_to_le64(i_size_read(inode));
ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1);
if (!f2fs_is_atomic_file(inode) ||
is_inode_flag_set(inode, FI_ATOMIC_COMMITTED))
ri->i_size = cpu_to_le64(i_size_read(inode));
if (et) {
read_lock(&et->lock);
set_raw_extent(&et->largest, &ri->i_ext);
set_raw_read_extent(&et->largest, &ri->i_ext);
read_unlock(&et->lock);
} else {
memset(&ri->i_ext, 0, sizeof(ri->i_ext));
@@ -622,9 +690,12 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_log_cluster_size)) {
ri->i_compr_blocks =
cpu_to_le64(F2FS_I(inode)->i_compr_blocks);
cpu_to_le64(atomic_read(
&F2FS_I(inode)->i_compr_blocks));
ri->i_compress_algorithm =
F2FS_I(inode)->i_compress_algorithm;
ri->i_compress_flag =
cpu_to_le16(F2FS_I(inode)->i_compress_flag);
ri->i_log_cluster_size =
F2FS_I(inode)->i_log_cluster_size;
}
@@ -634,13 +705,9 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
/* deleted inode */
if (inode->i_nlink == 0)
clear_inline_node(node_page);
F2FS_I(inode)->i_disk_time[0] = inode->i_atime;
F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
clear_page_private_inline(node_page);
init_idisk_time(inode);
#ifdef CONFIG_F2FS_CHECK_FS
f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page);
#endif
@@ -654,11 +721,13 @@ void f2fs_update_inode_page(struct inode *inode)
node_page = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(node_page)) {
int err = PTR_ERR(node_page);
if (err == -ENOMEM) {
cond_resched();
goto retry;
} else if (err != -ENOENT) {
f2fs_stop_checkpoint(sbi, false);
f2fs_stop_checkpoint(sbi, false,
STOP_CP_REASON_UPDATE_INODE);
}
return;
}
@@ -686,7 +755,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
/*
* We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
* during the urgent cleaning time when running out of free sections.
*/
f2fs_update_inode_page(inode);
if (wbc && wbc->nr_to_write)
@@ -703,15 +772,18 @@ void f2fs_evict_inode(struct inode *inode)
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
int err = 0;
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
f2fs_drop_inmem_pages(inode);
f2fs_abort_atomic_write(inode, true);
trace_f2fs_evict_inode(inode);
truncate_inode_pages_final(&inode->i_data);
if ((inode->i_nlink || is_bad_inode(inode)) &&
test_opt(sbi, COMPRESS_CACHE) && f2fs_compressed_file(inode))
f2fs_invalidate_compress_pages(sbi, inode->i_ino);
if (inode->i_ino == F2FS_NODE_INO(sbi) ||
inode->i_ino == F2FS_META_INO(sbi))
inode->i_ino == F2FS_META_INO(sbi) ||
inode->i_ino == F2FS_COMPRESS_INO(sbi))
goto out_clear;
f2fs_bug_on(sbi, get_dirty_pages(inode));
@@ -722,7 +794,7 @@ void f2fs_evict_inode(struct inode *inode)
if (inode->i_nlink || is_bad_inode(inode))
goto no_delete;
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err) {
err = 0;
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
@@ -732,7 +804,8 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
sb_start_intwrite(inode->i_sb);
if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
sb_start_intwrite(inode->i_sb);
set_inode_flag(inode, FI_NO_ALLOC);
i_size_write(inode, 0);
retry:
@@ -748,8 +821,22 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_lock_op(sbi);
err = f2fs_remove_inode_page(inode);
f2fs_unlock_op(sbi);
if (err == -ENOENT)
if (err == -ENOENT) {
err = 0;
/*
* in fuzzed image, another node may has the same
* block address as inode's, if it was truncated
* previously, truncation of inode node will fail.
*/
if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
f2fs_warn(F2FS_I_SB(inode),
"f2fs_evict_inode: inconsistent node id, ino:%lu",
inode->i_ino);
f2fs_inode_synced(inode);
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
}
}
/* give more chances, if ENOMEM case */
@@ -763,7 +850,8 @@ void f2fs_evict_inode(struct inode *inode)
if (dquot_initialize_needed(inode))
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
}
sb_end_intwrite(inode->i_sb);
if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
sb_end_intwrite(inode->i_sb);
no_delete:
dquot_drop(inode);
@@ -771,7 +859,8 @@ void f2fs_evict_inode(struct inode *inode)
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
stat_dec_compr_inode(inode);
stat_sub_compr_blocks(inode, F2FS_I(inode)->i_compr_blocks);
stat_sub_compr_blocks(inode,
atomic_read(&F2FS_I(inode)->i_compr_blocks));
if (likely(!f2fs_cp_error(sbi) &&
!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
@@ -835,9 +924,10 @@ void f2fs_handle_failed_inode(struct inode *inode)
* so we can prevent losing this orphan when encoutering checkpoint
* and following suddenly power-off.
*/
err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
set_inode_flag(inode, FI_FREE_NID);
f2fs_warn(sbi, "May loss orphan inode, run fsck to fix.");
goto out;
}

287
fs/f2fs/iostat.c Normal file
View File

@@ -0,0 +1,287 @@
// SPDX-License-Identifier: GPL-2.0
/*
* f2fs iostat support
*
* Copyright 2021 Google LLC
* Author: Daeho Jeong <daehojeong@google.com>
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/seq_file.h>
#include "f2fs.h"
#include "iostat.h"
#include <trace/events/f2fs.h>
#define NUM_PREALLOC_IOSTAT_CTXS 128
static struct kmem_cache *bio_iostat_ctx_cache;
static mempool_t *bio_iostat_ctx_pool;
int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
time64_t now = ktime_get_real_seconds();
if (!sbi->iostat_enable)
return 0;
seq_printf(seq, "time: %-16llu\n", now);
/* print app write IOs */
seq_puts(seq, "[WRITE]\n");
seq_printf(seq, "app buffered: %-16llu\n",
sbi->rw_iostat[APP_BUFFERED_IO]);
seq_printf(seq, "app direct: %-16llu\n",
sbi->rw_iostat[APP_DIRECT_IO]);
seq_printf(seq, "app mapped: %-16llu\n",
sbi->rw_iostat[APP_MAPPED_IO]);
/* print fs write IOs */
seq_printf(seq, "fs data: %-16llu\n",
sbi->rw_iostat[FS_DATA_IO]);
seq_printf(seq, "fs node: %-16llu\n",
sbi->rw_iostat[FS_NODE_IO]);
seq_printf(seq, "fs meta: %-16llu\n",
sbi->rw_iostat[FS_META_IO]);
seq_printf(seq, "fs gc data: %-16llu\n",
sbi->rw_iostat[FS_GC_DATA_IO]);
seq_printf(seq, "fs gc node: %-16llu\n",
sbi->rw_iostat[FS_GC_NODE_IO]);
seq_printf(seq, "fs cp data: %-16llu\n",
sbi->rw_iostat[FS_CP_DATA_IO]);
seq_printf(seq, "fs cp node: %-16llu\n",
sbi->rw_iostat[FS_CP_NODE_IO]);
seq_printf(seq, "fs cp meta: %-16llu\n",
sbi->rw_iostat[FS_CP_META_IO]);
/* print app read IOs */
seq_puts(seq, "[READ]\n");
seq_printf(seq, "app buffered: %-16llu\n",
sbi->rw_iostat[APP_BUFFERED_READ_IO]);
seq_printf(seq, "app direct: %-16llu\n",
sbi->rw_iostat[APP_DIRECT_READ_IO]);
seq_printf(seq, "app mapped: %-16llu\n",
sbi->rw_iostat[APP_MAPPED_READ_IO]);
/* print fs read IOs */
seq_printf(seq, "fs data: %-16llu\n",
sbi->rw_iostat[FS_DATA_READ_IO]);
seq_printf(seq, "fs gc data: %-16llu\n",
sbi->rw_iostat[FS_GDATA_READ_IO]);
seq_printf(seq, "fs compr_data: %-16llu\n",
sbi->rw_iostat[FS_CDATA_READ_IO]);
seq_printf(seq, "fs node: %-16llu\n",
sbi->rw_iostat[FS_NODE_READ_IO]);
seq_printf(seq, "fs meta: %-16llu\n",
sbi->rw_iostat[FS_META_READ_IO]);
/* print other IOs */
seq_puts(seq, "[OTHER]\n");
seq_printf(seq, "fs discard: %-16llu\n",
sbi->rw_iostat[FS_DISCARD]);
return 0;
}
static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
{
int io, idx = 0;
unsigned int cnt;
struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE];
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
spin_lock_irq(&sbi->iostat_lat_lock);
for (idx = 0; idx < MAX_IO_TYPE; idx++) {
for (io = 0; io < NR_PAGE_TYPE; io++) {
cnt = io_lat->bio_cnt[idx][io];
iostat_lat[idx][io].peak_lat =
jiffies_to_msecs(io_lat->peak_lat[idx][io]);
iostat_lat[idx][io].cnt = cnt;
iostat_lat[idx][io].avg_lat = cnt ?
jiffies_to_msecs(io_lat->sum_lat[idx][io]) / cnt : 0;
io_lat->sum_lat[idx][io] = 0;
io_lat->peak_lat[idx][io] = 0;
io_lat->bio_cnt[idx][io] = 0;
}
}
spin_unlock_irq(&sbi->iostat_lat_lock);
trace_f2fs_iostat_latency(sbi, iostat_lat);
}
static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
{
unsigned long long iostat_diff[NR_IO_TYPE];
int i;
if (time_is_after_jiffies(sbi->iostat_next_period))
return;
/* Need double check under the lock */
spin_lock(&sbi->iostat_lock);
if (time_is_after_jiffies(sbi->iostat_next_period)) {
spin_unlock(&sbi->iostat_lock);
return;
}
sbi->iostat_next_period = jiffies +
msecs_to_jiffies(sbi->iostat_period_ms);
for (i = 0; i < NR_IO_TYPE; i++) {
iostat_diff[i] = sbi->rw_iostat[i] -
sbi->prev_rw_iostat[i];
sbi->prev_rw_iostat[i] = sbi->rw_iostat[i];
}
spin_unlock(&sbi->iostat_lock);
trace_f2fs_iostat(sbi, iostat_diff);
__record_iostat_latency(sbi);
}
void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
{
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
int i;
spin_lock(&sbi->iostat_lock);
for (i = 0; i < NR_IO_TYPE; i++) {
sbi->rw_iostat[i] = 0;
sbi->prev_rw_iostat[i] = 0;
}
spin_unlock(&sbi->iostat_lock);
spin_lock_irq(&sbi->iostat_lat_lock);
memset(io_lat, 0, sizeof(struct iostat_lat_info));
spin_unlock_irq(&sbi->iostat_lat_lock);
}
void f2fs_update_iostat(struct f2fs_sb_info *sbi,
enum iostat_type type, unsigned long long io_bytes)
{
if (!sbi->iostat_enable)
return;
spin_lock(&sbi->iostat_lock);
sbi->rw_iostat[type] += io_bytes;
if (type == APP_WRITE_IO || type == APP_DIRECT_IO)
sbi->rw_iostat[APP_BUFFERED_IO] =
sbi->rw_iostat[APP_WRITE_IO] -
sbi->rw_iostat[APP_DIRECT_IO];
if (type == APP_READ_IO || type == APP_DIRECT_READ_IO)
sbi->rw_iostat[APP_BUFFERED_READ_IO] =
sbi->rw_iostat[APP_READ_IO] -
sbi->rw_iostat[APP_DIRECT_READ_IO];
spin_unlock(&sbi->iostat_lock);
f2fs_record_iostat(sbi);
}
static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
int rw, bool is_sync)
{
unsigned long ts_diff;
unsigned int iotype = iostat_ctx->type;
unsigned long flags;
struct f2fs_sb_info *sbi = iostat_ctx->sbi;
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
int idx;
if (!sbi->iostat_enable)
return;
ts_diff = jiffies - iostat_ctx->submit_ts;
if (iotype >= META_FLUSH)
iotype = META;
if (rw == 0) {
idx = READ_IO;
} else {
if (is_sync)
idx = WRITE_SYNC_IO;
else
idx = WRITE_ASYNC_IO;
}
spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
io_lat->sum_lat[idx][iotype] += ts_diff;
io_lat->bio_cnt[idx][iotype]++;
if (ts_diff > io_lat->peak_lat[idx][iotype])
io_lat->peak_lat[idx][iotype] = ts_diff;
spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
}
void iostat_update_and_unbind_ctx(struct bio *bio, int rw)
{
struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
bool is_sync = bio->bi_opf & REQ_SYNC;
if (rw == 0)
bio->bi_private = iostat_ctx->post_read_ctx;
else
bio->bi_private = iostat_ctx->sbi;
__update_iostat_latency(iostat_ctx, rw, is_sync);
mempool_free(iostat_ctx, bio_iostat_ctx_pool);
}
void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
struct bio *bio, struct bio_post_read_ctx *ctx)
{
struct bio_iostat_ctx *iostat_ctx;
/* Due to the mempool, this never fails. */
iostat_ctx = mempool_alloc(bio_iostat_ctx_pool, GFP_NOFS);
iostat_ctx->sbi = sbi;
iostat_ctx->submit_ts = 0;
iostat_ctx->type = 0;
iostat_ctx->post_read_ctx = ctx;
bio->bi_private = iostat_ctx;
}
int __init f2fs_init_iostat_processing(void)
{
bio_iostat_ctx_cache =
kmem_cache_create("f2fs_bio_iostat_ctx",
sizeof(struct bio_iostat_ctx), 0, 0, NULL);
if (!bio_iostat_ctx_cache)
goto fail;
bio_iostat_ctx_pool =
mempool_create_slab_pool(NUM_PREALLOC_IOSTAT_CTXS,
bio_iostat_ctx_cache);
if (!bio_iostat_ctx_pool)
goto fail_free_cache;
return 0;
fail_free_cache:
kmem_cache_destroy(bio_iostat_ctx_cache);
fail:
return -ENOMEM;
}
void f2fs_destroy_iostat_processing(void)
{
mempool_destroy(bio_iostat_ctx_pool);
kmem_cache_destroy(bio_iostat_ctx_cache);
}
int f2fs_init_iostat(struct f2fs_sb_info *sbi)
{
/* init iostat info */
spin_lock_init(&sbi->iostat_lock);
spin_lock_init(&sbi->iostat_lat_lock);
sbi->iostat_enable = false;
sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
sbi->iostat_io_lat = f2fs_kzalloc(sbi, sizeof(struct iostat_lat_info),
GFP_KERNEL);
if (!sbi->iostat_io_lat)
return -ENOMEM;
return 0;
}
void f2fs_destroy_iostat(struct f2fs_sb_info *sbi)
{
kfree(sbi->iostat_io_lat);
}

84
fs/f2fs/iostat.h Normal file
View File

@@ -0,0 +1,84 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright 2021 Google LLC
* Author: Daeho Jeong <daehojeong@google.com>
*/
#ifndef __F2FS_IOSTAT_H__
#define __F2FS_IOSTAT_H__
struct bio_post_read_ctx;
#ifdef CONFIG_F2FS_IOSTAT
#define DEFAULT_IOSTAT_PERIOD_MS 3000
#define MIN_IOSTAT_PERIOD_MS 100
/* maximum period of iostat tracing is 1 day */
#define MAX_IOSTAT_PERIOD_MS 8640000
enum {
READ_IO,
WRITE_SYNC_IO,
WRITE_ASYNC_IO,
MAX_IO_TYPE,
};
struct iostat_lat_info {
unsigned long sum_lat[MAX_IO_TYPE][NR_PAGE_TYPE]; /* sum of io latencies */
unsigned long peak_lat[MAX_IO_TYPE][NR_PAGE_TYPE]; /* peak io latency */
unsigned int bio_cnt[MAX_IO_TYPE][NR_PAGE_TYPE]; /* bio count */
};
extern int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
void *offset);
extern void f2fs_reset_iostat(struct f2fs_sb_info *sbi);
extern void f2fs_update_iostat(struct f2fs_sb_info *sbi,
enum iostat_type type, unsigned long long io_bytes);
struct bio_iostat_ctx {
struct f2fs_sb_info *sbi;
unsigned long submit_ts;
enum page_type type;
struct bio_post_read_ctx *post_read_ctx;
};
static inline void iostat_update_submit_ctx(struct bio *bio,
enum page_type type)
{
struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
iostat_ctx->submit_ts = jiffies;
iostat_ctx->type = type;
}
static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
{
struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
return iostat_ctx->post_read_ctx;
}
extern void iostat_update_and_unbind_ctx(struct bio *bio, int rw);
extern void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
struct bio *bio, struct bio_post_read_ctx *ctx);
extern int f2fs_init_iostat_processing(void);
extern void f2fs_destroy_iostat_processing(void);
extern int f2fs_init_iostat(struct f2fs_sb_info *sbi);
extern void f2fs_destroy_iostat(struct f2fs_sb_info *sbi);
#else
static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
enum iostat_type type, unsigned long long io_bytes) {}
static inline void iostat_update_and_unbind_ctx(struct bio *bio, int rw) {}
static inline void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
struct bio *bio, struct bio_post_read_ctx *ctx) {}
static inline void iostat_update_submit_ctx(struct bio *bio,
enum page_type type) {}
static inline struct bio_post_read_ctx *get_post_read_ctx(struct bio *bio)
{
return bio->bi_private;
}
static inline int f2fs_init_iostat_processing(void) { return 0; }
static inline void f2fs_destroy_iostat_processing(void) {}
static inline int f2fs_init_iostat(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_iostat(struct f2fs_sb_info *sbi) {}
#endif
#endif /* __F2FS_IOSTAT_H__ */

View File

@@ -22,133 +22,8 @@
#include "acl.h"
#include <trace/events/f2fs.h>
static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
nid_t ino;
struct inode *inode;
bool nid_free = false;
int xattr_size = 0;
int err;
inode = new_inode(dir->i_sb);
if (!inode)
return ERR_PTR(-ENOMEM);
f2fs_lock_op(sbi);
if (!f2fs_alloc_nid(sbi, &ino)) {
f2fs_unlock_op(sbi);
err = -ENOSPC;
goto fail;
}
f2fs_unlock_op(sbi);
nid_free = true;
inode_init_owner(inode, dir, mode);
inode->i_ino = ino;
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
F2FS_I(inode)->i_crtime = inode->i_mtime;
inode->i_generation = prandom_u32();
if (S_ISDIR(inode->i_mode))
F2FS_I(inode)->i_current_depth = 1;
err = insert_inode_locked(inode);
if (err) {
err = -EINVAL;
goto fail;
}
if (f2fs_sb_has_project_quota(sbi) &&
(F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL))
F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
else
F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns,
F2FS_DEF_PROJID);
err = dquot_initialize(inode);
if (err)
goto fail_drop;
set_inode_flag(inode, FI_NEW_INODE);
if (f2fs_may_encrypt(dir, inode))
f2fs_set_encrypted_inode(inode);
if (f2fs_sb_has_extra_attr(sbi)) {
set_inode_flag(inode, FI_EXTRA_ATTR);
F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
}
if (test_opt(sbi, INLINE_XATTR))
set_inode_flag(inode, FI_INLINE_XATTR);
if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
set_inode_flag(inode, FI_INLINE_DATA);
if (f2fs_may_inline_dentry(inode))
set_inode_flag(inode, FI_INLINE_DENTRY);
if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
if (f2fs_has_inline_xattr(inode))
xattr_size = F2FS_OPTION(sbi).inline_xattr_size;
/* Otherwise, will be 0 */
} else if (f2fs_has_inline_xattr(inode) ||
f2fs_has_inline_dentry(inode)) {
xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
}
F2FS_I(inode)->i_inline_xattr_size = xattr_size;
f2fs_init_extent_tree(inode, NULL);
stat_inc_inline_xattr(inode);
stat_inc_inline_inode(inode);
stat_inc_inline_dir(inode);
F2FS_I(inode)->i_flags =
f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED);
if (S_ISDIR(inode->i_mode))
F2FS_I(inode)->i_flags |= F2FS_INDEX_FL;
if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
if (f2fs_sb_has_compression(sbi)) {
/* Inherit the compression flag in directory */
if ((F2FS_I(dir)->i_flags & F2FS_COMPR_FL) &&
f2fs_may_compress(inode))
set_compress_context(inode);
}
f2fs_set_inode_flags(inode);
trace_f2fs_new_inode(inode, 0);
return inode;
fail:
trace_f2fs_new_inode(inode, err);
make_bad_inode(inode);
if (nid_free)
set_inode_flag(inode, FI_FREE_NID);
iput(inode);
return ERR_PTR(err);
fail_drop:
trace_f2fs_new_inode(inode, err);
dquot_drop(inode);
inode->i_flags |= S_NOQUOTA;
if (nid_free)
set_inode_flag(inode, FI_FREE_NID);
clear_nlink(inode);
unlock_new_inode(inode);
iput(inode);
return ERR_PTR(err);
}
static inline int is_extension_exist(const unsigned char *s, const char *sub)
static inline int is_extension_exist(const unsigned char *s, const char *sub,
bool tmp_ext)
{
size_t slen = strlen(s);
size_t sublen = strlen(sub);
@@ -164,6 +39,13 @@ static inline int is_extension_exist(const unsigned char *s, const char *sub)
if (slen < sublen + 2)
return 0;
if (!tmp_ext) {
/* file has no temp extension */
if (s[slen - sublen - 1] != '.')
return 0;
return !strncasecmp(s + slen - sublen, sub, sublen);
}
for (i = 1; i < slen - sublen; i++) {
if (s[i] != '.')
continue;
@@ -174,36 +56,6 @@ static inline int is_extension_exist(const unsigned char *s, const char *sub)
return 0;
}
/*
* Set file's temperature for hot/cold data separation
*/
static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode,
const unsigned char *name)
{
__u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
int i, cold_count, hot_count;
down_read(&sbi->sb_lock);
cold_count = le32_to_cpu(sbi->raw_super->extension_count);
hot_count = sbi->raw_super->hot_ext_count;
for (i = 0; i < cold_count + hot_count; i++) {
if (is_extension_exist(name, extlist[i]))
break;
}
up_read(&sbi->sb_lock);
if (i == cold_count + hot_count)
return;
if (i < cold_count)
file_set_cold(inode);
else
file_set_hot(inode);
}
int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
bool hot, bool set)
{
@@ -270,45 +122,211 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
return 0;
}
static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode,
const unsigned char *name)
static void set_compress_new_inode(struct f2fs_sb_info *sbi, struct inode *dir,
struct inode *inode, const unsigned char *name)
{
__u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
unsigned char (*ext)[F2FS_EXTENSION_LEN];
unsigned int ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
unsigned char (*noext)[F2FS_EXTENSION_LEN] =
F2FS_OPTION(sbi).noextensions;
unsigned char (*ext)[F2FS_EXTENSION_LEN] = F2FS_OPTION(sbi).extensions;
unsigned char ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
unsigned char noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt;
int i, cold_count, hot_count;
if (!f2fs_sb_has_compression(sbi) ||
is_inode_flag_set(inode, FI_COMPRESSED_FILE) ||
F2FS_I(inode)->i_flags & F2FS_NOCOMP_FL ||
!f2fs_may_compress(inode))
if (!f2fs_sb_has_compression(sbi))
return;
down_read(&sbi->sb_lock);
if (S_ISDIR(inode->i_mode))
goto inherit_comp;
/* This name comes only from normal files. */
if (!name)
return;
/* Don't compress hot files. */
f2fs_down_read(&sbi->sb_lock);
cold_count = le32_to_cpu(sbi->raw_super->extension_count);
hot_count = sbi->raw_super->hot_ext_count;
for (i = cold_count; i < cold_count + hot_count; i++)
if (is_extension_exist(name, extlist[i], false))
break;
f2fs_up_read(&sbi->sb_lock);
if (i < (cold_count + hot_count))
return;
for (i = cold_count; i < cold_count + hot_count; i++) {
if (is_extension_exist(name, extlist[i])) {
up_read(&sbi->sb_lock);
/* Don't compress unallowed extension. */
for (i = 0; i < noext_cnt; i++)
if (is_extension_exist(name, noext[i], false))
return;
/* Compress wanting extension. */
for (i = 0; i < ext_cnt; i++) {
if (is_extension_exist(name, ext[i], false)) {
set_compress_context(inode);
return;
}
}
up_read(&sbi->sb_lock);
ext = F2FS_OPTION(sbi).extensions;
for (i = 0; i < ext_cnt; i++) {
if (!is_extension_exist(name, ext[i]))
continue;
inherit_comp:
/* Inherit the {no-}compression flag in directory */
if (F2FS_I(dir)->i_flags & F2FS_NOCOMP_FL) {
F2FS_I(inode)->i_flags |= F2FS_NOCOMP_FL;
f2fs_mark_inode_dirty_sync(inode, true);
} else if (F2FS_I(dir)->i_flags & F2FS_COMPR_FL) {
set_compress_context(inode);
return;
}
}
/*
* Set file's temperature for hot/cold data separation
*/
static void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode,
const unsigned char *name)
{
__u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
int i, cold_count, hot_count;
f2fs_down_read(&sbi->sb_lock);
cold_count = le32_to_cpu(sbi->raw_super->extension_count);
hot_count = sbi->raw_super->hot_ext_count;
for (i = 0; i < cold_count + hot_count; i++)
if (is_extension_exist(name, extlist[i], true))
break;
f2fs_up_read(&sbi->sb_lock);
if (i == cold_count + hot_count)
return;
if (i < cold_count)
file_set_cold(inode);
else
file_set_hot(inode);
}
static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode,
const char *name)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
nid_t ino;
struct inode *inode;
bool nid_free = false;
int xattr_size = 0;
int err;
inode = new_inode(dir->i_sb);
if (!inode)
return ERR_PTR(-ENOMEM);
if (!f2fs_alloc_nid(sbi, &ino)) {
err = -ENOSPC;
goto fail;
}
nid_free = true;
inode_init_owner(inode, dir, mode);
inode->i_ino = ino;
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
F2FS_I(inode)->i_crtime = inode->i_mtime;
inode->i_generation = prandom_u32();
if (S_ISDIR(inode->i_mode))
F2FS_I(inode)->i_current_depth = 1;
err = insert_inode_locked(inode);
if (err) {
err = -EINVAL;
goto fail;
}
if (f2fs_sb_has_project_quota(sbi) &&
(F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL))
F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
else
F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns,
F2FS_DEF_PROJID);
err = f2fs_dquot_initialize(inode);
if (err)
goto fail_drop;
set_inode_flag(inode, FI_NEW_INODE);
if (f2fs_may_encrypt(dir, inode))
f2fs_set_encrypted_inode(inode);
if (f2fs_sb_has_extra_attr(sbi)) {
set_inode_flag(inode, FI_EXTRA_ATTR);
F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
}
if (test_opt(sbi, INLINE_XATTR))
set_inode_flag(inode, FI_INLINE_XATTR);
if (f2fs_may_inline_dentry(inode))
set_inode_flag(inode, FI_INLINE_DENTRY);
if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
if (f2fs_has_inline_xattr(inode))
xattr_size = F2FS_OPTION(sbi).inline_xattr_size;
/* Otherwise, will be 0 */
} else if (f2fs_has_inline_xattr(inode) ||
f2fs_has_inline_dentry(inode)) {
xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
}
F2FS_I(inode)->i_inline_xattr_size = xattr_size;
F2FS_I(inode)->i_flags =
f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED);
if (S_ISDIR(inode->i_mode))
F2FS_I(inode)->i_flags |= F2FS_INDEX_FL;
if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
/* Check compression first. */
set_compress_new_inode(sbi, dir, inode, name);
/* Should enable inline_data after compression set */
if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
set_inode_flag(inode, FI_INLINE_DATA);
if (name && !test_opt(sbi, DISABLE_EXT_IDENTIFY))
set_file_temperature(sbi, inode, name);
stat_inc_inline_xattr(inode);
stat_inc_inline_inode(inode);
stat_inc_inline_dir(inode);
f2fs_set_inode_flags(inode);
f2fs_init_extent_tree(inode);
trace_f2fs_new_inode(inode, 0);
return inode;
fail:
trace_f2fs_new_inode(inode, err);
make_bad_inode(inode);
if (nid_free)
set_inode_flag(inode, FI_FREE_NID);
iput(inode);
return ERR_PTR(err);
fail_drop:
trace_f2fs_new_inode(inode, err);
dquot_drop(inode);
inode->i_flags |= S_NOQUOTA;
if (nid_free)
set_inode_flag(inode, FI_FREE_NID);
clear_nlink(inode);
unlock_new_inode(inode);
iput(inode);
return ERR_PTR(err);
}
static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
@@ -322,19 +340,14 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
err = dquot_initialize(dir);
err = f2fs_dquot_initialize(dir);
if (err)
return err;
inode = f2fs_new_inode(dir, mode);
inode = f2fs_new_inode(dir, mode, dentry->d_name.name);
if (IS_ERR(inode))
return PTR_ERR(inode);
if (!test_opt(sbi, DISABLE_EXT_IDENTIFY))
set_file_temperature(sbi, inode, dentry->d_name.name);
set_compress_inode(sbi, inode, dentry->d_name.name);
inode->i_op = &f2fs_file_inode_operations;
inode->i_fop = &f2fs_file_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
@@ -381,7 +394,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
F2FS_I(old_dentry->d_inode)->i_projid)))
return -EXDEV;
err = dquot_initialize(dir);
err = f2fs_dquot_initialize(dir);
if (err)
return err;
@@ -414,6 +427,7 @@ struct dentry *f2fs_get_parent(struct dentry *child)
struct qstr dotdot = QSTR_INIT("..", 2);
struct page *page;
unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot, &page);
if (!ino) {
if (IS_ERR(page))
return ERR_CAST(page);
@@ -437,7 +451,14 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
return 0;
}
err = dquot_initialize(dir);
if (!S_ISDIR(dir->i_mode)) {
f2fs_err(sbi, "inconsistent inode status, skip recovering inline_dots inode (ino:%lu, i_mode:%u, pino:%u)",
dir->i_ino, dir->i_mode, pino);
set_sbi_flag(sbi, SBI_NEED_FSCK);
return -ENOTDIR;
}
err = f2fs_dquot_initialize(dir);
if (err)
return err;
@@ -492,7 +513,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
}
err = f2fs_prepare_lookup(dir, dentry, &fname);
generic_set_encrypted_ci_d_ops(dir, dentry);
generic_set_encrypted_ci_d_ops(dentry);
if (err == -ENOENT)
goto out_splice;
if (err)
@@ -570,15 +591,17 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
trace_f2fs_unlink_enter(dir, dentry);
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
goto fail;
}
err = dquot_initialize(dir);
err = f2fs_dquot_initialize(dir);
if (err)
return err;
err = dquot_initialize(inode);
goto fail;
err = f2fs_dquot_initialize(inode);
if (err)
return err;
goto fail;
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (!de) {
@@ -597,18 +620,18 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
goto fail;
}
f2fs_delete_entry(de, page, dir, inode);
f2fs_unlock_op(sbi);
#ifdef CONFIG_UNICODE
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
* negative dentries at f2fs_lookup(), when it is better
* negative dentries at f2fs_lookup(), when it is better
* supported by the VFS for the CI case.
*/
if (IS_CASEFOLDED(dir))
d_invalidate(dentry);
#endif
f2fs_unlock_op(sbi);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
fail:
@@ -621,6 +644,7 @@ static const char *f2fs_get_link(struct dentry *dentry,
struct delayed_call *done)
{
const char *link = page_get_link(dentry, inode, done);
if (!IS_ERR(link) && !*link) {
/* this is broken symlink case */
do_delayed_call(done);
@@ -649,11 +673,11 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
if (err)
return err;
err = dquot_initialize(dir);
err = f2fs_dquot_initialize(dir);
if (err)
return err;
inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO, NULL);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -706,7 +730,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
f2fs_handle_failed_inode(inode);
out_free_encrypted_link:
if (disk_link.name != (unsigned char *)symname)
kvfree(disk_link.name);
kfree(disk_link.name);
return err;
}
@@ -719,11 +743,11 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
err = dquot_initialize(dir);
err = f2fs_dquot_initialize(dir);
if (err)
return err;
inode = f2fs_new_inode(dir, S_IFDIR | mode);
inode = f2fs_new_inode(dir, S_IFDIR | mode, NULL);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -758,6 +782,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
if (f2fs_empty_dir(inode))
return f2fs_unlink(dir, dentry);
return -ENOTEMPTY;
@@ -775,11 +800,11 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
err = dquot_initialize(dir);
err = f2fs_dquot_initialize(dir);
if (err)
return err;
inode = f2fs_new_inode(dir, mode);
inode = f2fs_new_inode(dir, mode, NULL);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -806,22 +831,23 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
return err;
}
static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
umode_t mode, struct inode **whiteout)
static int __f2fs_tmpfile(struct inode *dir,
struct dentry *dentry, umode_t mode, bool is_whiteout,
struct inode **new_inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct inode *inode;
int err;
err = dquot_initialize(dir);
err = f2fs_dquot_initialize(dir);
if (err)
return err;
inode = f2fs_new_inode(dir, mode);
inode = f2fs_new_inode(dir, mode, NULL);
if (IS_ERR(inode))
return PTR_ERR(inode);
if (whiteout) {
if (is_whiteout) {
init_special_inode(inode, inode->i_mode, WHITEOUT_DEV);
inode->i_op = &f2fs_special_inode_operations;
} else {
@@ -846,21 +872,25 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
f2fs_add_orphan_inode(inode);
f2fs_alloc_nid_done(sbi, inode->i_ino);
if (whiteout) {
if (is_whiteout) {
f2fs_i_links_write(inode, false);
spin_lock(&inode->i_lock);
inode->i_state |= I_LINKABLE;
spin_unlock(&inode->i_lock);
*whiteout = inode;
} else {
d_tmpfile(dentry, inode);
if (dentry)
d_tmpfile(dentry, inode);
else
f2fs_i_links_write(inode, false);
}
/* link_count was changed by d_tmpfile as well. */
f2fs_unlock_op(sbi);
unlock_new_inode(inode);
if (new_inode)
*new_inode = inode;
f2fs_balance_fs(sbi, true);
return 0;
@@ -880,7 +910,7 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
return __f2fs_tmpfile(dir, dentry, mode, NULL);
return __f2fs_tmpfile(dir, dentry, mode, false, NULL);
}
static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout)
@@ -888,7 +918,14 @@ static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout)
if (unlikely(f2fs_cp_error(F2FS_I_SB(dir))))
return -EIO;
return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout);
return __f2fs_tmpfile(dir, NULL,
S_IFCHR | WHITEOUT_MODE, true, whiteout);
}
int f2fs_get_tmpfile(struct inode *dir,
struct inode **new_inode)
{
return __f2fs_tmpfile(dir, NULL, S_IFREG, false, new_inode);
}
static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
@@ -936,16 +973,16 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
return err;
}
err = dquot_initialize(old_dir);
err = f2fs_dquot_initialize(old_dir);
if (err)
goto out;
err = dquot_initialize(new_dir);
err = f2fs_dquot_initialize(new_dir);
if (err)
goto out;
if (new_inode) {
err = dquot_initialize(new_inode);
err = f2fs_dquot_initialize(new_inode);
if (err)
goto out;
}
@@ -994,11 +1031,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_page = NULL;
new_inode->i_ctime = current_time(new_inode);
down_write(&F2FS_I(new_inode)->i_sem);
f2fs_down_write(&F2FS_I(new_inode)->i_sem);
if (old_dir_entry)
f2fs_i_links_write(new_inode, false);
f2fs_i_links_write(new_inode, false);
up_write(&F2FS_I(new_inode)->i_sem);
f2fs_up_write(&F2FS_I(new_inode)->i_sem);
if (!new_inode->i_nlink)
f2fs_add_orphan_inode(new_inode);
@@ -1019,13 +1056,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_i_links_write(new_dir, true);
}
down_write(&F2FS_I(old_inode)->i_sem);
f2fs_down_write(&F2FS_I(old_inode)->i_sem);
if (!old_dir_entry || whiteout)
file_lost_pino(old_inode);
else
/* adjust dir's i_pino to pass fsck check */
f2fs_i_pino_write(old_inode, new_dir->i_ino);
up_write(&F2FS_I(old_inode)->i_sem);
f2fs_up_write(&F2FS_I(old_inode)->i_sem);
old_inode->i_ctime = current_time(old_inode);
f2fs_mark_inode_dirty_sync(old_inode, false);
@@ -1078,8 +1115,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
out_old:
f2fs_put_page(old_page, 0);
out:
if (whiteout)
iput(whiteout);
iput(whiteout);
return err;
}
@@ -1109,11 +1145,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
F2FS_I(new_dentry->d_inode)->i_projid)))
return -EXDEV;
err = dquot_initialize(old_dir);
err = f2fs_dquot_initialize(old_dir);
if (err)
goto out;
err = dquot_initialize(new_dir);
err = f2fs_dquot_initialize(new_dir);
if (err)
goto out;
@@ -1185,38 +1221,38 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
/* update directory entry info of old dir inode */
f2fs_set_link(old_dir, old_entry, old_page, new_inode);
down_write(&F2FS_I(old_inode)->i_sem);
f2fs_down_write(&F2FS_I(old_inode)->i_sem);
if (!old_dir_entry)
file_lost_pino(old_inode);
else
/* adjust dir's i_pino to pass fsck check */
f2fs_i_pino_write(old_inode, new_dir->i_ino);
up_write(&F2FS_I(old_inode)->i_sem);
f2fs_up_write(&F2FS_I(old_inode)->i_sem);
old_dir->i_ctime = current_time(old_dir);
if (old_nlink) {
down_write(&F2FS_I(old_dir)->i_sem);
f2fs_down_write(&F2FS_I(old_dir)->i_sem);
f2fs_i_links_write(old_dir, old_nlink > 0);
up_write(&F2FS_I(old_dir)->i_sem);
f2fs_up_write(&F2FS_I(old_dir)->i_sem);
}
f2fs_mark_inode_dirty_sync(old_dir, false);
/* update directory entry info of new dir inode */
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
down_write(&F2FS_I(new_inode)->i_sem);
f2fs_down_write(&F2FS_I(new_inode)->i_sem);
if (!new_dir_entry)
file_lost_pino(new_inode);
else
/* adjust dir's i_pino to pass fsck check */
f2fs_i_pino_write(new_inode, old_dir->i_ino);
up_write(&F2FS_I(new_inode)->i_sem);
f2fs_up_write(&F2FS_I(new_inode)->i_sem);
new_dir->i_ctime = current_time(new_dir);
if (new_nlink) {
down_write(&F2FS_I(new_dir)->i_sem);
f2fs_down_write(&F2FS_I(new_dir)->i_sem);
f2fs_i_links_write(new_dir, new_nlink > 0);
up_write(&F2FS_I(new_dir)->i_sem);
f2fs_up_write(&F2FS_I(new_dir)->i_sem);
}
f2fs_mark_inode_dirty_sync(new_dir, false);
@@ -1303,7 +1339,7 @@ static int f2fs_encrypted_symlink_getattr(const struct path *path,
}
const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
.get_link = f2fs_encrypted_get_link,
.get_link = f2fs_encrypted_get_link,
.getattr = f2fs_encrypted_symlink_getattr,
.setattr = f2fs_setattr,
.listxattr = f2fs_listxattr,
@@ -1329,7 +1365,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
};
const struct inode_operations f2fs_symlink_inode_operations = {
.get_link = f2fs_get_link,
.get_link = f2fs_get_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
.listxattr = f2fs_listxattr,
@@ -1337,7 +1373,7 @@ const struct inode_operations f2fs_symlink_inode_operations = {
const struct inode_operations f2fs_special_inode_operations = {
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
.set_acl = f2fs_set_acl,
.listxattr = f2fs_listxattr,

File diff suppressed because it is too large Load Diff

View File

@@ -31,6 +31,9 @@
/* control total # of nats */
#define DEF_NAT_CACHE_THRESHOLD 100000
/* control total # of node writes used for roll-fowrad recovery */
#define DEF_RF_NODE_BLOCKS 0
/* vector size for gang look-up from nat cache that consists of radix tree */
#define NATVEC_SIZE 64
#define SETVEC_SIZE 32
@@ -38,6 +41,9 @@
/* return value for read_node_page */
#define LOCKED_PAGE 1
/* check pinned file's alignment status of physical blocks */
#define FILE_NOT_ALIGNED 1
/* For flag in struct node_info */
enum {
IS_CHECKPOINTED, /* is it checkpointed before? */
@@ -126,18 +132,13 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi)
{
return NM_I(sbi)->dirty_nat_cnt >= NM_I(sbi)->max_nid *
return NM_I(sbi)->nat_cnt[DIRTY_NAT] >= NM_I(sbi)->max_nid *
NM_I(sbi)->dirty_nats_ratio / 100;
}
static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
{
return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD;
}
static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi)
{
return get_pages(sbi, F2FS_DIRTY_NODES) >= sbi->blocks_per_seg * 8;
return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD;
}
enum mem_type {
@@ -145,9 +146,10 @@ enum mem_type {
NAT_ENTRIES, /* indicates the cached nat entry */
DIRTY_DENTS, /* indicates dirty dentry pages */
INO_ENTRIES, /* indicates inode entries */
EXTENT_CACHE, /* indicates extent cache */
INMEM_PAGES, /* indicates inmemory pages */
READ_EXTENT_CACHE, /* indicates read extent cache */
AGE_EXTENT_CACHE, /* indicates age extent cache */
DISCARD_CACHE, /* indicates memory of cached discard cmds */
COMPRESS_PAGE, /* indicates memory of cached compressed pages */
BASE_CHECK, /* check kernel status */
};
@@ -389,20 +391,6 @@ static inline nid_t get_nid(struct page *p, int off, bool i)
* - Mark cold node blocks in their node footer
* - Mark cold data pages in page cache
*/
static inline int is_cold_data(struct page *page)
{
return PageChecked(page);
}
static inline void set_cold_data(struct page *page)
{
SetPageChecked(page);
}
static inline void clear_cold_data(struct page *page)
{
ClearPageChecked(page);
}
static inline int is_node(struct page *page, int type)
{
@@ -414,21 +402,6 @@ static inline int is_node(struct page *page, int type)
#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT)
#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT)
static inline int is_inline_node(struct page *page)
{
return PageChecked(page);
}
static inline void set_inline_node(struct page *page)
{
SetPageChecked(page);
}
static inline void clear_inline_node(struct page *page)
{
ClearPageChecked(page);
}
static inline void set_cold_node(struct page *page, bool is_dir)
{
struct f2fs_node *rn = F2FS_NODE(page);

View File

@@ -45,12 +45,20 @@
static struct kmem_cache *fsync_entry_slab;
#ifdef CONFIG_UNICODE
extern struct kmem_cache *f2fs_cf_name_slab;
#endif
bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi)
{
s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);
if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
return false;
if (NM_I(sbi)->max_rf_node_blocks &&
percpu_counter_sum_positive(&sbi->rf_node_block_count) >=
NM_I(sbi)->max_rf_node_blocks)
return false;
return true;
}
@@ -77,7 +85,7 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
if (IS_ERR(inode))
return ERR_CAST(inode);
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err)
goto err_out;
@@ -87,7 +95,8 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
goto err_out;
}
entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
entry = f2fs_kmem_cache_alloc(fsync_entry_slab,
GFP_F2FS_ZERO, true, NULL);
entry->inode = inode;
list_add_tail(&entry->list, head);
@@ -145,7 +154,7 @@ static int init_recovered_filename(const struct inode *dir,
f2fs_hash_filename(dir, fname);
#ifdef CONFIG_UNICODE
/* Case-sensitive match is fine for recovery */
kfree(fname->cf_name.name);
kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
fname->cf_name.name = NULL;
#endif
} else {
@@ -198,7 +207,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
goto out_put;
}
err = dquot_initialize(einode);
err = f2fs_dquot_initialize(einode);
if (err) {
iput(einode);
goto out_put;
@@ -337,6 +346,19 @@ static int recover_inode(struct inode *inode, struct page *page)
return 0;
}
static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi,
unsigned int ra_blocks, unsigned int blkaddr,
unsigned int next_blkaddr)
{
if (blkaddr + 1 == next_blkaddr)
ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS,
ra_blocks * 2);
else if (next_blkaddr % sbi->blocks_per_seg)
ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS,
ra_blocks / 2);
return ra_blocks;
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
bool check_only)
{
@@ -344,6 +366,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
struct page *page = NULL;
block_t blkaddr;
unsigned int loop_cnt = 0;
unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
valid_user_blocks(sbi);
int err = 0;
@@ -418,11 +441,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
break;
}
ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr,
next_blkaddr_of_node(page));
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
f2fs_put_page(page, 1);
f2fs_ra_meta_pages_cond(sbi, blkaddr);
f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks);
}
return err;
}
@@ -447,7 +473,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
struct dnode_of_data tdn = *dn;
nid_t ino, nid;
struct inode *inode;
unsigned int offset;
unsigned int offset, ofs_in_node, max_addrs;
block_t bidx;
int i;
@@ -458,6 +484,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
/* Get the previous summary */
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
if (curseg->segno == segno) {
sum = curseg->sum_blk->entries[blkoff];
goto got_it;
@@ -473,15 +500,25 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
got_it:
/* Use the locked dnode page and inode */
nid = le32_to_cpu(sum.nid);
ofs_in_node = le16_to_cpu(sum.ofs_in_node);
max_addrs = ADDRS_PER_PAGE(dn->node_page, dn->inode);
if (ofs_in_node >= max_addrs) {
f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u",
ofs_in_node, dn->inode->i_ino, nid, max_addrs);
f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUMMARY);
return -EFSCORRUPTED;
}
if (dn->inode->i_ino == nid) {
tdn.nid = nid;
if (!dn->inode_page_locked)
lock_page(dn->inode_page);
tdn.node_page = dn->inode_page;
tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
tdn.ofs_in_node = ofs_in_node;
goto truncate_out;
} else if (dn->nid == nid) {
tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
tdn.ofs_in_node = ofs_in_node;
goto truncate_out;
}
@@ -502,7 +539,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
if (IS_ERR(inode))
return PTR_ERR(inode);
ret = dquot_initialize(inode);
ret = f2fs_dquot_initialize(inode);
if (ret) {
iput(inode);
return ret;
@@ -589,7 +626,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true);
err = f2fs_get_node_info(sbi, dn.nid, &ni);
err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
if (err)
goto err;
@@ -600,6 +637,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
inode->i_ino, ofs_of_node(dn.node_page),
ofs_of_node(page));
err = -EFSCORRUPTED;
f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
goto err;
}
@@ -612,12 +650,14 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
if (__is_valid_data_blkaddr(src) &&
!f2fs_is_valid_blkaddr(sbi, src, META_POR)) {
err = -EFSCORRUPTED;
f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto err;
}
if (__is_valid_data_blkaddr(dest) &&
!f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
err = -EFSCORRUPTED;
f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto err;
}
@@ -682,6 +722,16 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
goto err;
}
if (f2fs_is_valid_blkaddr(sbi, dest,
DATA_GENERIC_ENHANCE_UPDATE)) {
f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u",
dest, inode->i_ino, dn.ofs_in_node);
err = -EFSCORRUPTED;
f2fs_handle_error(sbi,
ERROR_INVALID_BLKADDR);
goto err;
}
/* write dummy data page */
f2fs_replace_block(sbi, &dn, src, dest,
ni.version, false, false);
@@ -709,6 +759,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
struct page *page = NULL;
int err = 0;
block_t blkaddr;
unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
/* get node pages in the current segment */
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
@@ -720,8 +771,6 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
break;
f2fs_ra_meta_pages_cond(sbi, blkaddr);
page = f2fs_get_tmp_page(sbi, blkaddr);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -764,12 +813,17 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
if (entry->blkaddr == blkaddr)
list_move_tail(&entry->list, tmp_inode_list);
next:
ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr,
next_blkaddr_of_node(page));
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
f2fs_put_page(page, 1);
f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks);
}
if (!err)
f2fs_allocate_new_segments(sbi, NO_CHECK_TYPE);
f2fs_allocate_new_segments(sbi);
return err;
}
@@ -791,25 +845,16 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
}
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= SB_ACTIVE;
/* Turn on quotas so that they are updated correctly */
quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
#endif
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
sizeof(struct fsync_inode_entry));
if (!fsync_entry_slab) {
err = -ENOMEM;
goto out;
}
INIT_LIST_HEAD(&inode_list);
INIT_LIST_HEAD(&tmp_inode_list);
INIT_LIST_HEAD(&dir_list);
/* prevent checkpoint */
down_write(&sbi->cp_global_sem);
f2fs_down_write(&sbi->cp_global_sem);
/* step #1: find fsynced inode numbers */
err = find_fsync_dnodes(sbi, &inode_list, check_only);
@@ -827,10 +872,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list);
if (!err)
f2fs_bug_on(sbi, !list_empty(&inode_list));
else {
/* restore s_flags to let iput() trash data */
sbi->sb->s_flags = s_flags;
}
else
f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE);
skip:
destroy_fsync_dnodes(&inode_list, err);
destroy_fsync_dnodes(&tmp_inode_list, err);
@@ -845,8 +888,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
} else {
clear_sbi_flag(sbi, SBI_POR_DOING);
}
up_write(&sbi->cp_global_sem);
f2fs_up_write(&sbi->cp_global_sem);
/* let's drop all the directory inodes for clean checkpoint */
destroy_fsync_dnodes(&dir_list, err);
@@ -862,8 +904,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
}
}
kmem_cache_destroy(fsync_entry_slab);
out:
#ifdef CONFIG_QUOTA
/* Turn quotas off */
if (quota_enabled)
@@ -871,5 +911,17 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
#endif
sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
return ret ? ret: err;
return ret ? ret : err;
}
int __init f2fs_create_recovery_cache(void)
{
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
sizeof(struct fsync_inode_entry));
return fsync_entry_slab ? 0 : -ENOMEM;
}
void f2fs_destroy_recovery_cache(void)
{
kmem_cache_destroy(fsync_entry_slab);
}

File diff suppressed because it is too large Load Diff

View File

@@ -16,13 +16,21 @@
#define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */
#define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
#define F2FS_MIN_META_SEGMENTS 8 /* SB + 2 (CP + SIT + NAT) + SSA */
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno)
#define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno)
#define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE)
#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
#define SE_PAGETYPE(se) ((IS_NODESEG((se)->type) ? NODE : DATA))
static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
unsigned short seg_type)
{
f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG);
}
#define IS_HOT(t) ((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
#define IS_WARM(t) ((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
@@ -34,7 +42,9 @@
((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno))
#define IS_CURSEC(sbi, secno) \
(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
@@ -48,7 +58,11 @@
((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
(sbi)->segs_per_sec)) \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno / \
(sbi)->segs_per_sec))
#define MAIN_BLKADDR(sbi) \
(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \
@@ -87,6 +101,9 @@
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define BLKS_PER_SEC(sbi) \
((sbi)->segs_per_sec * (sbi)->blocks_per_seg)
#define CAP_BLKS_PER_SEC(sbi) \
((sbi)->segs_per_sec * (sbi)->blocks_per_seg - \
(sbi)->unusable_blocks_per_sec)
#define GET_SEC_FROM_SEG(sbi, segno) \
(((segno) == -1) ? -1: (segno) / (sbi)->segs_per_sec)
#define GET_SEG_FROM_SEC(sbi, secno) \
@@ -129,23 +146,28 @@ enum {
};
/*
* In the victim_sel_policy->alloc_mode, there are two block allocation modes.
* In the victim_sel_policy->alloc_mode, there are three block allocation modes.
* LFS writes data sequentially with cleaning operations.
* SSR (Slack Space Recycle) reuses obsolete space without cleaning operations.
* AT_SSR (Age Threshold based Slack Space Recycle) merges fragments into
* fragmented segment which has similar aging degree.
*/
enum {
LFS = 0,
SSR
SSR,
AT_SSR,
};
/*
* In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes.
* In the victim_sel_policy->gc_mode, there are three gc, aka cleaning, modes.
* GC_CB is based on cost-benefit algorithm.
* GC_GREEDY is based on greedy algorithm.
* GC_AT is based on age-threshold algorithm.
*/
enum {
GC_CB = 0,
GC_GREEDY,
GC_AT,
ALLOC_NEXT,
FLUSH_DEVICE,
MAX_GC_POLICY,
@@ -154,24 +176,28 @@ enum {
/*
* BG_GC means the background cleaning job.
* FG_GC means the on-demand cleaning job.
* FORCE_FG_GC means on-demand cleaning job in background.
*/
enum {
BG_GC = 0,
FG_GC,
FORCE_FG_GC,
};
/* for a function parameter to select a victim segment */
struct victim_sel_policy {
int alloc_mode; /* LFS or SSR */
int gc_mode; /* GC_CB or GC_GREEDY */
unsigned long *dirty_segmap; /* dirty segment bitmap */
unsigned int max_search; /* maximum # of segments to search */
unsigned long *dirty_bitmap; /* dirty segment/section bitmap */
unsigned int max_search; /*
* maximum # of segments/sections
* to search
*/
unsigned int offset; /* last scanned bitmap offset */
unsigned int ofs_unit; /* bitmap search unit */
unsigned int min_cost; /* minimum cost */
unsigned long long oldest_age; /* oldest age of segments having the same min cost */
unsigned int min_segno; /* segment # having min. cost */
unsigned long long age; /* mtime of GCed section*/
unsigned long long age_threshold;/* age threshold */
};
struct seg_entry {
@@ -184,7 +210,7 @@ struct seg_entry {
unsigned char *cur_valid_map_mir; /* mirror of current valid bitmap */
#endif
/*
* # of valid blocks and the validity bitmap stored in the the last
* # of valid blocks and the validity bitmap stored in the last
* checkpoint pack. This information is used by the SSR mode.
*/
unsigned char *ckpt_valid_map; /* validity bitmap of blocks last cp */
@@ -196,21 +222,15 @@ struct sec_entry {
unsigned int valid_blocks; /* # of valid blocks in a section */
};
struct segment_allocation {
void (*allocate_segment)(struct f2fs_sb_info *, int, bool);
};
#define MAX_SKIP_GC_COUNT 16
struct inmem_pages {
struct revoke_entry {
struct list_head list;
struct page *page;
block_t old_addr; /* for revoking when fail to commit */
pgoff_t index;
};
struct sit_info {
const struct segment_allocation *s_ops;
block_t sit_base_addr; /* start block address of SIT area */
block_t sit_blocks; /* # of blocks used by SIT area */
block_t written_valid_blocks; /* # of valid blocks in main area */
@@ -237,6 +257,8 @@ struct sit_info {
unsigned long long mounted_time; /* mount time */
unsigned long long min_mtime; /* min. modification time */
unsigned long long max_mtime; /* max. modification time */
unsigned long long dirty_min_mtime; /* rerange candidates in GC_AT */
unsigned long long dirty_max_mtime; /* rerange candidates in GC_AT */
unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */
};
@@ -266,15 +288,19 @@ enum dirty_type {
struct dirty_seglist_info {
const struct victim_selection *v_ops; /* victim selction operation */
unsigned long *dirty_segmap[NR_DIRTY_TYPE];
unsigned long *dirty_secmap;
struct mutex seglist_lock; /* lock for segment bitmaps */
int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */
unsigned long *victim_secmap; /* background GC victims */
unsigned long *pinned_secmap; /* pinned victims from foreground GC */
unsigned int pinned_secmap_cnt; /* count of victims which has pinned data */
bool enable_pin_section; /* enable pinning section */
};
/* victim selection function for cleaning and SSR */
struct victim_selection {
int (*get_victim)(struct f2fs_sb_info *, unsigned int *,
int, int, char);
int, int, char, unsigned long long);
};
/* for active log information */
@@ -284,10 +310,13 @@ struct curseg_info {
struct rw_semaphore journal_rwsem; /* protect journal area */
struct f2fs_journal *journal; /* cached journal info */
unsigned char alloc_type; /* current allocation type */
unsigned short seg_type; /* segment type like CURSEG_XXX_TYPE */
unsigned int segno; /* current segment number */
unsigned short next_blkoff; /* next block offset to write */
unsigned int zone; /* current zone number */
unsigned int next_segno; /* preallocated segment */
int fragment_remained_chunk; /* remained block size in a chunk for block fragmentation mode */
bool inited; /* indicate inmem log is inited */
};
struct sit_entry_set {
@@ -301,8 +330,6 @@ struct sit_entry_set {
*/
static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
{
if (type == CURSEG_COLD_DATA_PINNED)
type = CURSEG_COLD_DATA;
return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
}
@@ -334,8 +361,20 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
}
static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
unsigned int segno)
unsigned int segno, bool use_section)
{
if (use_section && __is_large_section(sbi)) {
unsigned int start_segno = START_SEGNO(segno);
unsigned int blocks = 0;
int i;
for (i = 0; i < sbi->segs_per_sec; i++, start_segno++) {
struct seg_entry *se = get_seg_entry(sbi, start_segno);
blocks += se->ckpt_valid_blocks;
}
return blocks;
}
return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
}
@@ -407,6 +446,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
spin_lock(&free_i->segmap_lock);
clear_bit(segno, free_i->free_segmap);
@@ -414,7 +454,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
next = find_next_bit(free_i->free_segmap,
start_segno + sbi->segs_per_sec, start_segno);
if (next >= start_segno + sbi->segs_per_sec) {
if (next >= start_segno + usable_segs) {
clear_bit(secno, free_i->free_secmap);
free_i->free_sections++;
}
@@ -434,22 +474,23 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
}
static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
unsigned int segno)
unsigned int segno, bool inmem)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
spin_lock(&free_i->segmap_lock);
if (test_and_clear_bit(segno, free_i->free_segmap)) {
free_i->free_segments++;
if (IS_CURSEC(sbi, secno))
if (!inmem && IS_CURSEC(sbi, secno))
goto skip_free;
next = find_next_bit(free_i->free_segmap,
start_segno + sbi->segs_per_sec, start_segno);
if (next >= start_segno + sbi->segs_per_sec) {
if (next >= start_segno + usable_segs) {
if (test_and_clear_bit(secno, free_i->free_secmap))
free_i->free_sections++;
}
@@ -496,9 +537,10 @@ static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
return FREE_I(sbi)->free_segments;
}
static inline int reserved_segments(struct f2fs_sb_info *sbi)
static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi)
{
return SM_I(sbi)->reserved_segments;
return SM_I(sbi)->reserved_segments +
SM_I(sbi)->additional_reserved_segments;
}
static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
@@ -528,7 +570,7 @@ static inline int overprovision_segments(struct f2fs_sb_info *sbi)
static inline int reserved_sections(struct f2fs_sb_info *sbi)
{
return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi));
return GET_SEC_FROM_SEG(sbi, reserved_segments(sbi));
}
static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
@@ -541,8 +583,8 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
/* check current node segment */
for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) {
segno = CURSEG_I(sbi, i)->segno;
left_blocks = sbi->blocks_per_seg -
get_seg_entry(sbi, segno)->ckpt_valid_blocks;
left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
get_seg_entry(sbi, segno)->ckpt_valid_blocks;
if (node_blocks > left_blocks)
return false;
@@ -550,7 +592,7 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
/* check current data segment */
segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno;
left_blocks = sbi->blocks_per_seg -
left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
get_seg_entry(sbi, segno)->ckpt_valid_blocks;
if (dent_blocks > left_blocks)
return false;
@@ -564,10 +606,10 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
get_pages(sbi, F2FS_DIRTY_DENTS) +
get_pages(sbi, F2FS_DIRTY_IMETA);
unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
unsigned int node_secs = total_node_blocks / BLKS_PER_SEC(sbi);
unsigned int dent_secs = total_dent_blocks / BLKS_PER_SEC(sbi);
unsigned int node_blocks = total_node_blocks % BLKS_PER_SEC(sbi);
unsigned int dent_blocks = total_dent_blocks % BLKS_PER_SEC(sbi);
unsigned int node_secs = total_node_blocks / CAP_BLKS_PER_SEC(sbi);
unsigned int dent_secs = total_dent_blocks / CAP_BLKS_PER_SEC(sbi);
unsigned int node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi);
unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi);
unsigned int free, need_lower, need_upper;
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
@@ -618,7 +660,9 @@ static inline int utilization(struct f2fs_sb_info *sbi)
* pages over min_fsync_blocks. (=default option)
* F2FS_IPU_ASYNC - do IPU given by asynchronous write requests.
* F2FS_IPU_NOCACHE - disable IPU bio cache.
* F2FS_IPUT_DISABLE - disable IPU. (=default option in LFS mode)
* F2FS_IPU_HONOR_OPU_WRITE - use OPU write prior to IPU write if inode has
* FI_OPU_WRITE flag.
* F2FS_IPU_DISABLE - disable IPU. (=default option in LFS mode)
*/
#define DEF_MIN_IPU_UTIL 70
#define DEF_MIN_FSYNC_BLOCKS 8
@@ -634,6 +678,7 @@ enum {
F2FS_IPU_FSYNC,
F2FS_IPU_ASYNC,
F2FS_IPU_NOCACHE,
F2FS_IPU_HONOR_OPU_WRITE,
};
static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi,
@@ -681,35 +726,43 @@ static inline int check_block_count(struct f2fs_sb_info *sbi,
bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
int valid_blocks = 0;
int cur_pos = 0, next_pos;
unsigned int usable_blks_per_seg = f2fs_usable_blks_in_seg(sbi, segno);
/* check bitmap with valid block count */
do {
if (is_valid) {
next_pos = find_next_zero_bit_le(&raw_sit->valid_map,
sbi->blocks_per_seg,
usable_blks_per_seg,
cur_pos);
valid_blocks += next_pos - cur_pos;
} else
next_pos = find_next_bit_le(&raw_sit->valid_map,
sbi->blocks_per_seg,
usable_blks_per_seg,
cur_pos);
cur_pos = next_pos;
is_valid = !is_valid;
} while (cur_pos < sbi->blocks_per_seg);
} while (cur_pos < usable_blks_per_seg);
if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) {
f2fs_err(sbi, "Mismatch valid blocks %d vs. %d",
GET_SIT_VBLOCKS(raw_sit), valid_blocks);
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_handle_error(sbi, ERROR_INCONSISTENT_SIT);
return -EFSCORRUPTED;
}
if (usable_blks_per_seg < sbi->blocks_per_seg)
f2fs_bug_on(sbi, find_next_bit_le(&raw_sit->valid_map,
sbi->blocks_per_seg,
usable_blks_per_seg) != sbi->blocks_per_seg);
/* check segment usage, and check boundary of a given segment number */
if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
if (unlikely(GET_SIT_VBLOCKS(raw_sit) > usable_blks_per_seg
|| segno > TOTAL_SEGS(sbi) - 1)) {
f2fs_err(sbi, "Wrong valid blocks %d or segno %u",
GET_SIT_VBLOCKS(raw_sit), segno);
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_handle_error(sbi, ERROR_INCONSISTENT_SIT);
return -EFSCORRUPTED;
}
return 0;

View File

@@ -18,9 +18,7 @@ static unsigned int shrinker_run_no;
static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
{
long count = NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt;
return count > 0 ? count : 0;
return NM_I(sbi)->nat_cnt[RECLAIMABLE_NAT];
}
static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
@@ -30,10 +28,13 @@ static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
return count > 0 ? count : 0;
}
static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi,
enum extent_type type)
{
return atomic_read(&sbi->total_zombie_tree) +
atomic_read(&sbi->total_ext_node);
struct extent_tree_info *eti = &sbi->extent_tree[type];
return atomic_read(&eti->total_zombie_tree) +
atomic_read(&eti->total_ext_node);
}
unsigned long f2fs_shrink_count(struct shrinker *shrink,
@@ -55,8 +56,11 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
}
spin_unlock(&f2fs_list_lock);
/* count extent cache entries */
count += __count_extent_cache(sbi);
/* count read extent cache entries */
count += __count_extent_cache(sbi, EX_READ);
/* count block age extent cache entries */
count += __count_extent_cache(sbi, EX_BLOCK_AGE);
/* count clean nat cache entries */
count += __count_nat_entries(sbi);
@@ -102,7 +106,10 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
sbi->shrinker_run_no = run_no;
/* shrink extent cache entries */
freed += f2fs_shrink_extent_tree(sbi, nr >> 1);
freed += f2fs_shrink_age_extent_tree(sbi, nr >> 2);
/* shrink read extent cache entries */
freed += f2fs_shrink_read_extent_tree(sbi, nr >> 2);
/* shrink clean nat cache entries */
if (freed < nr)
@@ -132,7 +139,9 @@ void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
void f2fs_leave_shrinker(struct f2fs_sb_info *sbi)
{
f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi));
f2fs_shrink_read_extent_tree(sbi, __count_extent_cache(sbi, EX_READ));
f2fs_shrink_age_extent_tree(sbi,
__count_extent_cache(sbi, EX_BLOCK_AGE));
spin_lock(&f2fs_list_lock);
list_del_init(&sbi->s_list);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,165 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* f2fs IO tracer
*
* Copyright (c) 2014 Motorola Mobility
* Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/sched.h>
#include <linux/radix-tree.h>
#include "f2fs.h"
#include "trace.h"
static RADIX_TREE(pids, GFP_ATOMIC);
static spinlock_t pids_lock;
static struct last_io_info last_io;
static inline void __print_last_io(void)
{
if (!last_io.len)
return;
trace_printk("%3x:%3x %4x %-16s %2x %5x %5x %12x %4x\n",
last_io.major, last_io.minor,
last_io.pid, "----------------",
last_io.type,
last_io.fio.op, last_io.fio.op_flags,
last_io.fio.new_blkaddr,
last_io.len);
memset(&last_io, 0, sizeof(last_io));
}
static int __file_type(struct inode *inode, pid_t pid)
{
if (f2fs_is_atomic_file(inode))
return __ATOMIC_FILE;
else if (f2fs_is_volatile_file(inode))
return __VOLATILE_FILE;
else if (S_ISDIR(inode->i_mode))
return __DIR_FILE;
else if (inode->i_ino == F2FS_NODE_INO(F2FS_I_SB(inode)))
return __NODE_FILE;
else if (inode->i_ino == F2FS_META_INO(F2FS_I_SB(inode)))
return __META_FILE;
else if (pid)
return __NORMAL_FILE;
else
return __MISC_FILE;
}
void f2fs_trace_pid(struct page *page)
{
struct inode *inode = page->mapping->host;
pid_t pid = task_pid_nr(current);
void *p;
set_page_private(page, (unsigned long)pid);
retry:
if (radix_tree_preload(GFP_NOFS))
return;
spin_lock(&pids_lock);
p = radix_tree_lookup(&pids, pid);
if (p == current)
goto out;
if (p)
radix_tree_delete(&pids, pid);
if (radix_tree_insert(&pids, pid, current)) {
spin_unlock(&pids_lock);
radix_tree_preload_end();
cond_resched();
goto retry;
}
trace_printk("%3x:%3x %4x %-16s\n",
MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
pid, current->comm);
out:
spin_unlock(&pids_lock);
radix_tree_preload_end();
}
void f2fs_trace_ios(struct f2fs_io_info *fio, int flush)
{
struct inode *inode;
pid_t pid;
int major, minor;
if (flush) {
__print_last_io();
return;
}
inode = fio->page->mapping->host;
pid = page_private(fio->page);
major = MAJOR(inode->i_sb->s_dev);
minor = MINOR(inode->i_sb->s_dev);
if (last_io.major == major && last_io.minor == minor &&
last_io.pid == pid &&
last_io.type == __file_type(inode, pid) &&
last_io.fio.op == fio->op &&
last_io.fio.op_flags == fio->op_flags &&
last_io.fio.new_blkaddr + last_io.len ==
fio->new_blkaddr) {
last_io.len++;
return;
}
__print_last_io();
last_io.major = major;
last_io.minor = minor;
last_io.pid = pid;
last_io.type = __file_type(inode, pid);
last_io.fio = *fio;
last_io.len = 1;
return;
}
void f2fs_build_trace_ios(void)
{
spin_lock_init(&pids_lock);
}
#define PIDVEC_SIZE 128
static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index,
unsigned int max_items)
{
struct radix_tree_iter iter;
void **slot;
unsigned int ret = 0;
if (unlikely(!max_items))
return 0;
radix_tree_for_each_slot(slot, &pids, &iter, first_index) {
results[ret] = iter.index;
if (++ret == max_items)
break;
}
return ret;
}
void f2fs_destroy_trace_ios(void)
{
pid_t pid[PIDVEC_SIZE];
pid_t next_pid = 0;
unsigned int found;
spin_lock(&pids_lock);
while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) {
unsigned idx;
next_pid = pid[found - 1] + 1;
for (idx = 0; idx < found; idx++)
radix_tree_delete(&pids, pid[idx]);
}
spin_unlock(&pids_lock);
}

View File

@@ -1,43 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* f2fs IO tracer
*
* Copyright (c) 2014 Motorola Mobility
* Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
*/
#ifndef __F2FS_TRACE_H__
#define __F2FS_TRACE_H__
#ifdef CONFIG_F2FS_IO_TRACE
#include <trace/events/f2fs.h>
enum file_type {
__NORMAL_FILE,
__DIR_FILE,
__NODE_FILE,
__META_FILE,
__ATOMIC_FILE,
__VOLATILE_FILE,
__MISC_FILE,
};
struct last_io_info {
int major, minor;
pid_t pid;
enum file_type type;
struct f2fs_io_info fio;
block_t len;
};
extern void f2fs_trace_pid(struct page *);
extern void f2fs_trace_ios(struct f2fs_io_info *, int);
extern void f2fs_build_trace_ios(void);
extern void f2fs_destroy_trace_ios(void);
#else
#define f2fs_trace_pid(p)
#define f2fs_trace_ios(i, n)
#define f2fs_build_trace_ios()
#define f2fs_destroy_trace_ios()
#endif
#endif /* __F2FS_TRACE_H__ */

View File

@@ -29,6 +29,8 @@
#include "f2fs.h"
#include "xattr.h"
#define F2FS_VERIFY_VER (1)
static inline loff_t f2fs_verity_metadata_pos(const struct inode *inode)
{
return round_up(inode->i_size, 65536);
@@ -126,7 +128,7 @@ static int f2fs_begin_enable_verity(struct file *filp)
if (f2fs_verity_in_progress(inode))
return -EBUSY;
if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
if (f2fs_is_atomic_file(inode))
return -EOPNOTSUPP;
/*
@@ -134,7 +136,7 @@ static int f2fs_begin_enable_verity(struct file *filp)
* here and not rely on ->open() doing it. This must be done before
* evicting the inline data.
*/
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err)
return err;
@@ -150,40 +152,73 @@ static int f2fs_end_enable_verity(struct file *filp, const void *desc,
size_t desc_size, u64 merkle_tree_size)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size;
struct fsverity_descriptor_location dloc = {
.version = cpu_to_le32(1),
.version = cpu_to_le32(F2FS_VERIFY_VER),
.size = cpu_to_le32(desc_size),
.pos = cpu_to_le64(desc_pos),
};
int err = 0;
int err = 0, err2 = 0;
if (desc != NULL) {
/* Succeeded; write the verity descriptor. */
err = pagecache_write(inode, desc, desc_size, desc_pos);
/*
* If an error already occurred (which fs/verity/ signals by passing
* desc == NULL), then only clean-up is needed.
*/
if (desc == NULL)
goto cleanup;
/* Write all pages before clearing FI_VERITY_IN_PROGRESS. */
if (!err)
err = filemap_write_and_wait(inode->i_mapping);
}
/* Append the verity descriptor. */
err = pagecache_write(inode, desc, desc_size, desc_pos);
if (err)
goto cleanup;
/* If we failed, truncate anything we wrote past i_size. */
if (desc == NULL || err)
f2fs_truncate(inode);
/*
* Write all pages (both data and verity metadata). Note that this must
* happen before clearing FI_VERITY_IN_PROGRESS; otherwise pages beyond
* i_size won't be written properly. For crash consistency, this also
* must happen before the verity inode flag gets persisted.
*/
err = filemap_write_and_wait(inode->i_mapping);
if (err)
goto cleanup;
/* Set the verity xattr. */
err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY,
F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc),
NULL, XATTR_CREATE);
if (err)
goto cleanup;
/* Finally, set the verity inode flag. */
file_set_verity(inode);
f2fs_set_inode_flags(inode);
f2fs_mark_inode_dirty_sync(inode, true);
clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
return 0;
if (desc != NULL && !err) {
err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY,
F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc),
NULL, XATTR_CREATE);
if (!err) {
file_set_verity(inode);
f2fs_set_inode_flags(inode);
f2fs_mark_inode_dirty_sync(inode, true);
}
cleanup:
/*
* Verity failed to be enabled, so clean up by truncating any verity
* metadata that was written beyond i_size (both from cache and from
* disk) and clearing FI_VERITY_IN_PROGRESS.
*
* Taking i_gc_rwsem[WRITE] is needed to stop f2fs garbage collection
* from re-instantiating cached pages we are truncating (since unlike
* normal file accesses, garbage collection isn't limited by i_size).
*/
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
truncate_inode_pages(inode->i_mapping, inode->i_size);
err2 = f2fs_truncate(inode);
if (err2) {
f2fs_err(sbi, "Truncating verity metadata failed (errno=%d)",
err2);
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
return err;
f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
return err ?: err2;
}
static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
@@ -199,7 +234,7 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc), NULL);
if (res < 0 && res != -ERANGE)
return res;
if (res != sizeof(dloc) || dloc.version != cpu_to_le32(1)) {
if (res != sizeof(dloc) || dloc.version != cpu_to_le32(F2FS_VERIFY_VER)) {
f2fs_warn(F2FS_I_SB(inode), "unknown verity xattr format");
return -EINVAL;
}
@@ -210,6 +245,8 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes ||
pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) {
f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr");
f2fs_handle_error(F2FS_I_SB(inode),
ERROR_CORRUPTED_VERITY_XATTR);
return -EFSCORRUPTED;
}
if (buf_size) {

View File

@@ -27,7 +27,8 @@ static void *xattr_alloc(struct f2fs_sb_info *sbi, int size, bool *is_inline)
{
if (likely(size == sbi->inline_xattr_slab_size)) {
*is_inline = true;
return kmem_cache_zalloc(sbi->inline_xattr_slab, GFP_NOFS);
return f2fs_kmem_cache_alloc(sbi->inline_xattr_slab,
GFP_F2FS_ZERO, false, sbi);
}
*is_inline = false;
return f2fs_kzalloc(sbi, size, GFP_NOFS);
@@ -39,7 +40,7 @@ static void xattr_free(struct f2fs_sb_info *sbi, void *xattr_addr,
if (is_inline)
kmem_cache_free(sbi->inline_xattr_slab, xattr_addr);
else
kvfree(xattr_addr);
kfree(xattr_addr);
}
static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
@@ -175,8 +176,8 @@ const struct xattr_handler f2fs_xattr_trusted_handler = {
const struct xattr_handler f2fs_xattr_advise_handler = {
.name = F2FS_SYSTEM_ADVISE_NAME,
.flags = F2FS_XATTR_INDEX_ADVISE,
.get = f2fs_xattr_advise_get,
.set = f2fs_xattr_advise_set,
.get = f2fs_xattr_advise_get,
.set = f2fs_xattr_advise_set,
};
const struct xattr_handler f2fs_xattr_security_handler = {
@@ -223,15 +224,18 @@ static inline const struct xattr_handler *f2fs_xattr_handler(int index)
}
static struct f2fs_xattr_entry *__find_xattr(void *base_addr,
void *last_base_addr, int index,
size_t len, const char *name)
void *last_base_addr, void **last_addr,
int index, size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
list_for_each_xattr(entry, base_addr) {
if ((void *)(entry) + sizeof(__u32) > last_base_addr ||
(void *)XATTR_NEXT_ENTRY(entry) > last_base_addr)
(void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) {
if (last_addr)
*last_addr = entry;
return NULL;
}
if (entry->e_name_index != index)
continue;
@@ -251,19 +255,9 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode,
unsigned int inline_size = inline_xattr_size(inode);
void *max_addr = base_addr + inline_size;
list_for_each_xattr(entry, base_addr) {
if ((void *)entry + sizeof(__u32) > max_addr ||
(void *)XATTR_NEXT_ENTRY(entry) > max_addr) {
*last_addr = entry;
return NULL;
}
if (entry->e_name_index != index)
continue;
if (entry->e_name_len != len)
continue;
if (!memcmp(entry->e_name, name, len))
break;
}
entry = __find_xattr(base_addr, max_addr, last_addr, index, len, name);
if (!entry)
return NULL;
/* inline xattr header or entry across max inline xattr size */
if (IS_XATTR_LAST_ENTRY(entry) &&
@@ -327,7 +321,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
void *last_addr = NULL;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
unsigned int inline_size = inline_xattr_size(inode);
int err = 0;
int err;
if (!xnid && !inline_size)
return -ENODATA;
@@ -365,12 +359,14 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
else
cur_addr = txattr_addr;
*xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name);
*xe = __find_xattr(cur_addr, last_txattr_addr, NULL, index, len, name);
if (!*xe) {
f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
inode->i_ino);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
err = -EFSCORRUPTED;
f2fs_handle_error(F2FS_I_SB(inode),
ERROR_CORRUPTED_XATTR);
goto out;
}
check:
@@ -425,7 +421,7 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage,
*base_addr = txattr_addr;
return 0;
fail:
kvfree(txattr_addr);
kfree(txattr_addr);
return err;
}
@@ -486,6 +482,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
f2fs_wait_on_page_writeback(xpage, NODE, true, true);
} else {
struct dnode_of_data dn;
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET);
if (IS_ERR(xpage)) {
@@ -515,7 +512,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
void *buffer, size_t buffer_size, struct page *ipage)
{
struct f2fs_xattr_entry *entry = NULL;
int error = 0;
int error;
unsigned int size, len;
void *base_addr = NULL;
int base_size;
@@ -528,10 +525,10 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (len > F2FS_NAME_LEN)
return -ERANGE;
down_read(&F2FS_I(inode)->i_xattr_sem);
f2fs_down_read(&F2FS_I(inode)->i_xattr_sem);
error = lookup_all_xattrs(inode, ipage, index, len, name,
&entry, &base_addr, &base_size, &is_inline);
up_read(&F2FS_I(inode)->i_xattr_sem);
f2fs_up_read(&F2FS_I(inode)->i_xattr_sem);
if (error)
return error;
@@ -562,12 +559,12 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
struct inode *inode = d_inode(dentry);
struct f2fs_xattr_entry *entry;
void *base_addr, *last_base_addr;
int error = 0;
int error;
size_t rest = buffer_size;
down_read(&F2FS_I(inode)->i_xattr_sem);
f2fs_down_read(&F2FS_I(inode)->i_xattr_sem);
error = read_all_xattrs(inode, NULL, &base_addr);
up_read(&F2FS_I(inode)->i_xattr_sem);
f2fs_up_read(&F2FS_I(inode)->i_xattr_sem);
if (error)
return error;
@@ -586,6 +583,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
inode->i_ino);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
error = -EFSCORRUPTED;
f2fs_handle_error(F2FS_I_SB(inode),
ERROR_CORRUPTED_XATTR);
goto cleanup;
}
@@ -610,7 +609,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
}
error = buffer_size - rest;
cleanup:
kvfree(base_addr);
kfree(base_addr);
return error;
}
@@ -633,7 +632,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
int found, newsize;
size_t len;
__u32 new_hsize;
int error = 0;
int error;
if (name == NULL)
return -EINVAL;
@@ -656,12 +655,14 @@ static int __f2fs_setxattr(struct inode *inode, int index,
last_base_addr = (void *)base_addr + XATTR_SIZE(inode);
/* find entry with wanted name. */
here = __find_xattr(base_addr, last_base_addr, index, len, name);
here = __find_xattr(base_addr, last_base_addr, NULL, index, len, name);
if (!here) {
f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
inode->i_ino);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
error = -EFSCORRUPTED;
f2fs_handle_error(F2FS_I_SB(inode),
ERROR_CORRUPTED_XATTR);
goto exit;
}
@@ -684,8 +685,12 @@ static int __f2fs_setxattr(struct inode *inode, int index,
while (!IS_XATTR_LAST_ENTRY(last)) {
if ((void *)(last) + sizeof(__u32) > last_base_addr ||
(void *)XATTR_NEXT_ENTRY(last) > last_base_addr) {
f2fs_err(F2FS_I_SB(inode), "inode (%lu) has invalid last xattr entry, entry_size: %zu",
inode->i_ino, ENTRY_SIZE(last));
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
error = -EFSCORRUPTED;
f2fs_handle_error(F2FS_I_SB(inode),
ERROR_CORRUPTED_XATTR);
goto exit;
}
last = XATTR_NEXT_ENTRY(last);
@@ -776,7 +781,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
inode->i_ctime = current_time(inode);
f2fs_mark_inode_dirty_sync(inode, true);
exit:
kvfree(base_addr);
kfree(base_addr);
return error;
}
@@ -792,7 +797,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
err = dquot_initialize(inode);
err = f2fs_dquot_initialize(inode);
if (err)
return err;
@@ -803,9 +808,9 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
down_write(&F2FS_I(inode)->i_xattr_sem);
f2fs_down_write(&F2FS_I(inode)->i_xattr_sem);
err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags);
up_write(&F2FS_I(inode)->i_xattr_sem);
f2fs_up_write(&F2FS_I(inode)->i_xattr_sem);
f2fs_unlock_op(sbi);
f2fs_update_time(sbi, REQ_TIME);

View File

@@ -197,6 +197,7 @@ static struct file *alloc_file(const struct path *path, int flags,
file->f_inode = path->dentry->d_inode;
file->f_mapping = path->dentry->d_inode->i_mapping;
file->f_wb_err = filemap_sample_wb_err(file->f_mapping);
file->f_sb_err = file_sample_sb_err(file);
if ((file->f_mode & FMODE_READ) &&
likely(fop->read || fop->read_iter))
file->f_mode |= FMODE_CAN_READ;

View File

@@ -22,7 +22,6 @@
#include <linux/swap.h>
#include <linux/splice.h>
#include <linux/sched.h>
#include <linux/freezer.h>
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
MODULE_ALIAS("devname:fuse");
@@ -488,9 +487,7 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
* Either request is already in userspace, or it was forced.
* Wait it out.
*/
while (!test_bit(FR_FINISHED, &req->flags))
wait_event_freezable(req->waitq,
test_bit(FR_FINISHED, &req->flags));
wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
}
static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)

View File

@@ -1587,25 +1587,31 @@ void iput(struct inode *inode)
}
EXPORT_SYMBOL(iput);
#ifdef CONFIG_BLOCK
/**
* bmap - find a block number in a file
* @inode: inode of file
* @block: block to find
* @inode: inode owning the block number being requested
* @block: pointer containing the block to find
*
* Returns the block number on the device holding the inode that
* is the disk block number for the block of the file requested.
* That is, asked for block 4 of inode 1 the function will return the
* disk block relative to the disk start that holds that block of the
* file.
* Replaces the value in *block with the block number on the device holding
* corresponding to the requested block number in the file.
* That is, asked for block 4 of inode 1 the function will replace the
* 4 in *block, with disk block relative to the disk start that holds that
* block of the file.
*
* Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a
* hole, returns 0 and *block is also set to 0.
*/
sector_t bmap(struct inode *inode, sector_t block)
int bmap(struct inode *inode, sector_t *block)
{
sector_t res = 0;
if (inode->i_mapping->a_ops->bmap)
res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block);
return res;
if (!inode->i_mapping->a_ops->bmap)
return -EINVAL;
*block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block);
return 0;
}
EXPORT_SYMBOL(bmap);
#endif
/*
* With relative atime, only update atime if the previous atime is
@@ -1898,6 +1904,26 @@ int file_update_time(struct file *file)
}
EXPORT_SYMBOL(file_update_time);
/* Caller must hold the file's inode lock */
int file_modified(struct file *file)
{
int err;
/*
* Clear the security bits if the process is not being run by root.
* This keeps people from modifying setuid and setgid binaries.
*/
err = file_remove_privs(file);
if (err)
return err;
if (unlikely(file->f_mode & FMODE_NOCMTIME))
return 0;
return file_update_time(file);
}
EXPORT_SYMBOL(file_modified);
int inode_needs_sync(struct inode *inode)
{
if (IS_SYNC(inode))

View File

@@ -819,18 +819,23 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
{
int err = 0;
unsigned long long ret;
sector_t block = 0;
if (journal->j_inode) {
ret = bmap(journal->j_inode, blocknr);
if (ret)
*retp = ret;
else {
block = blocknr;
ret = bmap(journal->j_inode, &block);
if (ret || !block) {
printk(KERN_ALERT "%s: journal block not found "
"at offset %lu on %s\n",
__func__, blocknr, journal->j_devname);
err = -EIO;
__journal_abort_soft(journal, err);
} else {
*retp = block;
}
} else {
*retp = blocknr; /* +journal->j_blk_offset */
}
@@ -1257,11 +1262,14 @@ journal_t *jbd2_journal_init_dev(struct block_device *bdev,
journal_t *jbd2_journal_init_inode(struct inode *inode)
{
journal_t *journal;
sector_t blocknr;
char *p;
unsigned long long blocknr;
int err = 0;
blocknr = bmap(inode, 0);
if (!blocknr) {
blocknr = 0;
err = bmap(inode, &blocknr);
if (err || !blocknr) {
pr_err("%s: Cannot locate journal superblock\n",
__func__);
return NULL;

View File

@@ -1282,27 +1282,38 @@ bool is_empty_dir_inode(struct inode *inode)
}
#ifdef CONFIG_UNICODE
bool needs_casefold(const struct inode *dir)
/*
* Determine if the name of a dentry should be casefolded.
*
* Return: if names will need casefolding
*/
static bool needs_casefold(const struct inode *dir)
{
return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding &&
(!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir));
return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
}
EXPORT_SYMBOL(needs_casefold);
int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
const char *str, const struct qstr *name)
/**
* generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
* @dentry: dentry whose name we are checking against
* @len: len of name of dentry
* @str: str pointer to name of dentry
* @name: Name to compare against
*
* Return: 0 if names match, 1 if mismatch, or -ERRNO
*/
static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
const char *str, const struct qstr *name)
{
const struct dentry *parent = READ_ONCE(dentry->d_parent);
const struct inode *inode = READ_ONCE(parent->d_inode);
const struct inode *dir = READ_ONCE(parent->d_inode);
const struct super_block *sb = dentry->d_sb;
const struct unicode_map *um = sb->s_encoding;
struct qstr entry = QSTR_INIT(str, len);
struct qstr qstr = QSTR_INIT(str, len);
char strbuf[DNAME_INLINE_LEN];
int ret;
if (!inode || !needs_casefold(inode))
if (!dir || !needs_casefold(dir))
goto fallback;
/*
* If the dentry name is stored in-line, then it may be concurrently
* modified by a rename. If this happens, the VFS will eventually retry
@@ -1313,47 +1324,44 @@ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
if (len <= DNAME_INLINE_LEN - 1) {
memcpy(strbuf, str, len);
strbuf[len] = 0;
entry.name = strbuf;
qstr.name = strbuf;
/* prevent compiler from optimizing out the temporary buffer */
barrier();
}
ret = utf8_strncasecmp(um, name, &entry);
ret = utf8_strncasecmp(um, name, &qstr);
if (ret >= 0)
return ret;
if (sb_has_enc_strict_mode(sb))
if (sb_has_strict_encoding(sb))
return -EINVAL;
fallback:
if (len != name->len)
return 1;
return !!memcmp(str, name->name, len);
}
EXPORT_SYMBOL(generic_ci_d_compare);
int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
/**
* generic_ci_d_hash - generic d_hash implementation for casefolding filesystems
* @dentry: dentry of the parent directory
* @str: qstr of name whose hash we should fill in
*
* Return: 0 if hash was successful or unchanged, and -EINVAL on error
*/
static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
{
const struct inode *inode = READ_ONCE(dentry->d_inode);
const struct inode *dir = READ_ONCE(dentry->d_inode);
struct super_block *sb = dentry->d_sb;
const struct unicode_map *um = sb->s_encoding;
int ret = 0;
if (!inode || !needs_casefold(inode))
if (!dir || !needs_casefold(dir))
return 0;
ret = utf8_casefold_hash(um, dentry, str);
if (ret < 0)
goto err;
if (ret < 0 && sb_has_strict_encoding(sb))
return -EINVAL;
return 0;
err:
if (sb_has_enc_strict_mode(sb))
ret = -EINVAL;
else
ret = 0;
return ret;
}
EXPORT_SYMBOL(generic_ci_d_hash);
static const struct dentry_operations generic_ci_dentry_ops = {
.d_hash = generic_ci_d_hash,
@@ -1367,7 +1375,7 @@ static const struct dentry_operations generic_encrypted_dentry_ops = {
};
#endif
#if IS_ENABLED(CONFIG_UNICODE) && IS_ENABLED(CONFIG_FS_ENCRYPTION)
#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE)
static const struct dentry_operations generic_encrypted_ci_dentry_ops = {
.d_hash = generic_ci_d_hash,
.d_compare = generic_ci_d_compare,
@@ -1377,28 +1385,48 @@ static const struct dentry_operations generic_encrypted_ci_dentry_ops = {
/**
* generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry
* @dir: parent of dentry whose ops to set
* @dentry: detnry to set ops on
* @dentry: dentry to set ops on
*
* This function sets the dentry ops for the given dentry to handle both
* casefolding and encryption of the dentry name.
* Casefolded directories need d_hash and d_compare set, so that the dentries
* contained in them are handled case-insensitively. Note that these operations
* are needed on the parent directory rather than on the dentries in it, and
* while the casefolding flag can be toggled on and off on an empty directory,
* dentry_operations can't be changed later. As a result, if the filesystem has
* casefolding support enabled at all, we have to give all dentries the
* casefolding operations even if their inode doesn't have the casefolding flag
* currently (and thus the casefolding ops would be no-ops for now).
*
* Encryption works differently in that the only dentry operation it needs is
* d_revalidate, which it only needs on dentries that have the no-key name flag.
* The no-key flag can't be set "later", so we don't have to worry about that.
*
* Finally, to maximize compatibility with overlayfs (which isn't compatible
* with certain dentry operations) and to avoid taking an unnecessary
* performance hit, we use custom dentry_operations for each possible
* combination rather than always installing all operations.
*/
void generic_set_encrypted_ci_d_ops(struct inode *dir, struct dentry *dentry)
void generic_set_encrypted_ci_d_ops(struct dentry *dentry)
{
#ifdef CONFIG_FS_ENCRYPTION
if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) {
#ifdef CONFIG_UNICODE
if (dir->i_sb->s_encoding) {
d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops);
return;
}
bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME;
#endif
#ifdef CONFIG_UNICODE
bool needs_ci_ops = dentry->d_sb->s_encoding;
#endif
#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE)
if (needs_encrypt_ops && needs_ci_ops) {
d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops);
return;
}
#endif
#ifdef CONFIG_FS_ENCRYPTION
if (needs_encrypt_ops) {
d_set_d_op(dentry, &generic_encrypted_dentry_ops);
return;
}
#endif
#ifdef CONFIG_UNICODE
if (dir->i_sb->s_encoding) {
if (needs_ci_ops) {
d_set_d_op(dentry, &generic_ci_dentry_ops);
return;
}

View File

@@ -32,16 +32,6 @@
#include <linux/cleancache.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
#include <trace/events/android_fs.h>
EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_start);
EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_end);
EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_start);
EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_end);
EXPORT_TRACEPOINT_SYMBOL(android_fs_fsync_start);
EXPORT_TRACEPOINT_SYMBOL(android_fs_fsync_end);
/*
* I/O completion handler for multipage BIOs.
*
@@ -59,16 +49,6 @@ static void mpage_end_io(struct bio *bio)
struct bio_vec *bv;
int i;
if (trace_android_fs_dataread_end_enabled() &&
(bio_data_dir(bio) == READ)) {
struct page *first_page = bio->bi_io_vec[0].bv_page;
if (first_page != NULL)
trace_android_fs_dataread_end(first_page->mapping->host,
page_offset(first_page),
bio->bi_iter.bi_size);
}
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
page_endio(page, bio_op(bio),
@@ -80,24 +60,6 @@ static void mpage_end_io(struct bio *bio)
static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio)
{
if (trace_android_fs_dataread_start_enabled() && (op == REQ_OP_READ)) {
struct page *first_page = bio->bi_io_vec[0].bv_page;
if (first_page != NULL) {
char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
path = android_fstrace_get_pathname(pathbuf,
MAX_TRACE_PATHBUF_LEN,
first_page->mapping->host);
trace_android_fs_dataread_start(
first_page->mapping->host,
page_offset(first_page),
bio->bi_iter.bi_size,
current->pid,
path,
current->comm);
}
}
bio->bi_end_io = mpage_end_io;
bio_set_op_attrs(bio, op, op_flags);
guard_bio_eod(op, bio);

View File

@@ -764,9 +764,8 @@ static int do_dentry_open(struct file *f,
path_get(&f->f_path);
f->f_inode = inode;
f->f_mapping = inode->i_mapping;
/* Ensure that we skip any errors that predate opening of the file */
f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
f->f_sb_err = file_sample_sb_err(f);
if (unlikely(f->f_flags & O_PATH)) {
f->f_mode = FMODE_PATH | FMODE_OPENED;

View File

@@ -162,7 +162,7 @@ SYSCALL_DEFINE1(syncfs, int, fd)
{
struct fd f = fdget(fd);
struct super_block *sb;
int ret;
int ret, ret2;
if (!f.file)
return -EBADF;
@@ -172,8 +172,10 @@ SYSCALL_DEFINE1(syncfs, int, fd)
ret = sync_filesystem(sb);
up_read(&sb->s_umount);
ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
fdput(f);
return ret;
return ret ? ret : ret2;
}
/**

View File

@@ -222,7 +222,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino);
err = fscrypt_prepare_lookup(dir, dentry, &nm);
ubifs_set_d_ops(dir, dentry);
generic_set_encrypted_ci_d_ops(dentry);
if (err == -ENOENT)
return d_splice_alias(NULL, dentry);
if (err)
@@ -1708,19 +1708,3 @@ const struct file_operations ubifs_dir_operations = {
.compat_ioctl = ubifs_compat_ioctl,
#endif
};
#ifdef CONFIG_FS_ENCRYPTION
static const struct dentry_operations ubifs_encrypted_dentry_ops = {
.d_revalidate = fscrypt_d_revalidate,
};
#endif
static void ubifs_set_d_ops(struct inode *dir, struct dentry *dentry)
{
#ifdef CONFIG_FS_ENCRYPTION
if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) {
d_set_d_op(dentry, &ubifs_encrypted_dentry_ops);
return;
}
#endif
}

View File

@@ -138,7 +138,7 @@ int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
while ((c = utf8byte(&cur))) {
if (c < 0)
return c;
return -EINVAL;
hash = partial_name_hash((unsigned char)c, hash);
}
str->hash = end_name_hash(hash);

View File

@@ -5,6 +5,7 @@ obj-$(CONFIG_FS_VERITY) += enable.o \
init.o \
measure.o \
open.o \
read_metadata.o \
verify.o
obj-$(CONFIG_FS_VERITY_BUILTIN_SIGNATURES) += signature.o

View File

@@ -122,12 +122,17 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
const u8 *salt, size_t salt_size);
struct fsverity_info *fsverity_create_info(const struct inode *inode,
void *desc, size_t desc_size);
struct fsverity_descriptor *desc,
size_t desc_size);
void fsverity_set_info(struct inode *inode, struct fsverity_info *vi);
void fsverity_free_info(struct fsverity_info *vi);
int fsverity_get_descriptor(struct inode *inode,
struct fsverity_descriptor **desc_ret,
size_t *desc_size_ret);
int __init fsverity_init_info_cache(void);
void __init fsverity_exit_info_cache(void);
@@ -135,15 +140,13 @@ void __init fsverity_exit_info_cache(void);
#ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES
int fsverity_verify_signature(const struct fsverity_info *vi,
const struct fsverity_descriptor *desc,
size_t desc_size);
const u8 *signature, size_t sig_size);
int __init fsverity_init_signature(void);
#else /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */
static inline int
fsverity_verify_signature(const struct fsverity_info *vi,
const struct fsverity_descriptor *desc,
size_t desc_size)
const u8 *signature, size_t sig_size)
{
return 0;
}

View File

@@ -142,45 +142,17 @@ static int compute_file_digest(struct fsverity_hash_alg *hash_alg,
}
/*
* Validate the given fsverity_descriptor and create a new fsverity_info from
* it. The signature (if present) is also checked.
* Create a new fsverity_info from the given fsverity_descriptor (with optional
* appended signature), and check the signature if present. The
* fsverity_descriptor must have already undergone basic validation.
*/
struct fsverity_info *fsverity_create_info(const struct inode *inode,
void *_desc, size_t desc_size)
struct fsverity_descriptor *desc,
size_t desc_size)
{
struct fsverity_descriptor *desc = _desc;
struct fsverity_info *vi;
int err;
if (desc_size < sizeof(*desc)) {
fsverity_err(inode, "Unrecognized descriptor size: %zu bytes",
desc_size);
return ERR_PTR(-EINVAL);
}
if (desc->version != 1) {
fsverity_err(inode, "Unrecognized descriptor version: %u",
desc->version);
return ERR_PTR(-EINVAL);
}
if (memchr_inv(desc->__reserved, 0, sizeof(desc->__reserved))) {
fsverity_err(inode, "Reserved bits set in descriptor");
return ERR_PTR(-EINVAL);
}
if (desc->salt_size > sizeof(desc->salt)) {
fsverity_err(inode, "Invalid salt_size: %u", desc->salt_size);
return ERR_PTR(-EINVAL);
}
if (le64_to_cpu(desc->data_size) != inode->i_size) {
fsverity_err(inode,
"Wrong data_size: %llu (desc) != %lld (inode)",
le64_to_cpu(desc->data_size), inode->i_size);
return ERR_PTR(-EINVAL);
}
vi = kmem_cache_zalloc(fsverity_info_cachep, GFP_KERNEL);
if (!vi)
return ERR_PTR(-ENOMEM);
@@ -209,7 +181,8 @@ struct fsverity_info *fsverity_create_info(const struct inode *inode,
vi->tree_params.hash_alg->name,
vi->tree_params.digest_size, vi->file_digest);
err = fsverity_verify_signature(vi, desc, desc_size);
err = fsverity_verify_signature(vi, desc->signature,
le32_to_cpu(desc->sig_size));
out:
if (err) {
fsverity_free_info(vi);
@@ -221,11 +194,20 @@ struct fsverity_info *fsverity_create_info(const struct inode *inode,
void fsverity_set_info(struct inode *inode, struct fsverity_info *vi)
{
/*
* Multiple processes may race to set ->i_verity_info, so use cmpxchg.
* This pairs with the READ_ONCE() in fsverity_get_info().
* Multiple tasks may race to set ->i_verity_info, so use
* cmpxchg_release(). This pairs with the smp_load_acquire() in
* fsverity_get_info(). I.e., here we publish ->i_verity_info with a
* RELEASE barrier so that other tasks can ACQUIRE it.
*/
if (cmpxchg(&inode->i_verity_info, NULL, vi) != NULL)
if (cmpxchg_release(&inode->i_verity_info, NULL, vi) != NULL) {
/* Lost the race, so free the fsverity_info we allocated. */
fsverity_free_info(vi);
/*
* Afterwards, the caller may access ->i_verity_info directly,
* so make sure to ACQUIRE the winning fsverity_info.
*/
(void)fsverity_get_info(inode);
}
}
void fsverity_free_info(struct fsverity_info *vi)
@@ -236,15 +218,57 @@ void fsverity_free_info(struct fsverity_info *vi)
kmem_cache_free(fsverity_info_cachep, vi);
}
/* Ensure the inode has an ->i_verity_info */
static int ensure_verity_info(struct inode *inode)
static bool validate_fsverity_descriptor(struct inode *inode,
const struct fsverity_descriptor *desc,
size_t desc_size)
{
struct fsverity_info *vi = fsverity_get_info(inode);
struct fsverity_descriptor *desc;
int res;
if (desc_size < sizeof(*desc)) {
fsverity_err(inode, "Unrecognized descriptor size: %zu bytes",
desc_size);
return false;
}
if (vi)
return 0;
if (desc->version != 1) {
fsverity_err(inode, "Unrecognized descriptor version: %u",
desc->version);
return false;
}
if (memchr_inv(desc->__reserved, 0, sizeof(desc->__reserved))) {
fsverity_err(inode, "Reserved bits set in descriptor");
return false;
}
if (desc->salt_size > sizeof(desc->salt)) {
fsverity_err(inode, "Invalid salt_size: %u", desc->salt_size);
return false;
}
if (le64_to_cpu(desc->data_size) != inode->i_size) {
fsverity_err(inode,
"Wrong data_size: %llu (desc) != %lld (inode)",
le64_to_cpu(desc->data_size), inode->i_size);
return false;
}
if (le32_to_cpu(desc->sig_size) > desc_size - sizeof(*desc)) {
fsverity_err(inode, "Signature overflows verity descriptor");
return false;
}
return true;
}
/*
* Read the inode's fsverity_descriptor (with optional appended signature) from
* the filesystem, and do basic validation of it.
*/
int fsverity_get_descriptor(struct inode *inode,
struct fsverity_descriptor **desc_ret,
size_t *desc_size_ret)
{
int res;
struct fsverity_descriptor *desc;
res = inode->i_sb->s_vop->get_verity_descriptor(inode, NULL, 0);
if (res < 0) {
@@ -263,20 +287,46 @@ static int ensure_verity_info(struct inode *inode)
res = inode->i_sb->s_vop->get_verity_descriptor(inode, desc, res);
if (res < 0) {
fsverity_err(inode, "Error %d reading verity descriptor", res);
goto out_free_desc;
kfree(desc);
return res;
}
vi = fsverity_create_info(inode, desc, res);
if (!validate_fsverity_descriptor(inode, desc, res)) {
kfree(desc);
return -EINVAL;
}
*desc_ret = desc;
*desc_size_ret = res;
return 0;
}
/* Ensure the inode has an ->i_verity_info */
static int ensure_verity_info(struct inode *inode)
{
struct fsverity_info *vi = fsverity_get_info(inode);
struct fsverity_descriptor *desc;
size_t desc_size;
int err;
if (vi)
return 0;
err = fsverity_get_descriptor(inode, &desc, &desc_size);
if (err)
return err;
vi = fsverity_create_info(inode, desc, desc_size);
if (IS_ERR(vi)) {
res = PTR_ERR(vi);
err = PTR_ERR(vi);
goto out_free_desc;
}
fsverity_set_info(inode, vi);
res = 0;
err = 0;
out_free_desc:
kfree(desc);
return res;
return err;
}
/**

195
fs/verity/read_metadata.c Normal file
View File

@@ -0,0 +1,195 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Ioctl to read verity metadata
*
* Copyright 2021 Google LLC
*/
#include "fsverity_private.h"
#include <linux/backing-dev.h>
#include <linux/highmem.h>
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
static int fsverity_read_merkle_tree(struct inode *inode,
const struct fsverity_info *vi,
void __user *buf, u64 offset, int length)
{
const struct fsverity_operations *vops = inode->i_sb->s_vop;
u64 end_offset;
unsigned int offs_in_page;
pgoff_t index, last_index;
int retval = 0;
int err = 0;
end_offset = min(offset + length, vi->tree_params.tree_size);
if (offset >= end_offset)
return 0;
offs_in_page = offset_in_page(offset);
last_index = (end_offset - 1) >> PAGE_SHIFT;
/*
* Iterate through each Merkle tree page in the requested range and copy
* the requested portion to userspace. Note that the Merkle tree block
* size isn't important here, as we are returning a byte stream; i.e.,
* we can just work with pages even if the tree block size != PAGE_SIZE.
*/
for (index = offset >> PAGE_SHIFT; index <= last_index; index++) {
unsigned long num_ra_pages =
min_t(unsigned long, last_index - index + 1,
inode->i_sb->s_bdi->io_pages);
unsigned int bytes_to_copy = min_t(u64, end_offset - offset,
PAGE_SIZE - offs_in_page);
struct page *page;
const void *virt;
page = vops->read_merkle_tree_page(inode, index, num_ra_pages);
if (IS_ERR(page)) {
err = PTR_ERR(page);
fsverity_err(inode,
"Error %d reading Merkle tree page %lu",
err, index);
break;
}
virt = kmap(page);
if (copy_to_user(buf, virt + offs_in_page, bytes_to_copy)) {
kunmap(page);
put_page(page);
err = -EFAULT;
break;
}
kunmap(page);
put_page(page);
retval += bytes_to_copy;
buf += bytes_to_copy;
offset += bytes_to_copy;
if (fatal_signal_pending(current)) {
err = -EINTR;
break;
}
cond_resched();
offs_in_page = 0;
}
return retval ? retval : err;
}
/* Copy the requested portion of the buffer to userspace. */
static int fsverity_read_buffer(void __user *dst, u64 offset, int length,
const void *src, size_t src_length)
{
if (offset >= src_length)
return 0;
src += offset;
src_length -= offset;
length = min_t(size_t, length, src_length);
if (copy_to_user(dst, src, length))
return -EFAULT;
return length;
}
static int fsverity_read_descriptor(struct inode *inode,
void __user *buf, u64 offset, int length)
{
struct fsverity_descriptor *desc;
size_t desc_size;
int res;
res = fsverity_get_descriptor(inode, &desc, &desc_size);
if (res)
return res;
/* don't include the signature */
desc_size = offsetof(struct fsverity_descriptor, signature);
desc->sig_size = 0;
res = fsverity_read_buffer(buf, offset, length, desc, desc_size);
kfree(desc);
return res;
}
static int fsverity_read_signature(struct inode *inode,
void __user *buf, u64 offset, int length)
{
struct fsverity_descriptor *desc;
size_t desc_size;
int res;
res = fsverity_get_descriptor(inode, &desc, &desc_size);
if (res)
return res;
if (desc->sig_size == 0) {
res = -ENODATA;
goto out;
}
/*
* Include only the signature. Note that fsverity_get_descriptor()
* already verified that sig_size is in-bounds.
*/
res = fsverity_read_buffer(buf, offset, length, desc->signature,
le32_to_cpu(desc->sig_size));
out:
kfree(desc);
return res;
}
/**
* fsverity_ioctl_read_metadata() - read verity metadata from a file
* @filp: file to read the metadata from
* @uarg: user pointer to fsverity_read_metadata_arg
*
* Return: length read on success, 0 on EOF, -errno on failure
*/
int fsverity_ioctl_read_metadata(struct file *filp, const void __user *uarg)
{
struct inode *inode = file_inode(filp);
const struct fsverity_info *vi;
struct fsverity_read_metadata_arg arg;
int length;
void __user *buf;
vi = fsverity_get_info(inode);
if (!vi)
return -ENODATA; /* not a verity file */
/*
* Note that we don't have to explicitly check that the file is open for
* reading, since verity files can only be opened for reading.
*/
if (copy_from_user(&arg, uarg, sizeof(arg)))
return -EFAULT;
if (arg.__reserved)
return -EINVAL;
/* offset + length must not overflow. */
if (arg.offset + arg.length < arg.offset)
return -EINVAL;
/* Ensure that the return value will fit in INT_MAX. */
length = min_t(u64, arg.length, INT_MAX);
buf = u64_to_user_ptr(arg.buf_ptr);
switch (arg.metadata_type) {
case FS_VERITY_METADATA_TYPE_MERKLE_TREE:
return fsverity_read_merkle_tree(inode, vi, buf, arg.offset,
length);
case FS_VERITY_METADATA_TYPE_DESCRIPTOR:
return fsverity_read_descriptor(inode, buf, arg.offset, length);
case FS_VERITY_METADATA_TYPE_SIGNATURE:
return fsverity_read_signature(inode, buf, arg.offset, length);
default:
return -EINVAL;
}
}
EXPORT_SYMBOL_GPL(fsverity_ioctl_read_metadata);

View File

@@ -26,6 +26,27 @@ static int fsverity_require_signatures;
*/
static struct key *fsverity_keyring;
/**
* fsverity_verify_signature() - check a verity file's signature
* @vi: the file's fsverity_info
* @signature: the file's built-in signature
* @sig_size: size of signature in bytes, or 0 if no signature
*
* If the file includes a signature of its fs-verity file digest, verify it
* against the certificates in the fs-verity keyring.
*
* Return: 0 on success (signature valid or not required); -errno on failure
*/
int fsverity_verify_signature(const struct fsverity_info *vi,
const u8 *signature, size_t sig_size)
{
unsigned int digest_algorithm =
vi->tree_params.hash_alg - fsverity_hash_algs;
return __fsverity_verify_signature(vi->inode, signature, sig_size,
vi->file_digest, digest_algorithm);
}
/**
* __fsverity_verify_signature() - check a verity file's signature
* @inode: the file's inode
@@ -85,8 +106,7 @@ int __fsverity_verify_signature(const struct inode *inode, const u8 *signature,
memcpy(d->digest, file_digest, hash_alg->digest_size);
err = verify_pkcs7_signature(d, sizeof(*d) + hash_alg->digest_size,
signature, sig_size,
fsverity_keyring,
signature, sig_size, fsverity_keyring,
VERIFYING_UNSPECIFIED_SIGNATURE,
NULL, NULL);
kfree(d);
@@ -111,34 +131,6 @@ int __fsverity_verify_signature(const struct inode *inode, const u8 *signature,
}
EXPORT_SYMBOL_GPL(__fsverity_verify_signature);
/**
* fsverity_verify_signature() - check a verity file's signature
* @vi: the file's fsverity_info
* @desc: the file's fsverity_descriptor
* @desc_size: size of @desc
*
* If the file's fs-verity descriptor includes a signature of the file digest,
* verify it against the certificates in the fs-verity keyring.
*
* Return: 0 on success (signature valid or not required); -errno on failure
*/
int fsverity_verify_signature(const struct fsverity_info *vi,
const struct fsverity_descriptor *desc,
size_t desc_size)
{
const struct inode *inode = vi->inode;
const struct fsverity_hash_alg *hash_alg = vi->tree_params.hash_alg;
const u32 sig_size = le32_to_cpu(desc->sig_size);
if (sig_size > desc_size - sizeof(*desc)) {
fsverity_err(inode, "Signature overflows verity descriptor");
return -EBADMSG;
}
return __fsverity_verify_signature(inode, desc->signature, sig_size,
vi->file_digest, hash_alg - fsverity_hash_algs);
}
#ifdef CONFIG_SYSCTL
static struct ctl_table_header *fsverity_sysctl_header;