Merge branch 'linux-4.19.y' of https://github.com/erofs/erofs-backports into qcom/sm8250

Change-Id: I50298bcd30f6fc282ea5b92597f28fa63ed9e082
This commit is contained in:
Sebastiano Barezzi
2024-02-19 18:49:01 +01:00
42 changed files with 5911 additions and 5397 deletions

View File

@@ -122,8 +122,6 @@ source "drivers/staging/gasket/Kconfig"
source "drivers/staging/axis-fifo/Kconfig"
source "drivers/staging/erofs/Kconfig"
source "drivers/staging/qcacld-3.0/Kconfig"
endif # STAGING

View File

@@ -51,5 +51,4 @@ obj-$(CONFIG_SOC_MT7621) += mt7621-eth/
obj-$(CONFIG_SOC_MT7621) += mt7621-dts/
obj-$(CONFIG_STAGING_GASKET_FRAMEWORK) += gasket/
obj-$(CONFIG_XIL_AXIS_FIFO) += axis-fifo/
obj-$(CONFIG_EROFS_FS) += erofs/
obj-$(CONFIG_QCA_CLD_WLAN) += qcacld-3.0/

View File

@@ -1,141 +0,0 @@
# SPDX-License-Identifier: GPL-2.0
config EROFS_FS
tristate "EROFS filesystem support"
depends on BLOCK
help
EROFS(Enhanced Read-Only File System) is a lightweight
read-only file system with modern designs (eg. page-sized
blocks, inline xattrs/data, etc.) for scenarios which need
high-performance read-only requirements, eg. firmwares in
mobile phone or LIVECDs.
It also provides VLE compression support, focusing on
random read improvements, keeping relatively lower
compression ratios, which is useful for high-performance
devices with limited memory and ROM space.
If unsure, say N.
config EROFS_FS_DEBUG
bool "EROFS debugging feature"
depends on EROFS_FS
help
Print EROFS debugging messages and enable more BUG_ONs
which check the filesystem consistency aggressively.
For daily use, say N.
config EROFS_FS_XATTR
bool "EROFS extended attributes"
depends on EROFS_FS
default y
help
Extended attributes are name:value pairs associated with inodes by
the kernel or by users (see the attr(5) manual page, or visit
<http://acl.bestbits.at/> for details).
If unsure, say N.
config EROFS_FS_POSIX_ACL
bool "EROFS Access Control Lists"
depends on EROFS_FS_XATTR
select FS_POSIX_ACL
default y
help
Posix Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.
To learn more about Access Control Lists, visit the POSIX ACLs for
Linux website <http://acl.bestbits.at/>.
If you don't know what Access Control Lists are, say N.
config EROFS_FS_SECURITY
bool "EROFS Security Labels"
depends on EROFS_FS_XATTR
help
Security labels provide an access control facility to support Linux
Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO
Linux. This option enables an extended attribute handler for file
security labels in the erofs filesystem, so that it requires enabling
the extended attribute support in advance.
If you are not using a security module, say N.
config EROFS_FS_USE_VM_MAP_RAM
bool "EROFS VM_MAP_RAM Support"
depends on EROFS_FS
help
use vm_map_ram/vm_unmap_ram instead of vmap/vunmap.
If you don't know what these are, say N.
config EROFS_FAULT_INJECTION
bool "EROFS fault injection facility"
depends on EROFS_FS
help
Test EROFS to inject faults such as ENOMEM, EIO, and so on.
If unsure, say N.
config EROFS_FS_ZIP
bool "EROFS Data Compresssion Support"
depends on EROFS_FS
help
Currently we support VLE Compression only.
Play at your own risk.
If you don't want to use compression feature, say N.
config EROFS_FS_CLUSTER_PAGE_LIMIT
int "EROFS Cluster Pages Hard Limit"
depends on EROFS_FS_ZIP
range 1 256
default "1"
help
Indicates VLE compressed pages hard limit of a
compressed cluster.
For example, if files of a image are compressed
into 8k-unit, the hard limit should not be less
than 2. Otherwise, the image cannot be mounted
correctly on this kernel.
choice
prompt "EROFS VLE Data Decompression mode"
depends on EROFS_FS_ZIP
default EROFS_FS_ZIP_CACHE_BIPOLAR
help
EROFS supports three options for VLE decompression.
"In-place Decompression Only" consumes the minimum memory
with lowest random read.
"Bipolar Cached Decompression" consumes the maximum memory
with highest random read.
If unsure, select "Bipolar Cached Decompression"
config EROFS_FS_ZIP_NO_CACHE
bool "In-place Decompression Only"
help
Read compressed data into page cache and do in-place
decompression directly.
config EROFS_FS_ZIP_CACHE_UNIPOLAR
bool "Unipolar Cached Decompression"
help
For each request, it caches the last compressed page
for further reading.
It still decompresses in place for the rest compressed pages.
config EROFS_FS_ZIP_CACHE_BIPOLAR
bool "Bipolar Cached Decompression"
help
For each request, it caches the both end compressed pages
for further reading.
It still decompresses in place for the rest compressed pages.
Recommended for performance priority.
endchoice

View File

@@ -1,13 +0,0 @@
# SPDX-License-Identifier: GPL-2.0
EROFS_VERSION = "1.0pre1"
ccflags-y += -Wall -DEROFS_VERSION=\"$(EROFS_VERSION)\"
obj-$(CONFIG_EROFS_FS) += erofs.o
# staging requirement: to be self-contained in its own directory
ccflags-y += -I$(src)/include
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += unzip_vle.o unzip_lz4.o unzip_vle_lz4.o

View File

@@ -1,276 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 OR Apache-2.0
*
* linux/drivers/staging/erofs/erofs_fs.h
*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* http://www.huawei.com/
* Created by Gao Xiang <gaoxiang25@huawei.com>
*
* This file is dual-licensed; you may select either the GNU General Public
* License version 2 or Apache License, Version 2.0. See the file COPYING
* in the main directory of the Linux distribution for more details.
*/
#ifndef __EROFS_FS_H
#define __EROFS_FS_H
/* Enhanced(Extended) ROM File System */
#define EROFS_SUPER_MAGIC_V1 0xE0F5E1E2
#define EROFS_SUPER_OFFSET 1024
/*
* Any bits that aren't in EROFS_ALL_REQUIREMENTS should be
* incompatible with this kernel version.
*/
#define EROFS_ALL_REQUIREMENTS 0
struct erofs_super_block {
/* 0 */__le32 magic; /* in the little endian */
/* 4 */__le32 checksum; /* crc32c(super_block) */
/* 8 */__le32 features; /* (aka. feature_compat) */
/* 12 */__u8 blkszbits; /* support block_size == PAGE_SIZE only */
/* 13 */__u8 reserved;
/* 14 */__le16 root_nid;
/* 16 */__le64 inos; /* total valid ino # (== f_files - f_favail) */
/* 24 */__le64 build_time; /* inode v1 time derivation */
/* 32 */__le32 build_time_nsec;
/* 36 */__le32 blocks; /* used for statfs */
/* 40 */__le32 meta_blkaddr;
/* 44 */__le32 xattr_blkaddr;
/* 48 */__u8 uuid[16]; /* 128-bit uuid for volume */
/* 64 */__u8 volume_name[16]; /* volume name */
/* 80 */__le32 requirements; /* (aka. feature_incompat) */
/* 84 */__u8 reserved2[44];
} __packed; /* 128 bytes */
#define __EROFS_BIT(_prefix, _cur, _pre) enum { \
_prefix ## _cur ## _BIT = _prefix ## _pre ## _BIT + \
_prefix ## _pre ## _BITS }
/*
* erofs inode data mapping:
* 0 - inode plain without inline data A:
* inode, [xattrs], ... | ... | no-holed data
* 1 - inode VLE compression B:
* inode, [xattrs], extents ... | ...
* 2 - inode plain with inline data C:
* inode, [xattrs], last_inline_data, ... | ... | no-holed data
* 3~7 - reserved
*/
enum {
EROFS_INODE_LAYOUT_PLAIN,
EROFS_INODE_LAYOUT_COMPRESSION,
EROFS_INODE_LAYOUT_INLINE,
EROFS_INODE_LAYOUT_MAX
};
#define EROFS_I_VERSION_BITS 1
#define EROFS_I_DATA_MAPPING_BITS 3
#define EROFS_I_VERSION_BIT 0
__EROFS_BIT(EROFS_I_, DATA_MAPPING, VERSION);
#define EROFS_I_ALL \
((1 << (EROFS_I_DATA_MAPPING_BIT + EROFS_I_DATA_MAPPING_BITS)) - 1)
struct erofs_inode_v1 {
/* 0 */__le16 i_advise;
/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
/* 2 */__le16 i_xattr_icount;
/* 4 */__le16 i_mode;
/* 6 */__le16 i_nlink;
/* 8 */__le32 i_size;
/* 12 */__le32 i_reserved;
/* 16 */union {
/* file total compressed blocks for data mapping 1 */
__le32 compressed_blocks;
__le32 raw_blkaddr;
/* for device files, used to indicate old/new device # */
__le32 rdev;
} i_u __packed;
/* 20 */__le32 i_ino; /* only used for 32-bit stat compatibility */
/* 24 */__le16 i_uid;
/* 26 */__le16 i_gid;
/* 28 */__le32 i_checksum;
} __packed;
/* 32 bytes on-disk inode */
#define EROFS_INODE_LAYOUT_V1 0
/* 64 bytes on-disk inode */
#define EROFS_INODE_LAYOUT_V2 1
struct erofs_inode_v2 {
__le16 i_advise;
/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
__le16 i_xattr_icount;
__le16 i_mode;
__le16 i_reserved; /* 8 bytes */
__le64 i_size; /* 16 bytes */
union {
/* file total compressed blocks for data mapping 1 */
__le32 compressed_blocks;
__le32 raw_blkaddr;
/* for device files, used to indicate old/new device # */
__le32 rdev;
} i_u __packed;
/* only used for 32-bit stat compatibility */
__le32 i_ino; /* 24 bytes */
__le32 i_uid;
__le32 i_gid;
__le64 i_ctime; /* 32 bytes */
__le32 i_ctime_nsec;
__le32 i_nlink;
__u8 i_reserved2[12];
__le32 i_checksum; /* 64 bytes */
} __packed;
#define EROFS_MAX_SHARED_XATTRS (128)
/* h_shared_count between 129 ... 255 are special # */
#define EROFS_SHARED_XATTR_EXTENT (255)
/*
* inline xattrs (n == i_xattr_icount):
* erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes
* 12 bytes / \
* / \
* /-----------------------\
* | erofs_xattr_entries+ |
* +-----------------------+
* inline xattrs must starts in erofs_xattr_ibody_header,
* for read-only fs, no need to introduce h_refcount
*/
struct erofs_xattr_ibody_header {
__le32 h_checksum;
__u8 h_shared_count;
__u8 h_reserved[7];
__le32 h_shared_xattrs[0]; /* shared xattr id array */
} __packed;
/* Name indexes */
#define EROFS_XATTR_INDEX_USER 1
#define EROFS_XATTR_INDEX_POSIX_ACL_ACCESS 2
#define EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT 3
#define EROFS_XATTR_INDEX_TRUSTED 4
#define EROFS_XATTR_INDEX_LUSTRE 5
#define EROFS_XATTR_INDEX_SECURITY 6
/* xattr entry (for both inline & shared xattrs) */
struct erofs_xattr_entry {
__u8 e_name_len; /* length of name */
__u8 e_name_index; /* attribute name index */
__le16 e_value_size; /* size of attribute value */
/* followed by e_name and e_value */
char e_name[0]; /* attribute name */
} __packed;
#define ondisk_xattr_ibody_size(count) ({\
u32 __count = le16_to_cpu(count); \
((__count) == 0) ? 0 : \
sizeof(struct erofs_xattr_ibody_header) + \
sizeof(__u32) * ((__count) - 1); })
#define EROFS_XATTR_ALIGN(size) round_up(size, sizeof(struct erofs_xattr_entry))
#define EROFS_XATTR_ENTRY_SIZE(entry) EROFS_XATTR_ALIGN( \
sizeof(struct erofs_xattr_entry) + \
(entry)->e_name_len + le16_to_cpu((entry)->e_value_size))
/* have to be aligned with 8 bytes on disk */
struct erofs_extent_header {
__le32 eh_checksum;
__le32 eh_reserved[3];
} __packed;
/*
* Z_EROFS Variable-sized Logical Extent cluster type:
* 0 - literal (uncompressed) cluster
* 1 - compressed cluster (for the head logical cluster)
* 2 - compressed cluster (for the other logical clusters)
*
* In detail,
* 0 - literal (uncompressed) cluster,
* di_advise = 0
* di_clusterofs = the literal data offset of the cluster
* di_blkaddr = the blkaddr of the literal cluster
*
* 1 - compressed cluster (for the head logical cluster)
* di_advise = 1
* di_clusterofs = the decompressed data offset of the cluster
* di_blkaddr = the blkaddr of the compressed cluster
*
* 2 - compressed cluster (for the other logical clusters)
* di_advise = 2
* di_clusterofs =
* the decompressed data offset in its own head cluster
* di_u.delta[0] = distance to its corresponding head cluster
* di_u.delta[1] = distance to its corresponding tail cluster
* (di_advise could be 0, 1 or 2)
*/
#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2
#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0
struct z_erofs_vle_decompressed_index {
__le16 di_advise;
/* where to decompress in the head cluster */
__le16 di_clusterofs;
union {
/* for the head cluster */
__le32 blkaddr;
/*
* for the rest clusters
* eg. for 4k page-sized cluster, maximum 4K*64k = 256M)
* [0] - pointing to the head cluster
* [1] - pointing to the tail cluster
*/
__le16 delta[2];
} di_u __packed; /* 8 bytes */
} __packed;
#define Z_EROFS_VLE_EXTENT_ALIGN(size) round_up(size, \
sizeof(struct z_erofs_vle_decompressed_index))
/* dirent sorts in alphabet order, thus we can do binary search */
struct erofs_dirent {
__le64 nid; /* 0, node number */
__le16 nameoff; /* 8, start offset of file name */
__u8 file_type; /* 10, file type */
__u8 reserved; /* 11, reserved */
} __packed;
/* file types used in inode_info->flags */
enum {
EROFS_FT_UNKNOWN,
EROFS_FT_REG_FILE,
EROFS_FT_DIR,
EROFS_FT_CHRDEV,
EROFS_FT_BLKDEV,
EROFS_FT_FIFO,
EROFS_FT_SOCK,
EROFS_FT_SYMLINK,
EROFS_FT_MAX
};
#define EROFS_NAME_LEN 255
/* check the EROFS on-disk layout strictly at compile time */
static inline void erofs_check_ondisk_layout_definitions(void)
{
BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128);
BUILD_BUG_ON(sizeof(struct erofs_inode_v1) != 32);
BUILD_BUG_ON(sizeof(struct erofs_inode_v2) != 64);
BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);
BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4);
BUILD_BUG_ON(sizeof(struct erofs_extent_header) != 16);
BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
}
#endif

View File

@@ -1,341 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* linux/drivers/staging/erofs/inode.c
*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* http://www.huawei.com/
* Created by Gao Xiang <gaoxiang25@huawei.com>
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
*/
#include "xattr.h"
#include <trace/events/erofs.h>
/*
* if inode is successfully read, return its inode page (or sometimes
* the inode payload page if it's an extended inode) in order to fill
* inline data if possible.
*/
static struct page *read_inode(struct inode *inode, unsigned int *ofs)
{
struct super_block *sb = inode->i_sb;
struct erofs_sb_info *sbi = EROFS_SB(sb);
struct erofs_vnode *vi = EROFS_V(inode);
const erofs_off_t inode_loc = iloc(sbi, vi->nid);
erofs_blk_t blkaddr;
struct page *page;
struct erofs_inode_v1 *v1;
struct erofs_inode_v2 *v2, *copied = NULL;
unsigned int ifmt;
int err;
blkaddr = erofs_blknr(inode_loc);
*ofs = erofs_blkoff(inode_loc);
debugln("%s, reading inode nid %llu at %u of blkaddr %u",
__func__, vi->nid, *ofs, blkaddr);
page = erofs_get_meta_page(sb, blkaddr, false);
if (IS_ERR(page)) {
errln("failed to get inode (nid: %llu) page, err %ld",
vi->nid, PTR_ERR(page));
return page;
}
v1 = page_address(page) + *ofs;
ifmt = le16_to_cpu(v1->i_advise);
if (ifmt & ~EROFS_I_ALL) {
errln("unsupported i_format %u of nid %llu", ifmt, vi->nid);
err = -EOPNOTSUPP;
goto err_out;
}
vi->data_mapping_mode = __inode_data_mapping(ifmt);
if (unlikely(vi->data_mapping_mode >= EROFS_INODE_LAYOUT_MAX)) {
errln("unknown data mapping mode %u of nid %llu",
vi->data_mapping_mode, vi->nid);
err = -EOPNOTSUPP;
goto err_out;
}
switch (__inode_version(ifmt)) {
case EROFS_INODE_LAYOUT_V2:
vi->inode_isize = sizeof(struct erofs_inode_v2);
/* check if the inode acrosses page boundary */
if (*ofs + vi->inode_isize <= PAGE_SIZE) {
*ofs += vi->inode_isize;
v2 = (struct erofs_inode_v2 *)v1;
} else {
const unsigned int gotten = PAGE_SIZE - *ofs;
copied = kmalloc(vi->inode_isize, GFP_NOFS);
if (!copied) {
err = -ENOMEM;
goto err_out;
}
memcpy(copied, v1, gotten);
unlock_page(page);
put_page(page);
page = erofs_get_meta_page(sb, blkaddr + 1, false);
if (IS_ERR(page)) {
errln("failed to get inode payload page (nid: %llu), err %ld",
vi->nid, PTR_ERR(page));
kfree(copied);
return page;
}
*ofs = vi->inode_isize - gotten;
memcpy((u8 *)copied + gotten, page_address(page), *ofs);
v2 = copied;
}
vi->xattr_isize = ondisk_xattr_ibody_size(v2->i_xattr_icount);
inode->i_mode = le16_to_cpu(v2->i_mode);
if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)) {
vi->raw_blkaddr = le32_to_cpu(v2->i_u.raw_blkaddr);
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
inode->i_rdev =
new_decode_dev(le32_to_cpu(v2->i_u.rdev));
} else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
inode->i_rdev = 0;
} else {
goto bogusimode;
}
i_uid_write(inode, le32_to_cpu(v2->i_uid));
i_gid_write(inode, le32_to_cpu(v2->i_gid));
set_nlink(inode, le32_to_cpu(v2->i_nlink));
/* extended inode has its own timestamp */
inode->i_ctime.tv_sec = le64_to_cpu(v2->i_ctime);
inode->i_ctime.tv_nsec = le32_to_cpu(v2->i_ctime_nsec);
inode->i_size = le64_to_cpu(v2->i_size);
kfree(copied);
break;
case EROFS_INODE_LAYOUT_V1:
vi->inode_isize = sizeof(struct erofs_inode_v1);
*ofs += vi->inode_isize;
vi->xattr_isize = ondisk_xattr_ibody_size(v1->i_xattr_icount);
inode->i_mode = le16_to_cpu(v1->i_mode);
if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)) {
vi->raw_blkaddr = le32_to_cpu(v1->i_u.raw_blkaddr);
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
inode->i_rdev =
new_decode_dev(le32_to_cpu(v1->i_u.rdev));
} else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
inode->i_rdev = 0;
} else {
goto bogusimode;
}
i_uid_write(inode, le16_to_cpu(v1->i_uid));
i_gid_write(inode, le16_to_cpu(v1->i_gid));
set_nlink(inode, le16_to_cpu(v1->i_nlink));
/* use build time for compact inodes */
inode->i_ctime.tv_sec = sbi->build_time;
inode->i_ctime.tv_nsec = sbi->build_time_nsec;
inode->i_size = le32_to_cpu(v1->i_size);
break;
default:
errln("unsupported on-disk inode version %u of nid %llu",
__inode_version(ifmt), vi->nid);
err = -EOPNOTSUPP;
goto err_out;
}
inode->i_mtime.tv_sec = inode->i_ctime.tv_sec;
inode->i_atime.tv_sec = inode->i_ctime.tv_sec;
inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec;
inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec;
/* measure inode.i_blocks as the generic filesystem */
inode->i_blocks = ((inode->i_size - 1) >> 9) + 1;
return page;
bogusimode:
errln("bogus i_mode (%o) @ nid %llu", inode->i_mode, vi->nid);
err = -EIO;
err_out:
DBG_BUGON(1);
kfree(copied);
unlock_page(page);
put_page(page);
return ERR_PTR(err);
}
/*
* try_lock can be required since locking order is:
* file data(fs_inode)
* meta(bd_inode)
* but the majority of the callers is "iget",
* in that case we are pretty sure no deadlock since
* no data operations exist. However I tend to
* try_lock since it takes no much overhead and
* will success immediately.
*/
static int fill_inline_data(struct inode *inode, void *data, unsigned m_pofs)
{
struct erofs_vnode *vi = EROFS_V(inode);
struct erofs_sb_info *sbi = EROFS_I_SB(inode);
int mode = vi->data_mapping_mode;
DBG_BUGON(mode >= EROFS_INODE_LAYOUT_MAX);
/* should be inode inline C */
if (mode != EROFS_INODE_LAYOUT_INLINE)
return 0;
/* fast symlink (following ext4) */
if (S_ISLNK(inode->i_mode) && inode->i_size < PAGE_SIZE) {
char *lnk = erofs_kmalloc(sbi, inode->i_size + 1, GFP_KERNEL);
if (unlikely(lnk == NULL))
return -ENOMEM;
m_pofs += vi->xattr_isize;
/* inline symlink data shouldn't across page boundary as well */
if (unlikely(m_pofs + inode->i_size > PAGE_SIZE)) {
DBG_BUGON(1);
kfree(lnk);
return -EIO;
}
/* get in-page inline data */
memcpy(lnk, data + m_pofs, inode->i_size);
lnk[inode->i_size] = '\0';
inode->i_link = lnk;
set_inode_fast_symlink(inode);
}
return -EAGAIN;
}
static int fill_inode(struct inode *inode, int isdir)
{
struct page *page;
unsigned int ofs;
int err = 0;
trace_erofs_fill_inode(inode, isdir);
/* read inode base data from disk */
page = read_inode(inode, &ofs);
if (IS_ERR(page)) {
return PTR_ERR(page);
} else {
/* setup the new inode */
if (S_ISREG(inode->i_mode)) {
#ifdef CONFIG_EROFS_FS_XATTR
inode->i_op = &erofs_generic_xattr_iops;
#endif
inode->i_fop = &generic_ro_fops;
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op =
#ifdef CONFIG_EROFS_FS_XATTR
&erofs_dir_xattr_iops;
#else
&erofs_dir_iops;
#endif
inode->i_fop = &erofs_dir_fops;
} else if (S_ISLNK(inode->i_mode)) {
/* by default, page_get_link is used for symlink */
inode->i_op =
#ifdef CONFIG_EROFS_FS_XATTR
&erofs_symlink_xattr_iops,
#else
&page_symlink_inode_operations;
#endif
inode_nohighmem(inode);
} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
#ifdef CONFIG_EROFS_FS_XATTR
inode->i_op = &erofs_special_inode_operations;
#endif
init_special_inode(inode, inode->i_mode, inode->i_rdev);
} else {
err = -EIO;
goto out_unlock;
}
if (is_inode_layout_compression(inode)) {
#ifdef CONFIG_EROFS_FS_ZIP
inode->i_mapping->a_ops =
&z_erofs_vle_normalaccess_aops;
#else
err = -ENOTSUPP;
#endif
goto out_unlock;
}
inode->i_mapping->a_ops = &erofs_raw_access_aops;
/* fill last page if inline data is available */
fill_inline_data(inode, page_address(page), ofs);
}
out_unlock:
unlock_page(page);
put_page(page);
return err;
}
struct inode *erofs_iget(struct super_block *sb,
erofs_nid_t nid, bool isdir)
{
struct inode *inode = iget_locked(sb, nid);
if (unlikely(inode == NULL))
return ERR_PTR(-ENOMEM);
if (inode->i_state & I_NEW) {
int err;
struct erofs_vnode *vi = EROFS_V(inode);
vi->nid = nid;
err = fill_inode(inode, isdir);
if (likely(!err))
unlock_new_inode(inode);
else {
iget_failed(inode);
inode = ERR_PTR(err);
}
}
return inode;
}
#ifdef CONFIG_EROFS_FS_XATTR
const struct inode_operations erofs_generic_xattr_iops = {
.listxattr = erofs_listxattr,
};
#endif
#ifdef CONFIG_EROFS_FS_XATTR
const struct inode_operations erofs_symlink_xattr_iops = {
.get_link = page_get_link,
.listxattr = erofs_listxattr,
};
#endif
const struct inode_operations erofs_special_inode_operations = {
#ifdef CONFIG_EROFS_FS_XATTR
.listxattr = erofs_listxattr,
#endif
};
#ifdef CONFIG_EROFS_FS_XATTR
const struct inode_operations erofs_fast_symlink_xattr_iops = {
.get_link = simple_get_link,
.listxattr = erofs_listxattr,
};
#endif

View File

@@ -1,585 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0
*
* linux/drivers/staging/erofs/internal.h
*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* http://www.huawei.com/
* Created by Gao Xiang <gaoxiang25@huawei.com>
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
*/
#ifndef __INTERNAL_H
#define __INTERNAL_H
#include <linux/fs.h>
#include <linux/dcache.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/bio.h>
#include <linux/buffer_head.h>
#include <linux/cleancache.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "erofs_fs.h"
/* redefine pr_fmt "erofs: " */
#undef pr_fmt
#define pr_fmt(fmt) "erofs: " fmt
#define errln(x, ...) pr_err(x "\n", ##__VA_ARGS__)
#define infoln(x, ...) pr_info(x "\n", ##__VA_ARGS__)
#ifdef CONFIG_EROFS_FS_DEBUG
#define debugln(x, ...) pr_debug(x "\n", ##__VA_ARGS__)
#define dbg_might_sleep might_sleep
#define DBG_BUGON BUG_ON
#else
#define debugln(x, ...) ((void)0)
#define dbg_might_sleep() ((void)0)
#define DBG_BUGON(x) ((void)(x))
#endif
#ifdef CONFIG_EROFS_FAULT_INJECTION
enum {
FAULT_KMALLOC,
FAULT_MAX,
};
extern char *erofs_fault_name[FAULT_MAX];
#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
struct erofs_fault_info {
atomic_t inject_ops;
unsigned int inject_rate;
unsigned int inject_type;
};
#endif
#ifdef CONFIG_EROFS_FS_ZIP_CACHE_BIPOLAR
#define EROFS_FS_ZIP_CACHE_LVL (2)
#elif defined(EROFS_FS_ZIP_CACHE_UNIPOLAR)
#define EROFS_FS_ZIP_CACHE_LVL (1)
#else
#define EROFS_FS_ZIP_CACHE_LVL (0)
#endif
#if (!defined(EROFS_FS_HAS_MANAGED_CACHE) && (EROFS_FS_ZIP_CACHE_LVL > 0))
#define EROFS_FS_HAS_MANAGED_CACHE
#endif
/* EROFS_SUPER_MAGIC_V1 to represent the whole file system */
#define EROFS_SUPER_MAGIC EROFS_SUPER_MAGIC_V1
typedef u64 erofs_nid_t;
struct erofs_sb_info {
/* list for all registered superblocks, mainly for shrinker */
struct list_head list;
struct mutex umount_mutex;
u32 blocks;
u32 meta_blkaddr;
#ifdef CONFIG_EROFS_FS_XATTR
u32 xattr_blkaddr;
#endif
/* inode slot unit size in bit shift */
unsigned char islotbits;
#ifdef CONFIG_EROFS_FS_ZIP
/* cluster size in bit shift */
unsigned char clusterbits;
/* the dedicated workstation for compression */
struct radix_tree_root workstn_tree;
#ifdef EROFS_FS_HAS_MANAGED_CACHE
struct inode *managed_cache;
#endif
#endif
u32 build_time_nsec;
u64 build_time;
/* what we really care is nid, rather than ino.. */
erofs_nid_t root_nid;
/* used for statfs, f_files - f_favail */
u64 inos;
u8 uuid[16]; /* 128-bit uuid for volume */
u8 volume_name[16]; /* volume name */
u32 requirements;
char *dev_name;
unsigned int mount_opt;
unsigned int shrinker_run_no;
#ifdef CONFIG_EROFS_FAULT_INJECTION
struct erofs_fault_info fault_info; /* For fault injection */
#endif
};
#ifdef CONFIG_EROFS_FAULT_INJECTION
#define erofs_show_injection_info(type) \
infoln("inject %s in %s of %pS", erofs_fault_name[type], \
__func__, __builtin_return_address(0))
static inline bool time_to_inject(struct erofs_sb_info *sbi, int type)
{
struct erofs_fault_info *ffi = &sbi->fault_info;
if (!ffi->inject_rate)
return false;
if (!IS_FAULT_SET(ffi, type))
return false;
atomic_inc(&ffi->inject_ops);
if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) {
atomic_set(&ffi->inject_ops, 0);
return true;
}
return false;
}
#endif
static inline void *erofs_kmalloc(struct erofs_sb_info *sbi,
size_t size, gfp_t flags)
{
#ifdef CONFIG_EROFS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_KMALLOC)) {
erofs_show_injection_info(FAULT_KMALLOC);
return NULL;
}
#endif
return kmalloc(size, flags);
}
#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
#define EROFS_I_SB(inode) ((struct erofs_sb_info *)(inode)->i_sb->s_fs_info)
/* Mount flags set via mount options or defaults */
#define EROFS_MOUNT_XATTR_USER 0x00000010
#define EROFS_MOUNT_POSIX_ACL 0x00000020
#define EROFS_MOUNT_FAULT_INJECTION 0x00000040
#define clear_opt(sbi, option) ((sbi)->mount_opt &= ~EROFS_MOUNT_##option)
#define set_opt(sbi, option) ((sbi)->mount_opt |= EROFS_MOUNT_##option)
#define test_opt(sbi, option) ((sbi)->mount_opt & EROFS_MOUNT_##option)
#ifdef CONFIG_EROFS_FS_ZIP
#define erofs_workstn_lock(sbi) xa_lock(&(sbi)->workstn_tree)
#define erofs_workstn_unlock(sbi) xa_unlock(&(sbi)->workstn_tree)
/* basic unit of the workstation of a super_block */
struct erofs_workgroup {
/* the workgroup index in the workstation */
pgoff_t index;
/* overall workgroup reference count */
atomic_t refcount;
};
#define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL)
#if defined(CONFIG_SMP)
static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
int val)
{
preempt_disable();
if (val != atomic_cmpxchg(&grp->refcount, val, EROFS_LOCKED_MAGIC)) {
preempt_enable();
return false;
}
return true;
}
static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
int orig_val)
{
/*
* other observers should notice all modifications
* in the freezing period.
*/
smp_mb();
atomic_set(&grp->refcount, orig_val);
preempt_enable();
}
static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
{
return atomic_cond_read_relaxed(&grp->refcount,
VAL != EROFS_LOCKED_MAGIC);
}
#else
static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
int val)
{
preempt_disable();
/* no need to spin on UP platforms, let's just disable preemption. */
if (val != atomic_read(&grp->refcount)) {
preempt_enable();
return false;
}
return true;
}
static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
int orig_val)
{
preempt_enable();
}
static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
{
int v = atomic_read(&grp->refcount);
/* workgroup is never freezed on uniprocessor systems */
DBG_BUGON(v == EROFS_LOCKED_MAGIC);
return v;
}
#endif
static inline bool erofs_workgroup_get(struct erofs_workgroup *grp, int *ocnt)
{
int o;
repeat:
o = erofs_wait_on_workgroup_freezed(grp);
if (unlikely(o <= 0))
return -1;
if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o))
goto repeat;
*ocnt = o;
return 0;
}
#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount)
#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount)
extern int erofs_workgroup_put(struct erofs_workgroup *grp);
extern struct erofs_workgroup *erofs_find_workgroup(
struct super_block *sb, pgoff_t index, bool *tag);
extern int erofs_register_workgroup(struct super_block *sb,
struct erofs_workgroup *grp, bool tag);
extern unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
unsigned long nr_shrink, bool cleanup);
static inline void erofs_workstation_cleanup_all(struct super_block *sb)
{
erofs_shrink_workstation(EROFS_SB(sb), ~0UL, true);
}
#ifdef EROFS_FS_HAS_MANAGED_CACHE
#define EROFS_UNALLOCATED_CACHED_PAGE ((void *)0x5F0EF00D)
extern int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
struct erofs_workgroup *egrp);
extern int erofs_try_to_free_cached_page(struct address_space *mapping,
struct page *page);
#endif
#endif
/* we strictly follow PAGE_SIZE and no buffer head yet */
#define LOG_BLOCK_SIZE PAGE_SHIFT
#undef LOG_SECTORS_PER_BLOCK
#define LOG_SECTORS_PER_BLOCK (PAGE_SHIFT - 9)
#undef SECTORS_PER_BLOCK
#define SECTORS_PER_BLOCK (1 << SECTORS_PER_BLOCK)
#define EROFS_BLKSIZ (1 << LOG_BLOCK_SIZE)
#if (EROFS_BLKSIZ % 4096 || !EROFS_BLKSIZ)
#error erofs cannot be used in this platform
#endif
#define ROOT_NID(sb) ((sb)->root_nid)
#ifdef CONFIG_EROFS_FS_ZIP
/* hard limit of pages per compressed cluster */
#define Z_EROFS_CLUSTER_MAX_PAGES (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT)
/* page count of a compressed cluster */
#define erofs_clusterpages(sbi) ((1 << (sbi)->clusterbits) / PAGE_SIZE)
#endif
typedef u64 erofs_off_t;
/* data type for filesystem-wide blocks number */
typedef u32 erofs_blk_t;
#define erofs_blknr(addr) ((addr) / EROFS_BLKSIZ)
#define erofs_blkoff(addr) ((addr) % EROFS_BLKSIZ)
#define blknr_to_addr(nr) ((erofs_off_t)(nr) * EROFS_BLKSIZ)
static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid)
{
return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits);
}
/* atomic flag definitions */
#define EROFS_V_EA_INITED_BIT 0
/* bitlock definitions (arranged in reverse order) */
#define EROFS_V_BL_XATTR_BIT (BITS_PER_LONG - 1)
struct erofs_vnode {
erofs_nid_t nid;
/* atomic flags (including bitlocks) */
unsigned long flags;
unsigned char data_mapping_mode;
/* inline size in bytes */
unsigned char inode_isize;
unsigned short xattr_isize;
unsigned xattr_shared_count;
unsigned *xattr_shared_xattrs;
erofs_blk_t raw_blkaddr;
/* the corresponding vfs inode */
struct inode vfs_inode;
};
#define EROFS_V(ptr) \
container_of(ptr, struct erofs_vnode, vfs_inode)
#define __inode_advise(x, bit, bits) \
(((x) >> (bit)) & ((1 << (bits)) - 1))
#define __inode_version(advise) \
__inode_advise(advise, EROFS_I_VERSION_BIT, \
EROFS_I_VERSION_BITS)
#define __inode_data_mapping(advise) \
__inode_advise(advise, EROFS_I_DATA_MAPPING_BIT,\
EROFS_I_DATA_MAPPING_BITS)
static inline unsigned long inode_datablocks(struct inode *inode)
{
/* since i_size cannot be changed */
return DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
}
static inline bool is_inode_layout_plain(struct inode *inode)
{
return EROFS_V(inode)->data_mapping_mode == EROFS_INODE_LAYOUT_PLAIN;
}
static inline bool is_inode_layout_compression(struct inode *inode)
{
return EROFS_V(inode)->data_mapping_mode ==
EROFS_INODE_LAYOUT_COMPRESSION;
}
static inline bool is_inode_layout_inline(struct inode *inode)
{
return EROFS_V(inode)->data_mapping_mode == EROFS_INODE_LAYOUT_INLINE;
}
extern const struct super_operations erofs_sops;
extern const struct inode_operations erofs_dir_iops;
extern const struct file_operations erofs_dir_fops;
extern const struct address_space_operations erofs_raw_access_aops;
#ifdef CONFIG_EROFS_FS_ZIP
extern const struct address_space_operations z_erofs_vle_normalaccess_aops;
#endif
/*
* Logical to physical block mapping, used by erofs_map_blocks()
*
* Different with other file systems, it is used for 2 access modes:
*
* 1) RAW access mode:
*
* Users pass a valid (m_lblk, m_lofs -- usually 0) pair,
* and get the valid m_pblk, m_pofs and the longest m_len(in bytes).
*
* Note that m_lblk in the RAW access mode refers to the number of
* the compressed ondisk block rather than the uncompressed
* in-memory block for the compressed file.
*
* m_pofs equals to m_lofs except for the inline data page.
*
* 2) Normal access mode:
*
* If the inode is not compressed, it has no difference with
* the RAW access mode. However, if the inode is compressed,
* users should pass a valid (m_lblk, m_lofs) pair, and get
* the needed m_pblk, m_pofs, m_len to get the compressed data
* and the updated m_lblk, m_lofs which indicates the start
* of the corresponding uncompressed data in the file.
*/
enum {
BH_Zipped = BH_PrivateStart,
};
/* Has a disk mapping */
#define EROFS_MAP_MAPPED (1 << BH_Mapped)
/* Located in metadata (could be copied from bd_inode) */
#define EROFS_MAP_META (1 << BH_Meta)
/* The extent has been compressed */
#define EROFS_MAP_ZIPPED (1 << BH_Zipped)
struct erofs_map_blocks {
erofs_off_t m_pa, m_la;
u64 m_plen, m_llen;
unsigned int m_flags;
};
/* Flags used by erofs_map_blocks() */
#define EROFS_GET_BLOCKS_RAW 0x0001
/* data.c */
static inline struct bio *prepare_bio(
struct super_block *sb,
erofs_blk_t blkaddr, unsigned nr_pages,
bio_end_io_t endio)
{
gfp_t gfp = GFP_NOIO;
struct bio *bio = bio_alloc(gfp, nr_pages);
if (unlikely(bio == NULL) &&
(current->flags & PF_MEMALLOC)) {
do {
nr_pages /= 2;
if (unlikely(!nr_pages)) {
bio = bio_alloc(gfp | __GFP_NOFAIL, 1);
BUG_ON(bio == NULL);
break;
}
bio = bio_alloc(gfp, nr_pages);
} while (bio == NULL);
}
bio->bi_end_io = endio;
bio_set_dev(bio, sb->s_bdev);
bio->bi_iter.bi_sector = blkaddr << LOG_SECTORS_PER_BLOCK;
return bio;
}
static inline void __submit_bio(struct bio *bio, unsigned op, unsigned op_flags)
{
bio_set_op_attrs(bio, op, op_flags);
submit_bio(bio);
}
extern struct page *erofs_get_meta_page(struct super_block *sb,
erofs_blk_t blkaddr, bool prio);
extern int erofs_map_blocks(struct inode *, struct erofs_map_blocks *, int);
extern int erofs_map_blocks_iter(struct inode *, struct erofs_map_blocks *,
struct page **, int);
struct erofs_map_blocks_iter {
struct erofs_map_blocks map;
struct page *mpage;
};
static inline struct page *
erofs_get_inline_page(struct inode *inode,
erofs_blk_t blkaddr)
{
return erofs_get_meta_page(inode->i_sb,
blkaddr, S_ISDIR(inode->i_mode));
}
/* inode.c */
extern struct inode *erofs_iget(struct super_block *sb,
erofs_nid_t nid, bool dir);
/* dir.c */
int erofs_namei(struct inode *dir, struct qstr *name,
erofs_nid_t *nid, unsigned *d_type);
/* xattr.c */
#ifdef CONFIG_EROFS_FS_XATTR
extern const struct xattr_handler *erofs_xattr_handlers[];
#endif
/* symlink */
#ifdef CONFIG_EROFS_FS_XATTR
extern const struct inode_operations erofs_symlink_xattr_iops;
extern const struct inode_operations erofs_fast_symlink_xattr_iops;
extern const struct inode_operations erofs_special_inode_operations;
#endif
static inline void set_inode_fast_symlink(struct inode *inode)
{
#ifdef CONFIG_EROFS_FS_XATTR
inode->i_op = &erofs_fast_symlink_xattr_iops;
#else
inode->i_op = &simple_symlink_inode_operations;
#endif
}
static inline bool is_inode_fast_symlink(struct inode *inode)
{
#ifdef CONFIG_EROFS_FS_XATTR
return inode->i_op == &erofs_fast_symlink_xattr_iops;
#else
return inode->i_op == &simple_symlink_inode_operations;
#endif
}
static inline void *erofs_vmap(struct page **pages, unsigned int count)
{
#ifdef CONFIG_EROFS_FS_USE_VM_MAP_RAM
int i = 0;
while (1) {
void *addr = vm_map_ram(pages, count, -1, PAGE_KERNEL);
/* retry two more times (totally 3 times) */
if (addr != NULL || ++i >= 3)
return addr;
vm_unmap_aliases();
}
return NULL;
#else
return vmap(pages, count, VM_MAP, PAGE_KERNEL);
#endif
}
static inline void erofs_vunmap(const void *mem, unsigned int count)
{
#ifdef CONFIG_EROFS_FS_USE_VM_MAP_RAM
vm_unmap_ram(mem, count);
#else
vunmap(mem);
#endif
}
/* utils.c */
extern struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp);
extern void erofs_register_super(struct super_block *sb);
extern void erofs_unregister_super(struct super_block *sb);
extern unsigned long erofs_shrink_count(struct shrinker *shrink,
struct shrink_control *sc);
extern unsigned long erofs_shrink_scan(struct shrinker *shrink,
struct shrink_control *sc);
#ifndef lru_to_page
#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
#endif
#endif

View File

@@ -1,227 +0,0 @@
#ifndef __LZ4DEFS_H__
#define __LZ4DEFS_H__
/*
* lz4defs.h -- common and architecture specific defines for the kernel usage
* LZ4 - Fast LZ compression algorithm
* Copyright (C) 2011-2016, Yann Collet.
* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* You can contact the author at :
* - LZ4 homepage : http://www.lz4.org
* - LZ4 source repository : https://github.com/lz4/lz4
*
* Changed for kernel usage by:
* Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
*/
#include <asm/unaligned.h>
#include <linux/string.h> /* memset, memcpy */
#define FORCE_INLINE __always_inline
/*-************************************
* Basic Types
**************************************/
#include <linux/types.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
typedef uintptr_t uptrval;
/*-************************************
* Architecture specifics
**************************************/
#if defined(CONFIG_64BIT)
#define LZ4_ARCH64 1
#else
#define LZ4_ARCH64 0
#endif
#if defined(__LITTLE_ENDIAN)
#define LZ4_LITTLE_ENDIAN 1
#else
#define LZ4_LITTLE_ENDIAN 0
#endif
/*-************************************
* Constants
**************************************/
#define MINMATCH 4
#define WILDCOPYLENGTH 8
#define LASTLITERALS 5
#define MFLIMIT (WILDCOPYLENGTH + MINMATCH)
/* Increase this value ==> compression run slower on incompressible data */
#define LZ4_SKIPTRIGGER 6
#define HASH_UNIT sizeof(size_t)
#define KB (1 << 10)
#define MB (1 << 20)
#define GB (1U << 30)
#define MAXD_LOG 16
#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
#define STEPSIZE sizeof(size_t)
#define ML_BITS 4
#define ML_MASK ((1U << ML_BITS) - 1)
#define RUN_BITS (8 - ML_BITS)
#define RUN_MASK ((1U << RUN_BITS) - 1)
/*-************************************
* Reading and writing into memory
**************************************/
static FORCE_INLINE U16 LZ4_read16(const void *ptr)
{
return get_unaligned((const U16 *)ptr);
}
static FORCE_INLINE U32 LZ4_read32(const void *ptr)
{
return get_unaligned((const U32 *)ptr);
}
static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr)
{
return get_unaligned((const size_t *)ptr);
}
static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value)
{
put_unaligned(value, (U16 *)memPtr);
}
static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value)
{
put_unaligned(value, (U32 *)memPtr);
}
static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr)
{
return get_unaligned_le16(memPtr);
}
static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value)
{
return put_unaligned_le16(value, memPtr);
}
static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
{
#if LZ4_ARCH64
U64 a = get_unaligned((const U64 *)src);
put_unaligned(a, (U64 *)dst);
#else
U32 a = get_unaligned((const U32 *)src);
U32 b = get_unaligned((const U32 *)src + 1);
put_unaligned(a, (U32 *)dst);
put_unaligned(b, (U32 *)dst + 1);
#endif
}
/*
* customized variant of memcpy,
* which can overwrite up to 7 bytes beyond dstEnd
*/
static FORCE_INLINE void LZ4_wildCopy(void *dstPtr,
const void *srcPtr, void *dstEnd)
{
BYTE *d = (BYTE *)dstPtr;
const BYTE *s = (const BYTE *)srcPtr;
BYTE *const e = (BYTE *)dstEnd;
do {
LZ4_copy8(d, s);
d += 8;
s += 8;
} while (d < e);
}
static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
{
#if LZ4_LITTLE_ENDIAN
return __ffs(val) >> 3;
#else
return (BITS_PER_LONG - 1 - __fls(val)) >> 3;
#endif
}
static FORCE_INLINE unsigned int LZ4_count(
const BYTE *pIn,
const BYTE *pMatch,
const BYTE *pInLimit)
{
const BYTE *const pStart = pIn;
while (likely(pIn < pInLimit - (STEPSIZE - 1))) {
size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
if (!diff) {
pIn += STEPSIZE;
pMatch += STEPSIZE;
continue;
}
pIn += LZ4_NbCommonBytes(diff);
return (unsigned int)(pIn - pStart);
}
#if LZ4_ARCH64
if ((pIn < (pInLimit - 3))
&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
pIn += 4;
pMatch += 4;
}
#endif
if ((pIn < (pInLimit - 1))
&& (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
pIn += 2;
pMatch += 2;
}
if ((pIn < pInLimit) && (*pMatch == *pIn))
pIn++;
return (unsigned int)(pIn - pStart);
}
typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
typedef enum { byPtr, byU32, byU16 } tableType_t;
typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
typedef enum { full = 0, partial = 1 } earlyEnd_directive;
#endif

View File

@@ -1,661 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* linux/drivers/staging/erofs/super.c
*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* http://www.huawei.com/
* Created by Gao Xiang <gaoxiang25@huawei.com>
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
*/
#include <linux/module.h>
#include <linux/buffer_head.h>
#include <linux/statfs.h>
#include <linux/parser.h>
#include <linux/seq_file.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
#include <trace/events/erofs.h>
static struct kmem_cache *erofs_inode_cachep __read_mostly;
static void init_once(void *ptr)
{
struct erofs_vnode *vi = ptr;
inode_init_once(&vi->vfs_inode);
}
static int erofs_init_inode_cache(void)
{
erofs_inode_cachep = kmem_cache_create("erofs_inode",
sizeof(struct erofs_vnode), 0,
SLAB_RECLAIM_ACCOUNT, init_once);
return erofs_inode_cachep != NULL ? 0 : -ENOMEM;
}
static void erofs_exit_inode_cache(void)
{
kmem_cache_destroy(erofs_inode_cachep);
}
static struct inode *alloc_inode(struct super_block *sb)
{
struct erofs_vnode *vi =
kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL);
if (vi == NULL)
return NULL;
/* zero out everything except vfs_inode */
memset(vi, 0, offsetof(struct erofs_vnode, vfs_inode));
return &vi->vfs_inode;
}
static void i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
struct erofs_vnode *vi = EROFS_V(inode);
/* be careful RCU symlink path (see ext4_inode_info->i_data)! */
if (is_inode_fast_symlink(inode))
kfree(inode->i_link);
kfree(vi->xattr_shared_xattrs);
kmem_cache_free(erofs_inode_cachep, vi);
}
static void destroy_inode(struct inode *inode)
{
call_rcu(&inode->i_rcu, i_callback);
}
static bool check_layout_compatibility(struct super_block *sb,
struct erofs_super_block *layout)
{
const unsigned int requirements = le32_to_cpu(layout->requirements);
EROFS_SB(sb)->requirements = requirements;
/* check if current kernel meets all mandatory requirements */
if (requirements & (~EROFS_ALL_REQUIREMENTS)) {
errln("unidentified requirements %x, please upgrade kernel version",
requirements & ~EROFS_ALL_REQUIREMENTS);
return false;
}
return true;
}
static int superblock_read(struct super_block *sb)
{
struct erofs_sb_info *sbi;
struct buffer_head *bh;
struct erofs_super_block *layout;
unsigned blkszbits;
int ret;
bh = sb_bread(sb, 0);
if (bh == NULL) {
errln("cannot read erofs superblock");
return -EIO;
}
sbi = EROFS_SB(sb);
layout = (struct erofs_super_block *)((u8 *)bh->b_data
+ EROFS_SUPER_OFFSET);
ret = -EINVAL;
if (le32_to_cpu(layout->magic) != EROFS_SUPER_MAGIC_V1) {
errln("cannot find valid erofs superblock");
goto out;
}
blkszbits = layout->blkszbits;
/* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */
if (unlikely(blkszbits != LOG_BLOCK_SIZE)) {
errln("blksize %u isn't supported on this platform",
1 << blkszbits);
goto out;
}
if (!check_layout_compatibility(sb, layout))
goto out;
sbi->blocks = le32_to_cpu(layout->blocks);
sbi->meta_blkaddr = le32_to_cpu(layout->meta_blkaddr);
#ifdef CONFIG_EROFS_FS_XATTR
sbi->xattr_blkaddr = le32_to_cpu(layout->xattr_blkaddr);
#endif
sbi->islotbits = ffs(sizeof(struct erofs_inode_v1)) - 1;
#ifdef CONFIG_EROFS_FS_ZIP
sbi->clusterbits = 12;
if (1 << (sbi->clusterbits - 12) > Z_EROFS_CLUSTER_MAX_PAGES)
errln("clusterbits %u is not supported on this kernel",
sbi->clusterbits);
#endif
sbi->root_nid = le16_to_cpu(layout->root_nid);
sbi->inos = le64_to_cpu(layout->inos);
sbi->build_time = le64_to_cpu(layout->build_time);
sbi->build_time_nsec = le32_to_cpu(layout->build_time_nsec);
memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid));
memcpy(sbi->volume_name, layout->volume_name,
sizeof(layout->volume_name));
ret = 0;
out:
brelse(bh);
return ret;
}
#ifdef CONFIG_EROFS_FAULT_INJECTION
char *erofs_fault_name[FAULT_MAX] = {
[FAULT_KMALLOC] = "kmalloc",
};
static void erofs_build_fault_attr(struct erofs_sb_info *sbi,
unsigned int rate)
{
struct erofs_fault_info *ffi = &sbi->fault_info;
if (rate) {
atomic_set(&ffi->inject_ops, 0);
ffi->inject_rate = rate;
ffi->inject_type = (1 << FAULT_MAX) - 1;
} else {
memset(ffi, 0, sizeof(struct erofs_fault_info));
}
}
#endif
static void default_options(struct erofs_sb_info *sbi)
{
#ifdef CONFIG_EROFS_FS_XATTR
set_opt(sbi, XATTR_USER);
#endif
#ifdef CONFIG_EROFS_FS_POSIX_ACL
set_opt(sbi, POSIX_ACL);
#endif
}
enum {
Opt_user_xattr,
Opt_nouser_xattr,
Opt_acl,
Opt_noacl,
Opt_fault_injection,
Opt_err
};
static match_table_t erofs_tokens = {
{Opt_user_xattr, "user_xattr"},
{Opt_nouser_xattr, "nouser_xattr"},
{Opt_acl, "acl"},
{Opt_noacl, "noacl"},
{Opt_fault_injection, "fault_injection=%u"},
{Opt_err, NULL}
};
static int parse_options(struct super_block *sb, char *options)
{
substring_t args[MAX_OPT_ARGS];
char *p;
int arg = 0;
if (!options)
return 0;
while ((p = strsep(&options, ",")) != NULL) {
int token;
if (!*p)
continue;
args[0].to = args[0].from = NULL;
token = match_token(p, erofs_tokens, args);
switch (token) {
#ifdef CONFIG_EROFS_FS_XATTR
case Opt_user_xattr:
set_opt(EROFS_SB(sb), XATTR_USER);
break;
case Opt_nouser_xattr:
clear_opt(EROFS_SB(sb), XATTR_USER);
break;
#else
case Opt_user_xattr:
infoln("user_xattr options not supported");
break;
case Opt_nouser_xattr:
infoln("nouser_xattr options not supported");
break;
#endif
#ifdef CONFIG_EROFS_FS_POSIX_ACL
case Opt_acl:
set_opt(EROFS_SB(sb), POSIX_ACL);
break;
case Opt_noacl:
clear_opt(EROFS_SB(sb), POSIX_ACL);
break;
#else
case Opt_acl:
infoln("acl options not supported");
break;
case Opt_noacl:
infoln("noacl options not supported");
break;
#endif
case Opt_fault_injection:
if (args->from && match_int(args, &arg))
return -EINVAL;
#ifdef CONFIG_EROFS_FAULT_INJECTION
erofs_build_fault_attr(EROFS_SB(sb), arg);
set_opt(EROFS_SB(sb), FAULT_INJECTION);
#else
infoln("FAULT_INJECTION was not selected");
#endif
break;
default:
errln("Unrecognized mount option \"%s\" "
"or missing value", p);
return -EINVAL;
}
}
return 0;
}
#ifdef EROFS_FS_HAS_MANAGED_CACHE
static const struct address_space_operations managed_cache_aops;
static int managed_cache_releasepage(struct page *page, gfp_t gfp_mask)
{
int ret = 1; /* 0 - busy */
struct address_space *const mapping = page->mapping;
DBG_BUGON(!PageLocked(page));
DBG_BUGON(mapping->a_ops != &managed_cache_aops);
if (PagePrivate(page))
ret = erofs_try_to_free_cached_page(mapping, page);
return ret;
}
static void managed_cache_invalidatepage(struct page *page,
unsigned int offset, unsigned int length)
{
const unsigned int stop = length + offset;
DBG_BUGON(!PageLocked(page));
/* Check for potential overflow in debug mode */
DBG_BUGON(stop > PAGE_SIZE || stop < length);
if (offset == 0 && stop == PAGE_SIZE)
while (!managed_cache_releasepage(page, GFP_NOFS))
cond_resched();
}
static const struct address_space_operations managed_cache_aops = {
.releasepage = managed_cache_releasepage,
.invalidatepage = managed_cache_invalidatepage,
};
static struct inode *erofs_init_managed_cache(struct super_block *sb)
{
struct inode *inode = new_inode(sb);
if (unlikely(inode == NULL))
return ERR_PTR(-ENOMEM);
set_nlink(inode, 1);
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &managed_cache_aops;
mapping_set_gfp_mask(inode->i_mapping,
GFP_NOFS | __GFP_HIGHMEM |
__GFP_MOVABLE | __GFP_NOFAIL);
return inode;
}
#endif
static int erofs_read_super(struct super_block *sb,
const char *dev_name, void *data, int silent)
{
struct inode *inode;
struct erofs_sb_info *sbi;
int err = -EINVAL;
infoln("read_super, device -> %s", dev_name);
infoln("options -> %s", (char *)data);
if (unlikely(!sb_set_blocksize(sb, EROFS_BLKSIZ))) {
errln("failed to set erofs blksize");
goto err;
}
sbi = kzalloc(sizeof(struct erofs_sb_info), GFP_KERNEL);
if (unlikely(sbi == NULL)) {
err = -ENOMEM;
goto err;
}
sb->s_fs_info = sbi;
err = superblock_read(sb);
if (err)
goto err_sbread;
sb->s_magic = EROFS_SUPER_MAGIC;
sb->s_flags |= SB_RDONLY | SB_NOATIME;
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_time_gran = 1;
sb->s_op = &erofs_sops;
#ifdef CONFIG_EROFS_FS_XATTR
sb->s_xattr = erofs_xattr_handlers;
#endif
/* set erofs default mount options */
default_options(sbi);
err = parse_options(sb, data);
if (err)
goto err_parseopt;
if (!silent)
infoln("root inode @ nid %llu", ROOT_NID(sbi));
#ifdef CONFIG_EROFS_FS_ZIP
INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC);
#endif
#ifdef EROFS_FS_HAS_MANAGED_CACHE
sbi->managed_cache = erofs_init_managed_cache(sb);
if (IS_ERR(sbi->managed_cache)) {
err = PTR_ERR(sbi->managed_cache);
goto err_init_managed_cache;
}
#endif
/* get the root inode */
inode = erofs_iget(sb, ROOT_NID(sbi), true);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto err_iget;
}
if (!S_ISDIR(inode->i_mode)) {
errln("rootino(nid %llu) is not a directory(i_mode %o)",
ROOT_NID(sbi), inode->i_mode);
err = -EINVAL;
goto err_isdir;
}
sb->s_root = d_make_root(inode);
if (sb->s_root == NULL) {
err = -ENOMEM;
goto err_makeroot;
}
/* save the device name to sbi */
sbi->dev_name = __getname();
if (sbi->dev_name == NULL) {
err = -ENOMEM;
goto err_devname;
}
snprintf(sbi->dev_name, PATH_MAX, "%s", dev_name);
sbi->dev_name[PATH_MAX - 1] = '\0';
erofs_register_super(sb);
if (!silent)
infoln("mounted on %s with opts: %s.", dev_name,
(char *)data);
return 0;
/*
* please add a label for each exit point and use
* the following name convention, thus new features
* can be integrated easily without renaming labels.
*/
err_devname:
dput(sb->s_root);
err_makeroot:
err_isdir:
if (sb->s_root == NULL)
iput(inode);
err_iget:
#ifdef EROFS_FS_HAS_MANAGED_CACHE
iput(sbi->managed_cache);
err_init_managed_cache:
#endif
err_parseopt:
err_sbread:
sb->s_fs_info = NULL;
kfree(sbi);
err:
return err;
}
/*
* could be triggered after deactivate_locked_super()
* is called, thus including umount and failed to initialize.
*/
static void erofs_put_super(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
/* for cases which are failed in "read_super" */
if (sbi == NULL)
return;
WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
infoln("unmounted for %s", sbi->dev_name);
__putname(sbi->dev_name);
#ifdef EROFS_FS_HAS_MANAGED_CACHE
iput(sbi->managed_cache);
#endif
mutex_lock(&sbi->umount_mutex);
#ifdef CONFIG_EROFS_FS_ZIP
erofs_workstation_cleanup_all(sb);
#endif
erofs_unregister_super(sb);
mutex_unlock(&sbi->umount_mutex);
kfree(sbi);
sb->s_fs_info = NULL;
}
struct erofs_mount_private {
const char *dev_name;
char *options;
};
/* support mount_bdev() with options */
static int erofs_fill_super(struct super_block *sb,
void *_priv, int silent)
{
struct erofs_mount_private *priv = _priv;
return erofs_read_super(sb, priv->dev_name,
priv->options, silent);
}
static struct dentry *erofs_mount(
struct file_system_type *fs_type, int flags,
const char *dev_name, void *data)
{
struct erofs_mount_private priv = {
.dev_name = dev_name,
.options = data
};
return mount_bdev(fs_type, flags, dev_name,
&priv, erofs_fill_super);
}
static void erofs_kill_sb(struct super_block *sb)
{
kill_block_super(sb);
}
static struct shrinker erofs_shrinker_info = {
.scan_objects = erofs_shrink_scan,
.count_objects = erofs_shrink_count,
.seeks = DEFAULT_SEEKS,
};
static struct file_system_type erofs_fs_type = {
.owner = THIS_MODULE,
.name = "erofs",
.mount = erofs_mount,
.kill_sb = erofs_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
};
MODULE_ALIAS_FS("erofs");
#ifdef CONFIG_EROFS_FS_ZIP
extern int z_erofs_init_zip_subsystem(void);
extern void z_erofs_exit_zip_subsystem(void);
#endif
static int __init erofs_module_init(void)
{
int err;
erofs_check_ondisk_layout_definitions();
infoln("initializing erofs " EROFS_VERSION);
err = erofs_init_inode_cache();
if (err)
goto icache_err;
err = register_shrinker(&erofs_shrinker_info);
if (err)
goto shrinker_err;
#ifdef CONFIG_EROFS_FS_ZIP
err = z_erofs_init_zip_subsystem();
if (err)
goto zip_err;
#endif
err = register_filesystem(&erofs_fs_type);
if (err)
goto fs_err;
infoln("successfully to initialize erofs");
return 0;
fs_err:
#ifdef CONFIG_EROFS_FS_ZIP
z_erofs_exit_zip_subsystem();
zip_err:
#endif
unregister_shrinker(&erofs_shrinker_info);
shrinker_err:
erofs_exit_inode_cache();
icache_err:
return err;
}
static void __exit erofs_module_exit(void)
{
unregister_filesystem(&erofs_fs_type);
#ifdef CONFIG_EROFS_FS_ZIP
z_erofs_exit_zip_subsystem();
#endif
unregister_shrinker(&erofs_shrinker_info);
erofs_exit_inode_cache();
infoln("successfully finalize erofs");
}
/* get filesystem statistics */
static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct erofs_sb_info *sbi = EROFS_SB(sb);
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
buf->f_type = sb->s_magic;
buf->f_bsize = EROFS_BLKSIZ;
buf->f_blocks = sbi->blocks;
buf->f_bfree = buf->f_bavail = 0;
buf->f_files = ULLONG_MAX;
buf->f_ffree = ULLONG_MAX - sbi->inos;
buf->f_namelen = EROFS_NAME_LEN;
buf->f_fsid.val[0] = (u32)id;
buf->f_fsid.val[1] = (u32)(id >> 32);
return 0;
}
static int erofs_show_options(struct seq_file *seq, struct dentry *root)
{
struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb);
#ifdef CONFIG_EROFS_FS_XATTR
if (test_opt(sbi, XATTR_USER))
seq_puts(seq, ",user_xattr");
else
seq_puts(seq, ",nouser_xattr");
#endif
#ifdef CONFIG_EROFS_FS_POSIX_ACL
if (test_opt(sbi, POSIX_ACL))
seq_puts(seq, ",acl");
else
seq_puts(seq, ",noacl");
#endif
#ifdef CONFIG_EROFS_FAULT_INJECTION
if (test_opt(sbi, FAULT_INJECTION))
seq_printf(seq, ",fault_injection=%u",
sbi->fault_info.inject_rate);
#endif
return 0;
}
static int erofs_remount(struct super_block *sb, int *flags, char *data)
{
DBG_BUGON(!sb_rdonly(sb));
*flags |= SB_RDONLY;
return 0;
}
const struct super_operations erofs_sops = {
.put_super = erofs_put_super,
.alloc_inode = alloc_inode,
.destroy_inode = destroy_inode,
.statfs = erofs_statfs,
.show_options = erofs_show_options,
.remount_fs = erofs_remount,
};
module_init(erofs_module_init);
module_exit(erofs_module_exit);
MODULE_DESCRIPTION("Enhanced ROM File System");
MODULE_AUTHOR("Gao Xiang, Yu Chao, Miao Xie, CONSUMER BG, HUAWEI Inc.");
MODULE_LICENSE("GPL");

View File

@@ -1,251 +0,0 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
* linux/drivers/staging/erofs/unzip_lz4.c
*
* Copyright (C) 2018 HUAWEI, Inc.
* http://www.huawei.com/
* Created by Gao Xiang <gaoxiang25@huawei.com>
*
* Original code taken from 'linux/lib/lz4/lz4_decompress.c'
*/
/*
* LZ4 - Fast LZ compression algorithm
* Copyright (C) 2011 - 2016, Yann Collet.
* BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* You can contact the author at :
* - LZ4 homepage : http://www.lz4.org
* - LZ4 source repository : https://github.com/lz4/lz4
*
* Changed for kernel usage by:
* Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
*/
#include "internal.h"
#include <asm/unaligned.h>
#include "lz4defs.h"
/*
* no public solution to solve our requirement yet.
* see: <required buffer size for LZ4_decompress_safe_partial>
* https://groups.google.com/forum/#!topic/lz4c/_3kkz5N6n00
*/
static FORCE_INLINE int customized_lz4_decompress_safe_partial(
const void * const source,
void * const dest,
int inputSize,
int outputSize)
{
/* Local Variables */
const BYTE *ip = (const BYTE *) source;
const BYTE * const iend = ip + inputSize;
BYTE *op = (BYTE *) dest;
BYTE * const oend = op + outputSize;
BYTE *cpy;
static const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };
static const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 };
/* Empty output buffer */
if (unlikely(outputSize == 0))
return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
/* Main Loop : decode sequences */
while (1) {
size_t length;
const BYTE *match;
size_t offset;
/* get literal length */
unsigned int const token = *ip++;
length = token>>ML_BITS;
if (length == RUN_MASK) {
unsigned int s;
do {
s = *ip++;
length += s;
} while ((ip < iend - RUN_MASK) & (s == 255));
if (unlikely((size_t)(op + length) < (size_t)(op))) {
/* overflow detection */
goto _output_error;
}
if (unlikely((size_t)(ip + length) < (size_t)(ip))) {
/* overflow detection */
goto _output_error;
}
}
/* copy literals */
cpy = op + length;
if ((cpy > oend - WILDCOPYLENGTH) ||
(ip + length > iend - (2 + 1 + LASTLITERALS))) {
if (cpy > oend) {
memcpy(op, ip, length = oend - op);
op += length;
break;
}
if (unlikely(ip + length > iend)) {
/*
* Error :
* read attempt beyond
* end of input buffer
*/
goto _output_error;
}
memcpy(op, ip, length);
ip += length;
op += length;
if (ip > iend - 2)
break;
/* Necessarily EOF, due to parsing restrictions */
/* break; */
} else {
LZ4_wildCopy(op, ip, cpy);
ip += length;
op = cpy;
}
/* get offset */
offset = LZ4_readLE16(ip);
ip += 2;
match = op - offset;
if (unlikely(match < (const BYTE *)dest)) {
/* Error : offset outside buffers */
goto _output_error;
}
/* get matchlength */
length = token & ML_MASK;
if (length == ML_MASK) {
unsigned int s;
do {
s = *ip++;
if (ip > iend - LASTLITERALS)
goto _output_error;
length += s;
} while (s == 255);
if (unlikely((size_t)(op + length) < (size_t)op)) {
/* overflow detection */
goto _output_error;
}
}
length += MINMATCH;
/* copy match within block */
cpy = op + length;
if (unlikely(cpy >= oend - WILDCOPYLENGTH)) {
if (cpy >= oend) {
while (op < oend)
*op++ = *match++;
break;
}
goto __match;
}
/* costs ~1%; silence an msan warning when offset == 0 */
LZ4_write32(op, (U32)offset);
if (unlikely(offset < 8)) {
const int dec64 = dec64table[offset];
op[0] = match[0];
op[1] = match[1];
op[2] = match[2];
op[3] = match[3];
match += dec32table[offset];
memcpy(op + 4, match, 4);
match -= dec64;
} else {
LZ4_copy8(op, match);
match += 8;
}
op += 8;
if (unlikely(cpy > oend - 12)) {
BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
if (op < oCopyLimit) {
LZ4_wildCopy(op, match, oCopyLimit);
match += oCopyLimit - op;
op = oCopyLimit;
}
__match:
while (op < cpy)
*op++ = *match++;
} else {
LZ4_copy8(op, match);
if (length > 16)
LZ4_wildCopy(op + 8, match + 8, cpy);
}
op = cpy; /* correction */
}
DBG_BUGON((void *)ip - source > inputSize);
DBG_BUGON((void *)op - dest > outputSize);
/* Nb of output bytes decoded */
return (int) ((void *)op - dest);
/* Overflow error detected */
_output_error:
return -ERANGE;
}
int z_erofs_unzip_lz4(void *in, void *out, size_t inlen, size_t outlen)
{
int ret = customized_lz4_decompress_safe_partial(in,
out, inlen, outlen);
if (ret >= 0)
return ret;
/*
* LZ4_decompress_safe will return an error code
* (< 0) if decompression failed
*/
errln("%s, failed to decompress, in[%p, %zu] outlen[%p, %zu]",
__func__, in, inlen, out, outlen);
WARN_ON(1);
print_hex_dump(KERN_DEBUG, "raw data [in]: ", DUMP_PREFIX_OFFSET,
16, 1, in, inlen, true);
print_hex_dump(KERN_DEBUG, "raw data [out]: ", DUMP_PREFIX_OFFSET,
16, 1, out, outlen, true);
return -EIO;
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,228 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0
*
* linux/drivers/staging/erofs/unzip_vle.h
*
* Copyright (C) 2018 HUAWEI, Inc.
* http://www.huawei.com/
* Created by Gao Xiang <gaoxiang25@huawei.com>
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
*/
#ifndef __EROFS_FS_UNZIP_VLE_H
#define __EROFS_FS_UNZIP_VLE_H
#include "internal.h"
#include "unzip_pagevec.h"
/*
* - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
* used for temporary allocated pages (via erofs_allocpage),
* in order to seperate those from NULL mapping (eg. truncated pages)
*/
#define Z_EROFS_MAPPING_STAGING ((void *)0x5A110C8D)
#define z_erofs_is_stagingpage(page) \
((page)->mapping == Z_EROFS_MAPPING_STAGING)
static inline bool z_erofs_gather_if_stagingpage(struct list_head *page_pool,
struct page *page)
{
if (z_erofs_is_stagingpage(page)) {
list_add(&page->lru, page_pool);
return true;
}
return false;
}
/*
* Structure fields follow one of the following exclusion rules.
*
* I: Modifiable by initialization/destruction paths and read-only
* for everyone else.
*
*/
#define Z_EROFS_VLE_INLINE_PAGEVECS 3
struct z_erofs_vle_work {
struct mutex lock;
/* I: decompression offset in page */
unsigned short pageofs;
unsigned short nr_pages;
/* L: queued pages in pagevec[] */
unsigned vcnt;
union {
/* L: pagevec */
erofs_vtptr_t pagevec[Z_EROFS_VLE_INLINE_PAGEVECS];
struct rcu_head rcu;
};
};
#define Z_EROFS_VLE_WORKGRP_FMT_PLAIN 0
#define Z_EROFS_VLE_WORKGRP_FMT_LZ4 1
#define Z_EROFS_VLE_WORKGRP_FMT_MASK 1
typedef struct z_erofs_vle_workgroup *z_erofs_vle_owned_workgrp_t;
struct z_erofs_vle_workgroup {
struct erofs_workgroup obj;
struct z_erofs_vle_work work;
/* next owned workgroup */
z_erofs_vle_owned_workgrp_t next;
/* compressed pages (including multi-usage pages) */
struct page *compressed_pages[Z_EROFS_CLUSTER_MAX_PAGES];
unsigned int llen, flags;
};
/* let's avoid the valid 32-bit kernel addresses */
/* the chained workgroup has't submitted io (still open) */
#define Z_EROFS_VLE_WORKGRP_TAIL ((void *)0x5F0ECAFE)
/* the chained workgroup has already submitted io */
#define Z_EROFS_VLE_WORKGRP_TAIL_CLOSED ((void *)0x5F0EDEAD)
#define Z_EROFS_VLE_WORKGRP_NIL (NULL)
#define z_erofs_vle_workgrp_fmt(grp) \
((grp)->flags & Z_EROFS_VLE_WORKGRP_FMT_MASK)
static inline void z_erofs_vle_set_workgrp_fmt(
struct z_erofs_vle_workgroup *grp,
unsigned int fmt)
{
grp->flags = fmt | (grp->flags & ~Z_EROFS_VLE_WORKGRP_FMT_MASK);
}
/* definitions if multiref is disabled */
#define z_erofs_vle_grab_primary_work(grp) (&(grp)->work)
#define z_erofs_vle_grab_work(grp, pageofs) (&(grp)->work)
#define z_erofs_vle_work_workgroup(wrk, primary) \
((primary) ? container_of(wrk, \
struct z_erofs_vle_workgroup, work) : \
({ BUG(); (void *)NULL; }))
#define Z_EROFS_WORKGROUP_SIZE sizeof(struct z_erofs_vle_workgroup)
struct z_erofs_vle_unzip_io {
atomic_t pending_bios;
z_erofs_vle_owned_workgrp_t head;
union {
wait_queue_head_t wait;
struct work_struct work;
} u;
};
struct z_erofs_vle_unzip_io_sb {
struct z_erofs_vle_unzip_io io;
struct super_block *sb;
};
#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)
/*
* waiters (aka. ongoing_packs): # to unlock the page
* sub-index: 0 - for partial page, >= 1 full page sub-index
*/
typedef atomic_t z_erofs_onlinepage_t;
/* type punning */
union z_erofs_onlinepage_converter {
z_erofs_onlinepage_t *o;
unsigned long *v;
};
static inline unsigned z_erofs_onlinepage_index(struct page *page)
{
union z_erofs_onlinepage_converter u;
BUG_ON(!PagePrivate(page));
u.v = &page_private(page);
return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
}
static inline void z_erofs_onlinepage_init(struct page *page)
{
union {
z_erofs_onlinepage_t o;
unsigned long v;
/* keep from being unlocked in advance */
} u = { .o = ATOMIC_INIT(1) };
set_page_private(page, u.v);
smp_wmb();
SetPagePrivate(page);
}
static inline void z_erofs_onlinepage_fixup(struct page *page,
uintptr_t index, bool down)
{
union z_erofs_onlinepage_converter u = { .v = &page_private(page) };
int orig, orig_index, val;
repeat:
orig = atomic_read(u.o);
orig_index = orig >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
if (orig_index) {
if (!index)
return;
DBG_BUGON(orig_index != index);
}
val = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
((orig & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down);
if (atomic_cmpxchg(u.o, orig, val) != orig)
goto repeat;
}
static inline void z_erofs_onlinepage_endio(struct page *page)
{
union z_erofs_onlinepage_converter u;
unsigned v;
BUG_ON(!PagePrivate(page));
u.v = &page_private(page);
v = atomic_dec_return(u.o);
if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
ClearPagePrivate(page);
if (!PageError(page))
SetPageUptodate(page);
unlock_page(page);
}
debugln("%s, page %p value %x", __func__, page, atomic_read(u.o));
}
#define Z_EROFS_VLE_VMAP_ONSTACK_PAGES \
min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U)
#define Z_EROFS_VLE_VMAP_GLOBAL_PAGES 2048
/* unzip_vle_lz4.c */
extern int z_erofs_vle_plain_copy(struct page **compressed_pages,
unsigned clusterpages, struct page **pages,
unsigned nr_pages, unsigned short pageofs);
extern int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
unsigned clusterpages, struct page **pages,
unsigned int outlen, unsigned short pageofs);
extern int z_erofs_vle_unzip_vmap(struct page **compressed_pages,
unsigned clusterpages, void *vaddr, unsigned llen,
unsigned short pageofs, bool overlapped);
#endif

View File

@@ -1,210 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* linux/drivers/staging/erofs/unzip_vle_lz4.c
*
* Copyright (C) 2018 HUAWEI, Inc.
* http://www.huawei.com/
* Created by Gao Xiang <gaoxiang25@huawei.com>
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
*/
#include "unzip_vle.h"
#if Z_EROFS_CLUSTER_MAX_PAGES > Z_EROFS_VLE_INLINE_PAGEVECS
#define EROFS_PERCPU_NR_PAGES Z_EROFS_CLUSTER_MAX_PAGES
#else
#define EROFS_PERCPU_NR_PAGES Z_EROFS_VLE_INLINE_PAGEVECS
#endif
static struct {
char data[PAGE_SIZE * EROFS_PERCPU_NR_PAGES];
} erofs_pcpubuf[NR_CPUS];
int z_erofs_vle_plain_copy(struct page **compressed_pages,
unsigned clusterpages,
struct page **pages,
unsigned nr_pages,
unsigned short pageofs)
{
unsigned i, j;
void *src = NULL;
const unsigned righthalf = PAGE_SIZE - pageofs;
char *percpu_data;
bool mirrored[Z_EROFS_CLUSTER_MAX_PAGES] = { 0 };
preempt_disable();
percpu_data = erofs_pcpubuf[smp_processor_id()].data;
j = 0;
for (i = 0; i < nr_pages; j = i++) {
struct page *page = pages[i];
void *dst;
if (page == NULL) {
if (src != NULL) {
if (!mirrored[j])
kunmap_atomic(src);
src = NULL;
}
continue;
}
dst = kmap_atomic(page);
for (; j < clusterpages; ++j) {
if (compressed_pages[j] != page)
continue;
DBG_BUGON(mirrored[j]);
memcpy(percpu_data + j * PAGE_SIZE, dst, PAGE_SIZE);
mirrored[j] = true;
break;
}
if (i) {
if (src == NULL)
src = mirrored[i-1] ?
percpu_data + (i-1) * PAGE_SIZE :
kmap_atomic(compressed_pages[i-1]);
memcpy(dst, src + righthalf, pageofs);
if (!mirrored[i-1])
kunmap_atomic(src);
if (unlikely(i >= clusterpages)) {
kunmap_atomic(dst);
break;
}
}
if (!righthalf)
src = NULL;
else {
src = mirrored[i] ? percpu_data + i * PAGE_SIZE :
kmap_atomic(compressed_pages[i]);
memcpy(dst + pageofs, src, righthalf);
}
kunmap_atomic(dst);
}
if (src != NULL && !mirrored[j])
kunmap_atomic(src);
preempt_enable();
return 0;
}
extern int z_erofs_unzip_lz4(void *in, void *out, size_t inlen, size_t outlen);
int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages,
unsigned clusterpages,
struct page **pages,
unsigned outlen,
unsigned short pageofs)
{
void *vin, *vout;
unsigned nr_pages, i, j;
int ret;
if (outlen + pageofs > EROFS_PERCPU_NR_PAGES * PAGE_SIZE)
return -ENOTSUPP;
nr_pages = DIV_ROUND_UP(outlen + pageofs, PAGE_SIZE);
if (clusterpages == 1) {
vin = kmap_atomic(compressed_pages[0]);
} else {
vin = erofs_vmap(compressed_pages, clusterpages);
if (!vin)
return -ENOMEM;
}
preempt_disable();
vout = erofs_pcpubuf[smp_processor_id()].data;
ret = z_erofs_unzip_lz4(vin, vout + pageofs,
clusterpages * PAGE_SIZE, outlen);
if (ret < 0)
goto out;
ret = 0;
for (i = 0; i < nr_pages; ++i) {
j = min((unsigned)PAGE_SIZE - pageofs, outlen);
if (pages[i] != NULL) {
if (clusterpages == 1 &&
pages[i] == compressed_pages[0]) {
memcpy(vin + pageofs, vout + pageofs, j);
} else {
void *dst = kmap_atomic(pages[i]);
memcpy(dst + pageofs, vout + pageofs, j);
kunmap_atomic(dst);
}
}
vout += PAGE_SIZE;
outlen -= j;
pageofs = 0;
}
out:
preempt_enable();
if (clusterpages == 1)
kunmap_atomic(vin);
else
erofs_vunmap(vin, clusterpages);
return ret;
}
int z_erofs_vle_unzip_vmap(struct page **compressed_pages,
unsigned clusterpages,
void *vout,
unsigned llen,
unsigned short pageofs,
bool overlapped)
{
void *vin;
unsigned i;
int ret;
if (overlapped) {
preempt_disable();
vin = erofs_pcpubuf[smp_processor_id()].data;
for (i = 0; i < clusterpages; ++i) {
void *t = kmap_atomic(compressed_pages[i]);
memcpy(vin + PAGE_SIZE *i, t, PAGE_SIZE);
kunmap_atomic(t);
}
} else if (clusterpages == 1)
vin = kmap_atomic(compressed_pages[0]);
else {
vin = erofs_vmap(compressed_pages, clusterpages);
}
ret = z_erofs_unzip_lz4(vin, vout + pageofs,
clusterpages * PAGE_SIZE, llen);
if (ret > 0)
ret = 0;
if (!overlapped) {
if (clusterpages == 1)
kunmap_atomic(vin);
else {
erofs_vunmap(vin, clusterpages);
}
} else
preempt_enable();
return ret;
}

View File

@@ -1,93 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0
*
* linux/drivers/staging/erofs/xattr.h
*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* http://www.huawei.com/
* Created by Gao Xiang <gaoxiang25@huawei.com>
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
*/
#ifndef __EROFS_XATTR_H
#define __EROFS_XATTR_H
#include "internal.h"
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>
/* Attribute not found */
#define ENOATTR ENODATA
static inline unsigned inlinexattr_header_size(struct inode *inode)
{
return sizeof(struct erofs_xattr_ibody_header)
+ sizeof(u32) * EROFS_V(inode)->xattr_shared_count;
}
static inline erofs_blk_t
xattrblock_addr(struct erofs_sb_info *sbi, unsigned xattr_id)
{
#ifdef CONFIG_EROFS_FS_XATTR
return sbi->xattr_blkaddr +
xattr_id * sizeof(__u32) / EROFS_BLKSIZ;
#else
return 0;
#endif
}
static inline unsigned
xattrblock_offset(struct erofs_sb_info *sbi, unsigned xattr_id)
{
return (xattr_id * sizeof(__u32)) % EROFS_BLKSIZ;
}
extern const struct xattr_handler erofs_xattr_user_handler;
extern const struct xattr_handler erofs_xattr_trusted_handler;
#ifdef CONFIG_EROFS_FS_SECURITY
extern const struct xattr_handler erofs_xattr_security_handler;
#endif
static inline const struct xattr_handler *erofs_xattr_handler(unsigned index)
{
static const struct xattr_handler *xattr_handler_map[] = {
[EROFS_XATTR_INDEX_USER] = &erofs_xattr_user_handler,
#ifdef CONFIG_EROFS_FS_POSIX_ACL
[EROFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler,
[EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT] =
&posix_acl_default_xattr_handler,
#endif
[EROFS_XATTR_INDEX_TRUSTED] = &erofs_xattr_trusted_handler,
#ifdef CONFIG_EROFS_FS_SECURITY
[EROFS_XATTR_INDEX_SECURITY] = &erofs_xattr_security_handler,
#endif
};
return index && index < ARRAY_SIZE(xattr_handler_map) ?
xattr_handler_map[index] : NULL;
}
#ifdef CONFIG_EROFS_FS_XATTR
extern const struct inode_operations erofs_generic_xattr_iops;
extern const struct inode_operations erofs_dir_xattr_iops;
int erofs_getxattr(struct inode *, int, const char *, void *, size_t);
ssize_t erofs_listxattr(struct dentry *, char *, size_t);
#else
static int __maybe_unused erofs_getxattr(struct inode *inode, int index,
const char *name,
void *buffer, size_t buffer_size)
{
return -ENOTSUPP;
}
static ssize_t __maybe_unused erofs_listxattr(struct dentry *dentry,
char *buffer, size_t buffer_size)
{
return -ENOTSUPP;
}
#endif
#endif

View File

@@ -260,6 +260,7 @@ source "fs/pstore/Kconfig"
source "fs/sysv/Kconfig"
source "fs/ufs/Kconfig"
source "fs/exofs/Kconfig"
source "fs/erofs/Kconfig"
endif # MISC_FILESYSTEMS

View File

@@ -133,3 +133,4 @@ obj-y += exofs/ # Multiple modules
obj-$(CONFIG_CEPH_FS) += ceph/
obj-$(CONFIG_PSTORE) += pstore/
obj-$(CONFIG_EFIVAR_FS) += efivarfs/
obj-$(CONFIG_EROFS_FS) += erofs/

78
fs/erofs/Kconfig Normal file
View File

@@ -0,0 +1,78 @@
# SPDX-License-Identifier: GPL-2.0-only
config EROFS_FS
tristate "EROFS filesystem support"
depends on BLOCK
select LIBCRC32C
help
EROFS (Enhanced Read-Only File System) is a lightweight
read-only file system with modern designs (eg. page-sized
blocks, inline xattrs/data, etc.) for scenarios which need
high-performance read-only requirements, e.g. Android OS
for mobile phones and LIVECDs.
It also provides fixed-sized output compression support,
which improves storage density, keeps relatively higher
compression ratios, which is more useful to achieve high
performance for embedded devices with limited memory.
If unsure, say N.
config EROFS_FS_DEBUG
bool "EROFS debugging feature"
depends on EROFS_FS
help
Print debugging messages and enable more BUG_ONs which check
filesystem consistency and find potential issues aggressively,
which can be used for Android eng build, for example.
For daily use, say N.
config EROFS_FS_XATTR
bool "EROFS extended attributes"
depends on EROFS_FS
default y
help
Extended attributes are name:value pairs associated with inodes by
the kernel or by users (see the attr(5) manual page, or visit
<http://acl.bestbits.at/> for details).
If unsure, say N.
config EROFS_FS_POSIX_ACL
bool "EROFS Access Control Lists"
depends on EROFS_FS_XATTR
select FS_POSIX_ACL
default y
help
Posix Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.
To learn more about Access Control Lists, visit the POSIX ACLs for
Linux website <http://acl.bestbits.at/>.
If you don't know what Access Control Lists are, say N.
config EROFS_FS_SECURITY
bool "EROFS Security Labels"
depends on EROFS_FS_XATTR
default y
help
Security labels provide an access control facility to support Linux
Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO
Linux. This option enables an extended attribute handler for file
security labels in the erofs filesystem, so that it requires enabling
the extended attribute support in advance.
If you are not using a security module, say N.
config EROFS_FS_ZIP
bool "EROFS Data Compression Support"
depends on EROFS_FS
select LZ4_DECOMPRESS
default y
help
Enable fixed-sized output compression for EROFS.
If you don't want to enable compression feature, say N.

6
fs/erofs/Makefile Normal file
View File

@@ -0,0 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_EROFS_FS) += erofs.o
erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o

86
fs/erofs/compress.h Normal file
View File

@@ -0,0 +1,86 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2019 HUAWEI, Inc.
* https://www.huawei.com/
*/
#ifndef __EROFS_FS_COMPRESS_H
#define __EROFS_FS_COMPRESS_H
#include "internal.h"
enum {
Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
Z_EROFS_COMPRESSION_RUNTIME_MAX
};
struct z_erofs_decompress_req {
struct super_block *sb;
struct page **in, **out;
unsigned short pageofs_out;
unsigned int inputsize, outputsize;
/* indicate the algorithm will be used for decompression */
unsigned int alg;
bool inplace_io, partial_decoding;
};
/* some special page->private (unsigned long, see below) */
#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
#define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2)
/*
* For all pages in a pcluster, page->private should be one of
* Type Last 2bits page->private
* short-lived page 00 Z_EROFS_SHORTLIVED_PAGE
* preallocated page (tryalloc) 00 Z_EROFS_PREALLOCATED_PAGE
* cached/managed page 00 pointer to z_erofs_pcluster
* online page (file-backed, 01/10/11 sub-index << 2 | count
* some pages can be used for inplace I/O)
*
* page->mapping should be one of
* Type page->mapping
* short-lived page NULL
* preallocated page NULL
* cached/managed page non-NULL or NULL (invalidated/truncated page)
* online page non-NULL
*
* For all managed pages, PG_private should be set with 1 extra refcount,
* which is used for page reclaim / migration.
*/
/*
* short-lived pages are pages directly from buddy system with specific
* page->private (no need to set PagePrivate since these are non-LRU /
* non-movable pages and bypass reclaim / migration code).
*/
static inline bool z_erofs_is_shortlived_page(struct page *page)
{
if (page->private != Z_EROFS_SHORTLIVED_PAGE)
return false;
DBG_BUGON(page->mapping);
return true;
}
static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool,
struct page *page)
{
if (!z_erofs_is_shortlived_page(page))
return false;
/* short-lived pages should not be used by others at the same time */
if (page_ref_count(page) > 1) {
put_page(page);
} else {
/* follow the pcluster rule above. */
set_page_private(page, 0);
list_add(&page->lru, pagepool);
}
return true;
}
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct list_head *pagepool);
#endif

View File

@@ -1,21 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/drivers/staging/erofs/data.c
*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* http://www.huawei.com/
* Created by Gao Xiang <gaoxiang25@huawei.com>
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
* https://www.huawei.com/
*/
#include "internal.h"
#include <linux/prefetch.h>
#include <trace/events/erofs.h>
static inline void read_endio(struct bio *bio)
static void erofs_readendio(struct bio *bio)
{
int i;
struct bio_vec *bvec;
@@ -27,7 +20,7 @@ static inline void read_endio(struct bio *bio)
/* page is already locked */
DBG_BUGON(PageUptodate(page));
if (unlikely(err))
if (err)
SetPageError(page);
else
SetPageUptodate(page);
@@ -38,70 +31,35 @@ static inline void read_endio(struct bio *bio)
bio_put(bio);
}
/* prio -- true is used for dir */
struct page *erofs_get_meta_page(struct super_block *sb,
erofs_blk_t blkaddr, bool prio)
struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr)
{
struct inode *bd_inode = sb->s_bdev->bd_inode;
struct address_space *mapping = bd_inode->i_mapping;
struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping;
struct page *page;
repeat:
page = find_or_create_page(mapping, blkaddr,
/*
* Prefer looping in the allocator rather than here,
* at least that code knows what it's doing.
*/
mapping_gfp_constraint(mapping, ~__GFP_FS) | __GFP_NOFAIL);
BUG_ON(!page || !PageLocked(page));
if (!PageUptodate(page)) {
struct bio *bio;
int err;
bio = prepare_bio(sb, blkaddr, 1, read_endio);
err = bio_add_page(bio, page, PAGE_SIZE, 0);
BUG_ON(err != PAGE_SIZE);
__submit_bio(bio, REQ_OP_READ,
REQ_META | (prio ? REQ_PRIO : 0));
page = read_cache_page_gfp(mapping, blkaddr,
mapping_gfp_constraint(mapping, ~__GFP_FS));
/* should already be PageUptodate */
if (!IS_ERR(page))
lock_page(page);
/* the page has been truncated by others? */
if (unlikely(page->mapping != mapping)) {
unlock_page(page);
put_page(page);
goto repeat;
}
/* more likely a read error */
if (unlikely(!PageUptodate(page))) {
unlock_page(page);
put_page(page);
page = ERR_PTR(-EIO);
}
}
return page;
}
static int erofs_map_blocks_flatmode(struct inode *inode,
struct erofs_map_blocks *map,
int flags)
struct erofs_map_blocks *map,
int flags)
{
int err = 0;
erofs_blk_t nblocks, lastblk;
u64 offset = map->m_la;
struct erofs_vnode *vi = EROFS_V(inode);
struct erofs_inode *vi = EROFS_I(inode);
bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
trace_erofs_map_blocks_flatmode_enter(inode, map, flags);
nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
lastblk = nblocks - is_inode_layout_inline(inode);
lastblk = nblocks - tailendpacking;
if (unlikely(offset >= inode->i_size)) {
if (offset >= inode->i_size) {
/* leave out-of-bound access unmapped */
map->m_flags = 0;
map->m_plen = 0;
@@ -114,7 +72,7 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
if (offset < blknr_to_addr(lastblk)) {
map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
map->m_plen = blknr_to_addr(lastblk) - offset;
} else if (is_inode_layout_inline(inode)) {
} else if (tailendpacking) {
/* 2 - inode inline B: inode, [xattrs], inline last blk... */
struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
@@ -122,17 +80,21 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
vi->xattr_isize + erofs_blkoff(map->m_la);
map->m_plen = inode->i_size - offset;
/* inline data should locate in one meta block */
/* inline data should be located in one meta block */
if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) {
erofs_err(inode->i_sb,
"inline data cross block boundary @ nid %llu",
vi->nid);
DBG_BUGON(1);
err = -EIO;
err = -EFSCORRUPTED;
goto err_out;
}
map->m_flags |= EROFS_MAP_META;
} else {
errln("internal error @ nid: %llu (size %llu), m_la 0x%llx",
vi->nid, inode->i_size, map->m_la);
erofs_err(inode->i_sb,
"internal error @ nid: %llu (size %llu), m_la 0x%llx",
vi->nid, inode->i_size, map->m_la);
DBG_BUGON(1);
err = -EIO;
goto err_out;
@@ -146,56 +108,15 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
return err;
}
#ifdef CONFIG_EROFS_FS_ZIP
extern int z_erofs_map_blocks_iter(struct inode *,
struct erofs_map_blocks *, struct page **, int);
#endif
int erofs_map_blocks_iter(struct inode *inode,
struct erofs_map_blocks *map,
struct page **mpage_ret, int flags)
static inline struct bio *erofs_read_raw_page(struct bio *bio,
struct address_space *mapping,
struct page *page,
erofs_off_t *last_block,
unsigned int nblocks,
bool ra)
{
/* by default, reading raw data never use erofs_map_blocks_iter */
if (unlikely(!is_inode_layout_compression(inode))) {
if (*mpage_ret != NULL)
put_page(*mpage_ret);
*mpage_ret = NULL;
return erofs_map_blocks(inode, map, flags);
}
#ifdef CONFIG_EROFS_FS_ZIP
return z_erofs_map_blocks_iter(inode, map, mpage_ret, flags);
#else
/* data compression is not available */
return -ENOTSUPP;
#endif
}
int erofs_map_blocks(struct inode *inode,
struct erofs_map_blocks *map, int flags)
{
if (unlikely(is_inode_layout_compression(inode))) {
struct page *mpage = NULL;
int err;
err = erofs_map_blocks_iter(inode, map, &mpage, flags);
if (mpage != NULL)
put_page(mpage);
return err;
}
return erofs_map_blocks_flatmode(inode, map, flags);
}
static inline struct bio *erofs_read_raw_page(
struct bio *bio,
struct address_space *mapping,
struct page *page,
erofs_off_t *last_block,
unsigned nblocks,
bool ra)
{
struct inode *inode = mapping->host;
struct inode *const inode = mapping->host;
struct super_block *const sb = inode->i_sb;
erofs_off_t current_block = (erofs_off_t)page->index;
int err;
@@ -206,34 +127,28 @@ static inline struct bio *erofs_read_raw_page(
goto has_updated;
}
if (cleancache_get_page(page) == 0) {
err = 0;
SetPageUptodate(page);
goto has_updated;
}
/* note that for readpage case, bio also equals to NULL */
if (bio != NULL &&
/* not continuous */
*last_block + 1 != current_block) {
if (bio &&
/* not continuous */
*last_block + 1 != current_block) {
submit_bio_retry:
__submit_bio(bio, REQ_OP_READ, 0);
submit_bio(bio);
bio = NULL;
}
if (bio == NULL) {
if (!bio) {
struct erofs_map_blocks map = {
.m_la = blknr_to_addr(current_block),
};
erofs_blk_t blknr;
unsigned blkoff;
unsigned int blkoff;
err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
if (unlikely(err))
err = erofs_map_blocks_flatmode(inode, &map, EROFS_GET_BLOCKS_RAW);
if (err)
goto err_out;
/* zero out the holed page */
if (unlikely(!(map.m_flags & EROFS_MAP_MAPPED))) {
if (!(map.m_flags & EROFS_MAP_MAPPED)) {
zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
@@ -254,7 +169,7 @@ static inline struct bio *erofs_read_raw_page(
DBG_BUGON(map.m_plen > PAGE_SIZE);
ipage = erofs_get_meta_page(inode->i_sb, blknr, 0);
ipage = erofs_get_meta_page(inode->i_sb, blknr);
if (IS_ERR(ipage)) {
err = PTR_ERR(ipage);
@@ -287,7 +202,13 @@ static inline struct bio *erofs_read_raw_page(
if (nblocks > BIO_MAX_PAGES)
nblocks = BIO_MAX_PAGES;
bio = prepare_bio(inode->i_sb, blknr, nblocks, read_endio);
bio = bio_alloc(GFP_NOIO, nblocks);
bio->bi_end_io = erofs_readendio;
bio_set_dev(bio, sb->s_bdev);
bio->bi_iter.bi_sector = (sector_t)blknr <<
LOG_SECTORS_PER_BLOCK;
bio->bi_opf = REQ_OP_READ | (ra ? REQ_RAHEAD : 0);
}
err = bio_add_page(bio, page, PAGE_SIZE, 0);
@@ -298,7 +219,7 @@ static inline struct bio *erofs_read_raw_page(
*last_block = current_block;
/* shift in advance in case of it followed by too many gaps */
if (unlikely(bio->bi_vcnt >= bio->bi_max_vecs)) {
if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) {
/* err should reassign to 0 after submitting */
err = 0;
goto submit_bio_out;
@@ -316,11 +237,10 @@ static inline struct bio *erofs_read_raw_page(
unlock_page(page);
/* if updated manually, continuous pages has a gap */
if (bio != NULL)
if (bio)
submit_bio_out:
__submit_bio(bio, REQ_OP_READ, 0);
return unlikely(err) ? ERR_PTR(err) : NULL;
submit_bio(bio);
return err ? ERR_PTR(err) : NULL;
}
/*
@@ -335,7 +255,7 @@ static int erofs_raw_access_readpage(struct file *file, struct page *page)
trace_erofs_readpage(page, true);
bio = erofs_read_raw_page(NULL, page->mapping,
page, &last_block, 1, false);
page, &last_block, 1, false);
if (IS_ERR(bio))
return PTR_ERR(bio);
@@ -345,8 +265,9 @@ static int erofs_raw_access_readpage(struct file *file, struct page *page)
}
static int erofs_raw_access_readpages(struct file *filp,
struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
struct address_space *mapping,
struct list_head *pages,
unsigned int nr_pages)
{
erofs_off_t last_block;
struct bio *bio = NULL;
@@ -363,13 +284,13 @@ static int erofs_raw_access_readpages(struct file *filp,
if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) {
bio = erofs_read_raw_page(bio, mapping, page,
&last_block, nr_pages, true);
&last_block, nr_pages, true);
/* all the page errors are ignored when readahead */
if (IS_ERR(bio)) {
pr_err("%s, readahead error at page %lu of nid %llu\n",
__func__, page->index,
EROFS_V(mapping->host)->nid);
__func__, page->index,
EROFS_I(mapping->host)->nid);
bio = NULL;
}
@@ -381,8 +302,28 @@ static int erofs_raw_access_readpages(struct file *filp,
DBG_BUGON(!list_empty(pages));
/* the rare case (end in gaps) */
if (unlikely(bio != NULL))
__submit_bio(bio, REQ_OP_READ, 0);
if (bio)
submit_bio(bio);
return 0;
}
static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
{
struct inode *inode = mapping->host;
struct erofs_map_blocks map = {
.m_la = blknr_to_addr(block),
};
if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) {
erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
if (block >> LOG_SECTORS_PER_BLOCK >= blks)
return 0;
}
if (!erofs_map_blocks_flatmode(inode, &map, EROFS_GET_BLOCKS_RAW))
return erofs_blknr(map.m_pa);
return 0;
}
@@ -390,5 +331,5 @@ static int erofs_raw_access_readpages(struct file *filp,
const struct address_space_operations erofs_raw_access_aops = {
.readpage = erofs_raw_access_readpage,
.readpages = erofs_raw_access_readpages,
.bmap = erofs_bmap,
};

407
fs/erofs/decompressor.c Normal file
View File

@@ -0,0 +1,407 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2019 HUAWEI, Inc.
* https://www.huawei.com/
*/
#include "compress.h"
#include <linux/module.h>
#include <linux/lz4.h>
#ifndef LZ4_DISTANCE_MAX /* history window size */
#define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
#endif
#define LZ4_MAX_DISTANCE_PAGES (DIV_ROUND_UP(LZ4_DISTANCE_MAX, PAGE_SIZE) + 1)
#ifndef LZ4_DECOMPRESS_INPLACE_MARGIN
#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32)
#endif
struct z_erofs_decompressor {
/*
* if destpages have sparsed pages, fill them with bounce pages.
* it also check whether destpages indicate continuous physical memory.
*/
int (*prepare_destpages)(struct z_erofs_decompress_req *rq,
struct list_head *pagepool);
int (*decompress)(struct z_erofs_decompress_req *rq, u8 *out);
char *name;
};
int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int size)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
u16 distance;
if (lz4) {
if (size < sizeof(struct z_erofs_lz4_cfgs)) {
erofs_err(sb, "invalid lz4 cfgs, size=%u", size);
return -EINVAL;
}
distance = le16_to_cpu(lz4->max_distance);
sbi->lz4.max_pclusterblks = le16_to_cpu(lz4->max_pclusterblks);
if (!sbi->lz4.max_pclusterblks) {
sbi->lz4.max_pclusterblks = 1; /* reserved case */
} else if (sbi->lz4.max_pclusterblks >
Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) {
erofs_err(sb, "too large lz4 pclusterblks %u",
sbi->lz4.max_pclusterblks);
return -EINVAL;
} else if (sbi->lz4.max_pclusterblks >= 2) {
erofs_info(sb, "EXPERIMENTAL big pcluster feature in use. Use at your own risk!");
}
} else {
distance = le16_to_cpu(dsb->u1.lz4_max_distance);
sbi->lz4.max_pclusterblks = 1;
}
sbi->lz4.max_distance_pages = distance ?
DIV_ROUND_UP(distance, PAGE_SIZE) + 1 :
LZ4_MAX_DISTANCE_PAGES;
return erofs_pcpubuf_growsize(sbi->lz4.max_pclusterblks);
}
static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
struct list_head *pagepool)
{
const unsigned int nr =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL };
unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES,
BITS_PER_LONG)] = { 0 };
unsigned int lz4_max_distance_pages =
EROFS_SB(rq->sb)->lz4.max_distance_pages;
void *kaddr = NULL;
unsigned int i, j, top;
top = 0;
for (i = j = 0; i < nr; ++i, ++j) {
struct page *const page = rq->out[i];
struct page *victim;
if (j >= lz4_max_distance_pages)
j = 0;
/* 'valid' bounced can only be tested after a complete round */
if (test_bit(j, bounced)) {
DBG_BUGON(i < lz4_max_distance_pages);
DBG_BUGON(top >= lz4_max_distance_pages);
availables[top++] = rq->out[i - lz4_max_distance_pages];
}
if (page) {
__clear_bit(j, bounced);
if (kaddr) {
if (kaddr + PAGE_SIZE == page_address(page))
kaddr += PAGE_SIZE;
else
kaddr = NULL;
} else if (!i) {
kaddr = page_address(page);
}
continue;
}
kaddr = NULL;
__set_bit(j, bounced);
if (top) {
victim = availables[--top];
get_page(victim);
} else {
victim = erofs_allocpage(pagepool,
GFP_KERNEL | __GFP_NOFAIL);
set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
}
rq->out[i] = victim;
}
return kaddr ? 1 : 0;
}
static void *z_erofs_handle_inplace_io(struct z_erofs_decompress_req *rq,
void *inpage, unsigned int *inputmargin, int *maptype,
bool support_0padding)
{
unsigned int nrpages_in, nrpages_out;
unsigned int ofull, oend, inputsize, total, i, j;
struct page **in;
void *src, *tmp;
inputsize = rq->inputsize;
nrpages_in = PAGE_ALIGN(inputsize) >> PAGE_SHIFT;
oend = rq->pageofs_out + rq->outputsize;
ofull = PAGE_ALIGN(oend);
nrpages_out = ofull >> PAGE_SHIFT;
if (rq->inplace_io) {
if (rq->partial_decoding || !support_0padding ||
ofull - oend < LZ4_DECOMPRESS_INPLACE_MARGIN(inputsize))
goto docopy;
for (i = 0; i < nrpages_in; ++i) {
DBG_BUGON(rq->in[i] == NULL);
for (j = 0; j < nrpages_out - nrpages_in + i; ++j)
if (rq->out[j] == rq->in[i])
goto docopy;
}
}
if (nrpages_in <= 1) {
*maptype = 0;
return inpage;
}
kunmap_atomic(inpage);
might_sleep();
src = erofs_vm_map_ram(rq->in, nrpages_in);
if (!src)
return ERR_PTR(-ENOMEM);
*maptype = 1;
return src;
docopy:
/* Or copy compressed data which can be overlapped to per-CPU buffer */
in = rq->in;
src = erofs_get_pcpubuf(nrpages_in);
if (!src) {
DBG_BUGON(1);
kunmap_atomic(inpage);
return ERR_PTR(-EFAULT);
}
tmp = src;
total = rq->inputsize;
while (total) {
unsigned int page_copycnt =
min_t(unsigned int, total, PAGE_SIZE - *inputmargin);
if (!inpage)
inpage = kmap_atomic(*in);
memcpy(tmp, inpage + *inputmargin, page_copycnt);
kunmap_atomic(inpage);
inpage = NULL;
tmp += page_copycnt;
total -= page_copycnt;
++in;
*inputmargin = 0;
}
*maptype = 2;
return src;
}
static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
{
unsigned int inputmargin;
u8 *headpage, *src;
bool support_0padding;
int ret, maptype;
DBG_BUGON(*rq->in == NULL);
headpage = kmap_atomic(*rq->in);
inputmargin = 0;
support_0padding = false;
/* decompression inplace is only safe when 0padding is enabled */
if (erofs_sb_has_lz4_0padding(EROFS_SB(rq->sb))) {
support_0padding = true;
while (!headpage[inputmargin & ~PAGE_MASK])
if (!(++inputmargin & ~PAGE_MASK))
break;
if (inputmargin >= rq->inputsize) {
kunmap_atomic(headpage);
return -EIO;
}
}
rq->inputsize -= inputmargin;
src = z_erofs_handle_inplace_io(rq, headpage, &inputmargin, &maptype,
support_0padding);
if (IS_ERR(src))
return PTR_ERR(src);
/* legacy format could compress extra data in a pcluster. */
if (rq->partial_decoding || !support_0padding)
ret = LZ4_decompress_safe_partial(src + inputmargin, out,
rq->inputsize, rq->outputsize, rq->outputsize);
else
ret = LZ4_decompress_safe(src + inputmargin, out,
rq->inputsize, rq->outputsize);
if (ret != rq->outputsize) {
erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
ret, rq->inputsize, inputmargin, rq->outputsize);
print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET,
16, 1, src + inputmargin, rq->inputsize, true);
print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET,
16, 1, out, rq->outputsize, true);
if (ret >= 0)
memset(out + ret, 0, rq->outputsize - ret);
ret = -EIO;
}
if (maptype == 0) {
kunmap_atomic(src);
} else if (maptype == 1) {
vm_unmap_ram(src, PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT);
} else if (maptype == 2) {
erofs_put_pcpubuf(src);
} else {
DBG_BUGON(1);
return -EFAULT;
}
return ret;
}
static struct z_erofs_decompressor decompressors[] = {
[Z_EROFS_COMPRESSION_SHIFTED] = {
.name = "shifted"
},
[Z_EROFS_COMPRESSION_LZ4] = {
.prepare_destpages = z_erofs_lz4_prepare_destpages,
.decompress = z_erofs_lz4_decompress,
.name = "lz4"
},
};
static void copy_from_pcpubuf(struct page **out, const char *dst,
unsigned short pageofs_out,
unsigned int outputsize)
{
const char *end = dst + outputsize;
const unsigned int righthalf = PAGE_SIZE - pageofs_out;
const char *cur = dst - pageofs_out;
while (cur < end) {
struct page *const page = *out++;
if (page) {
char *buf = kmap_atomic(page);
if (cur >= dst) {
memcpy(buf, cur, min_t(uint, PAGE_SIZE,
end - cur));
} else {
memcpy(buf + pageofs_out, cur + pageofs_out,
min_t(uint, righthalf, end - cur));
}
kunmap_atomic(buf);
}
cur += PAGE_SIZE;
}
}
static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
struct list_head *pagepool)
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
const struct z_erofs_decompressor *alg = decompressors + rq->alg;
unsigned int dst_maptype;
void *dst;
int ret;
/* two optimized fast paths only for non bigpcluster cases yet */
if (rq->inputsize <= PAGE_SIZE) {
if (nrpages_out == 1 && !rq->inplace_io) {
DBG_BUGON(!*rq->out);
dst = kmap_atomic(*rq->out);
dst_maptype = 0;
goto dstmap_out;
}
/*
* For the case of small output size (especially much less
* than PAGE_SIZE), memcpy the decompressed data rather than
* compressed data is preferred.
*/
if (rq->outputsize <= PAGE_SIZE * 7 / 8) {
dst = erofs_get_pcpubuf(1);
if (IS_ERR(dst))
return PTR_ERR(dst);
rq->inplace_io = false;
ret = alg->decompress(rq, dst);
if (!ret)
copy_from_pcpubuf(rq->out, dst, rq->pageofs_out,
rq->outputsize);
erofs_put_pcpubuf(dst);
return ret;
}
}
/* general decoding path which can be used for all cases */
ret = alg->prepare_destpages(rq, pagepool);
if (ret < 0)
return ret;
if (ret) {
dst = page_address(*rq->out);
dst_maptype = 1;
goto dstmap_out;
}
dst = erofs_vm_map_ram(rq->out, nrpages_out);
if (!dst)
return -ENOMEM;
dst_maptype = 2;
dstmap_out:
ret = alg->decompress(rq, dst + rq->pageofs_out);
if (!dst_maptype)
kunmap_atomic(dst);
else if (dst_maptype == 2)
vm_unmap_ram(dst, nrpages_out);
return ret;
}
static int z_erofs_shifted_transform(const struct z_erofs_decompress_req *rq,
struct list_head *pagepool)
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
const unsigned int righthalf = PAGE_SIZE - rq->pageofs_out;
unsigned char *src, *dst;
if (nrpages_out > 2) {
DBG_BUGON(1);
return -EIO;
}
if (rq->out[0] == *rq->in) {
DBG_BUGON(nrpages_out != 1);
return 0;
}
src = kmap_atomic(*rq->in);
if (rq->out[0]) {
dst = kmap_atomic(rq->out[0]);
memcpy(dst + rq->pageofs_out, src, righthalf);
kunmap_atomic(dst);
}
if (nrpages_out == 2) {
DBG_BUGON(!rq->out[1]);
if (rq->out[1] == *rq->in) {
memmove(src, src + righthalf, rq->pageofs_out);
} else {
dst = kmap_atomic(rq->out[1]);
memcpy(dst, src + righthalf, rq->pageofs_out);
kunmap_atomic(dst);
}
}
kunmap_atomic(src);
return 0;
}
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct list_head *pagepool)
{
if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED)
return z_erofs_shifted_transform(rq, pagepool);
return z_erofs_decompress_generic(rq, pagepool);
}

View File

@@ -1,14 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/drivers/staging/erofs/dir.c
*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* http://www.huawei.com/
* Created by Gao Xiang <gaoxiang25@huawei.com>
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
* https://www.huawei.com/
*/
#include "internal.h"
@@ -33,19 +26,18 @@ static void debug_one_dentry(unsigned char d_type, const char *de_name,
memcpy(dbg_namebuf, de_name, de_namelen);
dbg_namebuf[de_namelen] = '\0';
debugln("found dirent %s de_len %u d_type %d", dbg_namebuf,
de_namelen, d_type);
erofs_dbg("found dirent %s de_len %u d_type %d", dbg_namebuf,
de_namelen, d_type);
#endif
}
static int erofs_fill_dentries(struct dir_context *ctx,
void *dentry_blk, unsigned *ofs,
unsigned nameoff, unsigned maxsize)
static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
void *dentry_blk, unsigned int *ofs,
unsigned int nameoff, unsigned int maxsize)
{
struct erofs_dirent *de = dentry_blk;
struct erofs_dirent *de = dentry_blk + *ofs;
const struct erofs_dirent *end = dentry_blk + nameoff;
de = dentry_blk + *ofs;
while (de < end) {
const char *de_name;
unsigned int de_namelen;
@@ -66,16 +58,18 @@ static int erofs_fill_dentries(struct dir_context *ctx,
de_namelen = le16_to_cpu(de[1].nameoff) - nameoff;
/* a corrupted entry is found */
if (unlikely(nameoff + de_namelen > maxsize ||
de_namelen > EROFS_NAME_LEN)) {
if (nameoff + de_namelen > maxsize ||
de_namelen > EROFS_NAME_LEN) {
erofs_err(dir->i_sb, "bogus dirent @ nid %llu",
EROFS_I(dir)->nid);
DBG_BUGON(1);
return -EIO;
return -EFSCORRUPTED;
}
debug_one_dentry(d_type, de_name, de_namelen);
if (!dir_emit(ctx, de_name, de_namelen,
le64_to_cpu(de->nid), d_type))
/* stoped by some reason */
le64_to_cpu(de->nid), d_type))
/* stopped by some reason */
return 1;
++de;
*ofs += sizeof(struct erofs_dirent);
@@ -89,62 +83,63 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
struct inode *dir = file_inode(f);
struct address_space *mapping = dir->i_mapping;
const size_t dirsize = i_size_read(dir);
unsigned i = ctx->pos / EROFS_BLKSIZ;
unsigned ofs = ctx->pos % EROFS_BLKSIZ;
unsigned int i = ctx->pos / EROFS_BLKSIZ;
unsigned int ofs = ctx->pos % EROFS_BLKSIZ;
int err = 0;
bool initial = true;
while (ctx->pos < dirsize) {
struct page *dentry_page;
struct erofs_dirent *de;
unsigned nameoff, maxsize;
unsigned int nameoff, maxsize;
dentry_page = read_mapping_page(mapping, i, NULL);
if (dentry_page == ERR_PTR(-ENOMEM)) {
err = -ENOMEM;
break;
} else if (IS_ERR(dentry_page)) {
errln("fail to readdir of logical block %u of nid %llu",
i, EROFS_V(dir)->nid);
err = PTR_ERR(dentry_page);
erofs_err(dir->i_sb,
"fail to readdir of logical block %u of nid %llu",
i, EROFS_I(dir)->nid);
err = -EFSCORRUPTED;
break;
}
lock_page(dentry_page);
de = (struct erofs_dirent *)kmap(dentry_page);
nameoff = le16_to_cpu(de->nameoff);
if (unlikely(nameoff < sizeof(struct erofs_dirent) ||
nameoff >= PAGE_SIZE)) {
errln("%s, invalid de[0].nameoff %u",
__func__, nameoff);
err = -EIO;
if (nameoff < sizeof(struct erofs_dirent) ||
nameoff >= PAGE_SIZE) {
erofs_err(dir->i_sb,
"invalid de[0].nameoff %u @ nid %llu",
nameoff, EROFS_I(dir)->nid);
err = -EFSCORRUPTED;
goto skip_this;
}
maxsize = min_t(unsigned, dirsize - ctx->pos + ofs, PAGE_SIZE);
maxsize = min_t(unsigned int,
dirsize - ctx->pos + ofs, PAGE_SIZE);
/* search dirents at the arbitrary position */
if (unlikely(initial)) {
if (initial) {
initial = false;
ofs = roundup(ofs, sizeof(struct erofs_dirent));
if (unlikely(ofs >= nameoff))
if (ofs >= nameoff)
goto skip_this;
}
err = erofs_fill_dentries(ctx, de, &ofs, nameoff, maxsize);
err = erofs_fill_dentries(dir, ctx, de, &ofs,
nameoff, maxsize);
skip_this:
kunmap(dentry_page);
unlock_page(dentry_page);
put_page(dentry_page);
ctx->pos = blknr_to_addr(i) + ofs;
if (unlikely(err))
if (err)
break;
++i;
ofs = 0;
@@ -155,6 +150,5 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
const struct file_operations erofs_dir_fops = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.iterate = erofs_readdir,
.iterate_shared = erofs_readdir,
};

357
fs/erofs/erofs_fs.h Normal file
View File

@@ -0,0 +1,357 @@
/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */
/*
* EROFS (Enhanced ROM File System) on-disk format definition
*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/
*/
#ifndef __EROFS_FS_H
#define __EROFS_FS_H
#define EROFS_SUPER_MAGIC_V1 0xE0F5E1E2
#define EROFS_SUPER_OFFSET 1024
#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
/*
* Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
* be incompatible with this kernel version.
*/
#define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001
#define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002
#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002
#define EROFS_ALL_FEATURE_INCOMPAT \
(EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \
EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER)
#define EROFS_SB_EXTSLOT_SIZE 16
/* erofs on-disk super block (currently 128 bytes) */
struct erofs_super_block {
__le32 magic; /* file system magic number */
__le32 checksum; /* crc32c(super_block) */
__le32 feature_compat;
__u8 blkszbits; /* support block_size == PAGE_SIZE only */
__u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */
__le16 root_nid; /* nid of root directory */
__le64 inos; /* total valid ino # (== f_files - f_favail) */
__le64 build_time; /* inode v1 time derivation */
__le32 build_time_nsec; /* inode v1 time derivation in nano scale */
__le32 blocks; /* used for statfs */
__le32 meta_blkaddr; /* start block address of metadata area */
__le32 xattr_blkaddr; /* start block address of shared xattr area */
__u8 uuid[16]; /* 128-bit uuid for volume */
__u8 volume_name[16]; /* volume name */
__le32 feature_incompat;
union {
/* bitmap for available compression algorithms */
__le16 available_compr_algs;
/* customized sliding window size instead of 64k by default */
__le16 lz4_max_distance;
} __packed u1;
__u8 reserved2[42];
};
/*
* erofs inode datalayout (i_format in on-disk inode):
* 0 - inode plain without inline data A:
* inode, [xattrs], ... | ... | no-holed data
* 1 - inode VLE compression B (legacy):
* inode, [xattrs], extents ... | ...
* 2 - inode plain with inline data C:
* inode, [xattrs], last_inline_data, ... | ... | no-holed data
* 3 - inode compression D:
* inode, [xattrs], map_header, extents ... | ...
* 4~7 - reserved
*/
enum {
EROFS_INODE_FLAT_PLAIN = 0,
EROFS_INODE_FLAT_COMPRESSION_LEGACY = 1,
EROFS_INODE_FLAT_INLINE = 2,
EROFS_INODE_FLAT_COMPRESSION = 3,
EROFS_INODE_DATALAYOUT_MAX
};
static inline bool erofs_inode_is_data_compressed(unsigned int datamode)
{
return datamode == EROFS_INODE_FLAT_COMPRESSION ||
datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;
}
/* bit definitions of inode i_advise */
#define EROFS_I_VERSION_BITS 1
#define EROFS_I_DATALAYOUT_BITS 3
#define EROFS_I_VERSION_BIT 0
#define EROFS_I_DATALAYOUT_BIT 1
#define EROFS_I_ALL \
((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1)
/* 32-byte reduced form of an ondisk inode */
struct erofs_inode_compact {
__le16 i_format; /* inode format hints */
/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
__le16 i_xattr_icount;
__le16 i_mode;
__le16 i_nlink;
__le32 i_size;
__le32 i_reserved;
union {
/* file total compressed blocks for data mapping 1 */
__le32 compressed_blocks;
__le32 raw_blkaddr;
/* for device files, used to indicate old/new device # */
__le32 rdev;
} i_u;
__le32 i_ino; /* only used for 32-bit stat compatibility */
__le16 i_uid;
__le16 i_gid;
__le32 i_reserved2;
};
/* 32 bytes on-disk inode */
#define EROFS_INODE_LAYOUT_COMPACT 0
/* 64 bytes on-disk inode */
#define EROFS_INODE_LAYOUT_EXTENDED 1
/* 64-byte complete form of an ondisk inode */
struct erofs_inode_extended {
__le16 i_format; /* inode format hints */
/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
__le16 i_xattr_icount;
__le16 i_mode;
__le16 i_reserved;
__le64 i_size;
union {
/* file total compressed blocks for data mapping 1 */
__le32 compressed_blocks;
__le32 raw_blkaddr;
/* for device files, used to indicate old/new device # */
__le32 rdev;
} i_u;
/* only used for 32-bit stat compatibility */
__le32 i_ino;
__le32 i_uid;
__le32 i_gid;
__le64 i_ctime;
__le32 i_ctime_nsec;
__le32 i_nlink;
__u8 i_reserved2[16];
};
#define EROFS_MAX_SHARED_XATTRS (128)
/* h_shared_count between 129 ... 255 are special # */
#define EROFS_SHARED_XATTR_EXTENT (255)
/*
* inline xattrs (n == i_xattr_icount):
* erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes
* 12 bytes / \
* / \
* /-----------------------\
* | erofs_xattr_entries+ |
* +-----------------------+
* inline xattrs must starts in erofs_xattr_ibody_header,
* for read-only fs, no need to introduce h_refcount
*/
struct erofs_xattr_ibody_header {
__le32 h_reserved;
__u8 h_shared_count;
__u8 h_reserved2[7];
__le32 h_shared_xattrs[0]; /* shared xattr id array */
};
/* Name indexes */
#define EROFS_XATTR_INDEX_USER 1
#define EROFS_XATTR_INDEX_POSIX_ACL_ACCESS 2
#define EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT 3
#define EROFS_XATTR_INDEX_TRUSTED 4
#define EROFS_XATTR_INDEX_LUSTRE 5
#define EROFS_XATTR_INDEX_SECURITY 6
/* xattr entry (for both inline & shared xattrs) */
struct erofs_xattr_entry {
__u8 e_name_len; /* length of name */
__u8 e_name_index; /* attribute name index */
__le16 e_value_size; /* size of attribute value */
/* followed by e_name and e_value */
char e_name[0]; /* attribute name */
};
static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount)
{
if (!i_xattr_icount)
return 0;
return sizeof(struct erofs_xattr_ibody_header) +
sizeof(__u32) * (le16_to_cpu(i_xattr_icount) - 1);
}
#define EROFS_XATTR_ALIGN(size) round_up(size, sizeof(struct erofs_xattr_entry))
static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e)
{
return EROFS_XATTR_ALIGN(sizeof(struct erofs_xattr_entry) +
e->e_name_len + le16_to_cpu(e->e_value_size));
}
/* maximum supported size of a physical compression cluster */
#define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024)
/* available compression algorithm types (for h_algorithmtype) */
enum {
Z_EROFS_COMPRESSION_LZ4 = 0,
Z_EROFS_COMPRESSION_MAX
};
#define Z_EROFS_ALL_COMPR_ALGS (1 << (Z_EROFS_COMPRESSION_MAX - 1))
/* 14 bytes (+ length field = 16 bytes) */
struct z_erofs_lz4_cfgs {
__le16 max_distance;
__le16 max_pclusterblks;
u8 reserved[10];
} __packed;
/*
* bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
* e.g. for 4k logical cluster size, 4B if compacted 2B is off;
* (4B) + 2B + (4B) if compacted 2B is on.
* bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
* bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
*/
#define Z_EROFS_ADVISE_COMPACTED_2B 0x0001
#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002
#define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004
struct z_erofs_map_header {
__le32 h_reserved1;
__le16 h_advise;
/*
* bit 0-3 : algorithm type of head 1 (logical cluster type 01);
* bit 4-7 : algorithm type of head 2 (logical cluster type 11).
*/
__u8 h_algorithmtype;
/*
* bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
* bit 3-7 : reserved.
*/
__u8 h_clusterbits;
};
#define Z_EROFS_VLE_LEGACY_HEADER_PADDING 8
/*
* Fixed-sized output compression ondisk Logical Extent cluster type:
* 0 - literal (uncompressed) cluster
* 1 - compressed cluster (for the head logical cluster)
* 2 - compressed cluster (for the other logical clusters)
*
* In detail,
* 0 - literal (uncompressed) cluster,
* di_advise = 0
* di_clusterofs = the literal data offset of the cluster
* di_blkaddr = the blkaddr of the literal cluster
*
* 1 - compressed cluster (for the head logical cluster)
* di_advise = 1
* di_clusterofs = the decompressed data offset of the cluster
* di_blkaddr = the blkaddr of the compressed cluster
*
* 2 - compressed cluster (for the other logical clusters)
* di_advise = 2
* di_clusterofs =
* the decompressed data offset in its own head cluster
* di_u.delta[0] = distance to its corresponding head cluster
* di_u.delta[1] = distance to its corresponding tail cluster
* (di_advise could be 0, 1 or 2)
*/
enum {
Z_EROFS_VLE_CLUSTER_TYPE_PLAIN = 0,
Z_EROFS_VLE_CLUSTER_TYPE_HEAD = 1,
Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD = 2,
Z_EROFS_VLE_CLUSTER_TYPE_RESERVED = 3,
Z_EROFS_VLE_CLUSTER_TYPE_MAX
};
#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2
#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0
/*
* D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the
* compressed block count of a compressed extent (in logical clusters, aka.
* block count of a pcluster).
*/
#define Z_EROFS_VLE_DI_D0_CBLKCNT (1 << 11)
struct z_erofs_vle_decompressed_index {
__le16 di_advise;
/* where to decompress in the head cluster */
__le16 di_clusterofs;
union {
/* for the head cluster */
__le32 blkaddr;
/*
* for the rest clusters
* eg. for 4k page-sized cluster, maximum 4K*64k = 256M)
* [0] - pointing to the head cluster
* [1] - pointing to the tail cluster
*/
__le16 delta[2];
} di_u;
};
#define Z_EROFS_VLE_LEGACY_INDEX_ALIGN(size) \
(round_up(size, sizeof(struct z_erofs_vle_decompressed_index)) + \
sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING)
/* dirent sorts in alphabet order, thus we can do binary search */
struct erofs_dirent {
__le64 nid; /* node number */
__le16 nameoff; /* start offset of file name */
__u8 file_type; /* file type */
__u8 reserved; /* reserved */
} __packed;
/* file types used in inode_info->flags */
enum {
EROFS_FT_UNKNOWN,
EROFS_FT_REG_FILE,
EROFS_FT_DIR,
EROFS_FT_CHRDEV,
EROFS_FT_BLKDEV,
EROFS_FT_FIFO,
EROFS_FT_SOCK,
EROFS_FT_SYMLINK,
EROFS_FT_MAX
};
#define EROFS_NAME_LEN 255
/* check the EROFS on-disk layout strictly at compile time */
static inline void erofs_check_ondisk_layout_definitions(void)
{
BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128);
BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32);
BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64);
BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);
BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4);
BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8);
BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);
}
#endif

374
fs/erofs/inode.c Normal file
View File

@@ -0,0 +1,374 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/
*/
#include "xattr.h"
#include <trace/events/erofs.h>
/*
* if inode is successfully read, return its inode page (or sometimes
* the inode payload page if it's an extended inode) in order to fill
* inline data if possible.
*/
static struct page *erofs_read_inode(struct inode *inode,
unsigned int *ofs)
{
struct super_block *sb = inode->i_sb;
struct erofs_sb_info *sbi = EROFS_SB(sb);
struct erofs_inode *vi = EROFS_I(inode);
const erofs_off_t inode_loc = iloc(sbi, vi->nid);
erofs_blk_t blkaddr, nblks = 0;
struct page *page;
struct erofs_inode_compact *dic;
struct erofs_inode_extended *die, *copied = NULL;
unsigned int ifmt;
int err;
blkaddr = erofs_blknr(inode_loc);
*ofs = erofs_blkoff(inode_loc);
erofs_dbg("%s, reading inode nid %llu at %u of blkaddr %u",
__func__, vi->nid, *ofs, blkaddr);
page = erofs_get_meta_page(sb, blkaddr);
if (IS_ERR(page)) {
erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld",
vi->nid, PTR_ERR(page));
return page;
}
dic = page_address(page) + *ofs;
ifmt = le16_to_cpu(dic->i_format);
if (ifmt & ~EROFS_I_ALL) {
erofs_err(inode->i_sb, "unsupported i_format %u of nid %llu",
ifmt, vi->nid);
err = -EOPNOTSUPP;
goto err_out;
}
vi->datalayout = erofs_inode_datalayout(ifmt);
if (vi->datalayout >= EROFS_INODE_DATALAYOUT_MAX) {
erofs_err(inode->i_sb, "unsupported datalayout %u of nid %llu",
vi->datalayout, vi->nid);
err = -EOPNOTSUPP;
goto err_out;
}
switch (erofs_inode_version(ifmt)) {
case EROFS_INODE_LAYOUT_EXTENDED:
vi->inode_isize = sizeof(struct erofs_inode_extended);
/* check if the inode acrosses page boundary */
if (*ofs + vi->inode_isize <= PAGE_SIZE) {
*ofs += vi->inode_isize;
die = (struct erofs_inode_extended *)dic;
} else {
const unsigned int gotten = PAGE_SIZE - *ofs;
copied = kmalloc(vi->inode_isize, GFP_NOFS);
if (!copied) {
err = -ENOMEM;
goto err_out;
}
memcpy(copied, dic, gotten);
unlock_page(page);
put_page(page);
page = erofs_get_meta_page(sb, blkaddr + 1);
if (IS_ERR(page)) {
erofs_err(sb, "failed to get inode payload page (nid: %llu), err %ld",
vi->nid, PTR_ERR(page));
kfree(copied);
return page;
}
*ofs = vi->inode_isize - gotten;
memcpy((u8 *)copied + gotten, page_address(page), *ofs);
die = copied;
}
vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount);
inode->i_mode = le16_to_cpu(die->i_mode);
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
case S_IFDIR:
case S_IFLNK:
vi->raw_blkaddr = le32_to_cpu(die->i_u.raw_blkaddr);
break;
case S_IFCHR:
case S_IFBLK:
inode->i_rdev =
new_decode_dev(le32_to_cpu(die->i_u.rdev));
break;
case S_IFIFO:
case S_IFSOCK:
inode->i_rdev = 0;
break;
default:
goto bogusimode;
}
i_uid_write(inode, le32_to_cpu(die->i_uid));
i_gid_write(inode, le32_to_cpu(die->i_gid));
set_nlink(inode, le32_to_cpu(die->i_nlink));
/* extended inode has its own timestamp */
inode->i_ctime.tv_sec = le64_to_cpu(die->i_ctime);
inode->i_ctime.tv_nsec = le32_to_cpu(die->i_ctime_nsec);
inode->i_size = le64_to_cpu(die->i_size);
/* total blocks for compressed files */
if (erofs_inode_is_data_compressed(vi->datalayout))
nblks = le32_to_cpu(die->i_u.compressed_blocks);
kfree(copied);
break;
case EROFS_INODE_LAYOUT_COMPACT:
vi->inode_isize = sizeof(struct erofs_inode_compact);
*ofs += vi->inode_isize;
vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount);
inode->i_mode = le16_to_cpu(dic->i_mode);
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
case S_IFDIR:
case S_IFLNK:
vi->raw_blkaddr = le32_to_cpu(dic->i_u.raw_blkaddr);
break;
case S_IFCHR:
case S_IFBLK:
inode->i_rdev =
new_decode_dev(le32_to_cpu(dic->i_u.rdev));
break;
case S_IFIFO:
case S_IFSOCK:
inode->i_rdev = 0;
break;
default:
goto bogusimode;
}
i_uid_write(inode, le16_to_cpu(dic->i_uid));
i_gid_write(inode, le16_to_cpu(dic->i_gid));
set_nlink(inode, le16_to_cpu(dic->i_nlink));
/* use build time for compact inodes */
inode->i_ctime.tv_sec = sbi->build_time;
inode->i_ctime.tv_nsec = sbi->build_time_nsec;
inode->i_size = le32_to_cpu(dic->i_size);
if (erofs_inode_is_data_compressed(vi->datalayout))
nblks = le32_to_cpu(dic->i_u.compressed_blocks);
break;
default:
erofs_err(inode->i_sb,
"unsupported on-disk inode version %u of nid %llu",
erofs_inode_version(ifmt), vi->nid);
err = -EOPNOTSUPP;
goto err_out;
}
inode->i_mtime.tv_sec = inode->i_ctime.tv_sec;
inode->i_atime.tv_sec = inode->i_ctime.tv_sec;
inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec;
inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec;
if (!nblks)
/* measure inode.i_blocks as generic filesystems */
inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9;
else
inode->i_blocks = nblks << LOG_SECTORS_PER_BLOCK;
return page;
bogusimode:
erofs_err(inode->i_sb, "bogus i_mode (%o) @ nid %llu",
inode->i_mode, vi->nid);
err = -EFSCORRUPTED;
err_out:
DBG_BUGON(1);
kfree(copied);
unlock_page(page);
put_page(page);
return ERR_PTR(err);
}
static int erofs_fill_symlink(struct inode *inode, void *data,
unsigned int m_pofs)
{
struct erofs_inode *vi = EROFS_I(inode);
char *lnk;
/* if it cannot be handled with fast symlink scheme */
if (vi->datalayout != EROFS_INODE_FLAT_INLINE ||
inode->i_size >= PAGE_SIZE) {
inode->i_op = &erofs_symlink_iops;
return 0;
}
lnk = kmalloc(inode->i_size + 1, GFP_KERNEL);
if (!lnk)
return -ENOMEM;
m_pofs += vi->xattr_isize;
/* inline symlink data shouldn't cross page boundary as well */
if (m_pofs + inode->i_size > PAGE_SIZE) {
kfree(lnk);
erofs_err(inode->i_sb,
"inline data cross block boundary @ nid %llu",
vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
memcpy(lnk, data + m_pofs, inode->i_size);
lnk[inode->i_size] = '\0';
inode->i_link = lnk;
inode->i_op = &erofs_fast_symlink_iops;
return 0;
}
static int erofs_fill_inode(struct inode *inode, int isdir)
{
struct erofs_inode *vi = EROFS_I(inode);
struct page *page;
unsigned int ofs;
int err = 0;
trace_erofs_fill_inode(inode, isdir);
/* read inode base data from disk */
page = erofs_read_inode(inode, &ofs);
if (IS_ERR(page))
return PTR_ERR(page);
/* setup the new inode */
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_op = &erofs_generic_iops;
inode->i_fop = &generic_ro_fops;
break;
case S_IFDIR:
inode->i_op = &erofs_dir_iops;
inode->i_fop = &erofs_dir_fops;
break;
case S_IFLNK:
err = erofs_fill_symlink(inode, page_address(page), ofs);
if (err)
goto out_unlock;
inode_nohighmem(inode);
break;
case S_IFCHR:
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
inode->i_op = &erofs_generic_iops;
init_special_inode(inode, inode->i_mode, inode->i_rdev);
goto out_unlock;
default:
err = -EFSCORRUPTED;
goto out_unlock;
}
if (erofs_inode_is_data_compressed(vi->datalayout)) {
err = z_erofs_fill_inode(inode);
goto out_unlock;
}
inode->i_mapping->a_ops = &erofs_raw_access_aops;
out_unlock:
unlock_page(page);
put_page(page);
return err;
}
/*
* erofs nid is 64bits, but i_ino is 'unsigned long', therefore
* we should do more for 32-bit platform to find the right inode.
*/
static int erofs_ilookup_test_actor(struct inode *inode, void *opaque)
{
const erofs_nid_t nid = *(erofs_nid_t *)opaque;
return EROFS_I(inode)->nid == nid;
}
static int erofs_iget_set_actor(struct inode *inode, void *opaque)
{
const erofs_nid_t nid = *(erofs_nid_t *)opaque;
inode->i_ino = erofs_inode_hash(nid);
return 0;
}
static inline struct inode *erofs_iget_locked(struct super_block *sb,
erofs_nid_t nid)
{
const unsigned long hashval = erofs_inode_hash(nid);
return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
erofs_iget_set_actor, &nid);
}
struct inode *erofs_iget(struct super_block *sb,
erofs_nid_t nid,
bool isdir)
{
struct inode *inode = erofs_iget_locked(sb, nid);
if (!inode)
return ERR_PTR(-ENOMEM);
if (inode->i_state & I_NEW) {
int err;
struct erofs_inode *vi = EROFS_I(inode);
vi->nid = nid;
err = erofs_fill_inode(inode, isdir);
if (!err)
unlock_new_inode(inode);
else {
iget_failed(inode);
inode = ERR_PTR(err);
}
}
return inode;
}
int erofs_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *const inode = d_inode(path->dentry);
if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout))
stat->attributes |= STATX_ATTR_COMPRESSED;
stat->attributes |= STATX_ATTR_IMMUTABLE;
stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
STATX_ATTR_IMMUTABLE);
generic_fillattr(inode, stat);
return 0;
}
const struct inode_operations erofs_generic_iops = {
.getattr = erofs_getattr,
.listxattr = erofs_listxattr,
.get_acl = erofs_get_acl,
};
const struct inode_operations erofs_symlink_iops = {
.get_link = page_get_link,
.getattr = erofs_getattr,
.listxattr = erofs_listxattr,
.get_acl = erofs_get_acl,
};
const struct inode_operations erofs_fast_symlink_iops = {
.get_link = simple_get_link,
.getattr = erofs_getattr,
.listxattr = erofs_listxattr,
.get_acl = erofs_get_acl,
};

469
fs/erofs/internal.h Normal file
View File

@@ -0,0 +1,469 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/
*/
#ifndef __EROFS_INTERNAL_H
#define __EROFS_INTERNAL_H
#include <linux/fs.h>
#include <linux/dcache.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/bio.h>
#include <linux/buffer_head.h>
#include <linux/magic.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "erofs_fs.h"
/* redefine pr_fmt "erofs: " */
#undef pr_fmt
#define pr_fmt(fmt) "erofs: " fmt
__printf(3, 4) void _erofs_err(struct super_block *sb,
const char *function, const char *fmt, ...);
#define erofs_err(sb, fmt, ...) \
_erofs_err(sb, __func__, fmt "\n", ##__VA_ARGS__)
__printf(3, 4) void _erofs_info(struct super_block *sb,
const char *function, const char *fmt, ...);
#define erofs_info(sb, fmt, ...) \
_erofs_info(sb, __func__, fmt "\n", ##__VA_ARGS__)
#ifdef CONFIG_EROFS_FS_DEBUG
#define erofs_dbg(x, ...) pr_debug(x "\n", ##__VA_ARGS__)
#define DBG_BUGON BUG_ON
#else
#define erofs_dbg(x, ...) ((void)0)
#define DBG_BUGON(x) ((void)(x))
#endif /* !CONFIG_EROFS_FS_DEBUG */
/* EROFS_SUPER_MAGIC_V1 to represent the whole file system */
#define EROFS_SUPER_MAGIC EROFS_SUPER_MAGIC_V1
typedef u64 erofs_nid_t;
typedef u64 erofs_off_t;
/* data type for filesystem-wide blocks number */
typedef u32 erofs_blk_t;
/* all filesystem-wide lz4 configurations */
struct erofs_sb_lz4_info {
/* # of pages needed for EROFS lz4 rolling decompression */
u16 max_distance_pages;
/* maximum possible blocks for pclusters in the filesystem */
u16 max_pclusterblks;
};
struct erofs_sb_info {
#ifdef CONFIG_EROFS_FS_ZIP
/* list for all registered superblocks, mainly for shrinker */
struct list_head list;
struct mutex umount_mutex;
/* the dedicated workstation for compression */
struct radix_tree_root workstn_tree;
/* strategy of sync decompression (false - auto, true - force on) */
bool readahead_sync_decompress;
/* threshold for decompression synchronously */
unsigned int max_sync_decompress_pages;
unsigned int shrinker_run_no;
u16 available_compr_algs;
/* current strategy of how to use managed cache */
unsigned char cache_strategy;
/* pseudo inode to manage cached pages */
struct inode *managed_cache;
struct erofs_sb_lz4_info lz4;
#endif /* CONFIG_EROFS_FS_ZIP */
u32 blocks;
u32 meta_blkaddr;
#ifdef CONFIG_EROFS_FS_XATTR
u32 xattr_blkaddr;
#endif
/* inode slot unit size in bit shift */
unsigned char islotbits;
u32 sb_size; /* total superblock size */
u32 build_time_nsec;
u64 build_time;
/* what we really care is nid, rather than ino.. */
erofs_nid_t root_nid;
/* used for statfs, f_files - f_favail */
u64 inos;
u8 uuid[16]; /* 128-bit uuid for volume */
u8 volume_name[16]; /* volume name */
u32 feature_compat;
u32 feature_incompat;
unsigned int mount_opt;
};
#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
#define EROFS_I_SB(inode) ((struct erofs_sb_info *)(inode)->i_sb->s_fs_info)
/* Mount flags set via mount options or defaults */
#define EROFS_MOUNT_XATTR_USER 0x00000010
#define EROFS_MOUNT_POSIX_ACL 0x00000020
#define clear_opt(sbi, option) ((sbi)->mount_opt &= ~EROFS_MOUNT_##option)
#define set_opt(sbi, option) ((sbi)->mount_opt |= EROFS_MOUNT_##option)
#define test_opt(sbi, option) ((sbi)->mount_opt & EROFS_MOUNT_##option)
#ifdef CONFIG_EROFS_FS_ZIP
enum {
EROFS_ZIP_CACHE_DISABLED,
EROFS_ZIP_CACHE_READAHEAD,
EROFS_ZIP_CACHE_READAROUND
};
#define EROFS_LOCKED_MAGIC (INT_MIN | 0xE0F510CCL)
/* basic unit of the workstation of a super_block */
struct erofs_workgroup {
/* the workgroup index in the workstation */
pgoff_t index;
/* overall workgroup reference count */
atomic_t refcount;
};
#if defined(CONFIG_SMP)
static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
int val)
{
preempt_disable();
if (val != atomic_cmpxchg(&grp->refcount, val, EROFS_LOCKED_MAGIC)) {
preempt_enable();
return false;
}
return true;
}
static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
int orig_val)
{
/*
* other observers should notice all modifications
* in the freezing period.
*/
smp_mb();
atomic_set(&grp->refcount, orig_val);
preempt_enable();
}
static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
{
return atomic_cond_read_relaxed(&grp->refcount,
VAL != EROFS_LOCKED_MAGIC);
}
#else
static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
int val)
{
preempt_disable();
/* no need to spin on UP platforms, let's just disable preemption. */
if (val != atomic_read(&grp->refcount)) {
preempt_enable();
return false;
}
return true;
}
static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
int orig_val)
{
preempt_enable();
}
static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
{
int v = atomic_read(&grp->refcount);
/* workgroup is never freezed on uniprocessor systems */
DBG_BUGON(v == EROFS_LOCKED_MAGIC);
return v;
}
#endif /* !CONFIG_SMP */
#endif /* !CONFIG_EROFS_FS_ZIP */
/* we strictly follow PAGE_SIZE and no buffer head yet */
#define LOG_BLOCK_SIZE PAGE_SHIFT
#undef LOG_SECTORS_PER_BLOCK
#define LOG_SECTORS_PER_BLOCK (PAGE_SHIFT - 9)
#undef SECTORS_PER_BLOCK
#define SECTORS_PER_BLOCK (1 << SECTORS_PER_BLOCK)
#define EROFS_BLKSIZ (1 << LOG_BLOCK_SIZE)
#if (EROFS_BLKSIZ % 4096 || !EROFS_BLKSIZ)
#error erofs cannot be used in this platform
#endif
#define ROOT_NID(sb) ((sb)->root_nid)
#define erofs_blknr(addr) ((addr) / EROFS_BLKSIZ)
#define erofs_blkoff(addr) ((addr) % EROFS_BLKSIZ)
#define blknr_to_addr(nr) ((erofs_off_t)(nr) * EROFS_BLKSIZ)
static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid)
{
return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits);
}
#define EROFS_FEATURE_FUNCS(name, compat, feature) \
static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \
{ \
return sbi->feature_##compat & EROFS_FEATURE_##feature; \
}
EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING)
EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS)
EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER)
EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
/* atomic flag definitions */
#define EROFS_I_EA_INITED_BIT 0
#define EROFS_I_Z_INITED_BIT 1
/* bitlock definitions (arranged in reverse order) */
#define EROFS_I_BL_XATTR_BIT (BITS_PER_LONG - 1)
#define EROFS_I_BL_Z_BIT (BITS_PER_LONG - 2)
struct erofs_inode {
erofs_nid_t nid;
/* atomic flags (including bitlocks) */
unsigned long flags;
unsigned char datalayout;
unsigned char inode_isize;
unsigned short xattr_isize;
unsigned int xattr_shared_count;
unsigned int *xattr_shared_xattrs;
union {
erofs_blk_t raw_blkaddr;
#ifdef CONFIG_EROFS_FS_ZIP
struct {
unsigned short z_advise;
unsigned char z_algorithmtype[2];
unsigned char z_logical_clusterbits;
};
#endif /* CONFIG_EROFS_FS_ZIP */
};
/* the corresponding vfs inode */
struct inode vfs_inode;
};
#define EROFS_I(ptr) \
container_of(ptr, struct erofs_inode, vfs_inode)
static inline unsigned long erofs_inode_datablocks(struct inode *inode)
{
/* since i_size cannot be changed */
return DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
}
static inline unsigned int erofs_bitrange(unsigned int value, unsigned int bit,
unsigned int bits)
{
return (value >> bit) & ((1 << bits) - 1);
}
static inline unsigned int erofs_inode_version(unsigned int value)
{
return erofs_bitrange(value, EROFS_I_VERSION_BIT,
EROFS_I_VERSION_BITS);
}
static inline unsigned int erofs_inode_datalayout(unsigned int value)
{
return erofs_bitrange(value, EROFS_I_DATALAYOUT_BIT,
EROFS_I_DATALAYOUT_BITS);
}
extern const struct super_operations erofs_sops;
extern const struct address_space_operations erofs_raw_access_aops;
extern const struct address_space_operations z_erofs_aops;
/*
* Logical to physical block mapping
*
* Different with other file systems, it is used for 2 access modes:
*
* 1) RAW access mode:
*
* Users pass a valid (m_lblk, m_lofs -- usually 0) pair,
* and get the valid m_pblk, m_pofs and the longest m_len(in bytes).
*
* Note that m_lblk in the RAW access mode refers to the number of
* the compressed ondisk block rather than the uncompressed
* in-memory block for the compressed file.
*
* m_pofs equals to m_lofs except for the inline data page.
*
* 2) Normal access mode:
*
* If the inode is not compressed, it has no difference with
* the RAW access mode. However, if the inode is compressed,
* users should pass a valid (m_lblk, m_lofs) pair, and get
* the needed m_pblk, m_pofs, m_len to get the compressed data
* and the updated m_lblk, m_lofs which indicates the start
* of the corresponding uncompressed data in the file.
*/
enum {
BH_Zipped = BH_PrivateStart,
BH_FullMapped,
};
/* Has a disk mapping */
#define EROFS_MAP_MAPPED (1 << BH_Mapped)
/* Located in metadata (could be copied from bd_inode) */
#define EROFS_MAP_META (1 << BH_Meta)
/* The extent has been compressed */
#define EROFS_MAP_ZIPPED (1 << BH_Zipped)
/* The length of extent is full */
#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped)
struct erofs_map_blocks {
erofs_off_t m_pa, m_la;
u64 m_plen, m_llen;
unsigned int m_flags;
struct page *mpage;
};
/* Flags used by erofs_map_blocks_flatmode() */
#define EROFS_GET_BLOCKS_RAW 0x0001
/* zmap.c */
#ifdef CONFIG_EROFS_FS_ZIP
int z_erofs_fill_inode(struct inode *inode);
int z_erofs_map_blocks_iter(struct inode *inode,
struct erofs_map_blocks *map,
int flags);
#else
static inline int z_erofs_fill_inode(struct inode *inode) { return -EOPNOTSUPP; }
static inline int z_erofs_map_blocks_iter(struct inode *inode,
struct erofs_map_blocks *map,
int flags)
{
return -EOPNOTSUPP;
}
#endif /* !CONFIG_EROFS_FS_ZIP */
/* data.c */
struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr);
/* inode.c */
static inline unsigned long erofs_inode_hash(erofs_nid_t nid)
{
#if BITS_PER_LONG == 32
return (nid >> 32) ^ (nid & 0xffffffff);
#else
return nid;
#endif
}
extern const struct inode_operations erofs_generic_iops;
extern const struct inode_operations erofs_symlink_iops;
extern const struct inode_operations erofs_fast_symlink_iops;
struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid, bool dir);
int erofs_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags);
/* namei.c */
extern const struct inode_operations erofs_dir_iops;
int erofs_namei(struct inode *dir, struct qstr *name,
erofs_nid_t *nid, unsigned int *d_type);
/* dir.c */
extern const struct file_operations erofs_dir_fops;
static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count)
{
int retried = 0;
while (1) {
void *p = vm_map_ram(pages, count, -1, PAGE_KERNEL);
/* retry two more times (totally 3 times) */
if (p || ++retried >= 3)
return p;
vm_unmap_aliases();
}
return NULL;
}
/* pcpubuf.c */
void *erofs_get_pcpubuf(unsigned int requiredpages);
void erofs_put_pcpubuf(void *ptr);
int erofs_pcpubuf_growsize(unsigned int nrpages);
void erofs_pcpubuf_init(void);
void erofs_pcpubuf_exit(void);
/* utils.c / zdata.c */
struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp);
#ifdef CONFIG_EROFS_FS_ZIP
int erofs_workgroup_put(struct erofs_workgroup *grp);
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
pgoff_t index);
int erofs_register_workgroup(struct super_block *sb,
struct erofs_workgroup *grp);
void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
void erofs_shrinker_register(struct super_block *sb);
void erofs_shrinker_unregister(struct super_block *sb);
int __init erofs_init_shrinker(void);
void erofs_exit_shrinker(void);
int __init z_erofs_init_zip_subsystem(void);
void z_erofs_exit_zip_subsystem(void);
int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
struct erofs_workgroup *egrp);
int erofs_try_to_free_cached_page(struct address_space *mapping,
struct page *page);
int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int len);
#else
static inline void erofs_shrinker_register(struct super_block *sb) {}
static inline void erofs_shrinker_unregister(struct super_block *sb) {}
static inline int erofs_init_shrinker(void) { return 0; }
static inline void erofs_exit_shrinker(void) {}
static inline int z_erofs_init_zip_subsystem(void) { return 0; }
static inline void z_erofs_exit_zip_subsystem(void) {}
static inline int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int len)
{
if (lz4 || dsb->u1.lz4_max_distance) {
erofs_err(sb, "lz4 algorithm isn't enabled");
return -EINVAL;
}
return 0;
}
#endif /* !CONFIG_EROFS_FS_ZIP */
#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
#ifndef lru_to_page
#define lru_to_page(head) (list_entry((head)->prev, struct page, lru))
#endif
#endif /* __EROFS_INTERNAL_H */

View File

@@ -1,16 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/drivers/staging/erofs/namei.c
*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* http://www.huawei.com/
* Created by Gao Xiang <gaoxiang25@huawei.com>
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
* https://www.huawei.com/
*/
#include "internal.h"
#include "xattr.h"
#include <trace/events/erofs.h>
@@ -21,9 +13,9 @@ struct erofs_qstr {
};
/* based on the end of qn is accurate and it must have the trailing '\0' */
static inline int dirnamecmp(const struct erofs_qstr *qn,
const struct erofs_qstr *qd,
unsigned int *matched)
static inline int erofs_dirnamecmp(const struct erofs_qstr *qn,
const struct erofs_qstr *qd,
unsigned int *matched)
{
unsigned int i = *matched;
@@ -71,16 +63,16 @@ static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
unsigned int matched = min(startprfx, endprfx);
struct erofs_qstr dname = {
.name = data + nameoff,
.end = unlikely(mid >= ndirents - 1) ?
.end = mid >= ndirents - 1 ?
data + dirblksize :
data + nameoff_from_disk(de[mid + 1].nameoff,
dirblksize)
};
/* string comparison without already matched prefix */
int ret = dirnamecmp(name, &dname, &matched);
int ret = erofs_dirnamecmp(name, &dname, &matched);
if (unlikely(!ret)) {
if (!ret) {
return de + mid;
} else if (ret > 0) {
head = mid + 1;
@@ -105,7 +97,7 @@ static struct page *find_target_block_classic(struct inode *dir,
startprfx = endprfx = 0;
head = 0;
back = inode_datablocks(dir) - 1;
back = erofs_inode_datablocks(dir) - 1;
while (head <= back) {
const int mid = head + (back - head) / 2;
@@ -120,11 +112,14 @@ static struct page *find_target_block_classic(struct inode *dir,
unsigned int matched;
struct erofs_qstr dname;
if (unlikely(!ndirents)) {
DBG_BUGON(1);
if (!ndirents) {
kunmap_atomic(de);
put_page(page);
page = ERR_PTR(-EIO);
erofs_err(dir->i_sb,
"corrupted dir block %d @ nid %llu",
mid, EROFS_I(dir)->nid);
DBG_BUGON(1);
page = ERR_PTR(-EFSCORRUPTED);
goto out;
}
@@ -139,17 +134,17 @@ static struct page *find_target_block_classic(struct inode *dir,
EROFS_BLKSIZ);
/* string comparison without already matched prefix */
diff = dirnamecmp(name, &dname, &matched);
diff = erofs_dirnamecmp(name, &dname, &matched);
kunmap_atomic(de);
if (unlikely(!diff)) {
if (!diff) {
*_ndirents = 0;
goto out;
} else if (diff > 0) {
head = mid + 1;
startprfx = matched;
if (likely(!IS_ERR(candidate)))
if (!IS_ERR(candidate))
put_page(candidate);
candidate = page;
*_ndirents = ndirents;
@@ -179,7 +174,7 @@ int erofs_namei(struct inode *dir,
struct erofs_dirent *de;
struct erofs_qstr qn;
if (unlikely(!dir->i_size))
if (!dir->i_size)
return -ENOENT;
qn.name = name->name;
@@ -188,7 +183,7 @@ int erofs_namei(struct inode *dir,
ndirents = 0;
page = find_target_block_classic(dir, &qn, &ndirents);
if (unlikely(IS_ERR(page)))
if (IS_ERR(page))
return PTR_ERR(page);
data = kmap_atomic(page);
@@ -198,7 +193,7 @@ int erofs_namei(struct inode *dir,
else
de = (struct erofs_dirent *)data;
if (likely(!IS_ERR(de))) {
if (!IS_ERR(de)) {
*nid = le64_to_cpu(de->nid);
*d_type = de->file_type;
}
@@ -211,11 +206,12 @@ int erofs_namei(struct inode *dir,
/* NOTE: i_mutex is already held by vfs */
static struct dentry *erofs_lookup(struct inode *dir,
struct dentry *dentry, unsigned int flags)
struct dentry *dentry,
unsigned int flags)
{
int err;
erofs_nid_t nid;
unsigned d_type;
unsigned int d_type;
struct inode *inode;
DBG_BUGON(!d_really_is_negative(dentry));
@@ -225,7 +221,7 @@ static struct dentry *erofs_lookup(struct inode *dir,
trace_erofs_lookup(dir, dentry, flags);
/* file name exceeds fs limit */
if (unlikely(dentry->d_name.len > EROFS_NAME_LEN))
if (dentry->d_name.len > EROFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
/* false uninitialized warnings on gcc 4.8.x */
@@ -234,29 +230,19 @@ static struct dentry *erofs_lookup(struct inode *dir,
if (err == -ENOENT) {
/* negative dentry */
inode = NULL;
goto negative_out;
} else if (unlikely(err))
return ERR_PTR(err);
debugln("%s, %s (nid %llu) found, d_type %u", __func__,
dentry->d_name.name, nid, d_type);
inode = erofs_iget(dir->i_sb, nid, d_type == EROFS_FT_DIR);
if (IS_ERR(inode))
return ERR_CAST(inode);
negative_out:
} else if (err) {
inode = ERR_PTR(err);
} else {
erofs_dbg("%s, %s (nid %llu) found, d_type %u", __func__,
dentry->d_name.name, nid, d_type);
inode = erofs_iget(dir->i_sb, nid, d_type == EROFS_FT_DIR);
}
return d_splice_alias(inode, dentry);
}
const struct inode_operations erofs_dir_iops = {
.lookup = erofs_lookup,
};
const struct inode_operations erofs_dir_xattr_iops = {
.lookup = erofs_lookup,
#ifdef CONFIG_EROFS_FS_XATTR
.getattr = erofs_getattr,
.listxattr = erofs_listxattr,
#endif
.get_acl = erofs_get_acl,
};

148
fs/erofs/pcpubuf.c Normal file
View File

@@ -0,0 +1,148 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) Gao Xiang <xiang@kernel.org>
*
* For low-latency decompression algorithms (e.g. lz4), reserve consecutive
* per-CPU virtual memory (in pages) in advance to store such inplace I/O
* data if inplace decompression is failed (due to unmet inplace margin for
* example).
*/
#include "internal.h"
struct erofs_pcpubuf {
raw_spinlock_t lock;
void *ptr;
struct page **pages;
unsigned int nrpages;
};
static DEFINE_PER_CPU(struct erofs_pcpubuf, erofs_pcb);
void *erofs_get_pcpubuf(unsigned int requiredpages)
__acquires(pcb->lock)
{
struct erofs_pcpubuf *pcb = &get_cpu_var(erofs_pcb);
raw_spin_lock(&pcb->lock);
/* check if the per-CPU buffer is too small */
if (requiredpages > pcb->nrpages) {
raw_spin_unlock(&pcb->lock);
put_cpu_var(erofs_pcb);
/* (for sparse checker) pretend pcb->lock is still taken */
__acquire(pcb->lock);
return NULL;
}
return pcb->ptr;
}
void erofs_put_pcpubuf(void *ptr) __releases(pcb->lock)
{
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, smp_processor_id());
DBG_BUGON(pcb->ptr != ptr);
raw_spin_unlock(&pcb->lock);
put_cpu_var(erofs_pcb);
}
/* the next step: support per-CPU page buffers hotplug */
int erofs_pcpubuf_growsize(unsigned int nrpages)
{
static DEFINE_MUTEX(pcb_resize_mutex);
static unsigned int pcb_nrpages;
LIST_HEAD(pagepool);
int delta, cpu, ret, i;
mutex_lock(&pcb_resize_mutex);
delta = nrpages - pcb_nrpages;
ret = 0;
/* avoid shrinking pcpubuf, since no idea how many fses rely on */
if (delta <= 0)
goto out;
for_each_possible_cpu(cpu) {
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
struct page **pages, **oldpages;
void *ptr, *old_ptr;
pages = kmalloc_array(nrpages, sizeof(*pages), GFP_KERNEL);
if (!pages) {
ret = -ENOMEM;
break;
}
for (i = 0; i < nrpages; ++i) {
pages[i] = erofs_allocpage(&pagepool, GFP_KERNEL);
if (!pages[i]) {
ret = -ENOMEM;
oldpages = pages;
goto free_pagearray;
}
}
ptr = vmap(pages, nrpages, VM_MAP, PAGE_KERNEL);
if (!ptr) {
ret = -ENOMEM;
oldpages = pages;
goto free_pagearray;
}
raw_spin_lock(&pcb->lock);
old_ptr = pcb->ptr;
pcb->ptr = ptr;
oldpages = pcb->pages;
pcb->pages = pages;
i = pcb->nrpages;
pcb->nrpages = nrpages;
raw_spin_unlock(&pcb->lock);
if (!oldpages) {
DBG_BUGON(old_ptr);
continue;
}
if (old_ptr)
vunmap(old_ptr);
free_pagearray:
while (i)
list_add(&oldpages[--i]->lru, &pagepool);
kfree(oldpages);
if (ret)
break;
}
pcb_nrpages = nrpages;
put_pages_list(&pagepool);
out:
mutex_unlock(&pcb_resize_mutex);
return ret;
}
void erofs_pcpubuf_init(void)
{
int cpu;
for_each_possible_cpu(cpu) {
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
raw_spin_lock_init(&pcb->lock);
}
}
void erofs_pcpubuf_exit(void)
{
int cpu, i;
for_each_possible_cpu(cpu) {
struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
if (pcb->ptr) {
vunmap(pcb->ptr);
pcb->ptr = NULL;
}
if (!pcb->pages)
continue;
for (i = 0; i < pcb->nrpages; ++i)
if (pcb->pages[i])
put_page(pcb->pages[i]);
kfree(pcb->pages);
pcb->pages = NULL;
}
}

791
fs/erofs/super.c Normal file
View File

@@ -0,0 +1,791 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/
*/
#include <linux/module.h>
#include <linux/buffer_head.h>
#include <linux/statfs.h>
#include <linux/parser.h>
#include <linux/seq_file.h>
#include <linux/crc32c.h>
#include "xattr.h"
#define CREATE_TRACE_POINTS
#include <trace/events/erofs.h>
static struct kmem_cache *erofs_inode_cachep __read_mostly;
void _erofs_err(struct super_block *sb, const char *function,
const char *fmt, ...)
{
struct va_format vaf;
va_list args;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
pr_err("(device %s): %s: %pV", sb->s_id, function, &vaf);
va_end(args);
}
void _erofs_info(struct super_block *sb, const char *function,
const char *fmt, ...)
{
struct va_format vaf;
va_list args;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
pr_info("(device %s): %pV", sb->s_id, &vaf);
va_end(args);
}
static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata)
{
struct erofs_super_block *dsb;
u32 expected_crc, crc;
dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET,
EROFS_BLKSIZ - EROFS_SUPER_OFFSET, GFP_KERNEL);
if (!dsb)
return -ENOMEM;
expected_crc = le32_to_cpu(dsb->checksum);
dsb->checksum = 0;
/* to allow for x86 boot sectors and other oddities. */
crc = crc32c(~0, dsb, EROFS_BLKSIZ - EROFS_SUPER_OFFSET);
kfree(dsb);
if (crc != expected_crc) {
erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected",
crc, expected_crc);
return -EBADMSG;
}
return 0;
}
static void erofs_inode_init_once(void *ptr)
{
struct erofs_inode *vi = ptr;
inode_init_once(&vi->vfs_inode);
}
static struct inode *erofs_alloc_inode(struct super_block *sb)
{
struct erofs_inode *vi =
kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL);
if (!vi)
return NULL;
/* zero out everything except vfs_inode */
memset(vi, 0, offsetof(struct erofs_inode, vfs_inode));
return &vi->vfs_inode;
}
static void i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
struct erofs_inode *vi = EROFS_I(inode);
/* be careful of RCU symlink path */
if (inode->i_op == &erofs_fast_symlink_iops)
kfree(inode->i_link);
kfree(vi->xattr_shared_xattrs);
kmem_cache_free(erofs_inode_cachep, vi);
}
static void erofs_destroy_inode(struct inode *inode)
{
call_rcu(&inode->i_rcu, i_callback);
}
static bool check_layout_compatibility(struct super_block *sb,
struct erofs_super_block *dsb)
{
const unsigned int feature = le32_to_cpu(dsb->feature_incompat);
EROFS_SB(sb)->feature_incompat = feature;
/* check if current kernel meets all mandatory requirements */
if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) {
erofs_err(sb,
"unidentified incompatible feature %x, please upgrade kernel version",
feature & ~EROFS_ALL_FEATURE_INCOMPAT);
return false;
}
return true;
}
#ifdef CONFIG_EROFS_FS_ZIP
/* read variable-sized metadata, offset will be aligned by 4-byte */
static void *erofs_read_metadata(struct super_block *sb, struct page **pagep,
erofs_off_t *offset, int *lengthp)
{
struct page *page = *pagep;
u8 *buffer, *ptr;
int len, i, cnt;
erofs_blk_t blk;
*offset = round_up(*offset, 4);
blk = erofs_blknr(*offset);
if (!page || page->index != blk) {
if (page) {
unlock_page(page);
put_page(page);
}
page = erofs_get_meta_page(sb, blk);
if (IS_ERR(page))
goto err_nullpage;
}
ptr = kmap(page);
len = le16_to_cpu(*(__le16 *)&ptr[erofs_blkoff(*offset)]);
if (!len)
len = U16_MAX + 1;
buffer = kmalloc(len, GFP_KERNEL);
if (!buffer) {
buffer = ERR_PTR(-ENOMEM);
goto out;
}
*offset += sizeof(__le16);
*lengthp = len;
for (i = 0; i < len; i += cnt) {
cnt = min(EROFS_BLKSIZ - (int)erofs_blkoff(*offset), len - i);
blk = erofs_blknr(*offset);
if (!page || page->index != blk) {
if (page) {
kunmap(page);
unlock_page(page);
put_page(page);
}
page = erofs_get_meta_page(sb, blk);
if (IS_ERR(page)) {
kfree(buffer);
goto err_nullpage;
}
ptr = kmap(page);
}
memcpy(buffer + i, ptr + erofs_blkoff(*offset), cnt);
*offset += cnt;
}
out:
kunmap(page);
*pagep = page;
return buffer;
err_nullpage:
*pagep = NULL;
return page;
}
static int erofs_load_compr_cfgs(struct super_block *sb,
struct erofs_super_block *dsb)
{
struct erofs_sb_info *sbi;
struct page *page;
unsigned int algs, alg;
erofs_off_t offset;
int size, ret;
sbi = EROFS_SB(sb);
sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs);
if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) {
erofs_err(sb, "try to load compressed fs with unsupported algorithms %x",
sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS);
return -EINVAL;
}
offset = EROFS_SUPER_OFFSET + sbi->sb_size;
page = NULL;
alg = 0;
ret = 0;
for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) {
void *data;
if (!(algs & 1))
continue;
data = erofs_read_metadata(sb, &page, &offset, &size);
if (IS_ERR(data)) {
ret = PTR_ERR(data);
goto err;
}
switch (alg) {
case Z_EROFS_COMPRESSION_LZ4:
ret = z_erofs_load_lz4_config(sb, dsb, data, size);
break;
default:
DBG_BUGON(1);
ret = -EFAULT;
}
kfree(data);
if (ret)
goto err;
}
err:
if (page) {
unlock_page(page);
put_page(page);
}
return ret;
}
#else
static int erofs_load_compr_cfgs(struct super_block *sb,
struct erofs_super_block *dsb)
{
if (dsb->u1.available_compr_algs) {
erofs_err(sb, "try to load compressed fs when compression is disabled");
return -EINVAL;
}
return 0;
}
#endif
static int erofs_read_superblock(struct super_block *sb)
{
struct erofs_sb_info *sbi;
struct page *page;
struct erofs_super_block *dsb;
unsigned int blkszbits;
void *data;
int ret;
page = read_mapping_page(sb->s_bdev->bd_inode->i_mapping, 0, NULL);
if (IS_ERR(page)) {
erofs_err(sb, "cannot read erofs superblock");
return PTR_ERR(page);
}
sbi = EROFS_SB(sb);
data = kmap(page);
dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET);
ret = -EINVAL;
if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) {
erofs_err(sb, "cannot find valid erofs superblock");
goto out;
}
sbi->feature_compat = le32_to_cpu(dsb->feature_compat);
if (erofs_sb_has_sb_chksum(sbi)) {
ret = erofs_superblock_csum_verify(sb, data);
if (ret)
goto out;
}
ret = -EINVAL;
blkszbits = dsb->blkszbits;
/* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */
if (blkszbits != LOG_BLOCK_SIZE) {
erofs_err(sb, "blkszbits %u isn't supported on this platform",
blkszbits);
goto out;
}
if (!check_layout_compatibility(sb, dsb))
goto out;
sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE;
if (sbi->sb_size > EROFS_BLKSIZ) {
erofs_err(sb, "invalid sb_extslots %u (more than a fs block)",
sbi->sb_size);
goto out;
}
sbi->blocks = le32_to_cpu(dsb->blocks);
sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
#ifdef CONFIG_EROFS_FS_XATTR
sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
#endif
sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact));
sbi->root_nid = le16_to_cpu(dsb->root_nid);
sbi->inos = le64_to_cpu(dsb->inos);
sbi->build_time = le64_to_cpu(dsb->build_time);
sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
memcpy(&sb->s_uuid, dsb->uuid, sizeof(dsb->uuid));
ret = strscpy(sbi->volume_name, dsb->volume_name,
sizeof(dsb->volume_name));
if (ret < 0) { /* -E2BIG */
erofs_err(sb, "bad volume name without NIL terminator");
ret = -EFSCORRUPTED;
goto out;
}
/* parse on-disk compression configurations */
if (erofs_sb_has_compr_cfgs(sbi))
ret = erofs_load_compr_cfgs(sb, dsb);
else
ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0);
out:
kunmap(page);
put_page(page);
return ret;
}
#ifdef CONFIG_EROFS_FS_ZIP
static int erofs_build_cache_strategy(struct super_block *sb,
substring_t *args)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
const char *cs = match_strdup(args);
int err = 0;
if (!cs) {
erofs_err(sb, "Not enough memory to store cache strategy");
return -ENOMEM;
}
if (!strcmp(cs, "disabled")) {
sbi->cache_strategy = EROFS_ZIP_CACHE_DISABLED;
} else if (!strcmp(cs, "readahead")) {
sbi->cache_strategy = EROFS_ZIP_CACHE_READAHEAD;
} else if (!strcmp(cs, "readaround")) {
sbi->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
} else {
erofs_err(sb, "Unrecognized cache strategy \"%s\"", cs);
err = -EINVAL;
}
kfree(cs);
return err;
}
#else
static int erofs_build_cache_strategy(struct super_block *sb,
substring_t *args)
{
erofs_info(sb, "EROFS compression is disabled, so cache strategy is ignored");
return 0;
}
#endif
/* set up default EROFS parameters */
static void erofs_default_options(struct erofs_sb_info *sbi)
{
#ifdef CONFIG_EROFS_FS_ZIP
sbi->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
sbi->max_sync_decompress_pages = 3;
sbi->readahead_sync_decompress = false;
#endif
#ifdef CONFIG_EROFS_FS_XATTR
set_opt(sbi, XATTR_USER);
#endif
#ifdef CONFIG_EROFS_FS_POSIX_ACL
set_opt(sbi, POSIX_ACL);
#endif
}
enum {
Opt_user_xattr,
Opt_nouser_xattr,
Opt_acl,
Opt_noacl,
Opt_cache_strategy,
Opt_err
};
static match_table_t erofs_tokens = {
{Opt_user_xattr, "user_xattr"},
{Opt_nouser_xattr, "nouser_xattr"},
{Opt_acl, "acl"},
{Opt_noacl, "noacl"},
{Opt_cache_strategy, "cache_strategy=%s"},
{Opt_err, NULL}
};
static int erofs_parse_options(struct super_block *sb, char *options)
{
substring_t args[MAX_OPT_ARGS];
char *p;
int err;
if (!options)
return 0;
while ((p = strsep(&options, ","))) {
int token;
if (!*p)
continue;
args[0].to = args[0].from = NULL;
token = match_token(p, erofs_tokens, args);
switch (token) {
#ifdef CONFIG_EROFS_FS_XATTR
case Opt_user_xattr:
set_opt(EROFS_SB(sb), XATTR_USER);
break;
case Opt_nouser_xattr:
clear_opt(EROFS_SB(sb), XATTR_USER);
break;
#else
case Opt_user_xattr:
erofs_info(sb, "user_xattr options not supported");
break;
case Opt_nouser_xattr:
erofs_info(sb, "nouser_xattr options not supported");
break;
#endif
#ifdef CONFIG_EROFS_FS_POSIX_ACL
case Opt_acl:
set_opt(EROFS_SB(sb), POSIX_ACL);
break;
case Opt_noacl:
clear_opt(EROFS_SB(sb), POSIX_ACL);
break;
#else
case Opt_acl:
erofs_info(sb, "acl options not supported");
break;
case Opt_noacl:
erofs_info(sb, "noacl options not supported");
break;
#endif
case Opt_cache_strategy:
err = erofs_build_cache_strategy(sb, args);
if (err)
return err;
break;
default:
erofs_err(sb, "Unrecognized mount option \"%s\" or missing value", p);
return -EINVAL;
}
}
return 0;
}
#ifdef CONFIG_EROFS_FS_ZIP
static const struct address_space_operations managed_cache_aops;
static int erofs_managed_cache_releasepage(struct page *page, gfp_t gfp_mask)
{
int ret = 1; /* 0 - busy */
struct address_space *const mapping = page->mapping;
DBG_BUGON(!PageLocked(page));
DBG_BUGON(mapping->a_ops != &managed_cache_aops);
if (PagePrivate(page))
ret = erofs_try_to_free_cached_page(mapping, page);
return ret;
}
static void erofs_managed_cache_invalidatepage(struct page *page,
unsigned int offset,
unsigned int length)
{
const unsigned int stop = length + offset;
DBG_BUGON(!PageLocked(page));
/* Check for potential overflow in debug mode */
DBG_BUGON(stop > PAGE_SIZE || stop < length);
if (offset == 0 && stop == PAGE_SIZE)
while (!erofs_managed_cache_releasepage(page, GFP_NOFS))
cond_resched();
}
static const struct address_space_operations managed_cache_aops = {
.releasepage = erofs_managed_cache_releasepage,
.invalidatepage = erofs_managed_cache_invalidatepage,
};
static int erofs_init_managed_cache(struct super_block *sb)
{
struct erofs_sb_info *const sbi = EROFS_SB(sb);
struct inode *const inode = new_inode(sb);
if (!inode)
return -ENOMEM;
set_nlink(inode, 1);
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &managed_cache_aops;
mapping_set_gfp_mask(inode->i_mapping,
GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE);
sbi->managed_cache = inode;
return 0;
}
#else
static int erofs_init_managed_cache(struct super_block *sb) { return 0; }
#endif
static int erofs_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;
struct erofs_sb_info *sbi;
int err;
sb->s_magic = EROFS_SUPER_MAGIC;
if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) {
erofs_err(sb, "failed to set erofs blksize");
return -EINVAL;
}
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
err = erofs_read_superblock(sb);
if (err)
return err;
sb->s_flags |= SB_RDONLY | SB_NOATIME;
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_time_gran = 1;
sb->s_op = &erofs_sops;
sb->s_xattr = erofs_xattr_handlers;
/* set erofs default mount options */
erofs_default_options(sbi);
err = erofs_parse_options(sb, data);
if (err)
return err;
if (test_opt(sbi, POSIX_ACL))
sb->s_flags |= SB_POSIXACL;
else
sb->s_flags &= ~SB_POSIXACL;
#ifdef CONFIG_EROFS_FS_ZIP
INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC);
#endif
/* get the root inode */
inode = erofs_iget(sb, ROOT_NID(sbi), true);
if (IS_ERR(inode))
return PTR_ERR(inode);
if (!S_ISDIR(inode->i_mode)) {
erofs_err(sb, "rootino(nid %llu) is not a directory(i_mode %o)",
ROOT_NID(sbi), inode->i_mode);
iput(inode);
return -EINVAL;
}
sb->s_root = d_make_root(inode);
if (!sb->s_root)
return -ENOMEM;
erofs_shrinker_register(sb);
/* sb->s_umount is already locked, SB_ACTIVE and SB_BORN are not set */
err = erofs_init_managed_cache(sb);
if (err)
return err;
erofs_info(sb, "mounted with opts: %s, root inode @ nid %llu.",
(char *)data, ROOT_NID(sbi));
return 0;
}
static struct dentry *erofs_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data)
{
return mount_bdev(fs_type, flags, dev_name, data, erofs_fill_super);
}
/*
* could be triggered after deactivate_locked_super()
* is called, thus including umount and failed to initialize.
*/
static void erofs_kill_sb(struct super_block *sb)
{
struct erofs_sb_info *sbi;
WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
kill_block_super(sb);
sbi = EROFS_SB(sb);
if (!sbi)
return;
kfree(sbi);
sb->s_fs_info = NULL;
}
/* called when ->s_root is non-NULL */
static void erofs_put_super(struct super_block *sb)
{
struct erofs_sb_info *const sbi = EROFS_SB(sb);
DBG_BUGON(!sbi);
erofs_shrinker_unregister(sb);
#ifdef CONFIG_EROFS_FS_ZIP
iput(sbi->managed_cache);
sbi->managed_cache = NULL;
#endif
}
static struct file_system_type erofs_fs_type = {
.owner = THIS_MODULE,
.name = "erofs",
.mount = erofs_mount,
.kill_sb = erofs_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
};
MODULE_ALIAS_FS("erofs");
static int __init erofs_module_init(void)
{
int err;
erofs_check_ondisk_layout_definitions();
erofs_inode_cachep = kmem_cache_create("erofs_inode",
sizeof(struct erofs_inode), 0,
SLAB_RECLAIM_ACCOUNT,
erofs_inode_init_once);
if (!erofs_inode_cachep) {
err = -ENOMEM;
goto icache_err;
}
err = erofs_init_shrinker();
if (err)
goto shrinker_err;
erofs_pcpubuf_init();
err = z_erofs_init_zip_subsystem();
if (err)
goto zip_err;
err = register_filesystem(&erofs_fs_type);
if (err)
goto fs_err;
return 0;
fs_err:
z_erofs_exit_zip_subsystem();
zip_err:
erofs_exit_shrinker();
shrinker_err:
kmem_cache_destroy(erofs_inode_cachep);
icache_err:
return err;
}
static void __exit erofs_module_exit(void)
{
unregister_filesystem(&erofs_fs_type);
z_erofs_exit_zip_subsystem();
erofs_exit_shrinker();
/* Ensure all RCU free inodes are safe before cache is destroyed. */
rcu_barrier();
kmem_cache_destroy(erofs_inode_cachep);
erofs_pcpubuf_exit();
}
/* get filesystem statistics */
static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct erofs_sb_info *sbi = EROFS_SB(sb);
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
buf->f_type = sb->s_magic;
buf->f_bsize = EROFS_BLKSIZ;
buf->f_blocks = sbi->blocks;
buf->f_bfree = buf->f_bavail = 0;
buf->f_files = ULLONG_MAX;
buf->f_ffree = ULLONG_MAX - sbi->inos;
buf->f_namelen = EROFS_NAME_LEN;
buf->f_fsid.val[0] = (u32)id;
buf->f_fsid.val[1] = (u32)(id >> 32);
return 0;
}
static int erofs_show_options(struct seq_file *seq, struct dentry *root)
{
struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb);
#ifdef CONFIG_EROFS_FS_XATTR
if (test_opt(sbi, XATTR_USER))
seq_puts(seq, ",user_xattr");
else
seq_puts(seq, ",nouser_xattr");
#endif
#ifdef CONFIG_EROFS_FS_POSIX_ACL
if (test_opt(sbi, POSIX_ACL))
seq_puts(seq, ",acl");
else
seq_puts(seq, ",noacl");
#endif
#ifdef CONFIG_EROFS_FS_ZIP
if (sbi->cache_strategy == EROFS_ZIP_CACHE_DISABLED) {
seq_puts(seq, ",cache_strategy=disabled");
} else if (sbi->cache_strategy == EROFS_ZIP_CACHE_READAHEAD) {
seq_puts(seq, ",cache_strategy=readahead");
} else if (sbi->cache_strategy == EROFS_ZIP_CACHE_READAROUND) {
seq_puts(seq, ",cache_strategy=readaround");
}
#endif
return 0;
}
static int erofs_remount(struct super_block *sb, int *flags, char *data)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
unsigned int org_mnt_opt = sbi->mount_opt;
int err;
DBG_BUGON(!sb_rdonly(sb));
err = erofs_parse_options(sb, data);
if (err)
goto out;
if (test_opt(sbi, POSIX_ACL))
sb->s_flags |= SB_POSIXACL;
else
sb->s_flags &= ~SB_POSIXACL;
*flags |= SB_RDONLY;
return 0;
out:
sbi->mount_opt = org_mnt_opt;
return err;
}
const struct super_operations erofs_sops = {
.put_super = erofs_put_super,
.alloc_inode = erofs_alloc_inode,
.destroy_inode = erofs_destroy_inode,
.statfs = erofs_statfs,
.show_options = erofs_show_options,
.remount_fs = erofs_remount,
};
module_init(erofs_module_init);
module_exit(erofs_module_exit);
MODULE_DESCRIPTION("Enhanced ROM File System");
MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc.");
MODULE_LICENSE("GPL");

View File

@@ -1,11 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0
*
* Tagged pointer implementation
*
* Copyright (C) 2018 Gao Xiang <gaoxiang25@huawei.com>
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* A tagged pointer implementation
*/
#ifndef _LINUX_TAGPTR_H
#define _LINUX_TAGPTR_H
#ifndef __EROFS_FS_TAGPTR_H
#define __EROFS_FS_TAGPTR_H
#include <linux/types.h>
#include <linux/build_bug.h>
@@ -106,5 +104,4 @@ tagptr_init(o, cmpxchg(&ptptr->v, o.v, n.v)); })
ptptr->v &= ~tags; \
*ptptr; })
#endif
#endif /* __EROFS_FS_TAGPTR_H */

View File

@@ -1,16 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/drivers/staging/erofs/utils.c
*
* Copyright (C) 2018 HUAWEI, Inc.
* http://www.huawei.com/
* Created by Gao Xiang <gaoxiang25@huawei.com>
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
* https://www.huawei.com/
*/
#include "internal.h"
#include <linux/pagevec.h>
@@ -20,43 +12,55 @@ struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp)
if (!list_empty(pool)) {
page = lru_to_page(pool);
DBG_BUGON(page_ref_count(page) != 1);
list_del(&page->lru);
} else {
page = alloc_pages(gfp | __GFP_NOFAIL, 0);
page = alloc_page(gfp);
}
return page;
}
#ifdef CONFIG_EROFS_FS_ZIP
/* global shrink count (for all mounted EROFS instances) */
static atomic_long_t erofs_global_shrink_cnt;
#ifdef CONFIG_EROFS_FS_ZIP
#define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount)
#define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount)
/* radix_tree and the future XArray both don't use tagptr_t yet */
struct erofs_workgroup *erofs_find_workgroup(
struct super_block *sb, pgoff_t index, bool *tag)
static int erofs_workgroup_get(struct erofs_workgroup *grp)
{
int o;
repeat:
o = erofs_wait_on_workgroup_freezed(grp);
if (o <= 0)
return -1;
if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o)
goto repeat;
/* decrease refcount paired by erofs_workgroup_put */
if (o == 1)
atomic_long_dec(&erofs_global_shrink_cnt);
return 0;
}
struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
pgoff_t index)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
struct erofs_workgroup *grp;
int oldcount;
repeat:
rcu_read_lock();
grp = radix_tree_lookup(&sbi->workstn_tree, index);
if (grp != NULL) {
*tag = radix_tree_exceptional_entry(grp);
grp = (void *)((unsigned long)grp &
~RADIX_TREE_EXCEPTIONAL_ENTRY);
if (erofs_workgroup_get(grp, &oldcount)) {
if (grp) {
if (erofs_workgroup_get(grp)) {
/* prefer to relax rcu read side */
rcu_read_unlock();
goto repeat;
}
/* decrease refcount added by erofs_workgroup_put */
if (unlikely(oldcount == 1))
atomic_long_dec(&erofs_global_shrink_cnt);
DBG_BUGON(index != grp->index);
}
rcu_read_unlock();
@@ -64,14 +68,13 @@ struct erofs_workgroup *erofs_find_workgroup(
}
int erofs_register_workgroup(struct super_block *sb,
struct erofs_workgroup *grp,
bool tag)
struct erofs_workgroup *grp)
{
struct erofs_sb_info *sbi;
int err;
/* grp shouldn't be broken or used before */
if (unlikely(atomic_read(&grp->refcount) != 1)) {
if (atomic_read(&grp->refcount) != 1) {
DBG_BUGON(1);
return -EINVAL;
}
@@ -81,35 +84,28 @@ int erofs_register_workgroup(struct super_block *sb,
return err;
sbi = EROFS_SB(sb);
erofs_workstn_lock(sbi);
if (tag)
grp = (void *)((unsigned long)grp |
1UL << RADIX_TREE_EXCEPTIONAL_SHIFT);
xa_lock(&sbi->workstn_tree);
/*
* Bump up reference count before making this workgroup
* visible to other users in order to avoid potential UAF
* without serialized by erofs_workstn_lock.
* without serialized by workstn_lock.
*/
__erofs_workgroup_get(grp);
err = radix_tree_insert(&sbi->workstn_tree,
grp->index, grp);
if (unlikely(err))
err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp);
if (err)
/*
* it's safe to decrease since the workgroup isn't visible
* and refcount >= 2 (cannot be freezed).
*/
__erofs_workgroup_put(grp);
erofs_workstn_unlock(sbi);
xa_unlock(&sbi->workstn_tree);
radix_tree_preload_end();
return err;
}
extern void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
static void __erofs_workgroup_free(struct erofs_workgroup *grp)
{
atomic_long_dec(&erofs_global_shrink_cnt);
@@ -127,33 +123,22 @@ int erofs_workgroup_put(struct erofs_workgroup *grp)
return count;
}
#ifdef EROFS_FS_HAS_MANAGED_CACHE
/* for cache-managed case, customized reclaim paths exist */
static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
struct erofs_workgroup *grp)
{
erofs_workgroup_unfreeze(grp, 0);
__erofs_workgroup_free(grp);
}
bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
struct erofs_workgroup *grp,
bool cleanup)
{
void *entry;
/*
* for managed cache enabled, the refcount of workgroups
* themselves could be < 0 (freezed). So there is no guarantee
* that all refcount > 0 if managed cache is enabled.
* If managed cache is on, refcount of workgroups
* themselves could be < 0 (freezed). In other words,
* there is no guarantee that all refcounts > 0.
*/
if (!erofs_workgroup_try_to_freeze(grp, 1))
return false;
/*
* note that all cached pages should be unlinked
* before delete it from the radix tree.
* Otherwise some cached pages of an orphan old workgroup
* could be still linked after the new one is available.
* Note that all cached pages should be unattached
* before deleted from the radix tree. Otherwise some
* cached pages could be still attached to the orphan
* old workgroup when the new one is available in the tree.
*/
if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
erofs_workgroup_unfreeze(grp, 1);
@@ -161,87 +146,52 @@ bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
}
/*
* it is impossible to fail after the workgroup is freezed,
* It's impossible to fail after the workgroup is freezed,
* however in order to avoid some race conditions, add a
* DBG_BUGON to observe this in advance.
*/
entry = radix_tree_delete(&sbi->workstn_tree, grp->index);
DBG_BUGON((void *)((unsigned long)entry &
~RADIX_TREE_EXCEPTIONAL_ENTRY) != grp);
DBG_BUGON(radix_tree_delete(&sbi->workstn_tree, grp->index) != grp);
/*
* if managed cache is enable, the last refcount
* should indicate the related workstation.
*/
erofs_workgroup_unfreeze_final(grp);
/* last refcount should be connected with its managed pslot. */
erofs_workgroup_unfreeze(grp, 0);
__erofs_workgroup_free(grp);
return true;
}
#else
/* for nocache case, no customized reclaim path at all */
bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
struct erofs_workgroup *grp,
bool cleanup)
{
int cnt = atomic_read(&grp->refcount);
void *entry;
DBG_BUGON(cnt <= 0);
DBG_BUGON(cleanup && cnt != 1);
if (cnt > 1)
return false;
entry = radix_tree_delete(&sbi->workstn_tree, grp->index);
DBG_BUGON((void *)((unsigned long)entry &
~RADIX_TREE_EXCEPTIONAL_ENTRY) != grp);
/* (rarely) could be grabbed again when freeing */
erofs_workgroup_put(grp);
return true;
}
#endif
unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
unsigned long nr_shrink,
bool cleanup)
static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
unsigned long nr_shrink)
{
pgoff_t first_index = 0;
void *batch[PAGEVEC_SIZE];
unsigned freed = 0;
unsigned int freed = 0;
int i, found;
repeat:
erofs_workstn_lock(sbi);
xa_lock(&sbi->workstn_tree);
found = radix_tree_gang_lookup(&sbi->workstn_tree,
batch, first_index, PAGEVEC_SIZE);
batch, first_index, PAGEVEC_SIZE);
for (i = 0; i < found; ++i) {
struct erofs_workgroup *grp = (void *)
((unsigned long)batch[i] &
~RADIX_TREE_EXCEPTIONAL_ENTRY);
struct erofs_workgroup *grp = batch[i];
first_index = grp->index + 1;
/* try to shrink each valid workgroup */
if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
if (!erofs_try_to_release_workgroup(sbi, grp))
continue;
++freed;
if (unlikely(!--nr_shrink))
if (!--nr_shrink)
break;
}
erofs_workstn_unlock(sbi);
xa_unlock(&sbi->workstn_tree);
if (i && nr_shrink)
goto repeat;
return freed;
}
#endif
/* protected by 'erofs_sb_list_lock' */
static unsigned int shrinker_run_no;
@@ -249,7 +199,7 @@ static unsigned int shrinker_run_no;
static DEFINE_SPINLOCK(erofs_sb_list_lock);
static LIST_HEAD(erofs_sb_list);
void erofs_register_super(struct super_block *sb)
void erofs_shrinker_register(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
@@ -260,21 +210,28 @@ void erofs_register_super(struct super_block *sb)
spin_unlock(&erofs_sb_list_lock);
}
void erofs_unregister_super(struct super_block *sb)
void erofs_shrinker_unregister(struct super_block *sb)
{
struct erofs_sb_info *const sbi = EROFS_SB(sb);
mutex_lock(&sbi->umount_mutex);
/* clean up all remaining workgroups in memory */
erofs_shrink_workstation(sbi, ~0UL);
spin_lock(&erofs_sb_list_lock);
list_del(&EROFS_SB(sb)->list);
list_del(&sbi->list);
spin_unlock(&erofs_sb_list_lock);
mutex_unlock(&sbi->umount_mutex);
}
unsigned long erofs_shrink_count(struct shrinker *shrink,
struct shrink_control *sc)
static unsigned long erofs_shrink_count(struct shrinker *shrink,
struct shrink_control *sc)
{
return atomic_long_read(&erofs_global_shrink_cnt);
}
unsigned long erofs_shrink_scan(struct shrinker *shrink,
struct shrink_control *sc)
static unsigned long erofs_shrink_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
struct erofs_sb_info *sbi;
struct list_head *p;
@@ -284,9 +241,9 @@ unsigned long erofs_shrink_scan(struct shrinker *shrink,
unsigned long freed = 0;
spin_lock(&erofs_sb_list_lock);
do
do {
run_no = ++shrinker_run_no;
while (run_no == 0);
} while (run_no == 0);
/* Iterate over all mounted superblocks and try to shrink them */
p = erofs_sb_list.next;
@@ -308,9 +265,7 @@ unsigned long erofs_shrink_scan(struct shrinker *shrink,
spin_unlock(&erofs_sb_list_lock);
sbi->shrinker_run_no = run_no;
#ifdef CONFIG_EROFS_FS_ZIP
freed += erofs_shrink_workstation(sbi, nr - freed, false);
#endif
freed += erofs_shrink_workstation(sbi, nr - freed);
spin_lock(&erofs_sb_list_lock);
/* Get the next list element before we move this one */
@@ -330,3 +285,19 @@ unsigned long erofs_shrink_scan(struct shrinker *shrink,
return freed;
}
static struct shrinker erofs_shrinker_info = {
.scan_objects = erofs_shrink_scan,
.count_objects = erofs_shrink_count,
.seeks = DEFAULT_SEEKS,
};
int __init erofs_init_shrinker(void)
{
return register_shrinker(&erofs_shrinker_info);
}
void erofs_exit_shrinker(void)
{
unregister_shrinker(&erofs_shrinker_info);
}
#endif /* !CONFIG_EROFS_FS_ZIP */

View File

@@ -1,14 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/drivers/staging/erofs/xattr.c
*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* http://www.huawei.com/
* Created by Gao Xiang <gaoxiang25@huawei.com>
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
* https://www.huawei.com/
*/
#include <linux/security.h>
#include "xattr.h"
@@ -19,13 +12,13 @@ struct xattr_iter {
void *kaddr;
erofs_blk_t blkaddr;
unsigned ofs;
unsigned int ofs;
};
static inline void xattr_iter_end(struct xattr_iter *it, bool atomic)
{
/* the only user of kunmap() is 'init_inode_xattrs' */
if (unlikely(!atomic))
if (!atomic)
kunmap(it->page);
else
kunmap_atomic(it->kaddr);
@@ -44,23 +37,30 @@ static inline void xattr_iter_end_final(struct xattr_iter *it)
static int init_inode_xattrs(struct inode *inode)
{
struct erofs_vnode *const vi = EROFS_V(inode);
struct erofs_inode *const vi = EROFS_I(inode);
struct xattr_iter it;
unsigned i;
unsigned int i;
struct erofs_xattr_ibody_header *ih;
struct super_block *sb;
struct erofs_sb_info *sbi;
bool atomic_map;
int ret = 0;
/* the most case is that xattrs of this inode are initialized. */
if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags))
if (test_bit(EROFS_I_EA_INITED_BIT, &vi->flags)) {
/*
* paired with smp_mb() at the end of the function to ensure
* fields will only be observed after the bit is set.
*/
smp_mb();
return 0;
}
if (wait_on_bit_lock(&vi->flags, EROFS_V_BL_XATTR_BIT, TASK_KILLABLE))
if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_XATTR_BIT, TASK_KILLABLE))
return -ERESTARTSYS;
/* someone has initialized xattrs for us? */
if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags))
if (test_bit(EROFS_I_EA_INITED_BIT, &vi->flags))
goto out_unlock;
/*
@@ -72,25 +72,29 @@ static int init_inode_xattrs(struct inode *inode)
* undefined right now (maybe use later with some new sb feature).
*/
if (vi->xattr_isize == sizeof(struct erofs_xattr_ibody_header)) {
errln("xattr_isize %d of nid %llu is not supported yet",
vi->xattr_isize, vi->nid);
ret = -ENOTSUPP;
erofs_err(inode->i_sb,
"xattr_isize %d of nid %llu is not supported yet",
vi->xattr_isize, vi->nid);
ret = -EOPNOTSUPP;
goto out_unlock;
} else if (vi->xattr_isize < sizeof(struct erofs_xattr_ibody_header)) {
if (unlikely(vi->xattr_isize)) {
if (vi->xattr_isize) {
erofs_err(inode->i_sb,
"bogus xattr ibody @ nid %llu", vi->nid);
DBG_BUGON(1);
ret = -EIO;
ret = -EFSCORRUPTED;
goto out_unlock; /* xattr ondisk layout error */
}
ret = -ENOATTR;
goto out_unlock;
}
sbi = EROFS_I_SB(inode);
sb = inode->i_sb;
sbi = EROFS_SB(sb);
it.blkaddr = erofs_blknr(iloc(sbi, vi->nid) + vi->inode_isize);
it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize);
it.page = erofs_get_inline_page(inode, it.blkaddr);
it.page = erofs_get_meta_page(sb, it.blkaddr);
if (IS_ERR(it.page)) {
ret = PTR_ERR(it.page);
goto out_unlock;
@@ -115,13 +119,12 @@ static int init_inode_xattrs(struct inode *inode)
it.ofs += sizeof(struct erofs_xattr_ibody_header);
for (i = 0; i < vi->xattr_shared_count; ++i) {
if (unlikely(it.ofs >= EROFS_BLKSIZ)) {
if (it.ofs >= EROFS_BLKSIZ) {
/* cannot be unaligned */
BUG_ON(it.ofs != EROFS_BLKSIZ);
DBG_BUGON(it.ofs != EROFS_BLKSIZ);
xattr_iter_end(&it, atomic_map);
it.page = erofs_get_meta_page(inode->i_sb,
++it.blkaddr, S_ISDIR(inode->i_mode));
it.page = erofs_get_meta_page(sb, ++it.blkaddr);
if (IS_ERR(it.page)) {
kfree(vi->xattr_shared_xattrs);
vi->xattr_shared_xattrs = NULL;
@@ -139,18 +142,29 @@ static int init_inode_xattrs(struct inode *inode)
}
xattr_iter_end(&it, atomic_map);
set_bit(EROFS_V_EA_INITED_BIT, &vi->flags);
/* paired with smp_mb() at the beginning of the function. */
smp_mb();
set_bit(EROFS_I_EA_INITED_BIT, &vi->flags);
out_unlock:
clear_and_wake_up_bit(EROFS_V_BL_XATTR_BIT, &vi->flags);
clear_and_wake_up_bit(EROFS_I_BL_XATTR_BIT, &vi->flags);
return ret;
}
/*
* the general idea for these return values is
* if 0 is returned, go on processing the current xattr;
* 1 (> 0) is returned, skip this round to process the next xattr;
* -err (< 0) is returned, an error (maybe ENOXATTR) occurred
* and need to be handled
*/
struct xattr_iter_handlers {
int (*entry)(struct xattr_iter *, struct erofs_xattr_entry *);
int (*name)(struct xattr_iter *, unsigned, char *, unsigned);
int (*alloc_buffer)(struct xattr_iter *, unsigned);
void (*value)(struct xattr_iter *, unsigned, char *, unsigned);
int (*entry)(struct xattr_iter *_it, struct erofs_xattr_entry *entry);
int (*name)(struct xattr_iter *_it, unsigned int processed, char *buf,
unsigned int len);
int (*alloc_buffer)(struct xattr_iter *_it, unsigned int value_sz);
void (*value)(struct xattr_iter *_it, unsigned int processed, char *buf,
unsigned int len);
};
static inline int xattr_iter_fixup(struct xattr_iter *it)
@@ -161,7 +175,8 @@ static inline int xattr_iter_fixup(struct xattr_iter *it)
xattr_iter_end(it, true);
it->blkaddr += erofs_blknr(it->ofs);
it->page = erofs_get_meta_page(it->sb, it->blkaddr, false);
it->page = erofs_get_meta_page(it->sb, it->blkaddr);
if (IS_ERR(it->page)) {
int err = PTR_ERR(it->page);
@@ -175,15 +190,15 @@ static inline int xattr_iter_fixup(struct xattr_iter *it)
}
static int inline_xattr_iter_begin(struct xattr_iter *it,
struct inode *inode)
struct inode *inode)
{
struct erofs_vnode *const vi = EROFS_V(inode);
struct erofs_inode *const vi = EROFS_I(inode);
struct erofs_sb_info *const sbi = EROFS_SB(inode->i_sb);
unsigned xattr_header_sz, inline_xattr_ofs;
unsigned int xattr_header_sz, inline_xattr_ofs;
xattr_header_sz = inlinexattr_header_size(inode);
if (unlikely(xattr_header_sz >= vi->xattr_isize)) {
BUG_ON(xattr_header_sz > vi->xattr_isize);
if (xattr_header_sz >= vi->xattr_isize) {
DBG_BUGON(xattr_header_sz > vi->xattr_isize);
return -ENOATTR;
}
@@ -192,7 +207,7 @@ static int inline_xattr_iter_begin(struct xattr_iter *it,
it->blkaddr = erofs_blknr(iloc(sbi, vi->nid) + inline_xattr_ofs);
it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs);
it->page = erofs_get_inline_page(inode, it->blkaddr);
it->page = erofs_get_meta_page(inode->i_sb, it->blkaddr);
if (IS_ERR(it->page))
return PTR_ERR(it->page);
@@ -200,11 +215,16 @@ static int inline_xattr_iter_begin(struct xattr_iter *it,
return vi->xattr_isize - xattr_header_sz;
}
/*
* Regardless of success or failure, `xattr_foreach' will end up with
* `ofs' pointing to the next xattr item rather than an arbitrary position.
*/
static int xattr_foreach(struct xattr_iter *it,
const struct xattr_iter_handlers *op, unsigned int *tlimit)
const struct xattr_iter_handlers *op,
unsigned int *tlimit)
{
struct erofs_xattr_entry entry;
unsigned value_sz, processed, slice;
unsigned int value_sz, processed, slice;
int err;
/* 0. fixup blkaddr, ofs, ipage */
@@ -218,10 +238,14 @@ static int xattr_foreach(struct xattr_iter *it,
* therefore entry should be in the page
*/
entry = *(struct erofs_xattr_entry *)(it->kaddr + it->ofs);
if (tlimit != NULL) {
unsigned entry_sz = EROFS_XATTR_ENTRY_SIZE(&entry);
if (tlimit) {
unsigned int entry_sz = erofs_xattr_entry_size(&entry);
BUG_ON(*tlimit < entry_sz);
/* xattr on-disk corruption: xattr entry beyond xattr_isize */
if (*tlimit < entry_sz) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
*tlimit -= entry_sz;
}
@@ -240,7 +264,7 @@ static int xattr_foreach(struct xattr_iter *it,
while (processed < entry.e_name_len) {
if (it->ofs >= EROFS_BLKSIZ) {
BUG_ON(it->ofs > EROFS_BLKSIZ);
DBG_BUGON(it->ofs > EROFS_BLKSIZ);
err = xattr_iter_fixup(it);
if (err)
@@ -248,8 +272,8 @@ static int xattr_foreach(struct xattr_iter *it,
it->ofs = 0;
}
slice = min_t(unsigned, PAGE_SIZE - it->ofs,
entry.e_name_len - processed);
slice = min_t(unsigned int, PAGE_SIZE - it->ofs,
entry.e_name_len - processed);
/* handle name */
err = op->name(it, processed, it->kaddr + it->ofs, slice);
@@ -265,7 +289,7 @@ static int xattr_foreach(struct xattr_iter *it,
/* 3. handle xattr value */
processed = 0;
if (op->alloc_buffer != NULL) {
if (op->alloc_buffer) {
err = op->alloc_buffer(it, value_sz);
if (err) {
it->ofs += value_sz;
@@ -275,7 +299,7 @@ static int xattr_foreach(struct xattr_iter *it,
while (processed < value_sz) {
if (it->ofs >= EROFS_BLKSIZ) {
BUG_ON(it->ofs > EROFS_BLKSIZ);
DBG_BUGON(it->ofs > EROFS_BLKSIZ);
err = xattr_iter_fixup(it);
if (err)
@@ -283,17 +307,17 @@ static int xattr_foreach(struct xattr_iter *it,
it->ofs = 0;
}
slice = min_t(unsigned, PAGE_SIZE - it->ofs,
value_sz - processed);
slice = min_t(unsigned int, PAGE_SIZE - it->ofs,
value_sz - processed);
op->value(it, processed, it->kaddr + it->ofs, slice);
it->ofs += slice;
processed += slice;
}
out:
/* we assume that ofs is aligned with 4 bytes */
/* xattrs should be 4-byte aligned (on-disk constraint) */
it->ofs = EROFS_XATTR_ALIGN(it->ofs);
return err;
return err < 0 ? err : 0;
}
struct getxattr_iter {
@@ -305,7 +329,7 @@ struct getxattr_iter {
};
static int xattr_entrymatch(struct xattr_iter *_it,
struct erofs_xattr_entry *entry)
struct erofs_xattr_entry *entry)
{
struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
@@ -314,7 +338,7 @@ static int xattr_entrymatch(struct xattr_iter *_it,
}
static int xattr_namematch(struct xattr_iter *_it,
unsigned processed, char *buf, unsigned len)
unsigned int processed, char *buf, unsigned int len)
{
struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
@@ -322,17 +346,18 @@ static int xattr_namematch(struct xattr_iter *_it,
}
static int xattr_checkbuffer(struct xattr_iter *_it,
unsigned value_sz)
unsigned int value_sz)
{
struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
int err = it->buffer_size < value_sz ? -ERANGE : 0;
it->buffer_size = value_sz;
return it->buffer == NULL ? 1 : err;
return !it->buffer ? 1 : err;
}
static void xattr_copyvalue(struct xattr_iter *_it,
unsigned processed, char *buf, unsigned len)
unsigned int processed,
char *buf, unsigned int len)
{
struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
@@ -349,7 +374,7 @@ static const struct xattr_iter_handlers find_xattr_handlers = {
static int inline_getxattr(struct inode *inode, struct getxattr_iter *it)
{
int ret;
unsigned remaining;
unsigned int remaining;
ret = inline_xattr_iter_begin(&it->it, inode);
if (ret < 0)
@@ -358,22 +383,20 @@ static int inline_getxattr(struct inode *inode, struct getxattr_iter *it)
remaining = ret;
while (remaining) {
ret = xattr_foreach(&it->it, &find_xattr_handlers, &remaining);
if (ret >= 0)
break;
if (ret != -ENOATTR) /* -ENOMEM, -EIO, etc. */
if (ret != -ENOATTR)
break;
}
xattr_iter_end_final(&it->it);
return ret < 0 ? ret : it->buffer_size;
return ret ? ret : it->buffer_size;
}
static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
{
struct erofs_vnode *const vi = EROFS_V(inode);
struct erofs_sb_info *const sbi = EROFS_SB(inode->i_sb);
unsigned i;
struct erofs_inode *const vi = EROFS_I(inode);
struct super_block *const sb = inode->i_sb;
struct erofs_sb_info *const sbi = EROFS_SB(sb);
unsigned int i;
int ret = -ENOATTR;
for (i = 0; i < vi->xattr_shared_count; ++i) {
@@ -386,8 +409,7 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
if (i)
xattr_iter_end(&it->it, true);
it->it.page = erofs_get_meta_page(inode->i_sb,
blkaddr, false);
it->it.page = erofs_get_meta_page(sb, blkaddr);
if (IS_ERR(it->it.page))
return PTR_ERR(it->it.page);
@@ -396,16 +418,13 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
}
ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL);
if (ret >= 0)
break;
if (ret != -ENOATTR) /* -ENOMEM, -EIO, etc. */
if (ret != -ENOATTR)
break;
}
if (vi->xattr_shared_count)
xattr_iter_end_final(&it->it);
return ret < 0 ? ret : it->buffer_size;
return ret ? ret : it->buffer_size;
}
static bool erofs_xattr_user_list(struct dentry *dentry)
@@ -419,13 +438,13 @@ static bool erofs_xattr_trusted_list(struct dentry *dentry)
}
int erofs_getxattr(struct inode *inode, int index,
const char *name,
void *buffer, size_t buffer_size)
const char *name,
void *buffer, size_t buffer_size)
{
int ret;
struct getxattr_iter it;
if (unlikely(name == NULL))
if (!name)
return -EINVAL;
ret = init_inode_xattrs(inode);
@@ -450,8 +469,8 @@ int erofs_getxattr(struct inode *inode, int index,
}
static int erofs_xattr_generic_get(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
const char *name, void *buffer, size_t size)
struct dentry *unused, struct inode *inode,
const char *name, void *buffer, size_t size)
{
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
@@ -461,8 +480,6 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler,
return -EOPNOTSUPP;
break;
case EROFS_XATTR_INDEX_TRUSTED:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
break;
case EROFS_XATTR_INDEX_SECURITY:
break;
@@ -517,24 +534,23 @@ struct listxattr_iter {
};
static int xattr_entrylist(struct xattr_iter *_it,
struct erofs_xattr_entry *entry)
struct erofs_xattr_entry *entry)
{
struct listxattr_iter *it =
container_of(_it, struct listxattr_iter, it);
unsigned prefix_len;
unsigned int prefix_len;
const char *prefix;
const struct xattr_handler *h =
erofs_xattr_handler(entry->e_name_index);
if (h == NULL || (h->list != NULL && !h->list(it->dentry)))
if (!h || (h->list && !h->list(it->dentry)))
return 1;
/* Note that at least one of 'prefix' and 'name' should be non-NULL */
prefix = h->prefix != NULL ? h->prefix : h->name;
prefix = xattr_prefix(h);
prefix_len = strlen(prefix);
if (it->buffer == NULL) {
if (!it->buffer) {
it->buffer_ofs += prefix_len + entry->e_name_len + 1;
return 1;
}
@@ -549,7 +565,7 @@ static int xattr_entrylist(struct xattr_iter *_it,
}
static int xattr_namelist(struct xattr_iter *_it,
unsigned processed, char *buf, unsigned len)
unsigned int processed, char *buf, unsigned int len)
{
struct listxattr_iter *it =
container_of(_it, struct listxattr_iter, it);
@@ -560,7 +576,7 @@ static int xattr_namelist(struct xattr_iter *_it,
}
static int xattr_skipvalue(struct xattr_iter *_it,
unsigned value_sz)
unsigned int value_sz)
{
struct listxattr_iter *it =
container_of(_it, struct listxattr_iter, it);
@@ -579,7 +595,7 @@ static const struct xattr_iter_handlers list_xattr_handlers = {
static int inline_listxattr(struct listxattr_iter *it)
{
int ret;
unsigned remaining;
unsigned int remaining;
ret = inline_xattr_iter_begin(&it->it, d_inode(it->dentry));
if (ret < 0)
@@ -588,19 +604,20 @@ static int inline_listxattr(struct listxattr_iter *it)
remaining = ret;
while (remaining) {
ret = xattr_foreach(&it->it, &list_xattr_handlers, &remaining);
if (ret < 0)
if (ret)
break;
}
xattr_iter_end_final(&it->it);
return ret < 0 ? ret : it->buffer_ofs;
return ret ? ret : it->buffer_ofs;
}
static int shared_listxattr(struct listxattr_iter *it)
{
struct inode *const inode = d_inode(it->dentry);
struct erofs_vnode *const vi = EROFS_V(inode);
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
unsigned i;
struct erofs_inode *const vi = EROFS_I(inode);
struct super_block *const sb = inode->i_sb;
struct erofs_sb_info *const sbi = EROFS_SB(sb);
unsigned int i;
int ret = 0;
for (i = 0; i < vi->xattr_shared_count; ++i) {
@@ -612,8 +629,7 @@ static int shared_listxattr(struct listxattr_iter *it)
if (i)
xattr_iter_end(&it->it, true);
it->it.page = erofs_get_meta_page(inode->i_sb,
blkaddr, false);
it->it.page = erofs_get_meta_page(sb, blkaddr);
if (IS_ERR(it->it.page))
return PTR_ERR(it->it.page);
@@ -622,17 +638,17 @@ static int shared_listxattr(struct listxattr_iter *it)
}
ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL);
if (ret < 0)
if (ret)
break;
}
if (vi->xattr_shared_count)
xattr_iter_end_final(&it->it);
return ret < 0 ? ret : it->buffer_ofs;
return ret ? ret : it->buffer_ofs;
}
ssize_t erofs_listxattr(struct dentry *dentry,
char *buffer, size_t buffer_size)
char *buffer, size_t buffer_size)
{
int ret;
struct listxattr_iter it;
@@ -656,3 +672,39 @@ ssize_t erofs_listxattr(struct dentry *dentry,
return shared_listxattr(&it);
}
#ifdef CONFIG_EROFS_FS_POSIX_ACL
struct posix_acl *erofs_get_acl(struct inode *inode, int type)
{
struct posix_acl *acl;
int prefix, rc;
char *value = NULL;
switch (type) {
case ACL_TYPE_ACCESS:
prefix = EROFS_XATTR_INDEX_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
prefix = EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT;
break;
default:
return ERR_PTR(-EINVAL);
}
rc = erofs_getxattr(inode, prefix, "", NULL, 0);
if (rc > 0) {
value = kmalloc(rc, GFP_KERNEL);
if (!value)
return ERR_PTR(-ENOMEM);
rc = erofs_getxattr(inode, prefix, "", value, rc);
}
if (rc == -ENOATTR)
acl = NULL;
else if (rc < 0)
acl = ERR_PTR(rc);
else
acl = posix_acl_from_xattr(&init_user_ns, value, rc);
kfree(value);
return acl;
}
#endif

89
fs/erofs/xattr.h Normal file
View File

@@ -0,0 +1,89 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2017-2018 HUAWEI, Inc.
* https://www.huawei.com/
*/
#ifndef __EROFS_XATTR_H
#define __EROFS_XATTR_H
#include "internal.h"
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>
/* Attribute not found */
#define ENOATTR ENODATA
static inline unsigned int inlinexattr_header_size(struct inode *inode)
{
return sizeof(struct erofs_xattr_ibody_header) +
sizeof(u32) * EROFS_I(inode)->xattr_shared_count;
}
static inline erofs_blk_t xattrblock_addr(struct erofs_sb_info *sbi,
unsigned int xattr_id)
{
#ifdef CONFIG_EROFS_FS_XATTR
return sbi->xattr_blkaddr +
xattr_id * sizeof(__u32) / EROFS_BLKSIZ;
#else
return 0;
#endif
}
static inline unsigned int xattrblock_offset(struct erofs_sb_info *sbi,
unsigned int xattr_id)
{
return (xattr_id * sizeof(__u32)) % EROFS_BLKSIZ;
}
#ifdef CONFIG_EROFS_FS_XATTR
extern const struct xattr_handler erofs_xattr_user_handler;
extern const struct xattr_handler erofs_xattr_trusted_handler;
#ifdef CONFIG_EROFS_FS_SECURITY
extern const struct xattr_handler erofs_xattr_security_handler;
#endif
static inline const struct xattr_handler *erofs_xattr_handler(unsigned int idx)
{
static const struct xattr_handler *xattr_handler_map[] = {
[EROFS_XATTR_INDEX_USER] = &erofs_xattr_user_handler,
#ifdef CONFIG_EROFS_FS_POSIX_ACL
[EROFS_XATTR_INDEX_POSIX_ACL_ACCESS] =
&posix_acl_access_xattr_handler,
[EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT] =
&posix_acl_default_xattr_handler,
#endif
[EROFS_XATTR_INDEX_TRUSTED] = &erofs_xattr_trusted_handler,
#ifdef CONFIG_EROFS_FS_SECURITY
[EROFS_XATTR_INDEX_SECURITY] = &erofs_xattr_security_handler,
#endif
};
return idx && idx < ARRAY_SIZE(xattr_handler_map) ?
xattr_handler_map[idx] : NULL;
}
extern const struct xattr_handler *erofs_xattr_handlers[];
int erofs_getxattr(struct inode *, int, const char *, void *, size_t);
ssize_t erofs_listxattr(struct dentry *, char *, size_t);
#else
static inline int erofs_getxattr(struct inode *inode, int index,
const char *name, void *buffer,
size_t buffer_size)
{
return -EOPNOTSUPP;
}
#define erofs_listxattr (NULL)
#define erofs_xattr_handlers (NULL)
#endif /* !CONFIG_EROFS_FS_XATTR */
#ifdef CONFIG_EROFS_FS_POSIX_ACL
struct posix_acl *erofs_get_acl(struct inode *inode, int type);
#else
#define erofs_get_acl (NULL)
#endif
#endif

1479
fs/erofs/zdata.c Normal file

File diff suppressed because it is too large Load Diff

189
fs/erofs/zdata.h Normal file
View File

@@ -0,0 +1,189 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2018 HUAWEI, Inc.
* https://www.huawei.com/
*/
#ifndef __EROFS_FS_ZDATA_H
#define __EROFS_FS_ZDATA_H
#include "internal.h"
#include "zpvec.h"
#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE)
#define Z_EROFS_NR_INLINE_PAGEVECS 3
/*
* Structure fields follow one of the following exclusion rules.
*
* I: Modifiable by initialization/destruction paths and read-only
* for everyone else;
*
* L: Field should be protected by pageset lock;
*
* A: Field should be accessed / updated in atomic for parallelized code.
*/
struct z_erofs_collection {
struct mutex lock;
/* I: page offset of start position of decompression */
unsigned short pageofs;
/* L: maximum relative page index in pagevec[] */
unsigned short nr_pages;
/* L: total number of pages in pagevec[] */
unsigned int vcnt;
union {
/* L: inline a certain number of pagevecs for bootstrap */
erofs_vtptr_t pagevec[Z_EROFS_NR_INLINE_PAGEVECS];
/* I: can be used to free the pcluster by RCU. */
struct rcu_head rcu;
};
};
#define Z_EROFS_PCLUSTER_FULL_LENGTH 0x00000001
#define Z_EROFS_PCLUSTER_LENGTH_BIT 1
/*
* let's leave a type here in case of introducing
* another tagged pointer later.
*/
typedef void *z_erofs_next_pcluster_t;
struct z_erofs_pcluster {
struct erofs_workgroup obj;
struct z_erofs_collection primary_collection;
/* A: point to next chained pcluster or TAILs */
z_erofs_next_pcluster_t next;
/* A: lower limit of decompressed length and if full length or not */
unsigned int length;
/* I: physical cluster size in pages */
unsigned short pclusterpages;
/* I: compression algorithm format */
unsigned char algorithmformat;
/* A: compressed pages (can be cached or inplaced pages) */
struct page *compressed_pages[];
};
#define z_erofs_primarycollection(pcluster) (&(pcluster)->primary_collection)
/* let's avoid the valid 32-bit kernel addresses */
/* the chained workgroup has't submitted io (still open) */
#define Z_EROFS_PCLUSTER_TAIL ((void *)0x5F0ECAFE)
/* the chained workgroup has already submitted io */
#define Z_EROFS_PCLUSTER_TAIL_CLOSED ((void *)0x5F0EDEAD)
#define Z_EROFS_PCLUSTER_NIL (NULL)
struct z_erofs_decompressqueue {
struct super_block *sb;
atomic_t pending_bios;
z_erofs_next_pcluster_t head;
union {
wait_queue_head_t wait;
struct work_struct work;
} u;
};
#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping)
static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
struct page *page)
{
return page->mapping == MNGD_MAPPING(sbi);
}
#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS)
/*
* waiters (aka. ongoing_packs): # to unlock the page
* sub-index: 0 - for partial page, >= 1 full page sub-index
*/
typedef atomic_t z_erofs_onlinepage_t;
/* type punning */
union z_erofs_onlinepage_converter {
z_erofs_onlinepage_t *o;
unsigned long *v;
};
static inline unsigned int z_erofs_onlinepage_index(struct page *page)
{
union z_erofs_onlinepage_converter u;
DBG_BUGON(!PagePrivate(page));
u.v = &page_private(page);
return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
}
static inline void z_erofs_onlinepage_init(struct page *page)
{
union {
z_erofs_onlinepage_t o;
unsigned long v;
/* keep from being unlocked in advance */
} u = { .o = ATOMIC_INIT(1) };
set_page_private(page, u.v);
smp_wmb();
SetPagePrivate(page);
}
static inline void z_erofs_onlinepage_fixup(struct page *page,
uintptr_t index, bool down)
{
union z_erofs_onlinepage_converter u = { .v = &page_private(page) };
int orig, orig_index, val;
repeat:
orig = atomic_read(u.o);
orig_index = orig >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
if (orig_index) {
if (!index)
return;
DBG_BUGON(orig_index != index);
}
val = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
((orig & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down);
if (atomic_cmpxchg(u.o, orig, val) != orig)
goto repeat;
}
static inline void z_erofs_onlinepage_endio(struct page *page)
{
union z_erofs_onlinepage_converter u;
unsigned int v;
DBG_BUGON(!PagePrivate(page));
u.v = &page_private(page);
v = atomic_dec_return(u.o);
if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
set_page_private(page, 0);
ClearPagePrivate(page);
if (!PageError(page))
SetPageUptodate(page);
unlock_page(page);
}
erofs_dbg("%s, page %p value %x", __func__, page, atomic_read(u.o));
}
#define Z_EROFS_VMAP_ONSTACK_PAGES \
min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U)
#define Z_EROFS_VMAP_GLOBAL_PAGES 2048
#endif

598
fs/erofs/zmap.c Normal file
View File

@@ -0,0 +1,598 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2018-2019 HUAWEI, Inc.
* https://www.huawei.com/
*/
#include "internal.h"
#include <asm/unaligned.h>
#include <trace/events/erofs.h>
int z_erofs_fill_inode(struct inode *inode)
{
struct erofs_inode *const vi = EROFS_I(inode);
struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
if (!erofs_sb_has_big_pcluster(sbi) &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
vi->z_advise = 0;
vi->z_algorithmtype[0] = 0;
vi->z_algorithmtype[1] = 0;
vi->z_logical_clusterbits = LOG_BLOCK_SIZE;
set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
}
inode->i_mapping->a_ops = &z_erofs_aops;
return 0;
}
static int z_erofs_fill_inode_lazy(struct inode *inode)
{
struct erofs_inode *const vi = EROFS_I(inode);
struct super_block *const sb = inode->i_sb;
int err;
erofs_off_t pos;
struct page *page;
void *kaddr;
struct z_erofs_map_header *h;
if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) {
/*
* paired with smp_mb() at the end of the function to ensure
* fields will only be observed after the bit is set.
*/
smp_mb();
return 0;
}
if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE))
return -ERESTARTSYS;
err = 0;
if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
goto out_unlock;
DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
vi->xattr_isize, 8);
page = erofs_get_meta_page(sb, erofs_blknr(pos));
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto out_unlock;
}
kaddr = kmap_atomic(page);
h = kaddr + erofs_blkoff(pos);
vi->z_advise = le16_to_cpu(h->h_advise);
vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX) {
erofs_err(sb, "unknown compression format %u for nid %llu, please upgrade kernel",
vi->z_algorithmtype[0], vi->nid);
err = -EOPNOTSUPP;
goto unmap_done;
}
vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu",
vi->nid);
err = -EFSCORRUPTED;
goto unmap_done;
}
if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION &&
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu",
vi->nid);
err = -EFSCORRUPTED;
goto unmap_done;
}
/* paired with smp_mb() at the beginning of the function */
smp_mb();
set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
unmap_done:
kunmap_atomic(kaddr);
unlock_page(page);
put_page(page);
out_unlock:
clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
return err;
}
struct z_erofs_maprecorder {
struct inode *inode;
struct erofs_map_blocks *map;
void *kaddr;
unsigned long lcn;
/* compression extent information gathered */
u8 type;
u16 clusterofs;
u16 delta[2];
erofs_blk_t pblk, compressedlcs;
};
static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
erofs_blk_t eblk)
{
struct super_block *const sb = m->inode->i_sb;
struct erofs_map_blocks *const map = m->map;
struct page *mpage = map->mpage;
if (mpage) {
if (mpage->index == eblk) {
if (!m->kaddr)
m->kaddr = kmap_atomic(mpage);
return 0;
}
if (m->kaddr) {
kunmap_atomic(m->kaddr);
m->kaddr = NULL;
}
put_page(mpage);
}
mpage = erofs_get_meta_page(sb, eblk);
if (IS_ERR(mpage)) {
map->mpage = NULL;
return PTR_ERR(mpage);
}
m->kaddr = kmap_atomic(mpage);
unlock_page(mpage);
map->mpage = mpage;
return 0;
}
static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
unsigned long lcn)
{
struct inode *const inode = m->inode;
struct erofs_inode *const vi = EROFS_I(inode);
const erofs_off_t ibase = iloc(EROFS_I_SB(inode), vi->nid);
const erofs_off_t pos =
Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize +
vi->xattr_isize) +
lcn * sizeof(struct z_erofs_vle_decompressed_index);
struct z_erofs_vle_decompressed_index *di;
unsigned int advise, type;
int err;
err = z_erofs_reload_indexes(m, erofs_blknr(pos));
if (err)
return err;
m->lcn = lcn;
di = m->kaddr + erofs_blkoff(pos);
advise = le16_to_cpu(di->di_advise);
type = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) &
((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1);
switch (type) {
case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
m->clusterofs = 1 << vi->z_logical_clusterbits;
m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
if (m->delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) {
if (!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
m->compressedlcs = m->delta[0] &
~Z_EROFS_VLE_DI_D0_CBLKCNT;
m->delta[0] = 1;
}
m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
break;
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
m->clusterofs = le16_to_cpu(di->di_clusterofs);
m->pblk = le32_to_cpu(di->di_u.blkaddr);
break;
default:
DBG_BUGON(1);
return -EOPNOTSUPP;
}
m->type = type;
return 0;
}
static unsigned int decode_compactedbits(unsigned int lobits,
unsigned int lomask,
u8 *in, unsigned int pos, u8 *type)
{
const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7);
const unsigned int lo = v & lomask;
*type = (v >> lobits) & 3;
return lo;
}
static int unpack_compacted_index(struct z_erofs_maprecorder *m,
unsigned int amortizedshift,
unsigned int eofs)
{
struct erofs_inode *const vi = EROFS_I(m->inode);
const unsigned int lclusterbits = vi->z_logical_clusterbits;
const unsigned int lomask = (1 << lclusterbits) - 1;
unsigned int vcnt, base, lo, encodebits, nblk;
int i;
u8 *in, type;
bool big_pcluster;
if (1 << amortizedshift == 4)
vcnt = 2;
else if (1 << amortizedshift == 2 && lclusterbits == 12)
vcnt = 16;
else
return -EOPNOTSUPP;
big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
base = round_down(eofs, vcnt << amortizedshift);
in = m->kaddr + base;
i = (eofs - base) >> amortizedshift;
lo = decode_compactedbits(lclusterbits, lomask,
in, encodebits * i, &type);
m->type = type;
if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
m->clusterofs = 1 << lclusterbits;
if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
if (!big_pcluster) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
m->compressedlcs = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
m->delta[0] = 1;
return 0;
} else if (i + 1 != (int)vcnt) {
m->delta[0] = lo;
return 0;
}
/*
* since the last lcluster in the pack is special,
* of which lo saves delta[1] rather than delta[0].
* Hence, get delta[0] by the previous lcluster indirectly.
*/
lo = decode_compactedbits(lclusterbits, lomask,
in, encodebits * (i - 1), &type);
if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
lo = 0;
else if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT)
lo = 1;
m->delta[0] = lo + 1;
return 0;
}
m->clusterofs = lo;
m->delta[0] = 0;
/* figout out blkaddr (pblk) for HEAD lclusters */
if (!big_pcluster) {
nblk = 1;
while (i > 0) {
--i;
lo = decode_compactedbits(lclusterbits, lomask,
in, encodebits * i, &type);
if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
i -= lo;
if (i >= 0)
++nblk;
}
} else {
nblk = 0;
while (i > 0) {
--i;
lo = decode_compactedbits(lclusterbits, lomask,
in, encodebits * i, &type);
if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) {
--i;
nblk += lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT;
continue;
}
/* bigpcluster shouldn't have plain d0 == 1 */
if (lo <= 1) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
i -= lo - 2;
continue;
}
++nblk;
}
}
in += (vcnt << amortizedshift) - sizeof(__le32);
m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
return 0;
}
static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
unsigned long lcn)
{
struct inode *const inode = m->inode;
struct erofs_inode *const vi = EROFS_I(inode);
const unsigned int lclusterbits = vi->z_logical_clusterbits;
const erofs_off_t ebase = ALIGN(iloc(EROFS_I_SB(inode), vi->nid) +
vi->inode_isize + vi->xattr_isize, 8) +
sizeof(struct z_erofs_map_header);
const unsigned int totalidx = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
unsigned int compacted_4b_initial, compacted_2b;
unsigned int amortizedshift;
erofs_off_t pos;
int err;
if (lclusterbits != 12)
return -EOPNOTSUPP;
if (lcn >= totalidx)
return -EINVAL;
m->lcn = lcn;
/* used to align to 32-byte (compacted_2b) alignment */
compacted_4b_initial = (32 - ebase % 32) / 4;
if (compacted_4b_initial == 32 / 4)
compacted_4b_initial = 0;
if (vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B)
compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
else
compacted_2b = 0;
pos = ebase;
if (lcn < compacted_4b_initial) {
amortizedshift = 2;
goto out;
}
pos += compacted_4b_initial * 4;
lcn -= compacted_4b_initial;
if (lcn < compacted_2b) {
amortizedshift = 1;
goto out;
}
pos += compacted_2b * 2;
lcn -= compacted_2b;
amortizedshift = 2;
out:
pos += lcn * (1 << amortizedshift);
err = z_erofs_reload_indexes(m, erofs_blknr(pos));
if (err)
return err;
return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos));
}
static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m,
unsigned int lcn)
{
const unsigned int datamode = EROFS_I(m->inode)->datalayout;
if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
return legacy_load_cluster_from_disk(m, lcn);
if (datamode == EROFS_INODE_FLAT_COMPRESSION)
return compacted_load_cluster_from_disk(m, lcn);
return -EINVAL;
}
static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
unsigned int lookback_distance)
{
struct erofs_inode *const vi = EROFS_I(m->inode);
struct erofs_map_blocks *const map = m->map;
const unsigned int lclusterbits = vi->z_logical_clusterbits;
unsigned long lcn = m->lcn;
int err;
if (lcn < lookback_distance) {
erofs_err(m->inode->i_sb,
"bogus lookback distance @ nid %llu", vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
/* load extent head logical cluster if needed */
lcn -= lookback_distance;
err = z_erofs_load_cluster_from_disk(m, lcn);
if (err)
return err;
switch (m->type) {
case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
if (!m->delta[0]) {
erofs_err(m->inode->i_sb,
"invalid lookback distance 0 @ nid %llu",
vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
return z_erofs_extent_lookback(m, m->delta[0]);
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
map->m_flags &= ~EROFS_MAP_ZIPPED;
/* fallthrough */
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
map->m_la = (lcn << lclusterbits) | m->clusterofs;
break;
default:
erofs_err(m->inode->i_sb,
"unknown type %u @ lcn %lu of nid %llu",
m->type, lcn, vi->nid);
DBG_BUGON(1);
return -EOPNOTSUPP;
}
return 0;
}
static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
unsigned int initial_lcn)
{
struct erofs_inode *const vi = EROFS_I(m->inode);
struct erofs_map_blocks *const map = m->map;
const unsigned int lclusterbits = vi->z_logical_clusterbits;
unsigned long lcn;
int err;
DBG_BUGON(m->type != Z_EROFS_VLE_CLUSTER_TYPE_PLAIN &&
m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD);
if (!(map->m_flags & EROFS_MAP_ZIPPED) ||
!(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) {
map->m_plen = 1 << lclusterbits;
return 0;
}
lcn = m->lcn + 1;
if (m->compressedlcs)
goto out;
err = z_erofs_load_cluster_from_disk(m, lcn);
if (err)
return err;
/*
* If the 1st NONHEAD lcluster has already been handled initially w/o
* valid compressedlcs, which means at least it mustn't be CBLKCNT, or
* an internal implemenatation error is detected.
*
* The following code can also handle it properly anyway, but let's
* BUG_ON in the debugging mode only for developers to notice that.
*/
DBG_BUGON(lcn == initial_lcn &&
m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD);
switch (m->type) {
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
/*
* if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
* rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
*/
m->compressedlcs = 1;
break;
case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
if (m->delta[0] != 1)
goto err_bonus_cblkcnt;
if (m->compressedlcs)
break;
/* fallthrough */
default:
erofs_err(m->inode->i_sb,
"cannot found CBLKCNT @ lcn %lu of nid %llu",
lcn, vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
out:
map->m_plen = m->compressedlcs << lclusterbits;
return 0;
err_bonus_cblkcnt:
erofs_err(m->inode->i_sb,
"bogus CBLKCNT @ lcn %lu of nid %llu",
lcn, vi->nid);
DBG_BUGON(1);
return -EFSCORRUPTED;
}
int z_erofs_map_blocks_iter(struct inode *inode,
struct erofs_map_blocks *map,
int flags)
{
struct erofs_inode *const vi = EROFS_I(inode);
struct z_erofs_maprecorder m = {
.inode = inode,
.map = map,
};
int err = 0;
unsigned int lclusterbits, endoff;
unsigned long initial_lcn;
unsigned long long ofs, end;
trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
/* when trying to read beyond EOF, leave it unmapped */
if (map->m_la >= inode->i_size) {
map->m_llen = map->m_la + 1 - inode->i_size;
map->m_la = inode->i_size;
map->m_flags = 0;
goto out;
}
err = z_erofs_fill_inode_lazy(inode);
if (err)
goto out;
lclusterbits = vi->z_logical_clusterbits;
ofs = map->m_la;
initial_lcn = ofs >> lclusterbits;
endoff = ofs & ((1 << lclusterbits) - 1);
err = z_erofs_load_cluster_from_disk(&m, initial_lcn);
if (err)
goto unmap_out;
map->m_flags = EROFS_MAP_ZIPPED; /* by default, compressed */
end = (m.lcn + 1ULL) << lclusterbits;
switch (m.type) {
case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
if (endoff >= m.clusterofs)
map->m_flags &= ~EROFS_MAP_ZIPPED;
/* fallthrough */
case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
if (endoff >= m.clusterofs) {
map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
break;
}
/* m.lcn should be >= 1 if endoff < m.clusterofs */
if (!m.lcn) {
erofs_err(inode->i_sb,
"invalid logical cluster 0 at nid %llu",
vi->nid);
err = -EFSCORRUPTED;
goto unmap_out;
}
end = (m.lcn << lclusterbits) | m.clusterofs;
map->m_flags |= EROFS_MAP_FULL_MAPPED;
m.delta[0] = 1;
/* fallthrough */
case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
/* get the corresponding first chunk */
err = z_erofs_extent_lookback(&m, m.delta[0]);
if (err)
goto unmap_out;
break;
default:
erofs_err(inode->i_sb,
"unknown type %u @ offset %llu of nid %llu",
m.type, ofs, vi->nid);
err = -EOPNOTSUPP;
goto unmap_out;
}
map->m_llen = end - map->m_la;
map->m_pa = blknr_to_addr(m.pblk);
map->m_flags |= EROFS_MAP_MAPPED;
err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
if (err)
goto out;
unmap_out:
if (m.kaddr)
kunmap_atomic(m.kaddr);
out:
erofs_dbg("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o",
__func__, map->m_la, map->m_pa,
map->m_llen, map->m_plen, map->m_flags);
trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
/* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */
DBG_BUGON(err < 0 && err != -ENOMEM);
return err;
}

View File

@@ -1,21 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0
*
* linux/drivers/staging/erofs/unzip_pagevec.h
*
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2018 HUAWEI, Inc.
* http://www.huawei.com/
* Created by Gao Xiang <gaoxiang25@huawei.com>
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
* distribution for more details.
* https://www.huawei.com/
*/
#ifndef __EROFS_UNZIP_PAGEVEC_H
#define __EROFS_UNZIP_PAGEVEC_H
#ifndef __EROFS_FS_ZPVEC_H
#define __EROFS_FS_ZPVEC_H
#include <linux/tagptr.h>
#include "tagptr.h"
/* page type in pagevec for unzip subsystem */
/* page type in pagevec for decompress subsystem */
enum z_erofs_page_type {
/* including Z_EROFS_VLE_PAGE_TAIL_EXCLUSIVE */
Z_EROFS_PAGE_TYPE_EXCLUSIVE,
@@ -43,7 +36,7 @@ struct z_erofs_pagevec_ctor {
static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor,
bool atomic)
{
if (ctor->curr == NULL)
if (!ctor->curr)
return;
if (atomic)
@@ -54,25 +47,22 @@ static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor,
static inline struct page *
z_erofs_pagevec_ctor_next_page(struct z_erofs_pagevec_ctor *ctor,
unsigned nr)
unsigned int nr)
{
unsigned index;
unsigned int index;
/* keep away from occupied pages */
if (ctor->next != NULL)
if (ctor->next)
return ctor->next;
for (index = 0; index < nr; ++index) {
const erofs_vtptr_t t = ctor->pages[index];
const unsigned tags = tagptr_unfold_tags(t);
const unsigned int tags = tagptr_unfold_tags(t);
if (tags == Z_EROFS_PAGE_TYPE_EXCLUSIVE)
return tagptr_unfold_ptr(t);
}
if (unlikely(nr >= ctor->nr))
BUG();
DBG_BUGON(nr >= ctor->nr);
return NULL;
}
@@ -94,8 +84,9 @@ z_erofs_pagevec_ctor_pagedown(struct z_erofs_pagevec_ctor *ctor,
}
static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
unsigned nr,
erofs_vtptr_t *pages, unsigned i)
unsigned int nr,
erofs_vtptr_t *pages,
unsigned int i)
{
ctor->nr = nr;
ctor->curr = ctor->next = NULL;
@@ -109,16 +100,14 @@ static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
z_erofs_pagevec_ctor_pagedown(ctor, false);
}
}
ctor->next = z_erofs_pagevec_ctor_next_page(ctor, i);
ctor->index = i;
}
static inline bool
z_erofs_pagevec_ctor_enqueue(struct z_erofs_pagevec_ctor *ctor,
struct page *page,
enum z_erofs_page_type type,
bool pvec_safereuse)
static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor,
struct page *page,
enum z_erofs_page_type type,
bool pvec_safereuse)
{
if (!ctor->next) {
/* some pages cannot be reused as pvec safely without I/O */
@@ -130,7 +119,7 @@ z_erofs_pagevec_ctor_enqueue(struct z_erofs_pagevec_ctor *ctor,
return false;
}
if (unlikely(ctor->index >= ctor->nr))
if (ctor->index >= ctor->nr)
z_erofs_pagevec_ctor_pagedown(ctor, false);
/* exclusive page type must be 0 */
@@ -141,19 +130,17 @@ z_erofs_pagevec_ctor_enqueue(struct z_erofs_pagevec_ctor *ctor,
if (type == (uintptr_t)ctor->next) {
ctor->next = page;
}
ctor->pages[ctor->index++] =
tagptr_fold(erofs_vtptr_t, page, type);
ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, page, type);
return true;
}
static inline struct page *
z_erofs_pagevec_ctor_dequeue(struct z_erofs_pagevec_ctor *ctor,
enum z_erofs_page_type *type)
z_erofs_pagevec_dequeue(struct z_erofs_pagevec_ctor *ctor,
enum z_erofs_page_type *type)
{
erofs_vtptr_t t;
if (unlikely(ctor->index >= ctor->nr)) {
if (ctor->index >= ctor->nr) {
DBG_BUGON(!ctor->next);
z_erofs_pagevec_ctor_pagedown(ctor, true);
}
@@ -166,11 +153,7 @@ z_erofs_pagevec_ctor_dequeue(struct z_erofs_pagevec_ctor *ctor,
if (*type == (uintptr_t)ctor->next)
ctor->next = tagptr_unfold_ptr(t);
ctor->pages[ctor->index++] =
tagptr_fold(erofs_vtptr_t, NULL, 0);
ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, NULL, 0);
return tagptr_unfold_ptr(t);
}
#endif

View File

@@ -1,4 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* SPDX-License-Identifier: GPL-2.0-only */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM erofs
@@ -6,6 +6,9 @@
#define _TRACE_EROFS_H
#include <linux/tracepoint.h>
#include <linux/fs.h>
struct erofs_map_blocks;
#define show_dev(dev) MAJOR(dev), MINOR(dev)
#define show_dev_nid(entry) show_dev(entry->dev), entry->nid
@@ -38,7 +41,7 @@ TRACE_EVENT(erofs_lookup,
TP_fast_assign(
__entry->dev = dir->i_sb->s_dev;
__entry->nid = EROFS_V(dir)->nid;
__entry->nid = EROFS_I(dir)->nid;
__assign_str(name, dentry->d_name.name);
__entry->flags = flags;
),
@@ -63,7 +66,7 @@ TRACE_EVENT(erofs_fill_inode,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->nid = EROFS_V(inode)->nid;
__entry->nid = EROFS_I(inode)->nid;
__entry->blkaddr = erofs_blknr(iloc(EROFS_I_SB(inode), __entry->nid));
__entry->ofs = erofs_blkoff(iloc(EROFS_I_SB(inode), __entry->nid));
__entry->isdir = isdir;
@@ -92,7 +95,7 @@ TRACE_EVENT(erofs_readpage,
TP_fast_assign(
__entry->dev = page->mapping->host->i_sb->s_dev;
__entry->nid = EROFS_V(page->mapping->host)->nid;
__entry->nid = EROFS_I(page->mapping->host)->nid;
__entry->dir = S_ISDIR(page->mapping->host->i_mode);
__entry->index = page->index;
__entry->uptodate = PageUptodate(page);
@@ -125,7 +128,7 @@ TRACE_EVENT(erofs_readpages,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->nid = EROFS_V(inode)->nid;
__entry->nid = EROFS_I(inode)->nid;
__entry->start = page->index;
__entry->nrpage = nrpage;
__entry->raw = raw;
@@ -154,7 +157,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_enter,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->nid = EROFS_V(inode)->nid;
__entry->nid = EROFS_I(inode)->nid;
__entry->la = map->m_la;
__entry->llen = map->m_llen;
__entry->flags = flags;
@@ -162,7 +165,8 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_enter,
TP_printk("dev = (%d,%d), nid = %llu, la %llu llen %llu flags %s",
show_dev_nid(__entry),
__entry->la, __entry->llen, show_map_flags(__entry->flags))
__entry->la, __entry->llen,
__entry->flags ? show_map_flags(__entry->flags) : "NULL")
);
DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_flatmode_enter,
@@ -172,6 +176,13 @@ DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_flatmode_enter,
TP_ARGS(inode, map, flags)
);
DEFINE_EVENT(erofs__map_blocks_enter, z_erofs_map_blocks_iter_enter,
TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
unsigned int flags),
TP_ARGS(inode, map, flags)
);
DECLARE_EVENT_CLASS(erofs__map_blocks_exit,
TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
unsigned int flags, int ret),
@@ -192,7 +203,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_exit,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->nid = EROFS_V(inode)->nid;
__entry->nid = EROFS_I(inode)->nid;
__entry->flags = flags;
__entry->la = map->m_la;
__entry->pa = map->m_pa;
@@ -204,7 +215,8 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_exit,
TP_printk("dev = (%d,%d), nid = %llu, flags %s "
"la %llu pa %llu llen %llu plen %llu mflags %s ret %d",
show_dev_nid(__entry), show_map_flags(__entry->flags),
show_dev_nid(__entry),
__entry->flags ? show_map_flags(__entry->flags) : "NULL",
__entry->la, __entry->pa, __entry->llen, __entry->plen,
show_mflags(__entry->mflags), __entry->ret)
);
@@ -216,6 +228,13 @@ DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_flatmode_exit,
TP_ARGS(inode, map, flags, ret)
);
DEFINE_EVENT(erofs__map_blocks_exit, z_erofs_map_blocks_iter_exit,
TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
unsigned int flags, int ret),
TP_ARGS(inode, map, flags, ret)
);
TRACE_EVENT(erofs_destroy_inode,
TP_PROTO(struct inode *inode),
@@ -228,7 +247,7 @@ TRACE_EVENT(erofs_destroy_inode,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->nid = EROFS_V(inode)->nid;
__entry->nid = EROFS_I(inode)->nid;
),
TP_printk("dev = (%d,%d), nid = %llu", show_dev_nid(__entry))

View File

@@ -446,7 +446,7 @@ static FORCE_INLINE int LZ4_compress_generic(
*op++ = (BYTE)(lastRun << ML_BITS);
}
memcpy(op, anchor, lastRun);
LZ4_memcpy(op, anchor, lastRun);
op += lastRun;
}
@@ -708,7 +708,7 @@ static int LZ4_compress_destSize_generic(
} else {
*op++ = (BYTE)(lastRunSize<<ML_BITS);
}
memcpy(op, anchor, lastRunSize);
LZ4_memcpy(op, anchor, lastRunSize);
op += lastRunSize;
}

View File

@@ -43,30 +43,36 @@
/*-*****************************
* Decompression functions
*******************************/
/* LZ4_decompress_generic() :
* This generic decompression function cover all use cases.
* It shall be instantiated several times, using different sets of directives
* Note that it is important this generic function is really inlined,
#define DEBUGLOG(l, ...) {} /* disabled */
#ifndef assert
#define assert(condition) ((void)0)
#endif
/*
* LZ4_decompress_generic() :
* This generic decompression function covers all use cases.
* It shall be instantiated several times, using different sets of directives.
* Note that it is important for performance that this function really get inlined,
* in order to remove useless branches during compilation optimization.
*/
static FORCE_INLINE int LZ4_decompress_generic(
const char * const source,
char * const dest,
int inputSize,
const char * const src,
char * const dst,
int srcSize,
/*
* If endOnInput == endOnInputSize,
* this value is the max size of Output Buffer.
* this value is `dstCapacity`
*/
int outputSize,
/* endOnOutputSize, endOnInputSize */
int endOnInput,
endCondition_directive endOnInput,
/* full, partial */
int partialDecoding,
/* only used if partialDecoding == partial */
int targetOutputSize,
earlyEnd_directive partialDecoding,
/* noDict, withPrefix64k, usingExtDict */
int dict,
/* == dest when no prefix */
dict_directive dict,
/* always <= dst, == dst when no prefix */
const BYTE * const lowPrefix,
/* only if dict == usingExtDict */
const BYTE * const dictStart,
@@ -74,35 +80,43 @@ static FORCE_INLINE int LZ4_decompress_generic(
const size_t dictSize
)
{
/* Local Variables */
const BYTE *ip = (const BYTE *) source;
const BYTE * const iend = ip + inputSize;
const BYTE *ip = (const BYTE *) src;
const BYTE * const iend = ip + srcSize;
BYTE *op = (BYTE *) dest;
BYTE *op = (BYTE *) dst;
BYTE * const oend = op + outputSize;
BYTE *cpy;
BYTE *oexit = op + targetOutputSize;
const BYTE * const lowLimit = lowPrefix - dictSize;
const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize;
static const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };
static const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 };
static const unsigned int inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
const int safeDecode = (endOnInput == endOnInputSize);
const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB)));
/* Set up the "end" pointers for the shortcut. */
const BYTE *const shortiend = iend -
(endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
const BYTE *const shortoend = oend -
(endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
DEBUGLOG(5, "%s (srcSize:%i, dstSize:%i)", __func__,
srcSize, outputSize);
/* Special cases */
/* targetOutputSize too high => decode everything */
if ((partialDecoding) && (oexit > oend - MFLIMIT))
oexit = oend - MFLIMIT;
assert(lowPrefix <= op);
assert(src != NULL);
/* Empty output buffer */
if ((endOnInput) && (unlikely(outputSize == 0)))
return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
return ((srcSize == 1) && (*ip == 0)) ? 0 : -1;
if ((!endOnInput) && (unlikely(outputSize == 0)))
return (*ip == 0 ? 1 : -1);
if ((endOnInput) && unlikely(srcSize == 0))
return -1;
/* Main Loop : decode sequences */
while (1) {
size_t length;
@@ -111,12 +125,74 @@ static FORCE_INLINE int LZ4_decompress_generic(
/* get literal length */
unsigned int const token = *ip++;
length = token>>ML_BITS;
/* ip < iend before the increment */
assert(!endOnInput || ip <= iend);
/*
* A two-stage shortcut for the most common case:
* 1) If the literal length is 0..14, and there is enough
* space, enter the shortcut and copy 16 bytes on behalf
* of the literals (in the fast mode, only 8 bytes can be
* safely copied this way).
* 2) Further if the match length is 4..18, copy 18 bytes
* in a similar manner; but we ensure that there's enough
* space in the output for those 18 bytes earlier, upon
* entering the shortcut (in other words, there is a
* combined check for both stages).
*/
if ((endOnInput ? length != RUN_MASK : length <= 8)
/*
* strictly "less than" on input, to re-enter
* the loop with at least one byte
*/
&& likely((endOnInput ? ip < shortiend : 1) &
(op <= shortoend))) {
/* Copy the literals */
LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
op += length; ip += length;
/*
* The second stage:
* prepare for match copying, decode full info.
* If it doesn't work out, the info won't be wasted.
*/
length = token & ML_MASK; /* match length */
offset = LZ4_readLE16(ip);
ip += 2;
match = op - offset;
assert(match <= op); /* check overflow */
/* Do not deal with overlapping matches. */
if ((length != ML_MASK) &&
(offset >= 8) &&
(dict == withPrefix64k || match >= lowPrefix)) {
/* Copy the match. */
LZ4_memcpy(op + 0, match + 0, 8);
LZ4_memcpy(op + 8, match + 8, 8);
LZ4_memcpy(op + 16, match + 16, 2);
op += length + MINMATCH;
/* Both stages worked, load the next token. */
continue;
}
/*
* The second stage didn't work out, but the info
* is ready. Propel it right to the point of match
* copying.
*/
goto _copy_match;
}
/* decode literal length */
if (length == RUN_MASK) {
unsigned int s;
if (unlikely(endOnInput ? ip >= iend - RUN_MASK : 0)) {
/* overflow detection */
goto _output_error;
}
do {
s = *ip++;
length += s;
@@ -125,14 +201,14 @@ static FORCE_INLINE int LZ4_decompress_generic(
: 1) & (s == 255));
if ((safeDecode)
&& unlikely(
(size_t)(op + length) < (size_t)(op))) {
&& unlikely((uptrval)(op) +
length < (uptrval)(op))) {
/* overflow detection */
goto _output_error;
}
if ((safeDecode)
&& unlikely(
(size_t)(ip + length) < (size_t)(ip))) {
&& unlikely((uptrval)(ip) +
length < (uptrval)(ip))) {
/* overflow detection */
goto _output_error;
}
@@ -140,16 +216,19 @@ static FORCE_INLINE int LZ4_decompress_generic(
/* copy literals */
cpy = op + length;
if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT))
LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
if (((endOnInput) && ((cpy > oend - MFLIMIT)
|| (ip + length > iend - (2 + 1 + LASTLITERALS))))
|| ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
if (partialDecoding) {
if (cpy > oend) {
/*
* Error :
* write attempt beyond end of output buffer
* Partial decoding :
* stop in the middle of literal segment
*/
goto _output_error;
cpy = oend;
length = oend - op;
}
if ((endOnInput)
&& (ip + length > iend)) {
@@ -181,32 +260,50 @@ static FORCE_INLINE int LZ4_decompress_generic(
}
}
memcpy(op, ip, length);
/*
* supports overlapping memory regions; only matters
* for in-place decompression scenarios
*/
LZ4_memmove(op, ip, length);
ip += length;
op += length;
/* Necessarily EOF, due to parsing restrictions */
break;
}
LZ4_wildCopy(op, ip, cpy);
ip += length;
op = cpy;
/* Necessarily EOF, due to parsing restrictions */
if (!partialDecoding || (cpy == oend))
break;
} else {
/* may overwrite up to WILDCOPYLENGTH beyond cpy */
LZ4_wildCopy(op, ip, cpy);
ip += length;
op = cpy;
}
/* get offset */
offset = LZ4_readLE16(ip);
ip += 2;
match = op - offset;
if ((checkOffset) && (unlikely(match < lowLimit))) {
/* get matchlength */
length = token & ML_MASK;
_copy_match:
if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) {
/* Error : offset outside buffers */
goto _output_error;
}
/* costs ~1%; silence an msan warning when offset == 0 */
LZ4_write32(op, (U32)offset);
/*
* note : when partialDecoding, there is no guarantee that
* at least 4 bytes remain available in output buffer
*/
if (!partialDecoding) {
assert(oend > op);
assert(oend - op >= 4);
LZ4_write32(op, (U32)offset);
}
/* get matchlength */
length = token & ML_MASK;
if (length == ML_MASK) {
unsigned int s;
@@ -221,7 +318,7 @@ static FORCE_INLINE int LZ4_decompress_generic(
if ((safeDecode)
&& unlikely(
(size_t)(op + length) < (size_t)op)) {
(uptrval)(op) + length < (uptrval)op)) {
/* overflow detection */
goto _output_error;
}
@@ -229,32 +326,33 @@ static FORCE_INLINE int LZ4_decompress_generic(
length += MINMATCH;
/* check external dictionary */
/* match starting within external dictionary */
if ((dict == usingExtDict) && (match < lowPrefix)) {
if (unlikely(op + length > oend - LASTLITERALS)) {
/* doesn't respect parsing restriction */
goto _output_error;
if (!partialDecoding)
goto _output_error;
length = min(length, (size_t)(oend - op));
}
if (length <= (size_t)(lowPrefix - match)) {
/*
* match can be copied as a single segment
* from external dictionary
* match fits entirely within external
* dictionary : just copy
*/
memmove(op, dictEnd - (lowPrefix - match),
length);
op += length;
} else {
/*
* match encompass external
* match stretches into both external
* dictionary and current block
*/
size_t const copySize = (size_t)(lowPrefix - match);
size_t const restSize = length - copySize;
memcpy(op, dictEnd - copySize, copySize);
LZ4_memcpy(op, dictEnd - copySize, copySize);
op += copySize;
if (restSize > (size_t)(op - lowPrefix)) {
/* overlap copy */
BYTE * const endOfMatch = op + restSize;
@@ -263,27 +361,48 @@ static FORCE_INLINE int LZ4_decompress_generic(
while (op < endOfMatch)
*op++ = *copyFrom++;
} else {
memcpy(op, lowPrefix, restSize);
LZ4_memcpy(op, lowPrefix, restSize);
op += restSize;
}
}
continue;
}
/* copy match within block */
cpy = op + length;
if (unlikely(offset < 8)) {
const int dec64 = dec64table[offset];
/*
* partialDecoding :
* may not respect endBlock parsing restrictions
*/
assert(op <= oend);
if (partialDecoding &&
(cpy > oend - MATCH_SAFEGUARD_DISTANCE)) {
size_t const mlen = min(length, (size_t)(oend - op));
const BYTE * const matchEnd = match + mlen;
BYTE * const copyEnd = op + mlen;
if (matchEnd > op) {
/* overlap copy */
while (op < copyEnd)
*op++ = *match++;
} else {
LZ4_memcpy(op, match, mlen);
}
op = copyEnd;
if (op == oend)
break;
continue;
}
if (unlikely(offset < 8)) {
op[0] = match[0];
op[1] = match[1];
op[2] = match[2];
op[3] = match[3];
match += dec32table[offset];
memcpy(op + 4, match, 4);
match -= dec64;
match += inc32table[offset];
LZ4_memcpy(op + 4, match, 4);
match -= dec64table[offset];
} else {
LZ4_copy8(op, match);
match += 8;
@@ -291,7 +410,7 @@ static FORCE_INLINE int LZ4_decompress_generic(
op += 8;
if (unlikely(cpy > oend - 12)) {
if (unlikely(cpy > oend - MATCH_SAFEGUARD_DISTANCE)) {
BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
if (cpy > oend - LASTLITERALS) {
@@ -307,60 +426,139 @@ static FORCE_INLINE int LZ4_decompress_generic(
match += oCopyLimit - op;
op = oCopyLimit;
}
while (op < cpy)
*op++ = *match++;
} else {
LZ4_copy8(op, match);
if (length > 16)
LZ4_wildCopy(op + 8, match + 8, cpy);
}
op = cpy; /* correction */
op = cpy; /* wildcopy correction */
}
/* end of decoding */
if (endOnInput) {
/* Nb of output bytes decoded */
return (int) (((char *)op) - dest);
return (int) (((char *)op) - dst);
} else {
/* Nb of input bytes read */
return (int) (((const char *)ip) - source);
return (int) (((const char *)ip) - src);
}
/* Overflow error detected */
_output_error:
return -1;
return (int) (-(((const char *)ip) - src)) - 1;
}
int LZ4_decompress_safe(const char *source, char *dest,
int compressedSize, int maxDecompressedSize)
{
return LZ4_decompress_generic(source, dest, compressedSize,
maxDecompressedSize, endOnInputSize, full, 0,
noDict, (BYTE *)dest, NULL, 0);
return LZ4_decompress_generic(source, dest,
compressedSize, maxDecompressedSize,
endOnInputSize, decode_full_block,
noDict, (BYTE *)dest, NULL, 0);
}
int LZ4_decompress_safe_partial(const char *source, char *dest,
int compressedSize, int targetOutputSize, int maxDecompressedSize)
int LZ4_decompress_safe_partial(const char *src, char *dst,
int compressedSize, int targetOutputSize, int dstCapacity)
{
return LZ4_decompress_generic(source, dest, compressedSize,
maxDecompressedSize, endOnInputSize, partial,
targetOutputSize, noDict, (BYTE *)dest, NULL, 0);
dstCapacity = min(targetOutputSize, dstCapacity);
return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
endOnInputSize, partial_decode,
noDict, (BYTE *)dst, NULL, 0);
}
int LZ4_decompress_fast(const char *source, char *dest, int originalSize)
{
return LZ4_decompress_generic(source, dest, 0, originalSize,
endOnOutputSize, full, 0, withPrefix64k,
(BYTE *)(dest - 64 * KB), NULL, 64 * KB);
endOnOutputSize, decode_full_block,
withPrefix64k,
(BYTE *)dest - 64 * KB, NULL, 0);
}
/* ===== Instantiate a few more decoding cases, used more than once. ===== */
int LZ4_decompress_safe_withPrefix64k(const char *source, char *dest,
int compressedSize, int maxOutputSize)
{
return LZ4_decompress_generic(source, dest,
compressedSize, maxOutputSize,
endOnInputSize, decode_full_block,
withPrefix64k,
(BYTE *)dest - 64 * KB, NULL, 0);
}
static int LZ4_decompress_safe_withSmallPrefix(const char *source, char *dest,
int compressedSize,
int maxOutputSize,
size_t prefixSize)
{
return LZ4_decompress_generic(source, dest,
compressedSize, maxOutputSize,
endOnInputSize, decode_full_block,
noDict,
(BYTE *)dest - prefixSize, NULL, 0);
}
int LZ4_decompress_safe_forceExtDict(const char *source, char *dest,
int compressedSize, int maxOutputSize,
const void *dictStart, size_t dictSize)
{
return LZ4_decompress_generic(source, dest,
compressedSize, maxOutputSize,
endOnInputSize, decode_full_block,
usingExtDict, (BYTE *)dest,
(const BYTE *)dictStart, dictSize);
}
static int LZ4_decompress_fast_extDict(const char *source, char *dest,
int originalSize,
const void *dictStart, size_t dictSize)
{
return LZ4_decompress_generic(source, dest,
0, originalSize,
endOnOutputSize, decode_full_block,
usingExtDict, (BYTE *)dest,
(const BYTE *)dictStart, dictSize);
}
/*
* The "double dictionary" mode, for use with e.g. ring buffers: the first part
* of the dictionary is passed as prefix, and the second via dictStart + dictSize.
* These routines are used only once, in LZ4_decompress_*_continue().
*/
static FORCE_INLINE
int LZ4_decompress_safe_doubleDict(const char *source, char *dest,
int compressedSize, int maxOutputSize,
size_t prefixSize,
const void *dictStart, size_t dictSize)
{
return LZ4_decompress_generic(source, dest,
compressedSize, maxOutputSize,
endOnInputSize, decode_full_block,
usingExtDict, (BYTE *)dest - prefixSize,
(const BYTE *)dictStart, dictSize);
}
static FORCE_INLINE
int LZ4_decompress_fast_doubleDict(const char *source, char *dest,
int originalSize, size_t prefixSize,
const void *dictStart, size_t dictSize)
{
return LZ4_decompress_generic(source, dest,
0, originalSize,
endOnOutputSize, decode_full_block,
usingExtDict, (BYTE *)dest - prefixSize,
(const BYTE *)dictStart, dictSize);
}
/* ===== streaming decompression functions ===== */
int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
const char *dictionary, int dictSize)
{
LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode;
LZ4_streamDecode_t_internal *lz4sd =
&LZ4_streamDecode->internal_donotuse;
lz4sd->prefixSize = (size_t) dictSize;
lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize;
@@ -382,35 +580,51 @@ int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
const char *source, char *dest, int compressedSize, int maxOutputSize)
{
LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
LZ4_streamDecode_t_internal *lz4sd =
&LZ4_streamDecode->internal_donotuse;
int result;
if (lz4sd->prefixEnd == (BYTE *)dest) {
result = LZ4_decompress_generic(source, dest,
compressedSize,
maxOutputSize,
endOnInputSize, full, 0,
usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize,
lz4sd->externalDict,
lz4sd->extDictSize);
if (lz4sd->prefixSize == 0) {
/* The first call, no dictionary yet. */
assert(lz4sd->extDictSize == 0);
result = LZ4_decompress_safe(source, dest,
compressedSize, maxOutputSize);
if (result <= 0)
return result;
lz4sd->prefixSize = result;
lz4sd->prefixEnd = (BYTE *)dest + result;
} else if (lz4sd->prefixEnd == (BYTE *)dest) {
/* They're rolling the current segment. */
if (lz4sd->prefixSize >= 64 * KB - 1)
result = LZ4_decompress_safe_withPrefix64k(source, dest,
compressedSize, maxOutputSize);
else if (lz4sd->extDictSize == 0)
result = LZ4_decompress_safe_withSmallPrefix(source,
dest, compressedSize, maxOutputSize,
lz4sd->prefixSize);
else
result = LZ4_decompress_safe_doubleDict(source, dest,
compressedSize, maxOutputSize,
lz4sd->prefixSize,
lz4sd->externalDict, lz4sd->extDictSize);
if (result <= 0)
return result;
lz4sd->prefixSize += result;
lz4sd->prefixEnd += result;
lz4sd->prefixEnd += result;
} else {
/*
* The buffer wraps around, or they're
* switching to another buffer.
*/
lz4sd->extDictSize = lz4sd->prefixSize;
lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
result = LZ4_decompress_generic(source, dest,
result = LZ4_decompress_safe_forceExtDict(source, dest,
compressedSize, maxOutputSize,
endOnInputSize, full, 0,
usingExtDict, (BYTE *)dest,
lz4sd->externalDict, lz4sd->extDictSize);
if (result <= 0)
return result;
lz4sd->prefixSize = result;
lz4sd->prefixEnd = (BYTE *)dest + result;
lz4sd->prefixEnd = (BYTE *)dest + result;
}
return result;
@@ -422,75 +636,66 @@ int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
int result;
if (lz4sd->prefixEnd == (BYTE *)dest) {
result = LZ4_decompress_generic(source, dest, 0, originalSize,
endOnOutputSize, full, 0,
usingExtDict,
lz4sd->prefixEnd - lz4sd->prefixSize,
lz4sd->externalDict, lz4sd->extDictSize);
if (result <= 0)
return result;
lz4sd->prefixSize += originalSize;
lz4sd->prefixEnd += originalSize;
} else {
lz4sd->extDictSize = lz4sd->prefixSize;
lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
result = LZ4_decompress_generic(source, dest, 0, originalSize,
endOnOutputSize, full, 0,
usingExtDict, (BYTE *)dest,
lz4sd->externalDict, lz4sd->extDictSize);
if (lz4sd->prefixSize == 0) {
assert(lz4sd->extDictSize == 0);
result = LZ4_decompress_fast(source, dest, originalSize);
if (result <= 0)
return result;
lz4sd->prefixSize = originalSize;
lz4sd->prefixEnd = (BYTE *)dest + originalSize;
lz4sd->prefixEnd = (BYTE *)dest + originalSize;
} else if (lz4sd->prefixEnd == (BYTE *)dest) {
if (lz4sd->prefixSize >= 64 * KB - 1 ||
lz4sd->extDictSize == 0)
result = LZ4_decompress_fast(source, dest,
originalSize);
else
result = LZ4_decompress_fast_doubleDict(source, dest,
originalSize, lz4sd->prefixSize,
lz4sd->externalDict, lz4sd->extDictSize);
if (result <= 0)
return result;
lz4sd->prefixSize += originalSize;
lz4sd->prefixEnd += originalSize;
} else {
lz4sd->extDictSize = lz4sd->prefixSize;
lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
result = LZ4_decompress_fast_extDict(source, dest,
originalSize, lz4sd->externalDict, lz4sd->extDictSize);
if (result <= 0)
return result;
lz4sd->prefixSize = originalSize;
lz4sd->prefixEnd = (BYTE *)dest + originalSize;
}
return result;
}
/*
* Advanced decoding functions :
* *_usingDict() :
* These decoding functions work the same as "_continue" ones,
* the dictionary must be explicitly provided within parameters
*/
static FORCE_INLINE int LZ4_decompress_usingDict_generic(const char *source,
char *dest, int compressedSize, int maxOutputSize, int safe,
const char *dictStart, int dictSize)
int LZ4_decompress_safe_usingDict(const char *source, char *dest,
int compressedSize, int maxOutputSize,
const char *dictStart, int dictSize)
{
if (dictSize == 0)
return LZ4_decompress_generic(source, dest,
compressedSize, maxOutputSize, safe, full, 0,
noDict, (BYTE *)dest, NULL, 0);
if (dictStart + dictSize == dest) {
if (dictSize >= (int)(64 * KB - 1))
return LZ4_decompress_generic(source, dest,
compressedSize, maxOutputSize, safe, full, 0,
withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0);
return LZ4_decompress_generic(source, dest, compressedSize,
maxOutputSize, safe, full, 0, noDict,
(BYTE *)dest - dictSize, NULL, 0);
return LZ4_decompress_safe(source, dest,
compressedSize, maxOutputSize);
if (dictStart+dictSize == dest) {
if (dictSize >= 64 * KB - 1)
return LZ4_decompress_safe_withPrefix64k(source, dest,
compressedSize, maxOutputSize);
return LZ4_decompress_safe_withSmallPrefix(source, dest,
compressedSize, maxOutputSize, dictSize);
}
return LZ4_decompress_generic(source, dest, compressedSize,
maxOutputSize, safe, full, 0, usingExtDict,
(BYTE *)dest, (const BYTE *)dictStart, dictSize);
}
int LZ4_decompress_safe_usingDict(const char *source, char *dest,
int compressedSize, int maxOutputSize,
const char *dictStart, int dictSize)
{
return LZ4_decompress_usingDict_generic(source, dest,
compressedSize, maxOutputSize, 1, dictStart, dictSize);
return LZ4_decompress_safe_forceExtDict(source, dest,
compressedSize, maxOutputSize, dictStart, dictSize);
}
int LZ4_decompress_fast_usingDict(const char *source, char *dest,
int originalSize, const char *dictStart, int dictSize)
int originalSize,
const char *dictStart, int dictSize)
{
return LZ4_decompress_usingDict_generic(source, dest, 0,
originalSize, 0, dictStart, dictSize);
if (dictSize == 0 || dictStart + dictSize == dest)
return LZ4_decompress_fast(source, dest, originalSize);
return LZ4_decompress_fast_extDict(source, dest, originalSize,
dictStart, dictSize);
}
#ifndef STATIC

View File

@@ -75,6 +75,11 @@ typedef uintptr_t uptrval;
#define WILDCOPYLENGTH 8
#define LASTLITERALS 5
#define MFLIMIT (WILDCOPYLENGTH + MINMATCH)
/*
* ensure it's possible to write 2 x wildcopyLength
* without overflowing output buffer
*/
#define MATCH_SAFEGUARD_DISTANCE ((2 * WILDCOPYLENGTH) - MINMATCH)
/* Increase this value ==> compression run slower on incompressible data */
#define LZ4_SKIPTRIGGER 6
@@ -132,6 +137,17 @@ static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value)
return put_unaligned_le16(value, memPtr);
}
/*
* LZ4 relies on memcpy with a constant size being inlined. In freestanding
* environments, the compiler can't assume the implementation of memcpy() is
* standard compliant, so apply its specialized memcpy() inlining logic. When
* possible, use __builtin_memcpy() to tell the compiler to analyze memcpy()
* as-if it were standard compliant, so it can inline it in freestanding
* environments. This is needed when decompressing the Linux Kernel, for example.
*/
#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
#define LZ4_memmove(dst, src, size) __builtin_memmove(dst, src, size)
static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
{
#if LZ4_ARCH64
@@ -222,6 +238,8 @@ typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
typedef enum { full = 0, partial = 1 } earlyEnd_directive;
typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
#define LZ4_STATIC_ASSERT(c) BUILD_BUG_ON(!(c))
#endif

View File

@@ -570,7 +570,7 @@ static int LZ4HC_compress_generic(
*op++ = (BYTE) lastRun;
} else
*op++ = (BYTE)(lastRun<<ML_BITS);
memcpy(op, anchor, iend - anchor);
LZ4_memcpy(op, anchor, iend - anchor);
op += iend - anchor;
}