Files
kernel_xiaomi_sm8250/fs/ext4/page-io.c
Michael Bestas e4b3323f61 Merge tag 'ASB-2023-04-05_4.19-stable' of https://android.googlesource.com/kernel/common into android13-4.19-kona
https://source.android.com/docs/security/bulletin/2023-04-01
CVE-2022-4696
CVE-2023-20941

* tag 'ASB-2023-04-05_4.19-stable' of https://android.googlesource.com/kernel/common:
  UPSTREAM: ext4: fix kernel BUG in 'ext4_write_inline_data_end()'
  UPSTREAM: fsverity: don't drop pagecache at end of FS_IOC_ENABLE_VERITY
  UPSTREAM: fsverity: Remove WQ_UNBOUND from fsverity read workqueue
  BACKPORT: blk-mq: clear stale request in tags->rq[] before freeing one request pool
  Linux 4.19.279
  HID: uhid: Over-ride the default maximum data buffer value with our own
  HID: core: Provide new max_buffer_size attribute to over-ride the default
  serial: 8250_em: Fix UART port type
  drm/i915: Don't use stolen memory for ring buffers with LLC
  x86/mm: Fix use of uninitialized buffer in sme_enable()
  fbdev: stifb: Provide valid pixelclock and add fb_check_var() checks
  ftrace: Fix invalid address access in lookup_rec() when index is 0
  tracing: Make tracepoint lockdep check actually test something
  tracing: Check field value in hist_field_name()
  sh: intc: Avoid spurious sizeof-pointer-div warning
  drm/amdkfd: Fix an illegal memory access
  ext4: fix task hung in ext4_xattr_delete_inode
  ext4: fail ext4_iget if special inode unallocated
  jffs2: correct logic when creating a hole in jffs2_write_begin
  mmc: atmel-mci: fix race between stop command and start of next command
  media: m5mols: fix off-by-one loop termination error
  hwmon: (xgene) Fix use after free bug in xgene_hwmon_remove due to race condition
  hwmon: (adt7475) Fix masking of hysteresis registers
  hwmon: (adt7475) Display smoothing attributes in correct order
  ethernet: sun: add check for the mdesc_grab()
  net/iucv: Fix size of interrupt data
  net: usb: smsc75xx: Move packet length check to prevent kernel panic in skb_pull
  ipv4: Fix incorrect table ID in IOCTL path
  block: sunvdc: add check for mdesc_grab() returning NULL
  nvmet: avoid potential UAF in nvmet_req_complete()
  net: usb: smsc75xx: Limit packet length to skb->len
  nfc: st-nci: Fix use after free bug in ndlc_remove due to race condition
  net: phy: smsc: bail out in lan87xx_read_status if genphy_read_status fails
  net: tunnels: annotate lockless accesses to dev->needed_headroom
  qed/qed_dev: guard against a possible division by zero
  nfc: pn533: initialize struct pn533_out_arg properly
  tcp: tcp_make_synack() can be called from process context
  clk: HI655X: select REGMAP instead of depending on it
  fs: sysfs_emit_at: Remove PAGE_SIZE alignment check
  ext4: fix cgroup writeback accounting with fs-layer encryption
  UPSTREAM: ext4: fix another off-by-one fsmap error on 1k block filesystems
  Linux 4.19.278
  ila: do not generate empty messages in ila_xlat_nl_cmd_get_mapping()
  nfc: fdp: add null check of devm_kmalloc_array in fdp_nci_i2c_read_device_properties
  net: caif: Fix use-after-free in cfusbl_device_notify()
  drm/i915: Don't use BAR mappings for ring buffers with LLC
  tipc: improve function tipc_wait_for_cond()
  media: ov5640: Fix analogue gain control
  PCI: Add SolidRun vendor ID
  macintosh: windfarm: Use unsigned type for 1-bit bitfields
  alpha: fix R_ALPHA_LITERAL reloc for large modules
  MIPS: Fix a compilation issue
  Revert "spi: mt7621: Fix an error message in mt7621_spi_probe()"
  scsi: core: Remove the /proc/scsi/${proc_name} directory earlier
  kbuild: generate modules.order only in directories visited by obj-y/m
  kbuild: fix false-positive need-builtin calculation
  udf: Detect system inodes linked into directory hierarchy
  udf: Preserve link count of system files
  udf: Remove pointless union in udf_inode_info
  udf: reduce leakage of blocks related to named streams
  udf: Explain handling of load_nls() failure
  nfc: change order inside nfc_se_io error path
  ext4: zero i_disksize when initializing the bootloader inode
  ext4: fix WARNING in ext4_update_inline_data
  ext4: move where set the MAY_INLINE_DATA flag is set
  ext4: fix another off-by-one fsmap error on 1k block filesystems
  ext4: fix RENAME_WHITEOUT handling for inline directories
  x86/CPU/AMD: Disable XSAVES on AMD family 0x17
  fs: prevent out-of-bounds array speculation when closing a file descriptor
  Linux 4.19.277
  staging: rtl8192e: Remove call_usermodehelper starting RadioPower.sh
  staging: rtl8192e: Remove function ..dm_check_ac_dc_power calling a script
  wifi: cfg80211: Partial revert "wifi: cfg80211: Fix use after free for wext"
  Linux 4.19.276
  thermal: intel: powerclamp: Fix cur_state for multi package system
  f2fs: fix cgroup writeback accounting with fs-layer encryption
  media: uvcvideo: Fix race condition with usb_kill_urb
  media: uvcvideo: Provide sync and async uvc_ctrl_status_event
  tcp: Fix listen() regression in 4.19.270
  s390/setup: init jump labels before command line parsing
  s390/maccess: add no DAT mode to kernel_write
  Bluetooth: hci_sock: purge socket queues in the destruct() callback
  phy: rockchip-typec: Fix unsigned comparison with less than zero
  usb: uvc: Enumerate valid values for color matching
  USB: ene_usb6250: Allocate enough memory for full object
  usb: host: xhci: mvebu: Iterate over array indexes instead of using pointer math
  iio: accel: mma9551_core: Prevent uninitialized variable in mma9551_read_config_word()
  iio: accel: mma9551_core: Prevent uninitialized variable in mma9551_read_status_word()
  tools/iio/iio_utils:fix memory leak
  mei: bus-fixup:upon error print return values of send and receive
  tty: serial: fsl_lpuart: disable the CTS when send break signal
  tty: fix out-of-bounds access in tty_driver_lookup_tty()
  media: uvcvideo: Silence memcpy() run-time false positive warnings
  media: uvcvideo: Handle errors from calls to usb_string
  media: uvcvideo: Handle cameras with invalid descriptors
  firmware/efi sysfb_efi: Add quirk for Lenovo IdeaPad Duet 3
  tracing: Add NULL checks for buffer in ring_buffer_free_read_page()
  thermal: intel: quark_dts: fix error pointer dereference
  scsi: ipr: Work around fortify-string warning
  vc_screen: modify vcs_size() handling in vcs_read()
  tcp: tcp_check_req() can be called from process context
  ARM: dts: spear320-hmi: correct STMPE GPIO compatible
  nfc: fix memory leak of se_io context in nfc_genl_se_io
  9p/rdma: unmap receive dma buffer in rdma_request()/post_recv()
  9p/xen: fix connection sequence
  9p/xen: fix version parsing
  net: fix __dev_kfree_skb_any() vs drop monitor
  netfilter: ctnetlink: fix possible refcount leak in ctnetlink_create_conntrack()
  watchdog: pcwd_usb: Fix attempting to access uninitialized memory
  watchdog: Fix kmemleak in watchdog_cdev_register
  watchdog: at91sam9_wdt: use devm_request_irq to avoid missing free_irq() in error path
  x86: um: vdso: Add '%rcx' and '%r11' to the syscall clobber list
  ubi: ubi_wl_put_peb: Fix infinite loop when wear-leveling work failed
  ubi: Fix UAF wear-leveling entry in eraseblk_count_seq_show()
  ubifs: ubifs_writepage: Mark page dirty after writing inode failed
  ubifs: dirty_cow_znode: Fix memleak in error handling path
  ubifs: Re-statistic cleaned znode count if commit failed
  ubi: Fix possible null-ptr-deref in ubi_free_volume()
  ubi: Fix unreferenced object reported by kmemleak in ubi_resize_volume()
  ubi: Fix use-after-free when volume resizing failed
  ubifs: Reserve one leb for each journal head while doing budget
  ubifs: do_rename: Fix wrong space budget when target inode's nlink > 1
  ubifs: Fix wrong dirty space budget for dirty inode
  ubifs: Rectify space budget for ubifs_xrename()
  ubifs: Rectify space budget for ubifs_symlink() if symlink is encrypted
  ubi: ensure that VID header offset + VID header size <= alloc, size
  um: vector: Fix memory leak in vector_config
  pwm: stm32-lp: fix the check on arr and cmp registers update
  fs/jfs: fix shift exponent db_agl2size negative
  net/sched: Retire tcindex classifier
  kbuild: Port silent mode detection to future gnu make.
  wifi: ath9k: use proper statements in conditionals
  drm/radeon: Fix eDP for single-display iMac11,2
  PCI: Avoid FLR for AMD FCH AHCI adapters
  scsi: ses: Fix slab-out-of-bounds in ses_intf_remove()
  scsi: ses: Fix possible desc_ptr out-of-bounds accesses
  scsi: ses: Fix possible addl_desc_ptr out-of-bounds accesses
  scsi: ses: Fix slab-out-of-bounds in ses_enclosure_data_process()
  scsi: ses: Don't attach if enclosure has no components
  scsi: qla2xxx: Fix erroneous link down
  scsi: qla2xxx: Fix link failure in NPIV environment
  ktest.pl: Add RUN_TIMEOUT option with default unlimited
  ktest.pl: Fix missing "end_monitor" when machine check fails
  ktest.pl: Give back console on Ctrt^C on monitor
  media: ipu3-cio2: Fix PM runtime usage_count in driver unbind
  mips: fix syscall_get_nr
  alpha: fix FEN fault handling
  rbd: avoid use-after-free in do_rbd_add() when rbd_dev_create() fails
  ARM: dts: exynos: correct TMU phandle in Odroid XU
  ARM: dts: exynos: correct TMU phandle in Exynos4
  dm flakey: don't corrupt the zero page
  dm flakey: fix logic when corrupting a bio
  wifi: cfg80211: Fix use after free for wext
  wifi: rtl8xxxu: Use a longer retry limit of 48
  ext4: refuse to create ea block when umounted
  ext4: optimize ea_inode block expansion
  ALSA: ice1712: Do not left ice->gpio_mutex locked in aureon_add_controls()
  irqdomain: Drop bogus fwspec-mapping error handling
  irqdomain: Fix disassociation race
  irqdomain: Fix association race
  ima: Align ima_file_mmap() parameters with mmap_file LSM hook
  Documentation/hw-vuln: Document the interaction between IBRS and STIBP
  x86/speculation: Allow enabling STIBP with legacy IBRS
  x86/microcode/AMD: Fix mixed steppings support
  x86/microcode/AMD: Add a @cpu parameter to the reloading functions
  x86/microcode/amd: Remove load_microcode_amd()'s bsp parameter
  x86/kprobes: Fix arch_check_optimized_kprobe check within optimized_kprobe range
  x86/kprobes: Fix __recover_optprobed_insn check optimizing logic
  x86/reboot: Disable SVM, not just VMX, when stopping CPUs
  x86/reboot: Disable virtualization in an emergency if SVM is supported
  x86/crash: Disable virt in core NMI crash handler to avoid double shootdown
  x86/virt: Force GIF=1 prior to disabling SVM (for reboot flows)
  udf: Fix file corruption when appending just after end of preallocated extent
  udf: Do not update file length for failed writes to inline files
  udf: Do not bother merging very long extents
  udf: Truncate added extents on failed expansion
  ocfs2: fix non-auto defrag path not working issue
  ocfs2: fix defrag path triggering jbd2 ASSERT
  f2fs: fix information leak in f2fs_move_inline_dirents()
  fs: hfsplus: fix UAF issue in hfsplus_put_super
  hfs: fix missing hfs_bnode_get() in __hfs_bnode_create
  ARM: dts: exynos: correct HDMI phy compatible in Exynos4
  s390/kprobes: fix current_kprobe never cleared after kprobes reenter
  s390/kprobes: fix irq mask clobbering on kprobe reenter from post_handler
  s390: discard .interp section
  rtc: pm8xxx: fix set-alarm race
  firmware: coreboot: framebuffer: Ignore reserved pixel color bits
  wifi: rtl8xxxu: fixing transmisison failure for rtl8192eu
  dm cache: add cond_resched() to various workqueue loops
  dm thin: add cond_resched() to various workqueue loops
  pinctrl: at91: use devm_kasprintf() to avoid potential leaks
  regulator: s5m8767: Bounds check id indexing into arrays
  regulator: max77802: Bounds check regulator id against opmode
  ASoC: kirkwood: Iterate over array indexes instead of using pointer math
  docs/scripts/gdb: add necessary make scripts_gdb step
  drm/msm/dsi: Add missing check for alloc_ordered_workqueue
  drm/radeon: free iio for atombios when driver shutdown
  drm/amd/display: Fix potential null-deref in dm_resume
  net/mlx5: fw_tracer: Fix debug print
  ACPI: video: Fix Lenovo Ideapad Z570 DMI match
  m68k: Check syscall_trace_enter() return code
  net: bcmgenet: Add a check for oversized packets
  ACPI: Don't build ACPICA with '-Os'
  inet: fix fast path in __inet_hash_connect()
  wifi: brcmfmac: ensure CLM version is null-terminated to prevent stack-out-of-bounds
  x86/bugs: Reset speculation control settings on init
  timers: Prevent union confusion from unexpected restart_syscall()
  thermal: intel: Fix unsigned comparison with less than zero
  rcu: Suppress smp_processor_id() complaint in synchronize_rcu_expedited_wait()
  wifi: brcmfmac: Fix potential stack-out-of-bounds in brcmf_c_preinit_dcmds()
  ARM: dts: exynos: Use Exynos5420 compatible for the MIPI video phy
  udf: Define EFSCORRUPTED error code
  rpmsg: glink: Avoid infinite loop on intent for missing channel
  media: usb: siano: Fix use after free bugs caused by do_submit_urb
  media: i2c: ov7670: 0 instead of -EINVAL was returned
  media: rc: Fix use-after-free bugs caused by ene_tx_irqsim()
  media: i2c: ov772x: Fix memleak in ov772x_probe()
  powerpc: Remove linker flag from KBUILD_AFLAGS
  media: platform: ti: Add missing check for devm_regulator_get
  MIPS: vpe-mt: drop physical_memsize
  powerpc/rtas: ensure 4KB alignment for rtas_data_buf
  powerpc/rtas: make all exports GPL
  powerpc/pseries/lparcfg: add missing RTAS retry status handling
  clk: Honor CLK_OPS_PARENT_ENABLE in clk_core_is_enabled()
  powerpc/powernv/ioda: Skip unallocated resources when mapping to PE
  Input: ads7846 - don't check penirq immediately for 7845
  Input: ads7846 - don't report pressure for ads7845
  mtd: rawnand: sunxi: Fix the size of the last OOB region
  mfd: pcf50633-adc: Fix potential memleak in pcf50633_adc_async_read()
  selftests/ftrace: Fix bash specific "==" operator
  sparc: allow PM configs for sparc32 COMPILE_TEST
  perf tools: Fix auto-complete on aarch64
  perf llvm: Fix inadvertent file creation
  gfs2: jdata writepage fix
  cifs: Fix warning and UAF when destroy the MR list
  cifs: Fix lost destroy smbd connection when MR allocate failed
  nfsd: fix race to check ls_layouts
  dm: remove flush_scheduled_work() during local_exit()
  hwmon: (mlxreg-fan) Return zero speed for broken fan
  spi: bcm63xx-hsspi: Fix multi-bit mode setting
  spi: bcm63xx-hsspi: fix pm_runtime
  scsi: aic94xx: Add missing check for dma_map_single()
  hwmon: (ltc2945) Handle error case in ltc2945_value_store
  gpio: vf610: connect GPIO label to dev name
  ASoC: soc-compress.c: fixup private_data on snd_soc_new_compress()
  drm/mediatek: Clean dangling pointer on bind error path
  drm/mediatek: Drop unbalanced obj unref
  gpu: host1x: Don't skip assigning syncpoints to channels
  drm/msm/dpu: Add check for pstates
  drm/msm: use strscpy instead of strncpy
  drm/mipi-dsi: Fix byte order of 16-bit DCS set/get brightness
  ALSA: hda/ca0132: minor fix for allocation size
  pinctrl: rockchip: Fix refcount leak in rockchip_pinctrl_parse_groups
  pinctrl: pinctrl-rockchip: Fix a bunch of kerneldoc misdemeanours
  drm/msm/hdmi: Add missing check for alloc_ordered_workqueue
  gpu: ipu-v3: common: Add of_node_put() for reference returned by of_graph_get_port_by_id()
  drm/vc4: dpi: Fix format mapping for RGB565
  drm/vc4: dpi: Add option for inverting pixel clock and output enable
  drm: Clarify definition of the DRM_BUS_FLAG_(PIXDATA|SYNC)_* macros
  drm/bridge: megachips: Fix error handling in i2c_register_driver()
  drm: mxsfb: DRM_MXSFB should depend on ARCH_MXS || ARCH_MXC
  selftest: fib_tests: Always cleanup before exit
  irqchip/irq-bcm7120-l2: Set IRQ_LEVEL for level triggered interrupts
  irqchip/irq-brcmstb-l2: Set IRQ_LEVEL for level triggered interrupts
  can: esd_usb: Move mislocated storage of SJA1000_ECC_SEG bits in case of a bus error
  wifi: mac80211: make rate u32 in sta_set_rate_info_rx()
  crypto: crypto4xx - Call dma_unmap_page when done
  wifi: mwifiex: fix loop iterator in mwifiex_update_ampdu_txwinsize()
  wifi: iwl4965: Add missing check for create_singlethread_workqueue()
  wifi: iwl3945: Add missing check for create_singlethread_workqueue
  RISC-V: time: initialize hrtimer based broadcast clock event device
  m68k: /proc/hardware should depend on PROC_FS
  crypto: rsa-pkcs1pad - Use akcipher_request_complete
  rds: rds_rm_zerocopy_callback() correct order for list_add_tail()
  libbpf: Fix alen calculation in libbpf_nla_dump_errormsg()
  Bluetooth: L2CAP: Fix potential user-after-free
  irqchip/irq-mvebu-gicp: Fix refcount leak in mvebu_gicp_probe
  irqchip/alpine-msi: Fix refcount leak in alpine_msix_init_domains
  net/mlx5: Enhance debug print in page allocation failure
  powercap: fix possible name leak in powercap_register_zone()
  crypto: seqiv - Handle EBUSY correctly
  ACPI: battery: Fix missing NUL-termination with large strings
  wifi: ath9k: Fix potential stack-out-of-bounds write in ath9k_wmi_rsp_callback()
  wifi: ath9k: hif_usb: clean up skbs if ath9k_hif_usb_rx_stream() fails
  ath9k: htc: clean up statistics macros
  ath9k: hif_usb: simplify if-if to if-else
  wifi: ath9k: htc_hst: free skb in ath9k_htc_rx_msg() if there is no callback function
  wifi: orinoco: check return value of hermes_write_wordrec()
  ACPICA: nsrepair: handle cases without a return value correctly
  lib/mpi: Fix buffer overrun when SG is too long
  genirq: Fix the return type of kstat_cpu_irqs_sum()
  ACPICA: Drop port I/O validation for some regions
  wifi: wl3501_cs: don't call kfree_skb() under spin_lock_irqsave()
  wifi: libertas: cmdresp: don't call kfree_skb() under spin_lock_irqsave()
  wifi: libertas: main: don't call kfree_skb() under spin_lock_irqsave()
  wifi: libertas: if_usb: don't call kfree_skb() under spin_lock_irqsave()
  wifi: libertas_tf: don't call kfree_skb() under spin_lock_irqsave()
  wifi: brcmfmac: unmap dma buffer in brcmf_msgbuf_alloc_pktid()
  wifi: brcmfmac: fix potential memory leak in brcmf_netdev_start_xmit()
  wifi: ipw2200: fix memory leak in ipw_wdev_init()
  wifi: ipw2x00: don't call dev_kfree_skb() under spin_lock_irqsave()
  ipw2x00: switch from 'pci_' to 'dma_' API
  wifi: rtlwifi: Fix global-out-of-bounds bug in _rtl8812ae_phy_set_txpower_limit()
  rtlwifi: fix -Wpointer-sign warning
  wifi: rtl8xxxu: don't call dev_kfree_skb() under spin_lock_irqsave()
  wifi: libertas: fix memory leak in lbs_init_adapter()
  wifi: rsi: Fix memory leak in rsi_coex_attach()
  block: bio-integrity: Copy flags when bio_integrity_payload is cloned
  blk-mq: remove stale comment for blk_mq_sched_mark_restart_hctx
  arm64: dts: mediatek: mt7622: Add missing pwm-cells to pwm node
  arm64: dts: amlogic: meson-gxl: add missing unit address to eth-phy-mux node name
  arm64: dts: amlogic: meson-gx: add missing unit address to rng node name
  arm64: dts: amlogic: meson-gx: add missing SCPI sensors compatible
  arm64: dts: amlogic: meson-axg: fix SCPI clock dvfs node name
  arm64: dts: meson-axg: enable SCPI
  arm64: dts: amlogic: meson-gx: fix SCPI clock dvfs node name
  ARM: imx: Call ida_simple_remove() for ida_simple_get
  ARM: dts: exynos: correct wr-active property in Exynos3250 Rinato
  ARM: OMAP1: call platform_device_put() in error case in omap1_dm_timer_init()
  arm64: dts: meson-gx: Fix the SCPI DVFS node name and unit address
  arm64: dts: meson-gx: Fix Ethernet MAC address unit name
  ARM: zynq: Fix refcount leak in zynq_early_slcr_init
  ARM: OMAP2+: Fix memory leak in realtime_counter_init()
  HID: asus: use spinlock to safely schedule workers
  HID: asus: use spinlock to protect concurrent accesses
  HID: asus: Remove check for same LED brightness on set

Change-Id: Ie09175b59aef5de140e476316d94097cac7a3031
2023-04-06 14:00:28 +03:00

542 lines
14 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/page-io.c
*
* This contains the new page_io functions for ext4
*
* Written by Theodore Ts'o, 2010.
*/
#include <linux/fs.h>
#include <linux/time.h>
#include <linux/highuid.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
#include <linux/string.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include <linux/mpage.h>
#include <linux/namei.h>
#include <linux/uio.h>
#include <linux/bio.h>
#include <linux/workqueue.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/backing-dev.h>
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
static struct kmem_cache *io_end_cachep;
int __init ext4_init_pageio(void)
{
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
if (io_end_cachep == NULL)
return -ENOMEM;
return 0;
}
void ext4_exit_pageio(void)
{
kmem_cache_destroy(io_end_cachep);
}
/*
* Print an buffer I/O error compatible with the fs/buffer.c. This
* provides compatibility with dmesg scrapers that look for a specific
* buffer I/O error message. We really need a unified error reporting
* structure to userspace ala Digital Unix's uerf system, but it's
* probably not going to happen in my lifetime, due to LKML politics...
*/
static void buffer_io_error(struct buffer_head *bh)
{
printk_ratelimited(KERN_ERR "Buffer I/O error on device %pg, logical block %llu\n",
bh->b_bdev,
(unsigned long long)bh->b_blocknr);
}
static void ext4_finish_bio(struct bio *bio)
{
int i;
struct bio_vec *bvec;
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
struct page *bounce_page = NULL;
struct buffer_head *bh, *head;
unsigned bio_start = bvec->bv_offset;
unsigned bio_end = bio_start + bvec->bv_len;
unsigned under_io = 0;
unsigned long flags;
if (!page)
continue;
if (fscrypt_is_bounce_page(page)) {
bounce_page = page;
page = fscrypt_pagecache_page(bounce_page);
}
if (bio->bi_status) {
SetPageError(page);
mapping_set_error(page->mapping, -EIO);
}
bh = head = page_buffers(page);
/*
* We check all buffers in the page under BH_Uptodate_Lock
* to avoid races with other end io clearing async_write flags
*/
local_irq_save(flags);
bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
do {
if (bh_offset(bh) < bio_start ||
bh_offset(bh) + bh->b_size > bio_end) {
if (buffer_async_write(bh))
under_io++;
continue;
}
clear_buffer_async_write(bh);
if (bio->bi_status) {
set_buffer_write_io_error(bh);
buffer_io_error(bh);
}
} while ((bh = bh->b_this_page) != head);
bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
local_irq_restore(flags);
if (!under_io) {
fscrypt_free_bounce_page(bounce_page);
end_page_writeback(page);
}
}
}
static void ext4_release_io_end(ext4_io_end_t *io_end)
{
struct bio *bio, *next_bio;
BUG_ON(!list_empty(&io_end->list));
BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
WARN_ON(io_end->handle);
for (bio = io_end->bio; bio; bio = next_bio) {
next_bio = bio->bi_private;
ext4_finish_bio(bio);
bio_put(bio);
}
kmem_cache_free(io_end_cachep, io_end);
}
/*
* Check a range of space and convert unwritten extents to written. Note that
* we are protected from truncate touching same part of extent tree by the
* fact that truncate code waits for all DIO to finish (thus exclusion from
* direct IO is achieved) and also waits for PageWriteback bits. Thus we
* cannot get to ext4_ext_truncate() before all IOs overlapping that range are
* completed (happens from ext4_free_ioend()).
*/
static int ext4_end_io(ext4_io_end_t *io)
{
struct inode *inode = io->inode;
loff_t offset = io->offset;
ssize_t size = io->size;
handle_t *handle = io->handle;
int ret = 0;
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
"list->prev 0x%p\n",
io, inode->i_ino, io->list.next, io->list.prev);
io->handle = NULL; /* Following call will use up the handle */
ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) {
ext4_msg(inode->i_sb, KERN_EMERG,
"failed to convert unwritten extents to written "
"extents -- potential data loss! "
"(inode %lu, offset %llu, size %zd, error %d)",
inode->i_ino, offset, size, ret);
}
ext4_clear_io_unwritten_flag(io);
ext4_release_io_end(io);
return ret;
}
static void dump_completed_IO(struct inode *inode, struct list_head *head)
{
#ifdef EXT4FS_DEBUG
struct list_head *cur, *before, *after;
ext4_io_end_t *io, *io0, *io1;
if (list_empty(head))
return;
ext4_debug("Dump inode %lu completed io list\n", inode->i_ino);
list_for_each_entry(io, head, list) {
cur = &io->list;
before = cur->prev;
io0 = container_of(before, ext4_io_end_t, list);
after = cur->next;
io1 = container_of(after, ext4_io_end_t, list);
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
io, inode->i_ino, io0, io1);
}
#endif
}
/* Add the io_end to per-inode completed end_io list. */
static void ext4_add_complete_io(ext4_io_end_t *io_end)
{
struct ext4_inode_info *ei = EXT4_I(io_end->inode);
struct ext4_sb_info *sbi = EXT4_SB(io_end->inode->i_sb);
struct workqueue_struct *wq;
unsigned long flags;
/* Only reserved conversions from writeback should enter here */
WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
WARN_ON(!io_end->handle && sbi->s_journal);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
wq = sbi->rsv_conversion_wq;
if (list_empty(&ei->i_rsv_conversion_list))
queue_work(wq, &ei->i_rsv_conversion_work);
list_add_tail(&io_end->list, &ei->i_rsv_conversion_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
}
static int ext4_do_flush_completed_IO(struct inode *inode,
struct list_head *head)
{
ext4_io_end_t *io;
struct list_head unwritten;
unsigned long flags;
struct ext4_inode_info *ei = EXT4_I(inode);
int err, ret = 0;
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
dump_completed_IO(inode, head);
list_replace_init(head, &unwritten);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
while (!list_empty(&unwritten)) {
io = list_entry(unwritten.next, ext4_io_end_t, list);
BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
list_del_init(&io->list);
err = ext4_end_io(io);
if (unlikely(!ret && err))
ret = err;
}
return ret;
}
/*
* work on completed IO, to convert unwritten extents to extents
*/
void ext4_end_io_rsv_work(struct work_struct *work)
{
struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info,
i_rsv_conversion_work);
ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list);
}
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
{
ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
if (io) {
io->inode = inode;
INIT_LIST_HEAD(&io->list);
atomic_set(&io->count, 1);
}
return io;
}
void ext4_put_io_end_defer(ext4_io_end_t *io_end)
{
if (atomic_dec_and_test(&io_end->count)) {
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
ext4_release_io_end(io_end);
return;
}
ext4_add_complete_io(io_end);
}
}
int ext4_put_io_end(ext4_io_end_t *io_end)
{
int err = 0;
if (atomic_dec_and_test(&io_end->count)) {
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
err = ext4_convert_unwritten_extents(io_end->handle,
io_end->inode, io_end->offset,
io_end->size);
io_end->handle = NULL;
ext4_clear_io_unwritten_flag(io_end);
}
ext4_release_io_end(io_end);
}
return err;
}
ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
{
atomic_inc(&io_end->count);
return io_end;
}
/* BIO completion function for page writeback */
static void ext4_end_bio(struct bio *bio)
{
ext4_io_end_t *io_end = bio->bi_private;
sector_t bi_sector = bio->bi_iter.bi_sector;
char b[BDEVNAME_SIZE];
if (WARN_ONCE(!io_end, "io_end is NULL: %s: sector %Lu len %u err %d\n",
bio_devname(bio, b),
(long long) bio->bi_iter.bi_sector,
(unsigned) bio_sectors(bio),
bio->bi_status)) {
ext4_finish_bio(bio);
bio_put(bio);
return;
}
bio->bi_end_io = NULL;
if (bio->bi_status) {
struct inode *inode = io_end->inode;
ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
"(offset %llu size %ld starting block %llu)",
bio->bi_status, inode->i_ino,
(unsigned long long) io_end->offset,
(long) io_end->size,
(unsigned long long)
bi_sector >> (inode->i_blkbits - 9));
mapping_set_error(inode->i_mapping,
blk_status_to_errno(bio->bi_status));
}
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
/*
* Link bio into list hanging from io_end. We have to do it
* atomically as bio completions can be racing against each
* other.
*/
bio->bi_private = xchg(&io_end->bio, bio);
ext4_put_io_end_defer(io_end);
} else {
/*
* Drop io_end reference early. Inode can get freed once
* we finish the bio.
*/
ext4_put_io_end_defer(io_end);
ext4_finish_bio(bio);
bio_put(bio);
}
}
void ext4_io_submit(struct ext4_io_submit *io)
{
struct bio *bio = io->io_bio;
if (bio) {
int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
REQ_SYNC : 0;
io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
submit_bio(io->io_bio);
}
io->io_bio = NULL;
}
void ext4_io_submit_init(struct ext4_io_submit *io,
struct writeback_control *wbc)
{
io->io_wbc = wbc;
io->io_bio = NULL;
io->io_end = NULL;
}
static int io_submit_init_bio(struct ext4_io_submit *io,
struct buffer_head *bh)
{
struct bio *bio;
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
if (!bio)
return -ENOMEM;
fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
wbc_init_bio(io->io_wbc, bio);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio_set_dev(bio, bh->b_bdev);
bio->bi_end_io = ext4_end_bio;
bio->bi_private = ext4_get_io_end(io->io_end);
io->io_bio = bio;
io->io_next_block = bh->b_blocknr;
return 0;
}
static int io_submit_add_bh(struct ext4_io_submit *io,
struct inode *inode,
struct page *pagecache_page,
struct page *bounce_page,
struct buffer_head *bh)
{
int ret;
if (io->io_bio && (bh->b_blocknr != io->io_next_block ||
!fscrypt_mergeable_bio_bh(io->io_bio, bh))) {
submit_and_retry:
ext4_io_submit(io);
}
if (io->io_bio == NULL) {
ret = io_submit_init_bio(io, bh);
if (ret)
return ret;
io->io_bio->bi_write_hint = inode->i_write_hint;
}
ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page,
bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
wbc_account_io(io->io_wbc, pagecache_page, bh->b_size);
io->io_next_block++;
return 0;
}
int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
int len,
struct writeback_control *wbc,
bool keep_towrite)
{
struct page *bounce_page = NULL;
struct inode *inode = page->mapping->host;
unsigned block_start;
struct buffer_head *bh, *head;
int ret = 0;
int nr_submitted = 0;
int nr_to_submit = 0;
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
if (keep_towrite)
set_page_writeback_keepwrite(page);
else
set_page_writeback(page);
ClearPageError(page);
/*
* Comments copied from block_write_full_page:
*
* The page straddles i_size. It must be zeroed out on each and every
* writepage invocation because it may be mmapped. "A file is mapped
* in multiples of the page size. For a file that is not a multiple of
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
if (len < PAGE_SIZE)
zero_user_segment(page, len, PAGE_SIZE);
/*
* In the first loop we prepare and mark buffers to submit. We have to
* mark all buffers in the page before submitting so that
* end_page_writeback() cannot be called from ext4_bio_end_io() when IO
* on the first buffer finishes and we are still working on submitting
* the second buffer.
*/
bh = head = page_buffers(page);
do {
block_start = bh_offset(bh);
if (block_start >= len) {
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
continue;
}
if (!buffer_dirty(bh) || buffer_delay(bh) ||
!buffer_mapped(bh) || buffer_unwritten(bh)) {
/* A hole? We can safely clear the dirty bit */
if (!buffer_mapped(bh))
clear_buffer_dirty(bh);
if (io->io_bio)
ext4_io_submit(io);
continue;
}
if (buffer_new(bh)) {
clear_buffer_new(bh);
clean_bdev_bh_alias(bh);
}
set_buffer_async_write(bh);
nr_to_submit++;
} while ((bh = bh->b_this_page) != head);
bh = head = page_buffers(page);
if (fscrypt_inode_uses_fs_layer_crypto(inode) && nr_to_submit) {
gfp_t gfp_flags = GFP_NOFS;
/*
* Since bounce page allocation uses a mempool, we can only use
* a waiting mask (i.e. request guaranteed allocation) on the
* first page of the bio. Otherwise it can deadlock.
*/
if (io->io_bio)
gfp_flags = GFP_NOWAIT | __GFP_NOWARN;
retry_encrypt:
bounce_page = fscrypt_encrypt_pagecache_blocks(page,
PAGE_SIZE,0, gfp_flags);
if (IS_ERR(bounce_page)) {
ret = PTR_ERR(bounce_page);
if (ret == -ENOMEM &&
(io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) {
gfp_flags = GFP_NOFS;
if (io->io_bio)
ext4_io_submit(io);
else
gfp_flags |= __GFP_NOFAIL;
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry_encrypt;
}
bounce_page = NULL;
goto out;
}
}
/* Now submit buffers to write */
do {
if (!buffer_async_write(bh))
continue;
ret = io_submit_add_bh(io, inode, page, bounce_page, bh);
if (ret) {
/*
* We only get here on ENOMEM. Not much else
* we can do but mark the page as dirty, and
* better luck next time.
*/
break;
}
nr_submitted++;
clear_buffer_dirty(bh);
} while ((bh = bh->b_this_page) != head);
/* Error stopped previous loop? Clean up buffers... */
if (ret) {
out:
fscrypt_free_bounce_page(bounce_page);
printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
redirty_page_for_writepage(wbc, page);
do {
clear_buffer_async_write(bh);
bh = bh->b_this_page;
} while (bh != head);
}
unlock_page(page);
/* Nothing submitted - we have to end page writeback */
if (!nr_submitted)
end_page_writeback(page);
return ret;
}