Files
kernel_xiaomi_sm8250/mm/mremap.c
Greg Kroah-Hartman ce7025b713 Merge 4.19.238 into android-4.19-stable
Changes in 4.19.238
	USB: serial: pl2303: add IBM device IDs
	USB: serial: simple: add Nokia phone driver
	netdevice: add the case if dev is NULL
	xfrm: fix tunnel model fragmentation behavior
	virtio_console: break out of buf poll on remove
	ethernet: sun: Free the coherent when failing in probing
	spi: Fix invalid sgs value
	net:mcf8390: Use platform_get_irq() to get the interrupt
	spi: Fix erroneous sgs value with min_t()
	af_key: add __GFP_ZERO flag for compose_sadb_supported in function pfkey_register
	fuse: fix pipe buffer lifetime for direct_io
	tpm: fix reference counting for struct tpm_chip
	block: Add a helper to validate the block size
	virtio-blk: Use blk_validate_block_size() to validate block size
	USB: usb-storage: Fix use of bitfields for hardware data in ene_ub6250.c
	xhci: make xhci_handshake timeout for xhci_reset() adjustable
	coresight: Fix TRCCONFIGR.QE sysfs interface
	iio: afe: rescale: use s64 for temporary scale calculations
	iio: inkern: apply consumer scale on IIO_VAL_INT cases
	iio: inkern: apply consumer scale when no channel scale is available
	iio: inkern: make a best effort on offset calculation
	clk: uniphier: Fix fixed-rate initialization
	ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE
	Documentation: add link to stable release candidate tree
	Documentation: update stable tree link
	SUNRPC: avoid race between mod_timer() and del_timer_sync()
	NFSD: prevent underflow in nfssvc_decode_writeargs()
	NFSD: prevent integer overflow on 32 bit systems
	f2fs: fix to unlock page correctly in error path of is_alive()
	pinctrl: samsung: drop pin banks references on error paths
	can: ems_usb: ems_usb_start_xmit(): fix double dev_kfree_skb() in error path
	jffs2: fix use-after-free in jffs2_clear_xattr_subsystem
	jffs2: fix memory leak in jffs2_do_mount_fs
	jffs2: fix memory leak in jffs2_scan_medium
	mm/pages_alloc.c: don't create ZONE_MOVABLE beyond the end of a node
	mm: invalidate hwpoison page cache page in fault path
	mempolicy: mbind_range() set_policy() after vma_merge()
	scsi: libsas: Fix sas_ata_qc_issue() handling of NCQ NON DATA commands
	qed: display VF trust config
	qed: validate and restrict untrusted VFs vlan promisc mode
	Revert "Input: clear BTN_RIGHT/MIDDLE on buttonpads"
	ALSA: cs4236: fix an incorrect NULL check on list iterator
	ALSA: hda/realtek: Fix audio regression on Mi Notebook Pro 2020
	mm,hwpoison: unmap poisoned page before invalidation
	drbd: fix potential silent data corruption
	powerpc/kvm: Fix kvm_use_magic_page
	ACPI: properties: Consistently return -ENOENT if there are no more references
	drivers: hamradio: 6pack: fix UAF bug caused by mod_timer()
	block: don't merge across cgroup boundaries if blkcg is enabled
	drm/edid: check basic audio support on CEA extension block
	video: fbdev: sm712fb: Fix crash in smtcfb_read()
	video: fbdev: atari: Atari 2 bpp (STe) palette bugfix
	ARM: dts: at91: sama5d2: Fix PMERRLOC resource size
	ARM: dts: exynos: fix UART3 pins configuration in Exynos5250
	ARM: dts: exynos: add missing HDMI supplies on SMDK5250
	ARM: dts: exynos: add missing HDMI supplies on SMDK5420
	carl9170: fix missing bit-wise or operator for tx_params
	thermal: int340x: Increase bitmap size
	lib/raid6/test: fix multiple definition linking error
	DEC: Limit PMAX memory probing to R3k systems
	media: davinci: vpif: fix unbalanced runtime PM get
	brcmfmac: firmware: Allocate space for default boardrev in nvram
	brcmfmac: pcie: Replace brcmf_pcie_copy_mem_todev with memcpy_toio
	PCI: pciehp: Clear cmd_busy bit in polling mode
	regulator: qcom_smd: fix for_each_child.cocci warnings
	crypto: authenc - Fix sleep in atomic context in decrypt_tail
	crypto: mxs-dcp - Fix scatterlist processing
	spi: tegra114: Add missing IRQ check in tegra_spi_probe
	selftests/x86: Add validity check and allow field splitting
	spi: pxa2xx-pci: Balance reference count for PCI DMA device
	hwmon: (pmbus) Add mutex to regulator ops
	hwmon: (sch56xx-common) Replace WDOG_ACTIVE with WDOG_HW_RUNNING
	block: don't delete queue kobject before its children
	PM: hibernate: fix __setup handler error handling
	PM: suspend: fix return value of __setup handler
	hwrng: atmel - disable trng on failure path
	crypto: vmx - add missing dependencies
	clocksource/drivers/timer-of: Check return value of of_iomap in timer_of_base_init()
	ACPI: APEI: fix return value of __setup handlers
	crypto: ccp - ccp_dmaengine_unregister release dma channels
	hwmon: (pmbus) Add Vin unit off handling
	clocksource: acpi_pm: fix return value of __setup handler
	sched/debug: Remove mpol_get/put and task_lock/unlock from sched_show_numa
	perf/core: Fix address filter parser for multiple filters
	perf/x86/intel/pt: Fix address filter config for 32-bit kernel
	media: coda: Fix missing put_device() call in coda_get_vdoa_data
	video: fbdev: smscufx: Fix null-ptr-deref in ufx_usb_probe()
	video: fbdev: fbcvt.c: fix printing in fb_cvt_print_name()
	ARM: dts: qcom: ipq4019: fix sleep clock
	soc: ti: wkup_m3_ipc: Fix IRQ check in wkup_m3_ipc_probe
	media: em28xx: initialize refcount before kref_get
	media: usb: go7007: s2250-board: fix leak in probe()
	ASoC: rt5663: check the return value of devm_kzalloc() in rt5663_parse_dp()
	ASoC: ti: davinci-i2s: Add check for clk_enable()
	ALSA: spi: Add check for clk_enable()
	arm64: dts: ns2: Fix spi-cpol and spi-cpha property
	arm64: dts: broadcom: Fix sata nodename
	printk: fix return value of printk.devkmsg __setup handler
	ASoC: mxs-saif: Handle errors for clk_enable
	ASoC: atmel_ssc_dai: Handle errors for clk_enable
	memory: emif: Add check for setup_interrupts
	memory: emif: check the pointer temp in get_device_details()
	ALSA: firewire-lib: fix uninitialized flag for AV/C deferred transaction
	media: stk1160: If start stream fails, return buffers with VB2_BUF_STATE_QUEUED
	ASoC: atmel: Add missing of_node_put() in at91sam9g20ek_audio_probe
	ASoC: wm8350: Handle error for wm8350_register_irq
	ASoC: fsi: Add check for clk_enable
	video: fbdev: omapfb: Add missing of_node_put() in dvic_probe_of
	ASoC: dmaengine: do not use a NULL prepare_slave_config() callback
	ASoC: mxs: Fix error handling in mxs_sgtl5000_probe
	ASoC: imx-es8328: Fix error return code in imx_es8328_probe()
	ASoC: msm8916-wcd-digital: Fix missing clk_disable_unprepare() in msm8916_wcd_digital_probe
	mmc: davinci_mmc: Handle error for clk_enable
	drm/bridge: Fix free wrong object in sii8620_init_rcp_input_dev
	ath10k: fix memory overwrite of the WoWLAN wakeup packet pattern
	Bluetooth: hci_serdev: call init_rwsem() before p->open()
	mtd: onenand: Check for error irq
	drm/edid: Don't clear formats if using deep color
	drm/amd/display: Fix a NULL pointer dereference in amdgpu_dm_connector_add_common_modes()
	ath9k_htc: fix uninit value bugs
	KVM: PPC: Fix vmx/vsx mixup in mmio emulation
	power: reset: gemini-poweroff: Fix IRQ check in gemini_poweroff_probe
	ray_cs: Check ioremap return value
	power: supply: ab8500: Fix memory leak in ab8500_fg_sysfs_init
	HID: i2c-hid: fix GET/SET_REPORT for unnumbered reports
	iwlwifi: Fix -EIO error code that is never returned
	dm crypt: fix get_key_size compiler warning if !CONFIG_KEYS
	scsi: pm8001: Fix command initialization in pm80XX_send_read_log()
	scsi: pm8001: Fix command initialization in pm8001_chip_ssp_tm_req()
	scsi: pm8001: Fix payload initialization in pm80xx_set_thermal_config()
	scsi: pm8001: Fix abort all task initialization
	TOMOYO: fix __setup handlers return values
	ext2: correct max file size computing
	drm/tegra: Fix reference leak in tegra_dsi_ganged_probe
	power: supply: bq24190_charger: Fix bq24190_vbus_is_enabled() wrong false return
	drm/bridge: cdns-dsi: Make sure to to create proper aliases for dt
	powerpc/Makefile: Don't pass -mcpu=powerpc64 when building 32-bit
	KVM: x86: Fix emulation in writing cr8
	KVM: x86/emulator: Defer not-present segment check in __load_segment_descriptor()
	hv_balloon: rate-limit "Unhandled message" warning
	i2c: xiic: Make bus names unique
	power: supply: wm8350-power: Handle error for wm8350_register_irq
	power: supply: wm8350-power: Add missing free in free_charger_irq
	PCI: Reduce warnings on possible RW1C corruption
	powerpc/sysdev: fix incorrect use to determine if list is empty
	mfd: mc13xxx: Add check for mc13xxx_irq_request
	vxcan: enable local echo for sent CAN frames
	MIPS: RB532: fix return value of __setup handler
	mtd: rawnand: atmel: fix refcount issue in atmel_nand_controller_init
	USB: storage: ums-realtek: fix error code in rts51x_read_mem()
	af_netlink: Fix shift out of bounds in group mask calculation
	i2c: mux: demux-pinctrl: do not deactivate a master that is not active
	selftests/bpf/test_lirc_mode2.sh: Exit with proper code
	tcp: ensure PMTU updates are processed during fastopen
	mfd: asic3: Add missing iounmap() on error asic3_mfd_probe
	mxser: fix xmit_buf leak in activate when LSR == 0xff
	pwm: lpc18xx-sct: Initialize driver data and hardware before pwmchip_add()
	staging:iio:adc:ad7280a: Fix handing of device address bit reversing.
	clk: qcom: ipq8074: Use floor ops for SDCC1 clock
	serial: 8250_mid: Balance reference count for PCI DMA device
	serial: 8250: Fix race condition in RTS-after-send handling
	iio: adc: Add check for devm_request_threaded_irq
	dma-debug: fix return value of __setup handlers
	clk: qcom: clk-rcg2: Update the frac table for pixel clock
	remoteproc: qcom_wcnss: Add missing of_node_put() in wcnss_alloc_memory_region
	clk: actions: Terminate clk_div_table with sentinel element
	clk: loongson1: Terminate clk_div_table with sentinel element
	clk: clps711x: Terminate clk_div_table with sentinel element
	clk: tegra: tegra124-emc: Fix missing put_device() call in emc_ensure_emc_driver
	NFS: remove unneeded check in decode_devicenotify_args()
	pinctrl: mediatek: Fix missing of_node_put() in mtk_pctrl_init
	pinctrl: nomadik: Add missing of_node_put() in nmk_pinctrl_probe
	pinctrl/rockchip: Add missing of_node_put() in rockchip_pinctrl_probe
	tty: hvc: fix return value of __setup handler
	kgdboc: fix return value of __setup handler
	kgdbts: fix return value of __setup handler
	jfs: fix divide error in dbNextAG
	netfilter: nf_conntrack_tcp: preserve liberal flag in tcp options
	clk: qcom: gcc-msm8994: Fix gpll4 width
	xen: fix is_xen_pmu()
	net: phy: broadcom: Fix brcm_fet_config_init()
	qlcnic: dcb: default to returning -EOPNOTSUPP
	net/x25: Fix null-ptr-deref caused by x25_disconnect
	NFSv4/pNFS: Fix another issue with a list iterator pointing to the head
	lib/test: use after free in register_test_dev_kmod()
	selinux: use correct type for context length
	loop: use sysfs_emit() in the sysfs xxx show()
	Fix incorrect type in assignment of ipv6 port for audit
	irqchip/qcom-pdc: Fix broken locking
	irqchip/nvic: Release nvic_base upon failure
	bfq: fix use-after-free in bfq_dispatch_request
	ACPICA: Avoid walking the ACPI Namespace if it is not there
	lib/raid6/test/Makefile: Use $(pound) instead of \# for Make 4.3
	Revert "Revert "block, bfq: honor already-setup queue merges""
	ACPI/APEI: Limit printable size of BERT table data
	PM: core: keep irq flags in device_pm_check_callbacks()
	spi: tegra20: Use of_device_get_match_data()
	ext4: don't BUG if someone dirty pages without asking ext4 first
	ntfs: add sanity check on allocation size
	video: fbdev: nvidiafb: Use strscpy() to prevent buffer overflow
	video: fbdev: w100fb: Reset global state
	video: fbdev: cirrusfb: check pixclock to avoid divide by zero
	video: fbdev: omapfb: acx565akm: replace snprintf with sysfs_emit
	ARM: dts: qcom: fix gic_irq_domain_translate warnings for msm8960
	ARM: dts: bcm2837: Add the missing L1/L2 cache information
	video: fbdev: omapfb: panel-dsi-cm: Use sysfs_emit() instead of snprintf()
	video: fbdev: omapfb: panel-tpo-td043mtea1: Use sysfs_emit() instead of snprintf()
	video: fbdev: udlfb: replace snprintf in show functions with sysfs_emit
	ASoC: soc-core: skip zero num_dai component in searching dai name
	media: cx88-mpeg: clear interrupt status register before streaming video
	ARM: tegra: tamonten: Fix I2C3 pad setting
	ARM: mmp: Fix failure to remove sram device
	video: fbdev: sm712fb: Fix crash in smtcfb_write()
	media: Revert "media: em28xx: add missing em28xx_close_extension"
	media: hdpvr: initialize dev->worker at hdpvr_register_videodev
	mmc: host: Return an error when ->enable_sdio_irq() ops is missing
	powerpc/lib/sstep: Fix 'sthcx' instruction
	powerpc/lib/sstep: Fix build errors with newer binutils
	powerpc: Fix build errors with newer binutils
	scsi: qla2xxx: Fix stuck session in gpdb
	scsi: qla2xxx: Fix warning for missing error code
	scsi: qla2xxx: Check for firmware dump already collected
	scsi: qla2xxx: Suppress a kernel complaint in qla_create_qpair()
	scsi: qla2xxx: Fix incorrect reporting of task management failure
	scsi: qla2xxx: Fix hang due to session stuck
	scsi: qla2xxx: Reduce false trigger to login
	scsi: qla2xxx: Use correct feature type field during RFF_ID processing
	KVM: Prevent module exit until all VMs are freed
	KVM: x86: fix sending PV IPI
	ubifs: rename_whiteout: Fix double free for whiteout_ui->data
	ubifs: Fix deadlock in concurrent rename whiteout and inode writeback
	ubifs: Add missing iput if do_tmpfile() failed in rename whiteout
	ubifs: setflags: Make dirtied_ino_d 8 bytes aligned
	ubifs: Fix read out-of-bounds in ubifs_wbuf_write_nolock()
	ubifs: rename_whiteout: correct old_dir size computing
	can: mcba_usb: mcba_usb_start_xmit(): fix double dev_kfree_skb in error path
	can: mcba_usb: properly check endpoint type
	gfs2: Make sure FITRIM minlen is rounded up to fs block size
	pinctrl: pinconf-generic: Print arguments for bias-pull-*
	ubi: Fix race condition between ctrl_cdev_ioctl and ubi_cdev_ioctl
	ACPI: CPPC: Avoid out of bounds access when parsing _CPC data
	mm/mmap: return 1 from stack_guard_gap __setup() handler
	mm/memcontrol: return 1 from cgroup.memory __setup() handler
	mm/usercopy: return 1 from hardened_usercopy __setup() handler
	bpf: Fix comment for helper bpf_current_task_under_cgroup()
	ubi: fastmap: Return error code if memory allocation fails in add_aeb()
	ASoC: topology: Allow TLV control to be either read or write
	ARM: dts: spear1340: Update serial node properties
	ARM: dts: spear13xx: Update SPI dma properties
	um: Fix uml_mconsole stop/go
	openvswitch: Fixed nd target mask field in the flow dump.
	KVM: x86: Forbid VMM to set SYNIC/STIMER MSRs when SynIC wasn't activated
	ubifs: Rectify space amount budget for mkdir/tmpfile operations
	rtc: wm8350: Handle error for wm8350_register_irq
	riscv module: remove (NOLOAD)
	ARM: 9187/1: JIVE: fix return value of __setup handler
	KVM: x86/svm: Clear reserved bits written to PerfEvtSeln MSRs
	drm: Add orientation quirk for GPD Win Max
	ath5k: fix OOB in ath5k_eeprom_read_pcal_info_5111
	drm/amd/amdgpu/amdgpu_cs: fix refcount leak of a dma_fence obj
	ptp: replace snprintf with sysfs_emit
	powerpc: dts: t104xrdb: fix phy type for FMAN 4/5
	scsi: mvsas: Replace snprintf() with sysfs_emit()
	scsi: bfa: Replace snprintf() with sysfs_emit()
	power: supply: axp20x_battery: properly report current when discharging
	powerpc: Set crashkernel offset to mid of RMA region
	PCI: aardvark: Fix support for MSI interrupts
	iommu/arm-smmu-v3: fix event handling soft lockup
	usb: ehci: add pci device support for Aspeed platforms
	PCI: pciehp: Add Qualcomm quirk for Command Completed erratum
	ipv4: Invalidate neighbour for broadcast address upon address addition
	dm ioctl: prevent potential spectre v1 gadget
	drm/amdkfd: make CRAT table missing message informational only
	scsi: pm8001: Fix pm8001_mpi_task_abort_resp()
	scsi: aha152x: Fix aha152x_setup() __setup handler return value
	net/smc: correct settings of RMB window update limit
	macvtap: advertise link netns via netlink
	bnxt_en: Eliminate unintended link toggle during FW reset
	MIPS: fix fortify panic when copying asm exception handlers
	scsi: libfc: Fix use after free in fc_exch_abts_resp()
	usb: dwc3: omap: fix "unbalanced disables for smps10_out1" on omap5evm
	xtensa: fix DTC warning unit_address_format
	Bluetooth: Fix use after free in hci_send_acl
	init/main.c: return 1 from handled __setup() functions
	minix: fix bug when opening a file with O_DIRECT
	w1: w1_therm: fixes w1_seq for ds28ea00 sensors
	NFSv4: Protect the state recovery thread against direct reclaim
	xen: delay xen_hvm_init_time_ops() if kdump is boot on vcpu>=32
	clk: Enforce that disjoints limits are invalid
	SUNRPC/call_alloc: async tasks mustn't block waiting for memory
	NFS: swap IO handling is slightly different for O_DIRECT IO
	NFS: swap-out must always use STABLE writes.
	serial: samsung_tty: do not unlock port->lock for uart_write_wakeup()
	virtio_console: eliminate anonymous module_init & module_exit
	jfs: prevent NULL deref in diFree
	parisc: Fix CPU affinity for Lasi, WAX and Dino chips
	net: add missing SOF_TIMESTAMPING_OPT_ID support
	mm: fix race between MADV_FREE reclaim and blkdev direct IO read
	KVM: arm64: Check arm64_get_bp_hardening_data() didn't return NULL
	drm/amdgpu: fix off by one in amdgpu_gfx_kiq_acquire()
	Drivers: hv: vmbus: Fix potential crash on module unload
	scsi: zorro7xx: Fix a resource leak in zorro7xx_remove_one()
	net: stmmac: Fix unset max_speed difference between DT and non-DT platforms
	drm/imx: Fix memory leak in imx_pd_connector_get_modes
	net: openvswitch: don't send internal clone attribute to the userspace.
	rxrpc: fix a race in rxrpc_exit_net()
	qede: confirm skb is allocated before using
	spi: bcm-qspi: fix MSPI only access with bcm_qspi_exec_mem_op()
	drbd: Fix five use after free bugs in get_initial_state
	Revert "mmc: sdhci-xenon: fix annoying 1.8V regulator warning"
	mmc: renesas_sdhi: don't overwrite TAP settings when HS400 tuning is complete
	mmmremap.c: avoid pointless invalidate_range_start/end on mremap(old_size=0)
	mm/mempolicy: fix mpol_new leak in shared_policy_replace
	x86/pm: Save the MSR validity status at context setup
	x86/speculation: Restore speculation related MSRs during S3 resume
	btrfs: fix qgroup reserve overflow the qgroup limit
	arm64: patch_text: Fixup last cpu should be master
	ata: sata_dwc_460ex: Fix crash due to OOB write
	perf: qcom_l2_pmu: fix an incorrect NULL check on list iterator
	irqchip/gic-v3: Fix GICR_CTLR.RWP polling
	tools build: Filter out options and warnings not supported by clang
	tools build: Use $(shell ) instead of `` to get embedded libperl's ccopts
	dmaengine: Revert "dmaengine: shdma: Fix runtime PM imbalance on error"
	mm: don't skip swap entry even if zap_details specified
	arm64: module: remove (NOLOAD) from linker script
	mm/sparsemem: fix 'mem_section' will never be NULL gcc 12 warning
	cgroup: Use open-time credentials for process migraton perm checks
	cgroup: Allocate cgroup_file_ctx for kernfs_open_file->priv
	cgroup: Use open-time cgroup namespace for process migration perm checks
	selftests: cgroup: Make cg_create() use 0755 for permission instead of 0644
	selftests: cgroup: Test open-time credential usage for migration checks
	selftests: cgroup: Test open-time cgroup namespace usage for migration checks
	xfrm: policy: match with both mark and mask on user interfaces
	drm/amdgpu: Check if fd really is an amdgpu fd.
	drm/amdkfd: Use drm_priv to pass VM from KFD to amdgpu
	Linux 4.19.238

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I55a3615d2fbf9bde9ac152456701b36a6c9d20b6
2022-04-18 09:57:50 +02:00

643 lines
17 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* mm/mremap.c
*
* (C) Copyright 1996 Linus Torvalds
*
* Address space accounting code <alan@lxorguk.ukuu.org.uk>
* (C) Copyright 2002 Red Hat Inc, All Rights Reserved
*/
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/shm.h>
#include <linux/ksm.h>
#include <linux/mman.h>
#include <linux/swap.h>
#include <linux/capability.h>
#include <linux/fs.h>
#include <linux/swapops.h>
#include <linux/highmem.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/mmu_notifier.h>
#include <linux/uaccess.h>
#include <linux/mm-arch-hooks.h>
#include <linux/userfaultfd_k.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include "internal.h"
static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgd = pgd_offset(mm, addr);
if (pgd_none_or_clear_bad(pgd))
return NULL;
p4d = p4d_offset(pgd, addr);
if (p4d_none_or_clear_bad(p4d))
return NULL;
pud = pud_offset(p4d, addr);
if (pud_none_or_clear_bad(pud))
return NULL;
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
return NULL;
return pmd;
}
static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr)
{
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgd = pgd_offset(mm, addr);
p4d = p4d_alloc(mm, pgd, addr);
if (!p4d)
return NULL;
pud = pud_alloc(mm, p4d, addr);
if (!pud)
return NULL;
pmd = pmd_alloc(mm, pud, addr);
if (!pmd)
return NULL;
VM_BUG_ON(pmd_trans_huge(*pmd));
return pmd;
}
static void take_rmap_locks(struct vm_area_struct *vma)
{
if (vma->vm_file)
i_mmap_lock_write(vma->vm_file->f_mapping);
if (vma->anon_vma)
anon_vma_lock_write(vma->anon_vma);
}
static void drop_rmap_locks(struct vm_area_struct *vma)
{
if (vma->anon_vma)
anon_vma_unlock_write(vma->anon_vma);
if (vma->vm_file)
i_mmap_unlock_write(vma->vm_file->f_mapping);
}
static pte_t move_soft_dirty_pte(pte_t pte)
{
/*
* Set soft dirty bit so we can notice
* in userspace the ptes were moved.
*/
#ifdef CONFIG_MEM_SOFT_DIRTY
if (pte_present(pte))
pte = pte_mksoft_dirty(pte);
else if (is_swap_pte(pte))
pte = pte_swp_mksoft_dirty(pte);
#endif
return pte;
}
static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
unsigned long old_addr, unsigned long old_end,
struct vm_area_struct *new_vma, pmd_t *new_pmd,
unsigned long new_addr, bool need_rmap_locks)
{
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
spinlock_t *old_ptl, *new_ptl;
bool force_flush = false;
unsigned long len = old_end - old_addr;
/*
* When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
* locks to ensure that rmap will always observe either the old or the
* new ptes. This is the easiest way to avoid races with
* truncate_pagecache(), page migration, etc...
*
* When need_rmap_locks is false, we use other ways to avoid
* such races:
*
* - During exec() shift_arg_pages(), we use a specially tagged vma
* which rmap call sites look for using is_vma_temporary_stack().
*
* - During mremap(), new_vma is often known to be placed after vma
* in rmap traversal order. This ensures rmap will always observe
* either the old pte, or the new pte, or both (the page table locks
* serialize access to individual ptes, but only rmap traversal
* order guarantees that we won't miss both the old and new ptes).
*/
if (need_rmap_locks)
take_rmap_locks(vma);
/*
* We don't have to worry about the ordering of src and dst
* pte locks because exclusive mmap_sem prevents deadlock.
*/
old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
new_pte = pte_offset_map(new_pmd, new_addr);
new_ptl = pte_lockptr(mm, new_pmd);
if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
flush_tlb_batched_pending(vma->vm_mm);
arch_enter_lazy_mmu_mode();
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
new_pte++, new_addr += PAGE_SIZE) {
if (pte_none(*old_pte))
continue;
pte = ptep_get_and_clear(mm, old_addr, old_pte);
/*
* If we are remapping a valid PTE, make sure
* to flush TLB before we drop the PTL for the
* PTE.
*
* NOTE! Both old and new PTL matter: the old one
* for racing with page_mkclean(), the new one to
* make sure the physical page stays valid until
* the TLB entry for the old mapping has been
* flushed.
*/
if (pte_present(pte))
force_flush = true;
pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
pte = move_soft_dirty_pte(pte);
set_pte_at(mm, new_addr, new_pte, pte);
}
arch_leave_lazy_mmu_mode();
if (force_flush)
flush_tlb_range(vma, old_end - len, old_end);
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
pte_unmap(new_pte - 1);
pte_unmap_unlock(old_pte - 1, old_ptl);
if (need_rmap_locks)
drop_rmap_locks(vma);
}
unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long len,
bool need_rmap_locks)
{
unsigned long extent, next, old_end;
pmd_t *old_pmd, *new_pmd;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
if (!len)
return 0;
old_end = old_addr + len;
flush_cache_range(vma, old_addr, old_end);
mmun_start = old_addr;
mmun_end = old_end;
mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
cond_resched();
next = (old_addr + PMD_SIZE) & PMD_MASK;
/* even if next overflowed, extent below will be ok */
extent = next - old_addr;
if (extent > old_end - old_addr)
extent = old_end - old_addr;
old_pmd = get_old_pmd(vma->vm_mm, old_addr);
if (!old_pmd)
continue;
new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr);
if (!new_pmd)
break;
if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd) || pmd_devmap(*old_pmd)) {
if (extent == HPAGE_PMD_SIZE) {
bool moved;
/* See comment in move_ptes() */
if (need_rmap_locks)
take_rmap_locks(vma);
moved = move_huge_pmd(vma, old_addr, new_addr,
old_end, old_pmd, new_pmd);
if (need_rmap_locks)
drop_rmap_locks(vma);
if (moved)
continue;
}
split_huge_pmd(vma, old_pmd, old_addr);
if (pmd_trans_unstable(old_pmd))
continue;
}
if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
break;
next = (new_addr + PMD_SIZE) & PMD_MASK;
if (extent > next - new_addr)
extent = next - new_addr;
move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
new_pmd, new_addr, need_rmap_locks);
}
mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
return len + old_addr - old_end; /* how much done */
}
static unsigned long move_vma(struct vm_area_struct *vma,
unsigned long old_addr, unsigned long old_len,
unsigned long new_len, unsigned long new_addr,
bool *locked, struct vm_userfaultfd_ctx *uf,
struct list_head *uf_unmap)
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *new_vma;
unsigned long vm_flags = vma->vm_flags;
unsigned long new_pgoff;
unsigned long moved_len;
unsigned long excess = 0;
unsigned long hiwater_vm;
int split = 0;
int err;
bool need_rmap_locks;
/*
* We'd prefer to avoid failure later on in do_munmap:
* which may split one vma into three before unmapping.
*/
if (mm->map_count >= sysctl_max_map_count - 3)
return -ENOMEM;
/*
* Advise KSM to break any KSM pages in the area to be moved:
* it would be confusing if they were to turn up at the new
* location, where they happen to coincide with different KSM
* pages recently unmapped. But leave vma->vm_flags as it was,
* so KSM can come around to merge on vma and new_vma afterwards.
*/
err = ksm_madvise(vma, old_addr, old_addr + old_len,
MADV_UNMERGEABLE, &vm_flags);
if (err)
return err;
new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
&need_rmap_locks);
if (!new_vma)
return -ENOMEM;
moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,
need_rmap_locks);
if (moved_len < old_len) {
err = -ENOMEM;
} else if (vma->vm_ops && vma->vm_ops->mremap) {
err = vma->vm_ops->mremap(new_vma);
}
if (unlikely(err)) {
/*
* On error, move entries back from new area to old,
* which will succeed since page tables still there,
* and then proceed to unmap new area instead of old.
*/
move_page_tables(new_vma, new_addr, vma, old_addr, moved_len,
true);
vma = new_vma;
old_len = new_len;
old_addr = new_addr;
new_addr = err;
} else {
mremap_userfaultfd_prep(new_vma, uf);
arch_remap(mm, old_addr, old_addr + old_len,
new_addr, new_addr + new_len);
}
/* Conceal VM_ACCOUNT so old reservation is not undone */
if (vm_flags & VM_ACCOUNT) {
vma->vm_flags &= ~VM_ACCOUNT;
excess = vma->vm_end - vma->vm_start - old_len;
if (old_addr > vma->vm_start &&
old_addr + old_len < vma->vm_end)
split = 1;
}
/*
* If we failed to move page tables we still do total_vm increment
* since do_munmap() will decrement it by old_len == new_len.
*
* Since total_vm is about to be raised artificially high for a
* moment, we need to restore high watermark afterwards: if stats
* are taken meanwhile, total_vm and hiwater_vm appear too high.
* If this were a serious issue, we'd add a flag to do_munmap().
*/
hiwater_vm = mm->hiwater_vm;
vm_stat_account(mm, vma->vm_flags, new_len >> PAGE_SHIFT);
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
untrack_pfn_moved(vma);
if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
/* OOM: unable to split vma, just get accounts right */
vm_unacct_memory(excess >> PAGE_SHIFT);
excess = 0;
}
mm->hiwater_vm = hiwater_vm;
/* Restore VM_ACCOUNT if one or two pieces of vma left */
if (excess) {
vma->vm_flags |= VM_ACCOUNT;
if (split)
vma->vm_next->vm_flags |= VM_ACCOUNT;
}
if (vm_flags & VM_LOCKED) {
mm->locked_vm += new_len >> PAGE_SHIFT;
*locked = true;
}
return new_addr;
}
static struct vm_area_struct *vma_to_resize(unsigned long addr,
unsigned long old_len, unsigned long new_len, unsigned long *p)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma = find_vma(mm, addr);
unsigned long pgoff;
if (!vma || vma->vm_start > addr)
return ERR_PTR(-EFAULT);
/*
* !old_len is a special case where an attempt is made to 'duplicate'
* a mapping. This makes no sense for private mappings as it will
* instead create a fresh/new mapping unrelated to the original. This
* is contrary to the basic idea of mremap which creates new mappings
* based on the original. There are no known use cases for this
* behavior. As a result, fail such attempts.
*/
if (!old_len && !(vma->vm_flags & (VM_SHARED | VM_MAYSHARE))) {
pr_warn_once("%s (%d): attempted to duplicate a private mapping with mremap. This is not supported.\n", current->comm, current->pid);
return ERR_PTR(-EINVAL);
}
if (is_vm_hugetlb_page(vma))
return ERR_PTR(-EINVAL);
/* We can't remap across vm area boundaries */
if (old_len > vma->vm_end - addr)
return ERR_PTR(-EFAULT);
if (new_len == old_len)
return vma;
/* Need to be careful about a growing mapping */
pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
pgoff += vma->vm_pgoff;
if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
return ERR_PTR(-EINVAL);
if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
return ERR_PTR(-EFAULT);
if (vma->vm_flags & VM_LOCKED) {
unsigned long locked, lock_limit;
locked = mm->locked_vm << PAGE_SHIFT;
lock_limit = rlimit(RLIMIT_MEMLOCK);
locked += new_len - old_len;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
return ERR_PTR(-EAGAIN);
}
if (!may_expand_vm(mm, vma->vm_flags,
(new_len - old_len) >> PAGE_SHIFT))
return ERR_PTR(-ENOMEM);
if (vma->vm_flags & VM_ACCOUNT) {
unsigned long charged = (new_len - old_len) >> PAGE_SHIFT;
if (security_vm_enough_memory_mm(mm, charged))
return ERR_PTR(-ENOMEM);
*p = charged;
}
return vma;
}
static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
unsigned long new_addr, unsigned long new_len, bool *locked,
struct vm_userfaultfd_ctx *uf,
struct list_head *uf_unmap_early,
struct list_head *uf_unmap)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long ret = -EINVAL;
unsigned long charged = 0;
unsigned long map_flags;
if (offset_in_page(new_addr))
goto out;
if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
goto out;
/* Ensure the old/new locations do not overlap */
if (addr + old_len > new_addr && new_addr + new_len > addr)
goto out;
ret = do_munmap(mm, new_addr, new_len, uf_unmap_early);
if (ret)
goto out;
if (old_len >= new_len) {
ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap);
if (ret && old_len != new_len)
goto out;
old_len = new_len;
}
vma = vma_to_resize(addr, old_len, new_len, &charged);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto out;
}
map_flags = MAP_FIXED;
if (vma->vm_flags & VM_MAYSHARE)
map_flags |= MAP_SHARED;
ret = get_unmapped_area(vma->vm_file, new_addr, new_len, vma->vm_pgoff +
((addr - vma->vm_start) >> PAGE_SHIFT),
map_flags);
if (offset_in_page(ret))
goto out1;
ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf,
uf_unmap);
if (!(offset_in_page(ret)))
goto out;
out1:
vm_unacct_memory(charged);
out:
return ret;
}
static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
{
unsigned long end = vma->vm_end + delta;
if (end < vma->vm_end) /* overflow */
return 0;
if (vma->vm_next && vma->vm_next->vm_start < end) /* intersection */
return 0;
if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start,
0, MAP_FIXED) & ~PAGE_MASK)
return 0;
return 1;
}
/*
* Expand (or shrink) an existing mapping, potentially moving it at the
* same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
*
* MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
* This option implies MREMAP_MAYMOVE.
*/
SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
unsigned long, new_len, unsigned long, flags,
unsigned long, new_addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long ret = -EINVAL;
unsigned long charged = 0;
bool locked = false;
struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
LIST_HEAD(uf_unmap_early);
LIST_HEAD(uf_unmap);
addr = untagged_addr(addr);
if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
return ret;
if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
return ret;
if (offset_in_page(addr))
return ret;
old_len = PAGE_ALIGN(old_len);
new_len = PAGE_ALIGN(new_len);
/*
* We allow a zero old-len as a special case
* for DOS-emu "duplicate shm area" thing. But
* a zero new-len is nonsensical.
*/
if (!new_len)
return ret;
if (down_write_killable(&current->mm->mmap_sem))
return -EINTR;
if (flags & MREMAP_FIXED) {
ret = mremap_to(addr, old_len, new_addr, new_len,
&locked, &uf, &uf_unmap_early, &uf_unmap);
goto out;
}
/*
* Always allow a shrinking remap: that just unmaps
* the unnecessary pages..
* do_munmap does all the needed commit accounting
*/
if (old_len >= new_len) {
ret = do_munmap(mm, addr+new_len, old_len - new_len, &uf_unmap);
if (ret && old_len != new_len)
goto out;
ret = addr;
goto out;
}
/*
* Ok, we need to grow..
*/
vma = vma_to_resize(addr, old_len, new_len, &charged);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto out;
}
/* old_len exactly to the end of the area..
*/
if (old_len == vma->vm_end - addr) {
/* can we just expand the current mapping? */
if (vma_expandable(vma, new_len - old_len)) {
int pages = (new_len - old_len) >> PAGE_SHIFT;
if (vma_adjust(vma, vma->vm_start, addr + new_len,
vma->vm_pgoff, NULL)) {
ret = -ENOMEM;
goto out;
}
vm_stat_account(mm, vma->vm_flags, pages);
if (vma->vm_flags & VM_LOCKED) {
mm->locked_vm += pages;
locked = true;
new_addr = addr;
}
ret = addr;
goto out;
}
}
/*
* We weren't able to just expand or shrink the area,
* we need to create a new one and move it..
*/
ret = -ENOMEM;
if (flags & MREMAP_MAYMOVE) {
unsigned long map_flags = 0;
if (vma->vm_flags & VM_MAYSHARE)
map_flags |= MAP_SHARED;
new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
vma->vm_pgoff +
((addr - vma->vm_start) >> PAGE_SHIFT),
map_flags);
if (offset_in_page(new_addr)) {
ret = new_addr;
goto out;
}
ret = move_vma(vma, addr, old_len, new_len, new_addr,
&locked, &uf, &uf_unmap);
}
out:
if (offset_in_page(ret)) {
vm_unacct_memory(charged);
locked = 0;
}
up_write(&current->mm->mmap_sem);
if (locked && new_len > old_len)
mm_populate(new_addr + old_len, new_len - old_len);
userfaultfd_unmap_complete(mm, &uf_unmap_early);
mremap_userfaultfd_complete(&uf, addr, new_addr, old_len);
userfaultfd_unmap_complete(mm, &uf_unmap);
return ret;
}