diff options
author | Srinivasarao P <spathi@codeaurora.org> | 2018-01-02 14:13:40 +0530 |
---|---|---|
committer | Srinivasarao P <spathi@codeaurora.org> | 2018-01-02 18:15:22 +0530 |
commit | b0e2559a3e52ae84a664bc83155f731bfd7a303d (patch) | |
tree | a0c5c426d6180e8fd863ddf7ad38075b0d4f658e | |
parent | 00a5539a807c39d9051cf71056350b7615e4af21 (diff) | |
parent | 9fbf3d73741260aafb8d84b08f92a86daa6fc026 (diff) |
Merge android-4.4.103 (9fbf3d7) into msm-4.4
* refs/heads/tmp-9fbf3d7
Linux 4.4.103
Revert "sctp: do not peel off an assoc from one netns to another one"
xen: xenbus driver must not accept invalid transaction ids
s390/kbuild: enable modversions for symbols exported from asm
ASoC: wm_adsp: Don't overrun firmware file buffer when reading region data
btrfs: return the actual error value from from btrfs_uuid_tree_iterate
ASoC: rsnd: don't double free kctrl
netfilter: nf_tables: fix oob access
netfilter: nft_queue: use raw_smp_processor_id()
spi: SPI_FSL_DSPI should depend on HAS_DMA
staging: iio: cdc: fix improper return value
iio: light: fix improper return value
mac80211: Suppress NEW_PEER_CANDIDATE event if no room
mac80211: Remove invalid flag operations in mesh TSF synchronization
drm: Apply range restriction after color adjustment when allocation
ALSA: hda - Apply ALC269_FIXUP_NO_SHUTUP on HDA_FIXUP_ACT_PROBE
ath10k: set CTS protection VDEV param only if VDEV is up
ath10k: fix potential memory leak in ath10k_wmi_tlv_op_pull_fw_stats()
ath10k: ignore configuring the incorrect board_id
ath10k: fix incorrect txpower set by P2P_DEVICE interface
drm/armada: Fix compile fail
net: 3com: typhoon: typhoon_init_one: fix incorrect return values
net: 3com: typhoon: typhoon_init_one: make return values more specific
net: Allow IP_MULTICAST_IF to set index to L3 slave
dmaengine: zx: set DMA_CYCLIC cap_mask bit
PCI: Apply _HPX settings only to relevant devices
RDS: RDMA: return appropriate error on rdma map failures
e1000e: Separate signaling for link check/link up
e1000e: Fix return value test
e1000e: Fix error path in link detection
PM / OPP: Add missing of_node_put(np)
net/9p: Switch to wait_event_killable()
fscrypt: lock mutex before checking for bounce page pool
sched/rt: Simplify the IPI based RT balancing logic
media: v4l2-ctrl: Fix flags field on Control events
cx231xx-cards: fix NULL-deref on missing association descriptor
media: rc: check for integer overflow
media: Don't do DMA on stack for firmware upload in the AS102 driver
powerpc/signal: Properly handle return value from uprobe_deny_signal()
parisc: Fix validity check of pointer size argument in new CAS implementation
ixgbe: Fix skb list corruption on Power systems
fm10k: Use smp_rmb rather than read_barrier_depends
i40evf: Use smp_rmb rather than read_barrier_depends
ixgbevf: Use smp_rmb rather than read_barrier_depends
igbvf: Use smp_rmb rather than read_barrier_depends
igb: Use smp_rmb rather than read_barrier_depends
i40e: Use smp_rmb rather than read_barrier_depends
NFC: fix device-allocation error return
IB/srp: Avoid that a cable pull can trigger a kernel crash
IB/srpt: Do not accept invalid initiator port names
libnvdimm, namespace: make 'resource' attribute only readable by root
libnvdimm, namespace: fix label initialization to use valid seq numbers
clk: ti: dra7-atl-clock: fix child-node lookups
clk: ti: dra7-atl-clock: Fix of_node reference counting
SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status
KVM: SVM: obey guest PAT
KVM: nVMX: set IDTR and GDTR limits when loading L1 host state
target: Fix QUEUE_FULL + SCSI task attribute handling
iscsi-target: Fix non-immediate TMR reference leak
fs/9p: Compare qid.path in v9fs_test_inode
fix a page leak in vhost_scsi_iov_to_sgl() error recovery
ALSA: hda/realtek - Fix ALC700 family no sound issue
ALSA: timer: Remove kernel warning at compat ioctl error paths
ALSA: usb-audio: Add sanity checks in v2 clock parsers
ALSA: usb-audio: Fix potential out-of-bound access at parsing SU
ALSA: usb-audio: Add sanity checks to FE parser
ALSA: pcm: update tstamp only if audio_tstamp changed
ext4: fix interaction between i_size, fallocate, and delalloc after a crash
ata: fixes kernel crash while tracing ata_eh_link_autopsy event
rtlwifi: fix uninitialized rtlhal->last_suspend_sec time
rtlwifi: rtl8192ee: Fix memory leak when loading firmware
nfsd: deal with revoked delegations appropriately
nfs: Fix ugly referral attributes
NFS: Fix typo in nomigration mount option
isofs: fix timestamps beyond 2027
bcache: check ca->alloc_thread initialized before wake up it
eCryptfs: use after free in ecryptfs_release_messaging()
nilfs2: fix race condition that causes file system corruption
autofs: don't fail mount for transient error
MIPS: BCM47XX: Fix LED inversion for WRT54GSv1
MIPS: Fix an n32 core file generation regset support regression
dm: fix race between dm_get_from_kobject() and __dm_destroy()
dm bufio: fix integer overflow when limiting maximum cache size
ALSA: hda: Add Raven PCI ID
MIPS: ralink: Fix typo in mt7628 pinmux function
MIPS: ralink: Fix MT7628 pinmux
ARM: 8721/1: mm: dump: check hardware RO bit for LPAE
ARM: 8722/1: mm: make STRICT_KERNEL_RWX effective for LPAE
x86/decoder: Add new TEST instruction pattern
lib/mpi: call cond_resched() from mpi_powm() loop
sched: Make resched_cpu() unconditional
vsock: use new wait API for vsock_stream_sendmsg()
AF_VSOCK: Shrink the area influenced by prepare_to_wait
ipv6: only call ip6_route_dev_notify() once for NETDEV_UNREGISTER
s390/disassembler: increase show_code buffer size
s390/disassembler: add missing end marker for e7 table
s390/runtime instrumention: fix possible memory corruption
s390: fix transactional execution control register handling
BACKPORT: time: Clean up CLOCK_MONOTONIC_RAW time handling
BACKPORT: time: Fix CLOCK_MONOTONIC_RAW sub-nanosecond accounting
UPSTREAM: arm64: vdso: fix clock_getres for 4GiB-aligned res
f2fs: updates on 4.15-rc1
UPSTREAM: android: binder: fix type mismatch warning
Linux 4.4.102
mm, hwpoison: fixup "mm: check the return value of lookup_page_ext for all call sites"
Conflicts:
fs/ext4/crypto_key.c
mm/debug-pagealloc.c
Change-Id: Ibe35d78bd0397f3ff2049e0a1dda20fcb06f2f75
Signed-off-by: Srinivasarao P <spathi@codeaurora.org>
129 files changed, 2389 insertions, 1094 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 500c60403653..2baed1151eac 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -51,6 +51,18 @@ Description: Controls the dirty page count condition for the in-place-update policies. +What: /sys/fs/f2fs/<disk>/min_hot_blocks +Date: March 2017 +Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> +Description: + Controls the dirty page count condition for redefining hot data. + +What: /sys/fs/f2fs/<disk>/min_ssr_sections +Date: October 2017 +Contact: "Chao Yu" <yuchao0@huawei.com> +Description: + Controls the fee section threshold to trigger SSR allocation. + What: /sys/fs/f2fs/<disk>/max_small_discards Date: November 2013 Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com> @@ -96,6 +108,18 @@ Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> Description: Controls the checkpoint timing. +What: /sys/fs/f2fs/<disk>/idle_interval +Date: January 2016 +Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> +Description: + Controls the idle timing. + +What: /sys/fs/f2fs/<disk>/iostat_enable +Date: August 2017 +Contact: "Chao Yu" <yuchao0@huawei.com> +Description: + Controls to enable/disable IO stat. + What: /sys/fs/f2fs/<disk>/ra_nid_pages Date: October 2015 Contact: "Chao Yu" <chao2.yu@samsung.com> @@ -116,6 +140,12 @@ Contact: "Shuoran Liu" <liushuoran@huawei.com> Description: Shows total written kbytes issued to disk. +What: /sys/fs/f2fs/<disk>/feature +Date: July 2017 +Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> +Description: + Shows all enabled features in current device. + What: /sys/fs/f2fs/<disk>/inject_rate Date: May 2016 Contact: "Sheng Yong" <shengyong1@huawei.com> @@ -132,7 +162,18 @@ What: /sys/fs/f2fs/<disk>/reserved_blocks Date: June 2017 Contact: "Chao Yu" <yuchao0@huawei.com> Description: - Controls current reserved blocks in system. + Controls target reserved blocks in system, the threshold + is soft, it could exceed current available user space. + +What: /sys/fs/f2fs/<disk>/current_reserved_blocks +Date: October 2017 +Contact: "Yunlong Song" <yunlong.song@huawei.com> +Contact: "Chao Yu" <yuchao0@huawei.com> +Description: + Shows current reserved blocks in system, it may be temporarily + smaller than target_reserved_blocks, but will gradually + increase to target_reserved_blocks when more free blocks are + freed by user later. What: /sys/fs/f2fs/<disk>/gc_urgent Date: August 2017 @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 101 +SUBLEVEL = 103 EXTRAVERSION = NAME = Blurry Fish Butt diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index 9fe8e241335c..e1f6f0daa847 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -126,8 +126,8 @@ static const struct prot_bits section_bits[] = { .val = PMD_SECT_USER, .set = "USR", }, { - .mask = L_PMD_SECT_RDONLY, - .val = L_PMD_SECT_RDONLY, + .mask = L_PMD_SECT_RDONLY | PMD_SECT_AP2, + .val = L_PMD_SECT_RDONLY | PMD_SECT_AP2, .set = "ro", .clear = "RW", #elif __LINUX_ARM_ARCH__ >= 6 diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index d3d718772381..4d58a6eca48e 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -669,8 +669,8 @@ static struct section_perm ro_perms[] = { .start = (unsigned long)_stext, .end = (unsigned long)__init_begin, #ifdef CONFIG_ARM_LPAE - .mask = ~L_PMD_SECT_RDONLY, - .prot = L_PMD_SECT_RDONLY, + .mask = ~(L_PMD_SECT_RDONLY | PMD_SECT_AP2), + .prot = L_PMD_SECT_RDONLY | PMD_SECT_AP2, #else .mask = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE), .prot = PMD_SECT_APX | PMD_SECT_AP_WRITE, diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index b7730ba782dd..7e9dd94452bb 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -218,8 +218,8 @@ void update_vsyscall(struct timekeeper *tk) if (!use_syscall) { /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; - vdso_data->raw_time_sec = tk->raw_time.tv_sec; - vdso_data->raw_time_nsec = tk->raw_time.tv_nsec; + vdso_data->raw_time_sec = tk->raw_sec; + vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec; vdso_data->xtime_clock_sec = tk->xtime_sec; vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; /* tkr_raw.xtime_nsec == 0 */ diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index e00b4671bd7c..c97ce91cf023 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -310,7 +310,7 @@ ENTRY(__kernel_clock_getres) b.ne 4f ldr x2, 6f 2: - cbz w1, 3f + cbz x1, 3f stp xzr, x2, [x1] 3: /* res == NULL. */ diff --git a/arch/mips/bcm47xx/leds.c b/arch/mips/bcm47xx/leds.c index d20ae63eb3c2..46abe9e4e0e0 100644 --- a/arch/mips/bcm47xx/leds.c +++ b/arch/mips/bcm47xx/leds.c @@ -330,7 +330,7 @@ bcm47xx_leds_linksys_wrt54g3gv2[] __initconst = { /* Verified on: WRT54GS V1.0 */ static const struct gpio_led bcm47xx_leds_linksys_wrt54g_type_0101[] __initconst = { - BCM47XX_GPIO_LED(0, "green", "wlan", 0, LEDS_GPIO_DEFSTATE_OFF), + BCM47XX_GPIO_LED(0, "green", "wlan", 1, LEDS_GPIO_DEFSTATE_OFF), BCM47XX_GPIO_LED(1, "green", "power", 0, LEDS_GPIO_DEFSTATE_ON), BCM47XX_GPIO_LED(7, "green", "dmz", 1, LEDS_GPIO_DEFSTATE_OFF), }; diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 24c115a0721a..a3f38e6b7ea1 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -650,6 +650,19 @@ static const struct user_regset_view user_mips64_view = { .n = ARRAY_SIZE(mips64_regsets), }; +#ifdef CONFIG_MIPS32_N32 + +static const struct user_regset_view user_mipsn32_view = { + .name = "mipsn32", + .e_flags = EF_MIPS_ABI2, + .e_machine = ELF_ARCH, + .ei_osabi = ELF_OSABI, + .regsets = mips64_regsets, + .n = ARRAY_SIZE(mips64_regsets), +}; + +#endif /* CONFIG_MIPS32_N32 */ + #endif /* CONFIG_64BIT */ const struct user_regset_view *task_user_regset_view(struct task_struct *task) @@ -661,6 +674,10 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) if (test_tsk_thread_flag(task, TIF_32BIT_REGS)) return &user_mips_view; #endif +#ifdef CONFIG_MIPS32_N32 + if (test_tsk_thread_flag(task, TIF_32BIT_ADDR)) + return &user_mipsn32_view; +#endif return &user_mips64_view; #endif } diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index 48d6349fd9d7..c5f45fc96c74 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -141,8 +141,8 @@ static struct rt2880_pmx_func i2c_grp_mt7628[] = { FUNC("i2c", 0, 4, 2), }; -static struct rt2880_pmx_func refclk_grp_mt7628[] = { FUNC("reclk", 0, 36, 1) }; -static struct rt2880_pmx_func perst_grp_mt7628[] = { FUNC("perst", 0, 37, 1) }; +static struct rt2880_pmx_func refclk_grp_mt7628[] = { FUNC("refclk", 0, 37, 1) }; +static struct rt2880_pmx_func perst_grp_mt7628[] = { FUNC("perst", 0, 36, 1) }; static struct rt2880_pmx_func wdt_grp_mt7628[] = { FUNC("wdt", 0, 38, 1) }; static struct rt2880_pmx_func spi_grp_mt7628[] = { FUNC("spi", 0, 7, 4) }; diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index c6b855f7892c..9f22195b90ed 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -688,15 +688,15 @@ cas_action: /* ELF32 Process entry path */ lws_compare_and_swap_2: #ifdef CONFIG_64BIT - /* Clip the input registers */ + /* Clip the input registers. We don't need to clip %r23 as we + only use it for word operations */ depdi 0, 31, 32, %r26 depdi 0, 31, 32, %r25 depdi 0, 31, 32, %r24 - depdi 0, 31, 32, %r23 #endif /* Check the validity of the size pointer */ - subi,>>= 4, %r23, %r0 + subi,>>= 3, %r23, %r0 b,n lws_exit_nosys /* Jump to the functions which will load the old and new values into diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index cf8c7e4e0b21..984a54c85952 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -102,7 +102,7 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, static void do_signal(struct pt_regs *regs) { sigset_t *oldset = sigmask_to_save(); - struct ksignal ksig; + struct ksignal ksig = { .sig = 0 }; int ret; int is32 = is_32bit_task(); diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h new file mode 100644 index 000000000000..2c3413b0ca52 --- /dev/null +++ b/arch/s390/include/asm/asm-prototypes.h @@ -0,0 +1,8 @@ +#ifndef _ASM_S390_PROTOTYPES_H + +#include <linux/kvm_host.h> +#include <linux/ftrace.h> +#include <asm/fpu/api.h> +#include <asm-generic/asm-prototypes.h> + +#endif /* _ASM_S390_PROTOTYPES_H */ diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index 12d45f0cfdd9..dde6b52359c5 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -34,8 +34,8 @@ static inline void restore_access_regs(unsigned int *acrs) save_access_regs(&prev->thread.acrs[0]); \ save_ri_cb(prev->thread.ri_cb); \ } \ + update_cr_regs(next); \ if (next->mm) { \ - update_cr_regs(next); \ set_cpu_flag(CIF_FPU); \ restore_access_regs(&next->thread.acrs[0]); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 6e72961608f0..07477ba392b7 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1549,6 +1549,7 @@ static struct s390_insn opcode_e7[] = { { "vfsq", 0xce, INSTR_VRR_VV000MM }, { "vfs", 0xe2, INSTR_VRR_VVV00MM }, { "vftci", 0x4a, INSTR_VRI_VVIMM }, + { "", 0, INSTR_INVALID } }; static struct s390_insn opcode_eb[] = { @@ -1961,7 +1962,7 @@ void show_code(struct pt_regs *regs) { char *mode = user_mode(regs) ? "User" : "Krnl"; unsigned char code[64]; - char buffer[64], *ptr; + char buffer[128], *ptr; mm_segment_t old_fs; unsigned long addr; int start, end, opsize, hops, i; @@ -2024,7 +2025,7 @@ void show_code(struct pt_regs *regs) start += opsize; printk(buffer); ptr = buffer; - ptr += sprintf(ptr, "\n "); + ptr += sprintf(ptr, "\n\t "); hops++; } printk("\n"); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 3c31609df959..ee7b8e7ca4f8 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -325,8 +325,10 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; if (test_facility(40)) S390_lowcore.machine_flags |= MACHINE_FLAG_LPP; - if (test_facility(50) && test_facility(73)) + if (test_facility(50) && test_facility(73)) { S390_lowcore.machine_flags |= MACHINE_FLAG_TE; + __ctl_set_bit(0, 55); + } if (test_facility(51)) S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; if (test_facility(129)) { diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 114ee8b96f17..efa035a31b98 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -137,6 +137,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, memset(&p->thread.per_user, 0, sizeof(p->thread.per_user)); memset(&p->thread.per_event, 0, sizeof(p->thread.per_event)); clear_tsk_thread_flag(p, TIF_SINGLE_STEP); + p->thread.per_flags = 0; /* Initialize per thread user and system timer values */ ti = task_thread_info(p); ti->user_timer = 0; diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c index fffa0e5462af..70cdb03d4acd 100644 --- a/arch/s390/kernel/runtime_instr.c +++ b/arch/s390/kernel/runtime_instr.c @@ -47,11 +47,13 @@ void exit_thread_runtime_instr(void) { struct task_struct *task = current; + preempt_disable(); if (!task->thread.ri_cb) return; disable_runtime_instr(); kfree(task->thread.ri_cb); task->thread.ri_cb = NULL; + preempt_enable(); } SYSCALL_DEFINE1(s390_runtime_instr, int, command) @@ -62,9 +64,7 @@ SYSCALL_DEFINE1(s390_runtime_instr, int, command) return -EOPNOTSUPP; if (command == S390_RUNTIME_INSTR_STOP) { - preempt_disable(); exit_thread_runtime_instr(); - preempt_enable(); return 0; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 899c40f826dd..4e1b254c3695 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3114,6 +3114,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) u32 ecx = msr->index; u64 data = msr->data; switch (ecx) { + case MSR_IA32_CR_PAT: + if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) + return 1; + vcpu->arch.pat = data; + svm->vmcb->save.g_pat = data; + mark_dirty(svm->vmcb, VMCB_NPT); + break; case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr); break; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9114588e3e61..67ba0d8f87c7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10394,6 +10394,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip); vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); + vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF); + vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF); /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */ if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS) diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index d388de72eaca..ec039f2a0c13 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -833,7 +833,7 @@ EndTable GrpTable: Grp3_1 0: TEST Eb,Ib -1: +1: TEST Eb,Ib 2: NOT Eb 3: NEG Eb 4: MUL AL,Eb diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 47ba64f1e70b..2106014f1ea8 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2468,7 +2468,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, debug_id, (u64)fda->num_fds); continue; } - fd_array = (u32 *)(parent_buffer + fda->parent_offset); + fd_array = (u32 *)(parent_buffer + (uintptr_t)fda->parent_offset); for (fd_index = 0; fd_index < fda->num_fds; fd_index++) task_close_fd(proc, fd_array[fd_index]); } break; @@ -2692,7 +2692,7 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, */ parent_buffer = parent->buffer - binder_alloc_get_user_buffer_offset(&target_proc->alloc); - fd_array = (u32 *)(parent_buffer + fda->parent_offset); + fd_array = (u32 *)(parent_buffer + (uintptr_t)fda->parent_offset); if (!IS_ALIGNED((unsigned long)fd_array, sizeof(u32))) { binder_user_error("%d:%d parent offset not aligned correctly.\n", proc->pid, thread->pid); @@ -2758,7 +2758,7 @@ static int binder_fixup_parent(struct binder_transaction *t, proc->pid, thread->pid); return -EINVAL; } - parent_buffer = (u8 *)(parent->buffer - + parent_buffer = (u8 *)((uintptr_t)parent->buffer - binder_alloc_get_user_buffer_offset( &target_proc->alloc)); *(binder_uintptr_t *)(parent_buffer + bp->parent_offset) = bp->buffer; diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 91a9e6af2ec4..75cced210b2a 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2245,8 +2245,8 @@ static void ata_eh_link_autopsy(struct ata_link *link) if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) eflags |= ATA_EFLAG_DUBIOUS_XFER; ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); + trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask); } - trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask); DPRINTK("EXIT\n"); } diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 433b60092972..e40f67b7d28b 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -1936,6 +1936,7 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) if (ret) { dev_err(dev, "%s: Failed to add OPP, %d\n", __func__, ret); + of_node_put(np); goto free_table; } } diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c index 2e14dfb588f4..7d060ffe8975 100644 --- a/drivers/clk/ti/clk-dra7-atl.c +++ b/drivers/clk/ti/clk-dra7-atl.c @@ -265,7 +265,7 @@ static int of_dra7_atl_clk_probe(struct platform_device *pdev) /* Get configuration for the ATL instances */ snprintf(prop, sizeof(prop), "atl%u", i); - cfg_node = of_find_node_by_name(node, prop); + cfg_node = of_get_child_by_name(node, prop); if (cfg_node) { ret = of_property_read_u32(cfg_node, "bws", &cdesc->bws); @@ -278,6 +278,7 @@ static int of_dra7_atl_clk_probe(struct platform_device *pdev) atl_write(cinfo, DRA7_ATL_AWSMUX_REG(i), cdesc->aws); } + of_node_put(cfg_node); } cdesc->probed = true; diff --git a/drivers/dma/zx296702_dma.c b/drivers/dma/zx296702_dma.c index 245d759d5ffc..6059d81e701a 100644 --- a/drivers/dma/zx296702_dma.c +++ b/drivers/dma/zx296702_dma.c @@ -813,6 +813,7 @@ static int zx_dma_probe(struct platform_device *op) INIT_LIST_HEAD(&d->slave.channels); dma_cap_set(DMA_SLAVE, d->slave.cap_mask); dma_cap_set(DMA_MEMCPY, d->slave.cap_mask); + dma_cap_set(DMA_CYCLIC, d->slave.cap_mask); dma_cap_set(DMA_PRIVATE, d->slave.cap_mask); d->slave.dev = &op->dev; d->slave.device_free_chan_resources = zx_dma_free_chan_resources; diff --git a/drivers/gpu/drm/armada/Makefile b/drivers/gpu/drm/armada/Makefile index ffd673615772..26412d2f8c98 100644 --- a/drivers/gpu/drm/armada/Makefile +++ b/drivers/gpu/drm/armada/Makefile @@ -4,3 +4,5 @@ armada-y += armada_510.o armada-$(CONFIG_DEBUG_FS) += armada_debugfs.o obj-$(CONFIG_DRM_ARMADA) := armada.o + +CFLAGS_armada_trace.o := -I$(src) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 6e4dd62d4ed9..7ee077aca5da 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -378,14 +378,12 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node, BUG_ON(!hole_node->hole_follows || node->allocated); - if (adj_start < start) - adj_start = start; - if (adj_end > end) - adj_end = end; - if (mm->color_adjust) mm->color_adjust(hole_node, color, &adj_start, &adj_end); + adj_start = max(adj_start, start); + adj_end = min(adj_end, end); + if (flags & DRM_MM_CREATE_TOP) adj_start = adj_end - size; @@ -650,17 +648,15 @@ static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_ adj_end = drm_mm_hole_node_end(entry); hole_size = adj_end - adj_start; - if (adj_start < start) - adj_start = start; - if (adj_end > end) - adj_end = end; - if (mm->color_adjust) { mm->color_adjust(entry, color, &adj_start, &adj_end); if (adj_end <= adj_start) continue; } + adj_start = max(adj_start, start); + adj_end = min(adj_end, end); + if (!check_free_hole(adj_start, adj_end, size, alignment)) continue; diff --git a/drivers/iio/light/cm3232.c b/drivers/iio/light/cm3232.c index fe89b6823217..263e97235ea0 100644 --- a/drivers/iio/light/cm3232.c +++ b/drivers/iio/light/cm3232.c @@ -119,7 +119,7 @@ static int cm3232_reg_init(struct cm3232_chip *chip) if (ret < 0) dev_err(&chip->client->dev, "Error writing reg_cmd\n"); - return 0; + return ret; } /** diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index e397f1b0af09..9a99cee2665a 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -670,12 +670,19 @@ static void srp_path_rec_completion(int status, static int srp_lookup_path(struct srp_rdma_ch *ch) { struct srp_target_port *target = ch->target; - int ret; + int ret = -ENODEV; ch->path.numb_path = 1; init_completion(&ch->done); + /* + * Avoid that the SCSI host can be removed by srp_remove_target() + * before srp_path_rec_completion() is called. + */ + if (!scsi_host_get(target->scsi_host)) + goto out; + ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client, target->srp_host->srp_dev->dev, target->srp_host->port, @@ -689,18 +696,24 @@ static int srp_lookup_path(struct srp_rdma_ch *ch) GFP_KERNEL, srp_path_rec_completion, ch, &ch->path_query); - if (ch->path_query_id < 0) - return ch->path_query_id; + ret = ch->path_query_id; + if (ret < 0) + goto put; ret = wait_for_completion_interruptible(&ch->done); if (ret < 0) - return ret; + goto put; - if (ch->status < 0) + ret = ch->status; + if (ret < 0) shost_printk(KERN_WARNING, target->scsi_host, PFX "Path record query failed\n"); - return ch->status; +put: + scsi_host_put(target->scsi_host); + +out: + return ret; } static int srp_send_req(struct srp_rdma_ch *ch, bool multich) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index eaabf3125846..c52131233ba7 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3425,7 +3425,7 @@ static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name) { const char *p; unsigned len, count, leading_zero_bytes; - int ret, rc; + int ret; p = name; if (strncasecmp(p, "0x", 2) == 0) @@ -3437,10 +3437,9 @@ static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name) count = min(len / 2, 16U); leading_zero_bytes = 16 - count; memset(i_port_id, 0, leading_zero_bytes); - rc = hex2bin(i_port_id + leading_zero_bytes, p, count); - if (rc < 0) - pr_debug("hex2bin failed for srpt_parse_i_port_id: %d\n", rc); - ret = 0; + ret = hex2bin(i_port_id + leading_zero_bytes, p, count); + if (ret < 0) + pr_debug("hex2bin failed for srpt_parse_i_port_id: %d\n", ret); out: return ret; } diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 8eeab72b93e2..ea47980949ef 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -406,7 +406,8 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) finish_wait(&ca->set->bucket_wait, &w); out: - wake_up_process(ca->alloc_thread); + if (ca->alloc_thread) + wake_up_process(ca->alloc_thread); trace_bcache_alloc(ca, reserve); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index cdceefd0e57d..2ec7f90e3455 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -928,7 +928,8 @@ static void __get_memory_limit(struct dm_bufio_client *c, buffers = c->minimum_buffers; *limit_buffers = buffers; - *threshold_buffers = buffers * DM_BUFIO_WRITEBACK_PERCENT / 100; + *threshold_buffers = mult_frac(buffers, + DM_BUFIO_WRITEBACK_PERCENT, 100); } /* @@ -1829,19 +1830,15 @@ static int __init dm_bufio_init(void) memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches); memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names); - mem = (__u64)((totalram_pages - totalhigh_pages) * - DM_BUFIO_MEMORY_PERCENT / 100) << PAGE_SHIFT; + mem = (__u64)mult_frac(totalram_pages - totalhigh_pages, + DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT; if (mem > ULONG_MAX) mem = ULONG_MAX; #ifdef CONFIG_MMU - /* - * Get the size of vmalloc space the same way as VMALLOC_TOTAL - * in fs/proc/internal.h - */ - if (mem > (VMALLOC_END - VMALLOC_START) * DM_BUFIO_VMALLOC_PERCENT / 100) - mem = (VMALLOC_END - VMALLOC_START) * DM_BUFIO_VMALLOC_PERCENT / 100; + if (mem > mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100)) + mem = mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100); #endif dm_bufio_default_cache_size = mem; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 47ac131099d9..f7f560f5f056 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -3517,11 +3517,15 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj) md = container_of(kobj, struct mapped_device, kobj_holder.kobj); - if (test_bit(DMF_FREEING, &md->flags) || - dm_deleting_md(md)) - return NULL; - + spin_lock(&_minor_lock); + if (test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) { + md = NULL; + goto out; + } dm_get(md); +out: + spin_unlock(&_minor_lock); + return md; } diff --git a/drivers/media/rc/ir-lirc-codec.c b/drivers/media/rc/ir-lirc-codec.c index efc21b1da211..ca107033e429 100644 --- a/drivers/media/rc/ir-lirc-codec.c +++ b/drivers/media/rc/ir-lirc-codec.c @@ -286,11 +286,14 @@ static long ir_lirc_ioctl(struct file *filep, unsigned int cmd, if (!dev->max_timeout) return -ENOSYS; + /* Check for multiply overflow */ + if (val > U32_MAX / 1000) + return -EINVAL; + tmp = val * 1000; - if (tmp < dev->min_timeout || - tmp > dev->max_timeout) - return -EINVAL; + if (tmp < dev->min_timeout || tmp > dev->max_timeout) + return -EINVAL; dev->timeout = tmp; break; diff --git a/drivers/media/usb/as102/as102_fw.c b/drivers/media/usb/as102/as102_fw.c index 07d08c49f4d4..b2e16bb67572 100644 --- a/drivers/media/usb/as102/as102_fw.c +++ b/drivers/media/usb/as102/as102_fw.c @@ -101,18 +101,23 @@ static int as102_firmware_upload(struct as10x_bus_adapter_t *bus_adap, unsigned char *cmd, const struct firmware *firmware) { - struct as10x_fw_pkt_t fw_pkt; + struct as10x_fw_pkt_t *fw_pkt; int total_read_bytes = 0, errno = 0; unsigned char addr_has_changed = 0; + fw_pkt = kmalloc(sizeof(*fw_pkt), GFP_KERNEL); + if (!fw_pkt) + return -ENOMEM; + + for (total_read_bytes = 0; total_read_bytes < firmware->size; ) { int read_bytes = 0, data_len = 0; /* parse intel hex line */ read_bytes = parse_hex_line( (u8 *) (firmware->data + total_read_bytes), - fw_pkt.raw.address, - fw_pkt.raw.data, + fw_pkt->raw.address, + fw_pkt->raw.data, &data_len, &addr_has_changed); @@ -122,28 +127,28 @@ static int as102_firmware_upload(struct as10x_bus_adapter_t *bus_adap, /* detect the end of file */ total_read_bytes += read_bytes; if (total_read_bytes == firmware->size) { - fw_pkt.u.request[0] = 0x00; - fw_pkt.u.request[1] = 0x03; + fw_pkt->u.request[0] = 0x00; + fw_pkt->u.request[1] = 0x03; /* send EOF command */ errno = bus_adap->ops->upload_fw_pkt(bus_adap, (uint8_t *) - &fw_pkt, 2, 0); + fw_pkt, 2, 0); if (errno < 0) goto error; } else { if (!addr_has_changed) { /* prepare command to send */ - fw_pkt.u.request[0] = 0x00; - fw_pkt.u.request[1] = 0x01; + fw_pkt->u.request[0] = 0x00; + fw_pkt->u.request[1] = 0x01; - data_len += sizeof(fw_pkt.u.request); - data_len += sizeof(fw_pkt.raw.address); + data_len += sizeof(fw_pkt->u.request); + data_len += sizeof(fw_pkt->raw.address); /* send cmd to device */ errno = bus_adap->ops->upload_fw_pkt(bus_adap, (uint8_t *) - &fw_pkt, + fw_pkt, data_len, 0); if (errno < 0) @@ -152,6 +157,7 @@ static int as102_firmware_upload(struct as10x_bus_adapter_t *bus_adap, } } error: + kfree(fw_pkt); return (errno == 0) ? total_read_bytes : errno; } diff --git a/drivers/media/usb/cx231xx/cx231xx-cards.c b/drivers/media/usb/cx231xx/cx231xx-cards.c index 2c5f76d588ac..04ae21278440 100644 --- a/drivers/media/usb/cx231xx/cx231xx-cards.c +++ b/drivers/media/usb/cx231xx/cx231xx-cards.c @@ -1672,7 +1672,7 @@ static int cx231xx_usb_probe(struct usb_interface *interface, nr = dev->devno; assoc_desc = udev->actconfig->intf_assoc[0]; - if (assoc_desc->bFirstInterface != ifnum) { + if (!assoc_desc || assoc_desc->bFirstInterface != ifnum) { dev_err(d, "Not found matching IAD interface\n"); retval = -ENODEV; goto err_if; diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index bc45a225e710..3feaa9b154f0 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -1205,6 +1205,16 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, } EXPORT_SYMBOL(v4l2_ctrl_fill); +static u32 user_flags(const struct v4l2_ctrl *ctrl) +{ + u32 flags = ctrl->flags; + + if (ctrl->is_ptr) + flags |= V4L2_CTRL_FLAG_HAS_PAYLOAD; + + return flags; +} + static void fill_event(struct v4l2_event *ev, struct v4l2_ctrl *ctrl, u32 changes) { memset(ev->reserved, 0, sizeof(ev->reserved)); @@ -1212,7 +1222,7 @@ static void fill_event(struct v4l2_event *ev, struct v4l2_ctrl *ctrl, u32 change ev->id = ctrl->id; ev->u.ctrl.changes = changes; ev->u.ctrl.type = ctrl->type; - ev->u.ctrl.flags = ctrl->flags; + ev->u.ctrl.flags = user_flags(ctrl); if (ctrl->is_ptr) ev->u.ctrl.value64 = 0; else @@ -2541,10 +2551,8 @@ int v4l2_query_ext_ctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_query_ext_ctr else qc->id = ctrl->id; strlcpy(qc->name, ctrl->name, sizeof(qc->name)); - qc->flags = ctrl->flags; + qc->flags = user_flags(ctrl); qc->type = ctrl->type; - if (ctrl->is_ptr) - qc->flags |= V4L2_CTRL_FLAG_HAS_PAYLOAD; qc->elem_size = ctrl->elem_size; qc->elems = ctrl->elems; qc->nr_of_dims = ctrl->nr_of_dims; diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index 8f8418d2ac4a..a0012c3cb4f6 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -2366,9 +2366,9 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) * 4) Get the hardware address. * 5) Put the card to sleep. */ - if (typhoon_reset(ioaddr, WaitSleep) < 0) { + err = typhoon_reset(ioaddr, WaitSleep); + if (err < 0) { err_msg = "could not reset 3XP"; - err = -EIO; goto error_out_dma; } @@ -2382,24 +2382,25 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) typhoon_init_interface(tp); typhoon_init_rings(tp); - if(typhoon_boot_3XP(tp, TYPHOON_STATUS_WAITING_FOR_HOST) < 0) { + err = typhoon_boot_3XP(tp, TYPHOON_STATUS_WAITING_FOR_HOST); + if (err < 0) { err_msg = "cannot boot 3XP sleep image"; - err = -EIO; goto error_out_reset; } INIT_COMMAND_WITH_RESPONSE(&xp_cmd, TYPHOON_CMD_READ_MAC_ADDRESS); - if(typhoon_issue_command(tp, 1, &xp_cmd, 1, xp_resp) < 0) { + err = typhoon_issue_command(tp, 1, &xp_cmd, 1, xp_resp); + if (err < 0) { err_msg = "cannot read MAC address"; - err = -EIO; goto error_out_reset; } *(__be16 *)&dev->dev_addr[0] = htons(le16_to_cpu(xp_resp[0].parm1)); *(__be32 *)&dev->dev_addr[2] = htonl(le32_to_cpu(xp_resp[0].parm2)); - if(!is_valid_ether_addr(dev->dev_addr)) { + if (!is_valid_ether_addr(dev->dev_addr)) { err_msg = "Could not obtain valid ethernet address, aborting"; + err = -EIO; goto error_out_reset; } @@ -2407,7 +2408,8 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) * later when we print out the version reported. */ INIT_COMMAND_WITH_RESPONSE(&xp_cmd, TYPHOON_CMD_READ_VERSIONS); - if(typhoon_issue_command(tp, 1, &xp_cmd, 3, xp_resp) < 0) { + err = typhoon_issue_command(tp, 1, &xp_cmd, 3, xp_resp); + if (err < 0) { err_msg = "Could not get Sleep Image version"; goto error_out_reset; } @@ -2424,9 +2426,9 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if(xp_resp[0].numDesc != 0) tp->capabilities |= TYPHOON_WAKEUP_NEEDS_RESET; - if(typhoon_sleep(tp, PCI_D3hot, 0) < 0) { + err = typhoon_sleep(tp, PCI_D3hot, 0); + if (err < 0) { err_msg = "cannot put adapter to sleep"; - err = -EIO; goto error_out_reset; } @@ -2449,7 +2451,8 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->features = dev->hw_features | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM; - if(register_netdev(dev) < 0) { + err = register_netdev(dev); + if (err < 0) { err_msg = "unable to register netdev"; goto error_out_reset; } diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index e59d7c283cd4..645ace74429e 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -410,6 +410,9 @@ void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw) * Checks to see of the link status of the hardware has changed. If a * change in link status has been detected, then we read the PHY registers * to get the current speed/duplex if link exists. + * + * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link + * up). **/ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) { @@ -423,7 +426,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * Change or Rx Sequence Error interrupt. */ if (!mac->get_link_status) - return 0; + return 1; /* First we want to see if the MII Status Register reports * link. If so, then we want to get the current speed/duplex @@ -461,10 +464,12 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw) * different link partner. */ ret_val = e1000e_config_fc_after_link_up(hw); - if (ret_val) + if (ret_val) { e_dbg("Error configuring flow control\n"); + return ret_val; + } - return ret_val; + return 1; } /** diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 80ec587d510e..5205f1ebe381 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5017,7 +5017,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter) case e1000_media_type_copper: if (hw->mac.get_link_status) { ret_val = hw->mac.ops.check_for_link(hw); - link_active = !hw->mac.get_link_status; + link_active = ret_val > 0; } else { link_active = true; } @@ -5035,7 +5035,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter) break; } - if ((ret_val == E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) && + if ((ret_val == -E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) && (er32(CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) { /* See e1000_kmrn_lock_loss_workaround_ich8lan() */ e_info("Gigabit has been disabled, downgrading speed\n"); diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index de13aeacae97..8e674a0988b0 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -1744,6 +1744,7 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, s32 ret_val = 0; u16 i, phy_status; + *success = false; for (i = 0; i < iterations; i++) { /* Some PHYs require the MII_BMSR register to be read * twice due to the link bit being sticky. No harm doing @@ -1763,16 +1764,16 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, ret_val = e1e_rphy(hw, MII_BMSR, &phy_status); if (ret_val) break; - if (phy_status & BMSR_LSTATUS) + if (phy_status & BMSR_LSTATUS) { + *success = true; break; + } if (usec_interval >= 1000) msleep(usec_interval / 1000); else udelay(usec_interval); } - *success = (i < iterations); - return ret_val; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 09281558bfbc..c21fa56afd7c 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -1226,7 +1226,7 @@ static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector, break; /* prevent any other reads prior to eop_desc */ - read_barrier_depends(); + smp_rmb(); /* if DD is not set pending work has not been completed */ if (!(eop_desc->flags & FM10K_TXD_FLAG_DONE)) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 4edbab6ca7ef..b5b228c9a030 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3595,7 +3595,7 @@ static bool i40e_clean_fdir_tx_irq(struct i40e_ring *tx_ring, int budget) break; /* prevent any other reads prior to eop_desc */ - read_barrier_depends(); + smp_rmb(); /* if the descriptor isn't done, no work yet to do */ if (!(eop_desc->cmd_type_offset_bsz & diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 26c55bba4bf3..6dcc3854844d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -663,7 +663,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) break; /* prevent any other reads prior to eop_desc */ - read_barrier_depends(); + smp_rmb(); /* we have caught up to head, no work left to do */ if (tx_head == tx_desc) diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 39db70a597ed..1ed27fcd5031 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -172,7 +172,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) break; /* prevent any other reads prior to eop_desc */ - read_barrier_depends(); + smp_rmb(); /* we have caught up to head, no work left to do */ if (tx_head == tx_desc) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index ff6e57d788eb..c55552c3d2f9 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6433,7 +6433,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) break; /* prevent any other reads prior to eop_desc */ - read_barrier_depends(); + smp_rmb(); /* if DD is not set pending work has not been completed */ if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 297af801f051..519b72c41888 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -809,7 +809,7 @@ static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring) break; /* prevent any other reads prior to eop_desc */ - read_barrier_depends(); + smp_rmb(); /* if DD is not set pending work has not been completed */ if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 83645d8503d4..a5b443171b8b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1114,7 +1114,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, break; /* prevent any other reads prior to eop_desc */ - read_barrier_depends(); + smp_rmb(); /* if DD is not set pending work has not been completed */ if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 592ff237d692..50bbad37d640 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -312,7 +312,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, break; /* prevent any other reads prior to eop_desc */ - read_barrier_depends(); + smp_rmb(); /* if DD is not set pending work has not been completed */ if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 55033aed6d6b..d89a2d8adc2a 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -706,8 +706,11 @@ static int ath10k_core_get_board_id_from_otp(struct ath10k *ar) "boot get otp board id result 0x%08x board_id %d chip_id %d\n", result, board_id, chip_id); - if ((result & ATH10K_BMI_BOARD_ID_STATUS_MASK) != 0) + if ((result & ATH10K_BMI_BOARD_ID_STATUS_MASK) != 0 || + (board_id == 0)) { + ath10k_warn(ar, "board id is not exist in otp, ignore it\n"); return -EOPNOTSUPP; + } ar->id.bmi_ids_valid = true; ar->id.bmi_board_id = board_id; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 16a5c5fd3925..28042100ae0a 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -1237,6 +1237,36 @@ static int ath10k_monitor_recalc(struct ath10k *ar) return ath10k_monitor_stop(ar); } +static bool ath10k_mac_can_set_cts_prot(struct ath10k_vif *arvif) +{ + struct ath10k *ar = arvif->ar; + + lockdep_assert_held(&ar->conf_mutex); + + if (!arvif->is_started) { + ath10k_dbg(ar, ATH10K_DBG_MAC, "defer cts setup, vdev is not ready yet\n"); + return false; + } + + return true; +} + +static int ath10k_mac_set_cts_prot(struct ath10k_vif *arvif) +{ + struct ath10k *ar = arvif->ar; + u32 vdev_param; + + lockdep_assert_held(&ar->conf_mutex); + + vdev_param = ar->wmi.vdev_param->protection_mode; + + ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d cts_protection %d\n", + arvif->vdev_id, arvif->use_cts_prot); + + return ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, + arvif->use_cts_prot ? 1 : 0); +} + static int ath10k_recalc_rtscts_prot(struct ath10k_vif *arvif) { struct ath10k *ar = arvif->ar; @@ -5386,20 +5416,18 @@ static void ath10k_bss_info_changed(struct ieee80211_hw *hw, if (changed & BSS_CHANGED_ERP_CTS_PROT) { arvif->use_cts_prot = info->use_cts_prot; - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d cts_prot %d\n", - arvif->vdev_id, info->use_cts_prot); ret = ath10k_recalc_rtscts_prot(arvif); if (ret) ath10k_warn(ar, "failed to recalculate rts/cts prot for vdev %d: %d\n", arvif->vdev_id, ret); - vdev_param = ar->wmi.vdev_param->protection_mode; - ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param, - info->use_cts_prot ? 1 : 0); - if (ret) - ath10k_warn(ar, "failed to set protection mode %d on vdev %i: %d\n", - info->use_cts_prot, arvif->vdev_id, ret); + if (ath10k_mac_can_set_cts_prot(arvif)) { + ret = ath10k_mac_set_cts_prot(arvif); + if (ret) + ath10k_warn(ar, "failed to set cts protection for vdev %d: %d\n", + arvif->vdev_id, ret); + } } if (changed & BSS_CHANGED_ERP_SLOT) { @@ -7463,6 +7491,13 @@ ath10k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, arvif->is_up = true; } + if (ath10k_mac_can_set_cts_prot(arvif)) { + ret = ath10k_mac_set_cts_prot(arvif); + if (ret) + ath10k_warn(ar, "failed to set cts protection for vdev %d: %d\n", + arvif->vdev_id, ret); + } + mutex_unlock(&ar->conf_mutex); return 0; diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index 5ce4fdfca724..ba411cba6fc9 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -1156,8 +1156,10 @@ static int ath10k_wmi_tlv_op_pull_fw_stats(struct ath10k *ar, struct ath10k_fw_stats_pdev *dst; src = data; - if (data_len < sizeof(*src)) + if (data_len < sizeof(*src)) { + kfree(tb); return -EPROTO; + } data += sizeof(*src); data_len -= sizeof(*src); @@ -1177,8 +1179,10 @@ static int ath10k_wmi_tlv_op_pull_fw_stats(struct ath10k *ar, struct ath10k_fw_stats_vdev *dst; src = data; - if (data_len < sizeof(*src)) + if (data_len < sizeof(*src)) { + kfree(tb); return -EPROTO; + } data += sizeof(*src); data_len -= sizeof(*src); @@ -1196,8 +1200,10 @@ static int ath10k_wmi_tlv_op_pull_fw_stats(struct ath10k *ar, struct ath10k_fw_stats_peer *dst; src = data; - if (data_len < sizeof(*src)) + if (data_len < sizeof(*src)) { + kfree(tb); return -EPROTO; + } data += sizeof(*src); data_len -= sizeof(*src); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c index 0708eedd9671..1c69e8140d9d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c @@ -664,7 +664,7 @@ void rtl92ee_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); struct sk_buff *skb = NULL; - + bool rtstatus; u32 totalpacketlen; u8 u1rsvdpageloc[5] = { 0 }; bool b_dlok = false; @@ -727,7 +727,9 @@ void rtl92ee_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished) memcpy((u8 *)skb_put(skb, totalpacketlen), &reserved_page_packet, totalpacketlen); - b_dlok = true; + rtstatus = rtl_cmd_send_packet(hw, skb); + if (rtstatus) + b_dlok = true; if (b_dlok) { RT_TRACE(rtlpriv, COMP_POWER, DBG_LOUD , diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index bbb789f8990b..738d541a2255 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -1377,6 +1377,7 @@ static void _rtl8821ae_get_wakeup_reason(struct ieee80211_hw *hw) ppsc->wakeup_reason = 0; + do_gettimeofday(&ts); rtlhal->last_suspend_sec = ts.tv_sec; switch (fw_reason) { diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 96526dcfdd37..ff7b9632ad61 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -823,7 +823,7 @@ static int init_labels(struct nd_mapping *nd_mapping, int num_labels) nsindex = to_namespace_index(ndd, 0); memset(nsindex, 0, ndd->nsarea.config_size); for (i = 0; i < 2; i++) { - int rc = nd_label_write_index(ndd, i, i*2, ND_NSINDEX_INIT); + int rc = nd_label_write_index(ndd, i, 3 - i, ND_NSINDEX_INIT); if (rc) return rc; diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index aae7379af4e4..c2184104b789 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1305,7 +1305,7 @@ static umode_t namespace_visible(struct kobject *kobj, if (a == &dev_attr_resource.attr) { if (is_namespace_blk(dev)) return 0; - return a->mode; + return 0400; } if (is_namespace_pmem(dev) || is_namespace_blk(dev)) { diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index b83df942794f..193ac13de49b 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1414,8 +1414,16 @@ static void program_hpp_type0(struct pci_dev *dev, struct hpp_type0 *hpp) static void program_hpp_type1(struct pci_dev *dev, struct hpp_type1 *hpp) { - if (hpp) - dev_warn(&dev->dev, "PCI-X settings not supported\n"); + int pos; + + if (!hpp) + return; + + pos = pci_find_capability(dev, PCI_CAP_ID_PCIX); + if (!pos) + return; + + dev_warn(&dev->dev, "PCI-X settings not supported\n"); } static bool pcie_root_rcb_set(struct pci_dev *dev) @@ -1441,6 +1449,9 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) if (!hpp) return; + if (!pci_is_pcie(dev)) + return; + if (hpp->revision > 1) { dev_warn(&dev->dev, "PCIe settings rev %d not supported\n", hpp->revision); diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 1f4a1f02a2cd..fec1ef2b1748 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -315,6 +315,7 @@ config SPI_FSL_SPI config SPI_FSL_DSPI tristate "Freescale DSPI controller" select REGMAP_MMIO + depends on HAS_DMA depends on SOC_VF610 || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST help This enables support for the Freescale DSPI controller in master diff --git a/drivers/staging/iio/cdc/ad7150.c b/drivers/staging/iio/cdc/ad7150.c index e8d0ff2d5c9b..808d6ebf6c94 100644 --- a/drivers/staging/iio/cdc/ad7150.c +++ b/drivers/staging/iio/cdc/ad7150.c @@ -272,7 +272,7 @@ static int ad7150_write_event_config(struct iio_dev *indio_dev, error_ret: mutex_unlock(&chip->state_lock); - return 0; + return ret; } static int ad7150_read_event_value(struct iio_dev *indio_dev, diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index fd493412b172..bb73401f5761 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1923,12 +1923,14 @@ attach: if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) { int cmdsn_ret = iscsit_sequence_cmd(conn, cmd, buf, hdr->cmdsn); - if (cmdsn_ret == CMDSN_HIGHER_THAN_EXP) + if (cmdsn_ret == CMDSN_HIGHER_THAN_EXP) { out_of_order_cmdsn = 1; - else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) + } else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { + target_put_sess_cmd(&cmd->se_cmd); return 0; - else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + } else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) { return -1; + } } iscsit_ack_from_expstatsn(conn, be32_to_cpu(hdr->exp_statsn)); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index a42054edd427..37abf881ca75 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1970,6 +1970,8 @@ static void target_restart_delayed_cmds(struct se_device *dev) list_del(&cmd->se_delayed_node); spin_unlock(&dev->delayed_cmd_lock); + cmd->transport_state |= CMD_T_SENT; + __target_execute_cmd(cmd, true); if (cmd->sam_task_attr == TCM_ORDERED_TAG) @@ -2007,6 +2009,8 @@ static void transport_complete_task_attr(struct se_cmd *cmd) pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED\n", dev->dev_cur_ordered_id); } + cmd->se_cmd_flags &= ~SCF_TASK_ATTR_SET; + restart: target_restart_delayed_cmds(dev); } diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index e4110d6de0b5..da6cc25baaef 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -703,6 +703,7 @@ vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write, struct scatterlist *sg, int sg_count) { size_t off = iter->iov_offset; + struct scatterlist *p = sg; int i, ret; for (i = 0; i < iter->nr_segs; i++) { @@ -711,8 +712,8 @@ vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write, ret = vhost_scsi_map_to_sgl(cmd, base, len, sg, write); if (ret < 0) { - for (i = 0; i < sg_count; i++) { - struct page *page = sg_page(&sg[i]); + while (p < sg) { + struct page *page = sg_page(p++); if (page) put_page(page); } diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 0e0eb10f82a0..816a0e08ef10 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -316,7 +316,7 @@ static int xenbus_write_transaction(unsigned msg_type, rc = -ENOMEM; goto out; } - } else if (msg_type == XS_TRANSACTION_END) { + } else if (u->u.msg.tx_id != 0) { list_for_each_entry(trans, &u->transactions, list) if (trans->handle.id == u->u.msg.tx_id) break; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 511078586fa1..73f1d1b3a51c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -483,6 +483,9 @@ static int v9fs_test_inode(struct inode *inode, void *data) if (v9inode->qid.type != st->qid.type) return 0; + + if (v9inode->qid.path != st->qid.path) + return 0; return 1; } diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index cb899af1babc..0b88744c6446 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -87,6 +87,9 @@ static int v9fs_test_inode_dotl(struct inode *inode, void *data) if (v9inode->qid.type != st->qid.type) return 0; + + if (v9inode->qid.path != st->qid.path) + return 0; return 1; } diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 35b755e79c2d..fe6e7050fe50 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -87,7 +87,8 @@ static int autofs4_write(struct autofs_sb_info *sbi, spin_unlock_irqrestore(¤t->sighand->siglock, flags); } - return (bytes > 0); + /* if 'wr' returned 0 (impossible) we assume -EIO (safe) */ + return bytes == 0 ? 0 : wr < 0 ? wr : -EIO; } static void autofs4_notify_daemon(struct autofs_sb_info *sbi, @@ -101,6 +102,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, } pkt; struct file *pipe = NULL; size_t pktsz; + int ret; DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d", (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, type); @@ -173,7 +175,18 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, mutex_unlock(&sbi->wq_mutex); if (autofs4_write(sbi, pipe, &pkt, pktsz)) + switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) { + case 0: + break; + case -ENOMEM: + case -ERESTARTSYS: + /* Just fail this one */ + autofs4_wait_release(sbi, wq->wait_queue_token, ret); + break; + default: autofs4_catatonic_mode(sbi); + break; + } fput(pipe); } diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index 778282944530..837a9a8d579e 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -348,7 +348,5 @@ skip: out: btrfs_free_path(path); - if (ret) - btrfs_warn(fs_info, "btrfs_uuid_tree_iterate failed %d", ret); - return 0; + return ret; } diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 286f10b0363b..4f457d5c4933 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -442,15 +442,16 @@ void ecryptfs_release_messaging(void) } if (ecryptfs_daemon_hash) { struct ecryptfs_daemon *daemon; + struct hlist_node *n; int i; mutex_lock(&ecryptfs_daemon_hash_mux); for (i = 0; i < (1 << ecryptfs_hash_bits); i++) { int rc; - hlist_for_each_entry(daemon, - &ecryptfs_daemon_hash[i], - euid_chain) { + hlist_for_each_entry_safe(daemon, n, + &ecryptfs_daemon_hash[i], + euid_chain) { rc = ecryptfs_exorcise_daemon(daemon); if (rc) printk(KERN_ERR "%s: Error whilst " diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c index 15ebac242288..d3d6b28ce9b9 100644 --- a/fs/ext4/crypto_key.c +++ b/fs/ext4/crypto_key.c @@ -220,11 +220,9 @@ int _ext4_get_encryption_info(struct inode *inode) int mode; int res; - if (!ext4_read_workqueue) { - res = ext4_init_crypto(); - if (res) - return res; - } + res = ext4_init_crypto(); + if (res) + return res; retry: crypt_info = ACCESS_ONCE(ei->i_crypt_info); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index fc496c646d12..468e7fe3616c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4810,7 +4810,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, } if (!(mode & FALLOC_FL_KEEP_SIZE) && - offset + len > i_size_read(inode)) { + (offset + len > i_size_read(inode) || + offset + len > EXT4_I(inode)->i_disksize)) { new_size = offset + len; ret = inode_newsize_ok(inode, new_size); if (ret) @@ -4986,7 +4987,8 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) } if (!(mode & FALLOC_FL_KEEP_SIZE) && - offset + len > i_size_read(inode)) { + (offset + len > i_size_read(inode) || + offset + len > EXT4_I(inode)->i_disksize)) { new_size = offset + len; ret = inode_newsize_ok(inode, new_size); if (ret) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 112f8e04c549..3f52efa0f94f 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -253,6 +253,9 @@ static int __f2fs_set_acl(struct inode *inode, int type, int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + return __f2fs_set_acl(inode, type, acl, NULL); } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index e86f67ac96c6..2eb778174a9b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -29,7 +29,6 @@ struct kmem_cache *inode_entry_slab; void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) { set_ckpt_flags(sbi, CP_ERROR_FLAG); - sbi->sb->s_flags |= MS_RDONLY; if (!end_io) f2fs_flush_merged_writes(sbi); } @@ -402,24 +401,23 @@ const struct address_space_operations f2fs_meta_aops = { #endif }; -static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) +static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type) { struct inode_management *im = &sbi->im[type]; struct ino_entry *e, *tmp; tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS); -retry: + radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); spin_lock(&im->ino_lock); e = radix_tree_lookup(&im->ino_root, ino); if (!e) { e = tmp; - if (radix_tree_insert(&im->ino_root, ino, e)) { - spin_unlock(&im->ino_lock); - radix_tree_preload_end(); - goto retry; - } + if (unlikely(radix_tree_insert(&im->ino_root, ino, e))) + f2fs_bug_on(sbi, 1); + memset(e, 0, sizeof(struct ino_entry)); e->ino = ino; @@ -427,6 +425,10 @@ retry: if (type != ORPHAN_INO) im->ino_num++; } + + if (type == FLUSH_INO) + f2fs_set_bit(devidx, (char *)&e->dirty_device); + spin_unlock(&im->ino_lock); radix_tree_preload_end(); @@ -455,7 +457,7 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { /* add new dirty ino entry into list */ - __add_ino_entry(sbi, ino, type); + __add_ino_entry(sbi, ino, 0, type); } void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) @@ -481,7 +483,7 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all) struct ino_entry *e, *tmp; int i; - for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) { + for (i = all ? ORPHAN_INO : APPEND_INO; i < MAX_INO_ENTRY; i++) { struct inode_management *im = &sbi->im[i]; spin_lock(&im->ino_lock); @@ -495,6 +497,27 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all) } } +void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type) +{ + __add_ino_entry(sbi, ino, devidx, type); +} + +bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type) +{ + struct inode_management *im = &sbi->im[type]; + struct ino_entry *e; + bool is_dirty = false; + + spin_lock(&im->ino_lock); + e = radix_tree_lookup(&im->ino_root, ino); + if (e && f2fs_test_bit(devidx, (char *)&e->dirty_device)) + is_dirty = true; + spin_unlock(&im->ino_lock); + return is_dirty; +} + int acquire_orphan_inode(struct f2fs_sb_info *sbi) { struct inode_management *im = &sbi->im[ORPHAN_INO]; @@ -531,7 +554,7 @@ void release_orphan_inode(struct f2fs_sb_info *sbi) void add_orphan_inode(struct inode *inode) { /* add new orphan ino entry into list */ - __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO); + __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO); update_inode_page(inode); } @@ -555,7 +578,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) return err; } - __add_ino_entry(sbi, ino, ORPHAN_INO); + __add_ino_entry(sbi, ino, 0, ORPHAN_INO); inode = f2fs_iget_retry(sbi->sb, ino); if (IS_ERR(inode)) { @@ -591,6 +614,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) block_t start_blk, orphan_blocks, i, j; unsigned int s_flags = sbi->sb->s_flags; int err = 0; +#ifdef CONFIG_QUOTA + int quota_enabled; +#endif if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) return 0; @@ -603,8 +629,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ sbi->sb->s_flags |= MS_ACTIVE; + /* Turn on quotas so that they are updated correctly */ - f2fs_enable_quota_files(sbi); + quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); #endif start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); @@ -632,7 +659,8 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) out: #ifdef CONFIG_QUOTA /* Turn quotas off */ - f2fs_quota_off_umount(sbi->sb); + if (quota_enabled) + f2fs_quota_off_umount(sbi->sb); #endif sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ @@ -987,7 +1015,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) update_inode_page(inode); iput(inode); } - }; + } return 0; } @@ -1147,6 +1175,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct super_block *sb = sbi->sb; struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); u64 kbytes_written; + int err; /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) { @@ -1240,6 +1269,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (unlikely(f2fs_cp_error(sbi))) return -EIO; + /* flush all device cache */ + err = f2fs_flush_device_cache(sbi); + if (err) + return err; + /* write out checkpoint buffer at block 0 */ update_meta_page(sbi, ckpt, start_blk++); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c8583d7a1845..cdccc429325b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -172,7 +172,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, { struct bio *bio; - bio = f2fs_bio_alloc(npages); + bio = f2fs_bio_alloc(sbi, npages, true); f2fs_target_device(sbi, blk_addr, bio); bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; @@ -417,8 +417,8 @@ next: bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; - /* set submitted = 1 as a return value */ - fio->submitted = 1; + /* set submitted = true as a return value */ + fio->submitted = true; inc_page_count(sbi, WB_DATA_TYPE(bio_page)); @@ -472,7 +472,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, f2fs_wait_on_block_writeback(sbi, blkaddr); } - bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); + bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); if (!bio) { if (ctx) fscrypt_release_ctx(ctx); @@ -832,6 +832,13 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) struct f2fs_map_blocks map; int err = 0; + /* convert inline data for Direct I/O*/ + if (iocb->ki_flags & IOCB_DIRECT) { + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } + if (is_inode_flag_set(inode, FI_NO_PREALLOC)) return 0; @@ -844,15 +851,11 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) map.m_next_pgofs = NULL; - if (iocb->ki_flags & IOCB_DIRECT) { - err = f2fs_convert_inline_inode(inode); - if (err) - return err; + if (iocb->ki_flags & IOCB_DIRECT) return f2fs_map_blocks(inode, &map, 1, __force_buffered_io(inode, WRITE) ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO); - } if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) { err = f2fs_convert_inline_inode(inode); if (err) @@ -1332,7 +1335,7 @@ static int f2fs_read_data_pages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - struct inode *inode = file->f_mapping->host; + struct inode *inode = mapping->host; struct page *page = list_last_entry(pages, struct page, lru); trace_f2fs_readpages(inode, page, nr_pages); @@ -1493,6 +1496,7 @@ static int __write_data_page(struct page *page, bool *submitted, int err = 0; struct f2fs_io_info fio = { .sbi = sbi, + .ino = inode->i_ino, .type = DATA, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), @@ -1564,8 +1568,11 @@ write: err = do_write_data_page(&fio); } } + + down_write(&F2FS_I(inode)->i_sem); if (F2FS_I(inode)->last_disk_size < psize) F2FS_I(inode)->last_disk_size = psize; + up_write(&F2FS_I(inode)->i_sem); done: if (err && err != -ENOENT) @@ -1945,6 +1952,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, } trace_f2fs_write_begin(inode, pos, len, flags); + if (f2fs_is_atomic_file(inode) && + !available_free_memory(sbi, INMEM_PAGES)) { + err = -ENOMEM; + goto fail; + } + /* * We should check this at this moment to avoid deadlock on inode page * and #0 page. The locking rule for inline_data conversion should be: @@ -1960,7 +1973,8 @@ repeat: * Do not use grab_cache_page_write_begin() to avoid deadlock due to * wait_for_stable_page. Will wait that below with our IO control. */ - page = grab_cache_page(mapping, index); + page = f2fs_pagecache_get_page(mapping, index, + FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS); if (!page) { err = -ENOMEM; goto fail; @@ -2021,6 +2035,8 @@ repeat: fail: f2fs_put_page(page, 1); f2fs_write_failed(mapping, pos + len); + if (f2fs_is_atomic_file(inode)) + drop_inmem_pages_all(sbi); return err; } diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 87f449845f5f..ecada8425268 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -45,9 +45,18 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA); + si->ndirty_qdata = get_pages(sbi, F2FS_DIRTY_QDATA); si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA); si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; + + si->nquota_files = 0; + if (f2fs_sb_has_quota_ino(sbi->sb)) { + for (i = 0; i < MAXQUOTAS; i++) { + if (f2fs_qf_ino(sbi->sb, i)) + si->nquota_files++; + } + } si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->aw_cnt = atomic_read(&sbi->aw_cnt); @@ -61,6 +70,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) atomic_read(&SM_I(sbi)->fcc_info->issued_flush); si->nr_flushing = atomic_read(&SM_I(sbi)->fcc_info->issing_flush); + si->flush_list_empty = + llist_empty(&SM_I(sbi)->fcc_info->issue_list); } if (SM_I(sbi) && SM_I(sbi)->dcc_info) { si->nr_discarded = @@ -96,9 +107,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->dirty_nats = NM_I(sbi)->dirty_nat_cnt; si->sits = MAIN_SEGS(sbi); si->dirty_sits = SIT_I(sbi)->dirty_sentries; - si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST]; + si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID]; si->avail_nids = NM_I(sbi)->available_nids; - si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]; + si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID]; si->bg_gc = sbi->bg_gc; si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) * 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg) @@ -231,14 +242,14 @@ get_cache: } /* free nids */ - si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] + - NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]) * + si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] + + NM_I(sbi)->nid_cnt[PREALLOC_NID]) * sizeof(struct free_nid); si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry); si->cache_mem += NM_I(sbi)->dirty_nat_cnt * sizeof(struct nat_entry_set); si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages); - for (i = 0; i <= ORPHAN_INO; i++) + for (i = 0; i < MAX_INO_ENTRY; i++) si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); si->cache_mem += atomic_read(&sbi->total_ext_tree) * sizeof(struct extent_tree); @@ -262,9 +273,10 @@ static int stat_show(struct seq_file *s, void *v) list_for_each_entry(si, &f2fs_stat_list, stat_list) { update_general_status(si->sbi); - seq_printf(s, "\n=====[ partition info(%pg). #%d, %s]=====\n", + seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n", si->sbi->sb->s_bdev, i++, - f2fs_readonly(si->sbi->sb) ? "RO": "RW"); + f2fs_readonly(si->sbi->sb) ? "RO": "RW", + f2fs_cp_error(si->sbi) ? "Error": "Good"); seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ", si->sit_area_segs, si->nat_area_segs); seq_printf(s, "[SSA: %d] [MAIN: %d", @@ -349,10 +361,11 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), " + seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), " "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", si->nr_wb_cp_data, si->nr_wb_data, si->nr_flushing, si->nr_flushed, + si->flush_list_empty, si->nr_discarding, si->nr_discarded, si->nr_discard_cmd, si->undiscard_blks); seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), " @@ -365,6 +378,8 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_dent, si->ndirty_dirs, si->ndirty_all); seq_printf(s, " - datas: %4d in files:%4d\n", si->ndirty_data, si->ndirty_files); + seq_printf(s, " - quota datas: %4d in quota files:%4d\n", + si->ndirty_qdata, si->nquota_files); seq_printf(s, " - meta: %4d in %4d\n", si->ndirty_meta, si->meta_pages); seq_printf(s, " - imeta: %4d\n", diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 4f2a8fedb313..1955707b138b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -10,10 +10,12 @@ */ #include <linux/fs.h> #include <linux/f2fs_fs.h> +#include <linux/sched.h> #include "f2fs.h" #include "node.h" #include "acl.h" #include "xattr.h" +#include <trace/events/f2fs.h> static unsigned long dir_blocks(struct inode *inode) { @@ -847,6 +849,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) struct f2fs_dentry_block *dentry_blk = NULL; struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; + loff_t start_pos = ctx->pos; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); struct f2fs_dentry_ptr d; struct fscrypt_str fstr = FSTR_INIT(NULL, 0); @@ -855,24 +858,32 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) if (f2fs_encrypted_inode(inode)) { err = fscrypt_get_encryption_info(inode); if (err && err != -ENOKEY) - return err; + goto out; err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr); if (err < 0) - return err; + goto out; } if (f2fs_has_inline_dentry(inode)) { err = f2fs_read_inline_dir(file, ctx, &fstr); - goto out; + goto out_free; } - /* readahead for multi pages of dir */ - if (npages - n > 1 && !ra_has_index(ra, n)) - page_cache_sync_readahead(inode->i_mapping, ra, file, n, + for (; n < npages; n++, ctx->pos = n * NR_DENTRY_IN_BLOCK) { + + /* allow readdir() to be interrupted */ + if (fatal_signal_pending(current)) { + err = -ERESTARTSYS; + goto out_free; + } + cond_resched(); + + /* readahead for multi pages of dir */ + if (npages - n > 1 && !ra_has_index(ra, n)) + page_cache_sync_readahead(inode->i_mapping, ra, file, n, min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); - for (; n < npages; n++) { dentry_page = get_lock_data_page(inode, n, false); if (IS_ERR(dentry_page)) { err = PTR_ERR(dentry_page); @@ -880,7 +891,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) err = 0; continue; } else { - goto out; + goto out_free; } } @@ -896,12 +907,13 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) break; } - ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; kunmap(dentry_page); f2fs_put_page(dentry_page, 1); } -out: +out_free: fscrypt_fname_free_buffer(&fstr); +out: + trace_f2fs_readdir(inode, start_pos, ctx->pos, err); return err < 0 ? err : 0; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c1a0aef8efc6..081ec493baae 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -47,6 +47,8 @@ enum { FAULT_KMALLOC, FAULT_PAGE_ALLOC, + FAULT_PAGE_GET, + FAULT_ALLOC_BIO, FAULT_ALLOC_NID, FAULT_ORPHAN, FAULT_BLOCK, @@ -94,6 +96,7 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_GRPQUOTA 0x00100000 #define F2FS_MOUNT_PRJQUOTA 0x00200000 #define F2FS_MOUNT_QUOTA 0x00400000 +#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) @@ -119,6 +122,8 @@ struct f2fs_mount_info { #define F2FS_FEATURE_EXTRA_ATTR 0x0008 #define F2FS_FEATURE_PRJQUOTA 0x0010 #define F2FS_FEATURE_INODE_CHKSUM 0x0020 +#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040 +#define F2FS_FEATURE_QUOTA_INO 0x0080 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -214,7 +219,7 @@ enum { #define BATCHED_TRIM_BLOCKS(sbi) \ (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) -#define DISCARD_ISSUE_RATE 8 +#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */ #define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ #define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */ #define DEF_CP_INTERVAL 60 /* 60 secs */ @@ -225,7 +230,6 @@ struct cp_control { __u64 trim_start; __u64 trim_end; __u64 trim_minlen; - __u64 trimmed; }; /* @@ -244,12 +248,14 @@ enum { ORPHAN_INO, /* for orphan ino list */ APPEND_INO, /* for append ino list */ UPDATE_INO, /* for update ino list */ + FLUSH_INO, /* for multiple device flushing */ MAX_INO_ENTRY, /* max. list */ }; struct ino_entry { - struct list_head list; /* list head */ - nid_t ino; /* inode number */ + struct list_head list; /* list head */ + nid_t ino; /* inode number */ + unsigned int dirty_device; /* dirty device bitmap */ }; /* for the list of inodes to be GCed */ @@ -273,10 +279,6 @@ struct discard_entry { #define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ (MAX_PLIST_NUM - 1) : (blk_num - 1)) -#define P_ACTIVE 0x01 -#define P_TRIM 0x02 -#define plist_issue(tag) (((tag) & P_ACTIVE) || ((tag) & P_TRIM)) - enum { D_PREP, D_SUBMIT, @@ -308,12 +310,32 @@ struct discard_cmd { int error; /* bio error */ }; +enum { + DPOLICY_BG, + DPOLICY_FORCE, + DPOLICY_FSTRIM, + DPOLICY_UMOUNT, + MAX_DPOLICY, +}; + +struct discard_policy { + int type; /* type of discard */ + unsigned int min_interval; /* used for candidates exist */ + unsigned int max_interval; /* used for candidates not exist */ + unsigned int max_requests; /* # of discards issued per round */ + unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */ + bool io_aware; /* issue discard in idle time */ + bool sync; /* submit discard with REQ_SYNC flag */ + unsigned int granularity; /* discard granularity */ +}; + struct discard_cmd_control { struct task_struct *f2fs_issue_discard; /* discard thread */ struct list_head entry_list; /* 4KB discard entry list */ struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */ unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */ struct list_head wait_list; /* store on-flushing entries */ + struct list_head fstrim_list; /* in-flight discard from fstrim */ wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ unsigned int discard_wake; /* to wake up discard thread */ struct mutex cmd_lock; @@ -443,11 +465,14 @@ struct f2fs_flush_device { /* for inline stuff */ #define DEF_INLINE_RESERVED_SIZE 1 +#define DEF_MIN_INLINE_SIZE 1 static inline int get_extra_isize(struct inode *inode); -#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ - (CUR_ADDRS_PER_INODE(inode) - \ - DEF_INLINE_RESERVED_SIZE - \ - F2FS_INLINE_XATTR_ADDRS)) +static inline int get_inline_xattr_addrs(struct inode *inode); +#define F2FS_INLINE_XATTR_ADDRS(inode) get_inline_xattr_addrs(inode) +#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ + (CUR_ADDRS_PER_INODE(inode) - \ + F2FS_INLINE_XATTR_ADDRS(inode) - \ + DEF_INLINE_RESERVED_SIZE)) /* for inline dir */ #define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \ @@ -647,6 +672,7 @@ struct f2fs_inode_info { #endif struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ + struct list_head inmem_ilist; /* list for inmem inodes */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct task_struct *inmem_task; /* store inmemory task */ struct mutex inmem_lock; /* lock for inmemory pages */ @@ -657,6 +683,7 @@ struct f2fs_inode_info { int i_extra_isize; /* size of extra space located in i_addr */ kprojid_t i_projid; /* id for project quota */ + int i_inline_xattr_size; /* inline xattr size */ }; static inline void get_extent_info(struct extent_info *ext, @@ -730,10 +757,13 @@ static inline void __try_update_largest_extent(struct inode *inode, } } -enum nid_list { - FREE_NID_LIST, - ALLOC_NID_LIST, - MAX_NID_LIST, +/* + * For free nid management + */ +enum nid_state { + FREE_NID, /* newly added to free nid list */ + PREALLOC_NID, /* it is preallocated */ + MAX_NID_STATE, }; struct f2fs_nm_info { @@ -756,8 +786,8 @@ struct f2fs_nm_info { /* free node ids management */ struct radix_tree_root free_nid_root;/* root of the free_nid cache */ - struct list_head nid_list[MAX_NID_LIST];/* lists for free nids */ - unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */ + struct list_head free_nid_list; /* list for free nids excluding preallocated nids */ + unsigned int nid_cnt[MAX_NID_STATE]; /* the number of free node id */ spinlock_t nid_list_lock; /* protect nid lists ops */ struct mutex build_lock; /* lock for build free nids */ unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; @@ -835,6 +865,7 @@ enum { struct flush_cmd { struct completion wait; struct llist_node llnode; + nid_t ino; int ret; }; @@ -853,6 +884,8 @@ struct f2fs_sm_info { struct dirty_seglist_info *dirty_info; /* dirty segment information */ struct curseg_info *curseg_array; /* active segment information */ + struct rw_semaphore curseg_lock; /* for preventing curseg change */ + block_t seg0_blkaddr; /* block address of 0'th segment */ block_t main_blkaddr; /* start block address of main area */ block_t ssa_blkaddr; /* start block address of SSA area */ @@ -874,6 +907,7 @@ struct f2fs_sm_info { unsigned int min_ipu_util; /* in-place-update threshold */ unsigned int min_fsync_blocks; /* threshold for fsync */ unsigned int min_hot_blocks; /* threshold for hot block allocation */ + unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */ /* for flush command control */ struct flush_cmd_control *fcc_info; @@ -895,6 +929,7 @@ struct f2fs_sm_info { enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_DATA, + F2FS_DIRTY_QDATA, F2FS_DIRTY_NODES, F2FS_DIRTY_META, F2FS_INMEM_PAGES, @@ -943,6 +978,18 @@ enum need_lock_type { LOCK_RETRY, }; +enum cp_reason_type { + CP_NO_NEEDED, + CP_NON_REGULAR, + CP_HARDLINK, + CP_SB_NEED_CP, + CP_WRONG_PINO, + CP_NO_SPC_ROLL, + CP_NODE_NEED_CP, + CP_FASTBOOT_MODE, + CP_SPEC_LOG_NUM, +}; + enum iostat_type { APP_DIRECT_IO, /* app direct IOs */ APP_BUFFERED_IO, /* app buffered IOs */ @@ -962,6 +1009,7 @@ enum iostat_type { struct f2fs_io_info { struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ + nid_t ino; /* inode number */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ enum temp_type temp; /* contains HOT/WARM/COLD */ int op; /* contains REQ_OP_ */ @@ -1006,6 +1054,7 @@ enum inode_type { DIR_INODE, /* for dirty dir inode */ FILE_INODE, /* for dirty regular/symlink inode */ DIRTY_META, /* for all dirtied inode metadata */ + ATOMIC_FILE, /* for all atomic files */ NR_INODE_TYPE, }; @@ -1108,12 +1157,15 @@ struct f2fs_sb_info { loff_t max_file_blocks; /* max block index of file */ int active_logs; /* # of active logs */ int dir_level; /* directory level */ + int inline_xattr_size; /* inline xattr size */ + unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */ block_t user_block_count; /* # of user blocks */ block_t total_valid_block_count; /* # of valid blocks */ block_t discard_blks; /* discard command candidats */ block_t last_valid_block_count; /* for recovery */ block_t reserved_blocks; /* configurable reserved blocks */ + block_t current_reserved_blocks; /* current reserved blocks */ u32 s_next_generation; /* for NFS support */ @@ -1179,6 +1231,8 @@ struct f2fs_sb_info { struct list_head s_list; int s_ndevs; /* number of devices */ struct f2fs_dev_info *devs; /* for device list */ + unsigned int dirty_device; /* for checkpoint data flush */ + spinlock_t dev_lock; /* protect dirty_device */ struct mutex umount_mutex; unsigned int shrinker_run_no; @@ -1242,8 +1296,7 @@ static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type) { - struct timespec ts = {sbi->interval_time[type], 0}; - unsigned long interval = timespec_to_jiffies(&ts); + unsigned long interval = sbi->interval_time[type] * HZ; return time_after(jiffies, sbi->last_time[type] + interval); } @@ -1410,6 +1463,13 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) return le64_to_cpu(cp->checkpoint_ver); } +static inline unsigned long f2fs_qf_ino(struct super_block *sb, int type) +{ + if (type < F2FS_MAX_QUOTAS) + return le32_to_cpu(F2FS_SB(sb)->raw_super->qf_ino[type]); + return 0; +} + static inline __u64 cur_cp_crc(struct f2fs_checkpoint *cp) { size_t crc_offset = le32_to_cpu(cp->checksum_offset); @@ -1588,7 +1648,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); sbi->total_valid_block_count += (block_t)(*count); - avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks; + avail_user_block_count = sbi->user_block_count - + sbi->current_reserved_blocks; if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; *count -= diff; @@ -1622,6 +1683,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); f2fs_bug_on(sbi, inode->i_blocks < sectors); sbi->total_valid_block_count -= (block_t)count; + if (sbi->reserved_blocks && + sbi->current_reserved_blocks < sbi->reserved_blocks) + sbi->current_reserved_blocks = min(sbi->reserved_blocks, + sbi->current_reserved_blocks + count); spin_unlock(&sbi->stat_lock); f2fs_i_blocks_write(inode, count, false, true); } @@ -1642,6 +1707,8 @@ static inline void inode_inc_dirty_pages(struct inode *inode) atomic_inc(&F2FS_I(inode)->dirty_pages); inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); + if (IS_NOQUOTA(inode)) + inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA); } static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) @@ -1658,6 +1725,8 @@ static inline void inode_dec_dirty_pages(struct inode *inode) atomic_dec(&F2FS_I(inode)->dirty_pages); dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ? F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA); + if (IS_NOQUOTA(inode)) + dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA); } static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type) @@ -1765,10 +1834,17 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, return ret; } +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(sbi, FAULT_BLOCK)) { + f2fs_show_injection_info(FAULT_BLOCK); + goto enospc; + } +#endif + spin_lock(&sbi->stat_lock); valid_block_count = sbi->total_valid_block_count + 1; - if (unlikely(valid_block_count + sbi->reserved_blocks > + if (unlikely(valid_block_count + sbi->current_reserved_blocks > sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); goto enospc; @@ -1811,6 +1887,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, sbi->total_valid_node_count--; sbi->total_valid_block_count--; + if (sbi->reserved_blocks && + sbi->current_reserved_blocks < sbi->reserved_blocks) + sbi->current_reserved_blocks++; spin_unlock(&sbi->stat_lock); @@ -1857,6 +1936,19 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); } +static inline struct page *f2fs_pagecache_get_page( + struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp_mask) +{ +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) { + f2fs_show_injection_info(FAULT_PAGE_GET); + return NULL; + } +#endif + return pagecache_get_page(mapping, index, fgp_flags, gfp_mask); +} + static inline void f2fs_copy_page(struct page *src, struct page *dst) { char *src_kaddr = kmap(src); @@ -1906,15 +1998,25 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, return entry; } -static inline struct bio *f2fs_bio_alloc(int npages) +static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, + int npages, bool no_fail) { struct bio *bio; - /* No failure on bio allocation */ - bio = bio_alloc(GFP_NOIO, npages); - if (!bio) - bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); - return bio; + if (no_fail) { + /* No failure on bio allocation */ + bio = bio_alloc(GFP_NOIO, npages); + if (!bio) + bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); + return bio; + } +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { + f2fs_show_injection_info(FAULT_ALLOC_BIO); + return NULL; + } +#endif + return bio_alloc(GFP_KERNEL, npages); } static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, @@ -2224,25 +2326,20 @@ static inline int f2fs_has_inline_xattr(struct inode *inode) static inline unsigned int addrs_per_inode(struct inode *inode) { - if (f2fs_has_inline_xattr(inode)) - return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS; - return CUR_ADDRS_PER_INODE(inode); + return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS(inode); } -static inline void *inline_xattr_addr(struct page *page) +static inline void *inline_xattr_addr(struct inode *inode, struct page *page) { struct f2fs_inode *ri = F2FS_INODE(page); return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - - F2FS_INLINE_XATTR_ADDRS]); + F2FS_INLINE_XATTR_ADDRS(inode)]); } static inline int inline_xattr_size(struct inode *inode) { - if (f2fs_has_inline_xattr(inode)) - return F2FS_INLINE_XATTR_ADDRS << 2; - else - return 0; + return get_inline_xattr_addrs(inode) * sizeof(__le32); } static inline int f2fs_has_inline_data(struct inode *inode) @@ -2323,9 +2420,10 @@ static inline void clear_file(struct inode *inode, int type) static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) { + bool ret; + if (dsync) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - bool ret; spin_lock(&sbi->inode_lock[DIRTY_META]); ret = list_empty(&F2FS_I(inode)->gdirty_list); @@ -2336,9 +2434,15 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) file_keep_isize(inode) || i_size_read(inode) & PAGE_MASK) return false; - return F2FS_I(inode)->last_disk_size == i_size_read(inode); + + down_read(&F2FS_I(inode)->i_sem); + ret = F2FS_I(inode)->last_disk_size == i_size_read(inode); + up_read(&F2FS_I(inode)->i_sem); + + return ret; } +#define sb_rdonly f2fs_readonly static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; @@ -2406,6 +2510,12 @@ static inline int get_extra_isize(struct inode *inode) return F2FS_I(inode)->i_extra_isize / sizeof(__le32); } +static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb); +static inline int get_inline_xattr_addrs(struct inode *inode) +{ + return F2FS_I(inode)->i_inline_xattr_size; +} + #define get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) @@ -2534,7 +2644,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) */ int f2fs_inode_dirtied(struct inode *inode, bool sync); void f2fs_inode_synced(struct inode *inode); -void f2fs_enable_quota_files(struct f2fs_sb_info *sbi); +int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly); void f2fs_quota_off_umount(struct super_block *sb); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); @@ -2562,7 +2672,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni); pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs); int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode); int truncate_inode_blocks(struct inode *inode, pgoff_t from); -int truncate_xattr_node(struct inode *inode, struct page *page); +int truncate_xattr_node(struct inode *inode); int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino); int remove_inode_page(struct inode *inode); struct page *new_inode_page(struct inode *inode); @@ -2597,19 +2707,22 @@ void destroy_node_manager_caches(void); */ bool need_SSR(struct f2fs_sb_info *sbi); void register_inmem_page(struct inode *inode, struct page *page); +void drop_inmem_pages_all(struct f2fs_sb_info *sbi); void drop_inmem_pages(struct inode *inode); void drop_inmem_page(struct inode *inode, struct page *page); int commit_inmem_pages(struct inode *inode); void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need); void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi); -int f2fs_issue_flush(struct f2fs_sb_info *sbi); +int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino); int create_flush_cmd_control(struct f2fs_sb_info *sbi); +int f2fs_flush_device_cache(struct f2fs_sb_info *sbi); void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); -void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); +void init_discard_policy(struct discard_policy *dpolicy, int discard_type, + unsigned int granularity); void stop_discard_thread(struct f2fs_sb_info *sbi); -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount); +bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void release_discard_addrs(struct f2fs_sb_info *sbi); int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); @@ -2664,6 +2777,10 @@ void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); void release_ino_entry(struct f2fs_sb_info *sbi, bool all); bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode); +void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type); +bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino, + unsigned int devidx, int type); int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi); int acquire_orphan_inode(struct f2fs_sb_info *sbi); void release_orphan_inode(struct f2fs_sb_info *sbi); @@ -2751,14 +2868,16 @@ struct f2fs_stat_info { unsigned long long hit_largest, hit_cached, hit_rbtree; unsigned long long hit_total, total_ext; int ext_tree, zombie_tree, ext_node; - int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta; + int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta; + int ndirty_data, ndirty_qdata; int inmem_pages; - unsigned int ndirty_dirs, ndirty_files, ndirty_all; + unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all; int nats, dirty_nats, sits, dirty_sits; int free_nids, avail_nids, alloc_nids; int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; - int nr_flushing, nr_flushed, nr_discarding, nr_discarded; + int nr_flushing, nr_flushed, flush_list_empty; + int nr_discarding, nr_discarded; int nr_discard_cmd; unsigned int undiscard_blks; int inline_xattr, inline_inode, inline_dir, append, update, orphans; @@ -3066,6 +3185,16 @@ static inline int f2fs_sb_has_inode_chksum(struct super_block *sb) return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM); } +static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb) +{ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR); +} + +static inline int f2fs_sb_has_quota_ino(struct super_block *sb) +{ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_QUOTA_INO); +} + #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkaddr) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a9e1655a6bf8..bfff53f658e1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -56,6 +56,11 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct dnode_of_data dn; int err; + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; + goto err; + } + sb_start_pagefault(inode->i_sb); f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); @@ -117,6 +122,7 @@ out_sem: out: sb_end_pagefault(inode->i_sb); f2fs_update_time(sbi, REQ_TIME); +err: return block_page_mkwrite_return(err); } @@ -141,27 +147,29 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) return 1; } -static inline bool need_do_checkpoint(struct inode *inode) +static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - bool need_cp = false; + enum cp_reason_type cp_reason = CP_NO_NEEDED; - if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) - need_cp = true; + if (!S_ISREG(inode->i_mode)) + cp_reason = CP_NON_REGULAR; + else if (inode->i_nlink != 1) + cp_reason = CP_HARDLINK; else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) - need_cp = true; + cp_reason = CP_SB_NEED_CP; else if (file_wrong_pino(inode)) - need_cp = true; + cp_reason = CP_WRONG_PINO; else if (!space_for_roll_forward(sbi)) - need_cp = true; + cp_reason = CP_NO_SPC_ROLL; else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) - need_cp = true; + cp_reason = CP_NODE_NEED_CP; else if (test_opt(sbi, FASTBOOT)) - need_cp = true; + cp_reason = CP_FASTBOOT_MODE; else if (sbi->active_logs == 2) - need_cp = true; + cp_reason = CP_SPEC_LOG_NUM; - return need_cp; + return cp_reason; } static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) @@ -196,7 +204,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t ino = inode->i_ino; int ret = 0; - bool need_cp = false; + enum cp_reason_type cp_reason = 0; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, @@ -215,7 +223,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, clear_inode_flag(inode, FI_NEED_IPU); if (ret) { - trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); return ret; } @@ -246,10 +254,10 @@ go_write: * sudden-power-off. */ down_read(&F2FS_I(inode)->i_sem); - need_cp = need_do_checkpoint(inode); + cp_reason = need_do_checkpoint(inode); up_read(&F2FS_I(inode)->i_sem); - if (need_cp) { + if (cp_reason) { /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); @@ -297,19 +305,24 @@ sync_nodes: remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: - remove_ino_entry(sbi, ino, UPDATE_INO); - clear_inode_flag(inode, FI_UPDATE_WRITE); if (!atomic) - ret = f2fs_issue_flush(sbi); + ret = f2fs_issue_flush(sbi, inode->i_ino); + if (!ret) { + remove_ino_entry(sbi, ino, UPDATE_INO); + clear_inode_flag(inode, FI_UPDATE_WRITE); + remove_ino_entry(sbi, ino, FLUSH_INO); + } f2fs_update_time(sbi, REQ_TIME); out: - trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); + trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret); f2fs_trace_ios(NULL, 1); return ret; } int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) + return -EIO; return f2fs_do_sync_file(file, start, end, datasync, false); } @@ -446,6 +459,9 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) struct inode *inode = file_inode(file); int err; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + /* we don't need to use inline_data strictly */ err = f2fs_convert_inline_inode(inode); if (err) @@ -632,6 +648,9 @@ int f2fs_truncate(struct inode *inode) { int err; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return 0; @@ -667,7 +686,8 @@ int f2fs_getattr(struct vfsmount *mnt, generic_fillattr(inode, stat); /* we need to show initial sectors used for inline_data/dentries */ - if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) + if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) || + f2fs_has_inline_dentry(inode)) stat->blocks += (stat->size + 511) >> 9; return 0; @@ -709,6 +729,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) int err; bool size_changed = false; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + err = inode_change_ok(inode, attr); if (err) return err; @@ -761,6 +784,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) inode->i_mtime = inode->i_ctime = current_time(inode); } + down_write(&F2FS_I(inode)->i_sem); + F2FS_I(inode)->last_disk_size = i_size_read(inode); + up_write(&F2FS_I(inode)->i_sem); + size_changed = true; } @@ -834,7 +861,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) err = get_dnode_of_data(&dn, pg_start, LOOKUP_NODE); if (err) { if (err == -ENOENT) { - pg_start++; + pg_start = get_next_page_offset(&dn, pg_start); continue; } return err; @@ -1149,11 +1176,14 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) if (ret) goto out; + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + truncate_pagecache(inode, offset); ret = f2fs_do_collapse(inode, pg_start, pg_end); if (ret) - goto out; + goto out_unlock; /* write out all moved pages, if possible */ filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); @@ -1165,7 +1195,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) ret = truncate_blocks(inode, new_size, true); if (!ret) f2fs_i_size_write(inode, new_size); - +out_unlock: + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); out: up_write(&F2FS_I(inode)->i_mmap_sem); return ret; @@ -1348,6 +1379,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) if (ret) goto out; + /* avoid gc operation during block exchange */ + down_write(&F2FS_I(inode)->dio_rwsem[WRITE]); + truncate_pagecache(inode, offset); pg_start = offset >> PAGE_SHIFT; @@ -1375,6 +1409,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) if (!ret) f2fs_i_size_write(inode, new_size); + + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); out: up_write(&F2FS_I(inode)->i_mmap_sem); return ret; @@ -1424,8 +1460,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset, new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; } - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) - f2fs_i_size_write(inode, new_size); + if (new_size > i_size_read(inode)) { + if (mode & FALLOC_FL_KEEP_SIZE) + file_set_keep_isize(inode); + else + f2fs_i_size_write(inode, new_size); + } return err; } @@ -1436,6 +1476,9 @@ static long f2fs_fallocate(struct file *file, int mode, struct inode *inode = file_inode(file); long ret = 0; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + /* f2fs only support ->fallocate for regular file */ if (!S_ISREG(inode->i_mode)) return -EINVAL; @@ -1469,8 +1512,6 @@ static long f2fs_fallocate(struct file *file, int mode, if (!ret) { inode->i_mtime = inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, false); - if (mode & FALLOC_FL_KEEP_SIZE) - file_set_keep_isize(inode); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); } @@ -1864,6 +1905,9 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + if (!f2fs_sb_has_crypto(inode->i_sb)) + return -EOPNOTSUPP; + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); @@ -1871,6 +1915,8 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg) { + if (!f2fs_sb_has_crypto(file_inode(filp)->i_sb)) + return -EOPNOTSUPP; return fscrypt_ioctl_get_policy(filp, (void __user *)arg); } @@ -2226,9 +2272,13 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, } inode_lock(src); + down_write(&F2FS_I(src)->dio_rwsem[WRITE]); if (src != dst) { - if (!inode_trylock(dst)) { - ret = -EBUSY; + ret = -EBUSY; + if (!inode_trylock(dst)) + goto out; + if (!down_write_trylock(&F2FS_I(dst)->dio_rwsem[WRITE])) { + inode_unlock(dst); goto out; } } @@ -2288,9 +2338,12 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, } f2fs_unlock_op(sbi); out_unlock: - if (src != dst) + if (src != dst) { + up_write(&F2FS_I(dst)->dio_rwsem[WRITE]); inode_unlock(dst); + } out: + up_write(&F2FS_I(src)->dio_rwsem[WRITE]); inode_unlock(src); return ret; } @@ -2412,6 +2465,9 @@ static int f2fs_ioc_get_features(struct file *filp, unsigned long arg) long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) + return -EIO; + switch (cmd) { case F2FS_IOC_GETFLAGS: return f2fs_ioc_getflags(filp, arg); @@ -2465,6 +2521,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct blk_plug plug; ssize_t ret; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret > 0) { @@ -2475,6 +2534,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) err = f2fs_preallocate_blocks(iocb, from); if (err) { + clear_inode_flag(inode, FI_NO_PREALLOC); inode_unlock(inode); return err; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index bd16e6631cf3..be9fd616736b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -267,16 +267,6 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); } -static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi, - unsigned int segno) -{ - unsigned int valid_blocks = - get_valid_blocks(sbi, segno, true); - - return IS_DATASEG(get_seg_entry(sbi, segno)->type) ? - valid_blocks * 2 : valid_blocks; -} - static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, unsigned int segno, struct victim_sel_policy *p) { @@ -285,7 +275,7 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, /* alloc_mode == LFS */ if (p->gc_mode == GC_GREEDY) - return get_greedy_cost(sbi, segno); + return get_valid_blocks(sbi, segno, true); else return get_cb_cost(sbi, segno); } @@ -466,10 +456,10 @@ static int check_valid_map(struct f2fs_sb_info *sbi, struct seg_entry *sentry; int ret; - mutex_lock(&sit_i->sentry_lock); + down_read(&sit_i->sentry_lock); sentry = get_seg_entry(sbi, segno); ret = f2fs_test_bit(offset, sentry->cur_valid_map); - mutex_unlock(&sit_i->sentry_lock); + up_read(&sit_i->sentry_lock); return ret; } @@ -608,6 +598,7 @@ static void move_data_block(struct inode *inode, block_t bidx, { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), + .ino = inode->i_ino, .type = DATA, .temp = COLD, .op = REQ_OP_READ, @@ -659,8 +650,8 @@ static void move_data_block(struct inode *inode, block_t bidx, allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, &sum, CURSEG_COLD_DATA, NULL, false); - fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr, - FGP_LOCK | FGP_CREAT, GFP_NOFS); + fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), + newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); if (!fio.encrypted_page) { err = -ENOMEM; goto recover_block; @@ -738,6 +729,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, } else { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), + .ino = inode->i_ino, .type = DATA, .temp = COLD, .op = REQ_OP_WRITE, @@ -840,10 +832,17 @@ next_step: continue; } + if (!down_write_trylock( + &F2FS_I(inode)->dio_rwsem[WRITE])) { + iput(inode); + continue; + } + start_bidx = start_bidx_of_node(nofs, inode); data_page = get_read_data_page(inode, start_bidx + ofs_in_node, REQ_RAHEAD, true); + up_write(&F2FS_I(inode)->dio_rwsem[WRITE]); if (IS_ERR(data_page)) { iput(inode); continue; @@ -901,10 +900,10 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, struct sit_info *sit_i = SIT_I(sbi); int ret; - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, LFS); - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); return ret; } @@ -952,8 +951,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, /* * this is to avoid deadlock: * - lock_page(sum_page) - f2fs_replace_block - * - check_valid_map() - mutex_lock(sentry_lock) - * - mutex_lock(sentry_lock) - change_curseg() + * - check_valid_map() - down_write(sentry_lock) + * - down_read(sentry_lock) - change_curseg() * - lock_page(sum_page) */ if (type == SUM_TYPE_NODE) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index fbf22b0f667f..91d5d831be72 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -130,6 +130,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(dn->inode), + .ino = dn->inode->i_ino, .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_NOIDLE | REQ_PRIO, diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 50c88e37ed66..9684d53563f1 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -232,6 +232,23 @@ static int do_read_inode(struct inode *inode) fi->i_extra_isize = f2fs_has_extra_attr(inode) ? le16_to_cpu(ri->i_extra_isize) : 0; + if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { + f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); + fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size); + } else if (f2fs_has_inline_xattr(inode) || + f2fs_has_inline_dentry(inode)) { + fi->i_inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + } else { + + /* + * Previous inline data or directory always reserved 200 bytes + * in inode layout, even if inline_xattr is disabled. In order + * to keep inline_dentry's structure for backward compatibility, + * we get the space back only from inline_data. + */ + fi->i_inline_xattr_size = 0; + } + /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) __recover_inline_status(inode, node_page); @@ -384,6 +401,10 @@ int update_inode(struct inode *inode, struct page *node_page) if (f2fs_has_extra_attr(inode)) { ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); + if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb)) + ri->i_inline_xattr_size = + cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size); + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, i_projid)) { @@ -480,6 +501,7 @@ void f2fs_evict_inode(struct inode *inode) remove_ino_entry(sbi, inode->i_ino, APPEND_INO); remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); + remove_ino_entry(sbi, inode->i_ino, FLUSH_INO); sb_start_intwrite(inode->i_sb); set_inode_flag(inode, FI_NO_ALLOC); @@ -519,8 +541,10 @@ no_delete: stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); - if (!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)) + if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG))) f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); + else + f2fs_inode_synced(inode); /* ino == 0, if f2fs_new_inode() was failed t*/ if (inode->i_ino) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index d92b8e9064cb..cf8f4370d256 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -29,6 +29,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_t ino; struct inode *inode; bool nid_free = false; + int xattr_size = 0; int err; inode = new_inode(dir->i_sb); @@ -86,11 +87,23 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (test_opt(sbi, INLINE_XATTR)) set_inode_flag(inode, FI_INLINE_XATTR); + if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) set_inode_flag(inode, FI_INLINE_DATA); if (f2fs_may_inline_dentry(inode)) set_inode_flag(inode, FI_INLINE_DENTRY); + if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { + f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); + if (f2fs_has_inline_xattr(inode)) + xattr_size = sbi->inline_xattr_size; + /* Otherwise, will be 0 */ + } else if (f2fs_has_inline_xattr(inode) || + f2fs_has_inline_dentry(inode)) { + xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + } + F2FS_I(inode)->i_inline_xattr_size = xattr_size; + f2fs_init_extent_tree(inode, NULL); stat_inc_inline_xattr(inode); @@ -177,6 +190,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, nid_t ino = 0; int err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = dquot_initialize(dir); if (err) return err; @@ -221,6 +237,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, struct f2fs_sb_info *sbi = F2FS_I_SB(dir); int err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + if (f2fs_encrypted_inode(dir) && !fscrypt_has_permitted_context(dir, inode)) return -EPERM; @@ -331,12 +350,15 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; struct f2fs_dir_entry *de; struct page *page; - nid_t ino; + struct dentry *new; + nid_t ino = -1; int err = 0; unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir)); + trace_f2fs_lookup_start(dir, dentry, flags); + if (f2fs_encrypted_inode(dir)) { - int res = fscrypt_get_encryption_info(dir); + err = fscrypt_get_encryption_info(dir); /* * DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is @@ -346,18 +368,22 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (fscrypt_has_encryption_key(dir)) fscrypt_set_encrypted_dentry(dentry); fscrypt_set_d_op(dentry); - if (res && res != -ENOKEY) - return ERR_PTR(res); + if (err && err != -ENOKEY) + goto out; } - if (dentry->d_name.len > F2FS_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); + if (dentry->d_name.len > F2FS_NAME_LEN) { + err = -ENAMETOOLONG; + goto out; + } de = f2fs_find_entry(dir, &dentry->d_name, &page); if (!de) { - if (IS_ERR(page)) - return (struct dentry *)page; - return d_splice_alias(inode, dentry); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out; + } + goto out_splice; } ino = le32_to_cpu(de->ino); @@ -365,19 +391,21 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, f2fs_put_page(page, 0); inode = f2fs_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) { err = __recover_dot_dentries(dir, root_ino); if (err) - goto err_out; + goto out_iput; } if (f2fs_has_inline_dots(inode)) { err = __recover_dot_dentries(inode, dir->i_ino); if (err) - goto err_out; + goto out_iput; } if (f2fs_encrypted_inode(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && @@ -386,12 +414,18 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, "Inconsistent encryption contexts: %lu/%lu", dir->i_ino, inode->i_ino); err = -EPERM; - goto err_out; + goto out_iput; } - return d_splice_alias(inode, dentry); - -err_out: +out_splice: + new = d_splice_alias(inode, dentry); + if (IS_ERR(new)) + err = PTR_ERR(new); + trace_f2fs_lookup_end(dir, dentry, ino, err); + return new; +out_iput: iput(inode); +out: + trace_f2fs_lookup_end(dir, dentry, ino, err); return ERR_PTR(err); } @@ -405,9 +439,15 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) trace_f2fs_unlink_enter(dir, dentry); + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = dquot_initialize(dir); if (err) return err; + err = dquot_initialize(inode); + if (err) + return err; de = f2fs_find_entry(dir, &dentry->d_name, &page); if (!de) { @@ -457,6 +497,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, struct fscrypt_symlink_data *sd = NULL; int err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + if (f2fs_encrypted_inode(dir)) { err = fscrypt_get_encryption_info(dir); if (err) @@ -563,6 +606,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; int err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = dquot_initialize(dir); if (err) return err; @@ -615,6 +661,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode; int err = 0; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = dquot_initialize(dir); if (err) return err; @@ -709,6 +758,9 @@ out: static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(dir)))) + return -EIO; + if (f2fs_encrypted_inode(dir)) { int err = fscrypt_get_encryption_info(dir); if (err) @@ -720,6 +772,9 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(dir)))) + return -EIO; + return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout); } @@ -739,6 +794,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, bool is_old_inline = f2fs_has_inline_dentry(old_dir); int err = -ENOENT; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + if ((f2fs_encrypted_inode(old_dir) && !fscrypt_has_encryption_key(old_dir)) || (f2fs_encrypted_inode(new_dir) && @@ -764,6 +822,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto out; + if (new_inode) { + err = dquot_initialize(new_inode); + if (err) + goto out; + } + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { if (IS_ERR(old_page)) @@ -932,6 +996,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, int old_nlink = 0, new_nlink = 0; int err = -ENOENT; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + if ((f2fs_encrypted_inode(old_dir) && !fscrypt_has_encryption_key(old_dir)) || (f2fs_encrypted_inode(new_dir) && diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 32474db18ad9..964c99655942 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -46,7 +46,7 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) * give 25%, 25%, 50%, 50%, 50% memory for each components respectively */ if (type == FREE_NIDS) { - mem_size = (nm_i->nid_cnt[FREE_NID_LIST] * + mem_size = (nm_i->nid_cnt[FREE_NID] * sizeof(struct free_nid)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { @@ -63,7 +63,7 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) } else if (type == INO_ENTRIES) { int i; - for (i = 0; i <= UPDATE_INO; i++) + for (i = 0; i < MAX_INO_ENTRY; i++) mem_size += sbi->im[i].ino_num * sizeof(struct ino_entry); mem_size >>= PAGE_SHIFT; @@ -74,6 +74,10 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) atomic_read(&sbi->total_ext_node) * sizeof(struct extent_node)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else if (type == INMEM_PAGES) { + /* it allows 20% / total_ram for inmemory pages */ + mem_size = get_pages(sbi, F2FS_INMEM_PAGES); + res = mem_size < (val.totalram / 5); } else { if (!sbi->sb->s_bdi->wb.dirty_exceeded) return true; @@ -134,6 +138,44 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) return dst_page; } +static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail) +{ + struct nat_entry *new; + + if (no_fail) + new = f2fs_kmem_cache_alloc(nat_entry_slab, + GFP_NOFS | __GFP_ZERO); + else + new = kmem_cache_alloc(nat_entry_slab, + GFP_NOFS | __GFP_ZERO); + if (new) { + nat_set_nid(new, nid); + nat_reset_flag(new); + } + return new; +} + +static void __free_nat_entry(struct nat_entry *e) +{ + kmem_cache_free(nat_entry_slab, e); +} + +/* must be locked by nat_tree_lock */ +static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i, + struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail) +{ + if (no_fail) + f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne); + else if (radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne)) + return NULL; + + if (raw_ne) + node_info_from_raw_nat(&ne->ni, raw_ne); + list_add_tail(&ne->list, &nm_i->nat_entries); + nm_i->nat_cnt++; + return ne; +} + static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) { return radix_tree_lookup(&nm_i->nat_root, n); @@ -150,7 +192,7 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e) list_del(&e->list); radix_tree_delete(&nm_i->nat_root, nat_get_nid(e)); nm_i->nat_cnt--; - kmem_cache_free(nat_entry_slab, e); + __free_nat_entry(e); } static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, @@ -246,49 +288,29 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) return need_update; } -static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, - bool no_fail) -{ - struct nat_entry *new; - - if (no_fail) { - new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS); - f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); - } else { - new = kmem_cache_alloc(nat_entry_slab, GFP_NOFS); - if (!new) - return NULL; - if (radix_tree_insert(&nm_i->nat_root, nid, new)) { - kmem_cache_free(nat_entry_slab, new); - return NULL; - } - } - - memset(new, 0, sizeof(struct nat_entry)); - nat_set_nid(new, nid); - nat_reset_flag(new); - list_add_tail(&new->list, &nm_i->nat_entries); - nm_i->nat_cnt++; - return new; -} - +/* must be locked by nat_tree_lock */ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, struct f2fs_nat_entry *ne) { struct f2fs_nm_info *nm_i = NM_I(sbi); - struct nat_entry *e; + struct nat_entry *new, *e; + new = __alloc_nat_entry(nid, false); + if (!new) + return; + + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); - if (!e) { - e = grab_nat_entry(nm_i, nid, false); - if (e) - node_info_from_raw_nat(&e->ni, ne); - } else { + if (!e) + e = __init_nat_entry(nm_i, new, ne, false); + else f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) || nat_get_blkaddr(e) != le32_to_cpu(ne->block_addr) || nat_get_version(e) != ne->version); - } + up_write(&nm_i->nat_tree_lock); + if (e != new) + __free_nat_entry(new); } static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, @@ -296,11 +318,12 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; + struct nat_entry *new = __alloc_nat_entry(ni->nid, true); down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { - e = grab_nat_entry(nm_i, ni->nid, true); + e = __init_nat_entry(nm_i, new, NULL, true); copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { @@ -312,6 +335,9 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, copy_node_info(&e->ni, ni); f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR); } + /* let's free early to reduce memory consumption */ + if (e != new) + __free_nat_entry(new); /* sanity check */ f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr); @@ -327,10 +353,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { unsigned char version = nat_get_version(e); nat_set_version(e, inc_node_version(version)); - - /* in order to reuse the nid */ - if (nm_i->next_scan_nid > ni->nid) - nm_i->next_scan_nid = ni->nid; } /* change address */ @@ -424,9 +446,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) f2fs_put_page(page, 1); cache: /* cache nat entry */ - down_write(&nm_i->nat_tree_lock); cache_nat_entry(sbi, nid, &ne); - up_write(&nm_i->nat_tree_lock); } /* @@ -962,7 +982,8 @@ fail: return err > 0 ? 0 : err; } -int truncate_xattr_node(struct inode *inode, struct page *page) +/* caller must lock inode page */ +int truncate_xattr_node(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t nid = F2FS_I(inode)->i_xattr_nid; @@ -978,10 +999,7 @@ int truncate_xattr_node(struct inode *inode, struct page *page) f2fs_i_xnid_write(inode, 0); - set_new_dnode(&dn, inode, page, npage, nid); - - if (page) - dn.inode_page_locked = true; + set_new_dnode(&dn, inode, NULL, npage, nid); truncate_node(&dn); return 0; } @@ -1000,7 +1018,7 @@ int remove_inode_page(struct inode *inode) if (err) return err; - err = truncate_xattr_node(inode, dn.inode_page); + err = truncate_xattr_node(inode); if (err) { f2fs_put_dnode(&dn); return err; @@ -1220,7 +1238,8 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) if (!inode) return; - page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0); + page = f2fs_pagecache_get_page(inode->i_mapping, 0, + FGP_LOCK|FGP_NOWAIT, 0); if (!page) goto iput_out; @@ -1244,37 +1263,6 @@ iput_out: iput(inode); } -void move_node_page(struct page *node_page, int gc_type) -{ - if (gc_type == FG_GC) { - struct f2fs_sb_info *sbi = F2FS_P_SB(node_page); - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = 1, - .for_reclaim = 0, - }; - - set_page_dirty(node_page); - f2fs_wait_on_page_writeback(node_page, NODE, true); - - f2fs_bug_on(sbi, PageWriteback(node_page)); - if (!clear_page_dirty_for_io(node_page)) - goto out_page; - - if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc)) - unlock_page(node_page); - goto release_page; - } else { - /* set page dirty and write it */ - if (!PageWriteback(node_page)) - set_page_dirty(node_page); - } -out_page: - unlock_page(node_page); -release_page: - f2fs_put_page(node_page, 0); -} - static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) { pgoff_t index, end; @@ -1344,6 +1332,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, struct node_info ni; struct f2fs_io_info fio = { .sbi = sbi, + .ino = ino_of_node(page), .type = NODE, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), @@ -1416,6 +1405,37 @@ redirty_out: return AOP_WRITEPAGE_ACTIVATE; } +void move_node_page(struct page *node_page, int gc_type) +{ + if (gc_type == FG_GC) { + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 1, + .for_reclaim = 0, + }; + + set_page_dirty(node_page); + f2fs_wait_on_page_writeback(node_page, NODE, true); + + f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page)); + if (!clear_page_dirty_for_io(node_page)) + goto out_page; + + if (__write_node_page(node_page, false, NULL, + &wbc, false, FS_GC_NODE_IO)) + unlock_page(node_page); + goto release_page; + } else { + /* set page dirty and write it */ + if (!PageWriteback(node_page)) + set_page_dirty(node_page); + } +out_page: + unlock_page(node_page); +release_page: + f2fs_put_page(node_page, 0); +} + static int f2fs_write_node_page(struct page *page, struct writeback_control *wbc) { @@ -1764,35 +1784,54 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, return radix_tree_lookup(&nm_i->free_nid_root, n); } -static int __insert_nid_to_list(struct f2fs_sb_info *sbi, - struct free_nid *i, enum nid_list list, bool new) +static int __insert_free_nid(struct f2fs_sb_info *sbi, + struct free_nid *i, enum nid_state state) { struct f2fs_nm_info *nm_i = NM_I(sbi); - if (new) { - int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i); - if (err) - return err; - } + int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i); + if (err) + return err; - f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW : - i->state != NID_ALLOC); - nm_i->nid_cnt[list]++; - list_add_tail(&i->list, &nm_i->nid_list[list]); + f2fs_bug_on(sbi, state != i->state); + nm_i->nid_cnt[state]++; + if (state == FREE_NID) + list_add_tail(&i->list, &nm_i->free_nid_list); return 0; } -static void __remove_nid_from_list(struct f2fs_sb_info *sbi, - struct free_nid *i, enum nid_list list, bool reuse) +static void __remove_free_nid(struct f2fs_sb_info *sbi, + struct free_nid *i, enum nid_state state) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + f2fs_bug_on(sbi, state != i->state); + nm_i->nid_cnt[state]--; + if (state == FREE_NID) + list_del(&i->list); + radix_tree_delete(&nm_i->free_nid_root, i->nid); +} + +static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i, + enum nid_state org_state, enum nid_state dst_state) { struct f2fs_nm_info *nm_i = NM_I(sbi); - f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW : - i->state != NID_ALLOC); - nm_i->nid_cnt[list]--; - list_del(&i->list); - if (!reuse) - radix_tree_delete(&nm_i->free_nid_root, i->nid); + f2fs_bug_on(sbi, org_state != i->state); + i->state = dst_state; + nm_i->nid_cnt[org_state]--; + nm_i->nid_cnt[dst_state]++; + + switch (dst_state) { + case PREALLOC_NID: + list_del(&i->list); + break; + case FREE_NID: + list_add_tail(&i->list, &nm_i->free_nid_list); + break; + default: + BUG_ON(1); + } } /* return if the nid is recognized as free */ @@ -1810,7 +1849,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); i->nid = nid; - i->state = NID_NEW; + i->state = FREE_NID; if (radix_tree_preload(GFP_NOFS)) goto err; @@ -1823,7 +1862,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) * - f2fs_create * - f2fs_new_inode * - alloc_nid - * - __insert_nid_to_list(ALLOC_NID_LIST) + * - __insert_nid_to_list(PREALLOC_NID) * - f2fs_balance_fs_bg * - build_free_nids * - __build_free_nids @@ -1836,8 +1875,8 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) * - new_node_page * - set_node_addr * - alloc_nid_done - * - __remove_nid_from_list(ALLOC_NID_LIST) - * - __insert_nid_to_list(FREE_NID_LIST) + * - __remove_nid_from_list(PREALLOC_NID) + * - __insert_nid_to_list(FREE_NID) */ ne = __lookup_nat_cache(nm_i, nid); if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || @@ -1846,13 +1885,13 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) e = __lookup_free_nid_list(nm_i, nid); if (e) { - if (e->state == NID_NEW) + if (e->state == FREE_NID) ret = true; goto err_out; } } ret = true; - err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true); + err = __insert_free_nid(sbi, i, FREE_NID); err_out: spin_unlock(&nm_i->nid_list_lock); radix_tree_preload_end(); @@ -1870,8 +1909,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); - if (i && i->state == NID_NEW) { - __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); + if (i && i->state == FREE_NID) { + __remove_free_nid(sbi, i, FREE_NID); need_free = true; } spin_unlock(&nm_i->nid_list_lock); @@ -1890,15 +1929,18 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) return; - if (set) + if (set) { + if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) + return; __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); - else - __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); - - if (set) nm_i->free_nid_count[nat_ofs]++; - else if (!build) - nm_i->free_nid_count[nat_ofs]--; + } else { + if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs])) + return; + __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); + if (!build) + nm_i->free_nid_count[nat_ofs]--; + } } static void scan_nat_page(struct f2fs_sb_info *sbi, @@ -1933,12 +1975,32 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, } } -static void scan_free_nid_bits(struct f2fs_sb_info *sbi) +static void scan_curseg_cache(struct f2fs_sb_info *sbi) { - struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_journal *journal = curseg->journal; + int i; + + down_read(&curseg->journal_rwsem); + for (i = 0; i < nats_in_cursum(journal); i++) { + block_t addr; + nid_t nid; + + addr = le32_to_cpu(nat_in_journal(journal, i).block_addr); + nid = le32_to_cpu(nid_in_journal(journal, i)); + if (addr == NULL_ADDR) + add_free_nid(sbi, nid, true); + else + remove_free_nid(sbi, nid); + } + up_read(&curseg->journal_rwsem); +} + +static void scan_free_nid_bits(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int i, idx; + nid_t nid; down_read(&nm_i->nat_tree_lock); @@ -1948,40 +2010,27 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) if (!nm_i->free_nid_count[i]) continue; for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) { - nid_t nid; - - if (!test_bit_le(idx, nm_i->free_nid_bitmap[i])) - continue; + idx = find_next_bit_le(nm_i->free_nid_bitmap[i], + NAT_ENTRY_PER_BLOCK, idx); + if (idx >= NAT_ENTRY_PER_BLOCK) + break; nid = i * NAT_ENTRY_PER_BLOCK + idx; add_free_nid(sbi, nid, true); - if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS) + if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS) goto out; } } out: - down_read(&curseg->journal_rwsem); - for (i = 0; i < nats_in_cursum(journal); i++) { - block_t addr; - nid_t nid; + scan_curseg_cache(sbi); - addr = le32_to_cpu(nat_in_journal(journal, i).block_addr); - nid = le32_to_cpu(nid_in_journal(journal, i)); - if (addr == NULL_ADDR) - add_free_nid(sbi, nid, true); - else - remove_free_nid(sbi, nid); - } - up_read(&curseg->journal_rwsem); up_read(&nm_i->nat_tree_lock); } static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) { struct f2fs_nm_info *nm_i = NM_I(sbi); - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); - struct f2fs_journal *journal = curseg->journal; int i = 0; nid_t nid = nm_i->next_scan_nid; @@ -1989,7 +2038,7 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) nid = 0; /* Enough entries */ - if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK) + if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) return; if (!sync && !available_free_memory(sbi, FREE_NIDS)) @@ -1999,7 +2048,7 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) /* try to find free nids in free_nid_bitmap */ scan_free_nid_bits(sbi); - if (nm_i->nid_cnt[FREE_NID_LIST]) + if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK) return; } @@ -2027,18 +2076,8 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) nm_i->next_scan_nid = nid; /* find free nids from current sum_pages */ - down_read(&curseg->journal_rwsem); - for (i = 0; i < nats_in_cursum(journal); i++) { - block_t addr; + scan_curseg_cache(sbi); - addr = le32_to_cpu(nat_in_journal(journal, i).block_addr); - nid = le32_to_cpu(nid_in_journal(journal, i)); - if (addr == NULL_ADDR) - add_free_nid(sbi, nid, true); - else - remove_free_nid(sbi, nid); - } - up_read(&curseg->journal_rwsem); up_read(&nm_i->nat_tree_lock); ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), @@ -2076,15 +2115,13 @@ retry: } /* We should not use stale free nids created by build_free_nids */ - if (nm_i->nid_cnt[FREE_NID_LIST] && !on_build_free_nids(nm_i)) { - f2fs_bug_on(sbi, list_empty(&nm_i->nid_list[FREE_NID_LIST])); - i = list_first_entry(&nm_i->nid_list[FREE_NID_LIST], + if (nm_i->nid_cnt[FREE_NID] && !on_build_free_nids(nm_i)) { + f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); + i = list_first_entry(&nm_i->free_nid_list, struct free_nid, list); *nid = i->nid; - __remove_nid_from_list(sbi, i, FREE_NID_LIST, true); - i->state = NID_ALLOC; - __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); + __move_free_nid(sbi, i, FREE_NID, PREALLOC_NID); nm_i->available_nids--; update_free_nid_bitmap(sbi, *nid, false, false); @@ -2110,7 +2147,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); f2fs_bug_on(sbi, !i); - __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false); + __remove_free_nid(sbi, i, PREALLOC_NID); spin_unlock(&nm_i->nid_list_lock); kmem_cache_free(free_nid_slab, i); @@ -2133,12 +2170,10 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) f2fs_bug_on(sbi, !i); if (!available_free_memory(sbi, FREE_NIDS)) { - __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false); + __remove_free_nid(sbi, i, PREALLOC_NID); need_free = true; } else { - __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, true); - i->state = NID_NEW; - __insert_nid_to_list(sbi, i, FREE_NID_LIST, false); + __move_free_nid(sbi, i, PREALLOC_NID, FREE_NID); } nm_i->available_nids++; @@ -2157,20 +2192,19 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) struct free_nid *i, *next; int nr = nr_shrink; - if (nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS) + if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) return 0; if (!mutex_trylock(&nm_i->build_lock)) return 0; spin_lock(&nm_i->nid_list_lock); - list_for_each_entry_safe(i, next, &nm_i->nid_list[FREE_NID_LIST], - list) { + list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { if (nr_shrink <= 0 || - nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS) + nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS) break; - __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); + __remove_free_nid(sbi, i, FREE_NID); kmem_cache_free(free_nid_slab, i); nr_shrink--; } @@ -2196,8 +2230,8 @@ void recover_inline_xattr(struct inode *inode, struct page *page) goto update_inode; } - dst_addr = inline_xattr_addr(ipage); - src_addr = inline_xattr_addr(page); + dst_addr = inline_xattr_addr(inode, ipage); + src_addr = inline_xattr_addr(inode, page); inline_size = inline_xattr_size(inode); f2fs_wait_on_page_writeback(ipage, NODE, true); @@ -2286,6 +2320,12 @@ retry: dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR); if (dst->i_inline & F2FS_EXTRA_ATTR) { dst->i_extra_isize = src->i_extra_isize; + + if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) && + F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), + i_inline_xattr_size)) + dst->i_inline_xattr_size = src->i_inline_xattr_size; + if (f2fs_sb_has_project_quota(sbi->sb) && F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_projid)) @@ -2357,8 +2397,8 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) ne = __lookup_nat_cache(nm_i, nid); if (!ne) { - ne = grab_nat_entry(nm_i, nid, true); - node_info_from_raw_nat(&ne->ni, &raw_ne); + ne = __alloc_nat_entry(nid, true); + __init_nat_entry(nm_i, ne, &raw_ne, true); } /* @@ -2404,15 +2444,17 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK; struct f2fs_nat_block *nat_blk = page_address(page); int valid = 0; - int i; + int i = 0; if (!enabled_nat_bits(sbi, NULL)) return; - for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) { - if (start_nid == 0 && i == 0) - valid++; - if (nat_blk->entries[i].block_addr) + if (nat_index == 0) { + valid = 1; + i = 1; + } + for (; i < NAT_ENTRY_PER_BLOCK; i++) { + if (nat_blk->entries[i].block_addr != NULL_ADDR) valid++; } if (valid == 0) { @@ -2607,7 +2649,7 @@ static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) __set_bit_le(i, nm_i->nat_block_bitmap); nid = i * NAT_ENTRY_PER_BLOCK; - last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK; + last_nid = nid + NAT_ENTRY_PER_BLOCK; spin_lock(&NM_I(sbi)->nid_list_lock); for (; nid < last_nid; nid++) @@ -2642,16 +2684,15 @@ static int init_node_manager(struct f2fs_sb_info *sbi) /* not used nids: 0, node, meta, (and root counted as valid node) */ nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count - F2FS_RESERVED_NODE_NUM; - nm_i->nid_cnt[FREE_NID_LIST] = 0; - nm_i->nid_cnt[ALLOC_NID_LIST] = 0; + nm_i->nid_cnt[FREE_NID] = 0; + nm_i->nid_cnt[PREALLOC_NID] = 0; nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; nm_i->ra_nid_pages = DEF_RA_NID_PAGES; nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); - INIT_LIST_HEAD(&nm_i->nid_list[FREE_NID_LIST]); - INIT_LIST_HEAD(&nm_i->nid_list[ALLOC_NID_LIST]); + INIT_LIST_HEAD(&nm_i->free_nid_list); INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); INIT_LIST_HEAD(&nm_i->nat_entries); @@ -2743,16 +2784,15 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) /* destroy free nid list */ spin_lock(&nm_i->nid_list_lock); - list_for_each_entry_safe(i, next_i, &nm_i->nid_list[FREE_NID_LIST], - list) { - __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); + list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { + __remove_free_nid(sbi, i, FREE_NID); spin_unlock(&nm_i->nid_list_lock); kmem_cache_free(free_nid_slab, i); spin_lock(&nm_i->nid_list_lock); } - f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID_LIST]); - f2fs_bug_on(sbi, nm_i->nid_cnt[ALLOC_NID_LIST]); - f2fs_bug_on(sbi, !list_empty(&nm_i->nid_list[ALLOC_NID_LIST])); + f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID]); + f2fs_bug_on(sbi, nm_i->nid_cnt[PREALLOC_NID]); + f2fs_bug_on(sbi, !list_empty(&nm_i->free_nid_list)); spin_unlock(&nm_i->nid_list_lock); /* destroy nat cache */ diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index bb53e9955ff2..0ee3e5ff49a3 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -140,6 +140,7 @@ enum mem_type { DIRTY_DENTS, /* indicates dirty dentry pages */ INO_ENTRIES, /* indicates inode entries */ EXTENT_CACHE, /* indicates extent cache */ + INMEM_PAGES, /* indicates inmemory pages */ BASE_CHECK, /* check kernel status */ }; @@ -150,18 +151,10 @@ struct nat_entry_set { unsigned int entry_cnt; /* the # of nat entries in set */ }; -/* - * For free nid mangement - */ -enum nid_state { - NID_NEW, /* newly added to free nid list */ - NID_ALLOC /* it is allocated */ -}; - struct free_nid { struct list_head list; /* for free node id list */ nid_t nid; /* node id */ - int state; /* in use or not: NID_NEW or NID_ALLOC */ + int state; /* in use or not: FREE_NID or PREALLOC_NID */ }; static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) @@ -170,12 +163,11 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct free_nid *fnid; spin_lock(&nm_i->nid_list_lock); - if (nm_i->nid_cnt[FREE_NID_LIST] <= 0) { + if (nm_i->nid_cnt[FREE_NID] <= 0) { spin_unlock(&nm_i->nid_list_lock); return; } - fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST], - struct free_nid, list); + fnid = list_first_entry(&nm_i->free_nid_list, struct free_nid, list); *nid = fnid->nid; spin_unlock(&nm_i->nid_list_lock); } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9626758bc762..92c57ace1939 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -594,6 +594,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) int ret = 0; unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; +#ifdef CONFIG_QUOTA + int quota_enabled; +#endif if (s_flags & MS_RDONLY) { f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); @@ -604,7 +607,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) /* Needed for iput() to work correctly and not trash data */ sbi->sb->s_flags |= MS_ACTIVE; /* Turn on quotas so that they are updated correctly */ - f2fs_enable_quota_files(sbi); + quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); #endif fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", @@ -665,7 +668,8 @@ skip: out: #ifdef CONFIG_QUOTA /* Turn quotas off */ - f2fs_quota_off_umount(sbi->sb); + if (quota_enabled) + f2fs_quota_off_umount(sbi->sb); #endif sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f5c494389483..94939a5a96c8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -181,11 +181,12 @@ bool need_SSR(struct f2fs_sb_info *sbi) return true; return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + - 2 * reserved_sections(sbi)); + SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); } void register_inmem_page(struct inode *inode, struct page *page) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *new; @@ -204,6 +205,10 @@ void register_inmem_page(struct inode *inode, struct page *page) mutex_lock(&fi->inmem_lock); get_page(page); list_add_tail(&new->list, &fi->inmem_pages); + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (list_empty(&fi->inmem_ilist)) + list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); mutex_unlock(&fi->inmem_lock); @@ -262,12 +267,41 @@ next: return err; } +void drop_inmem_pages_all(struct f2fs_sb_info *sbi) +{ + struct list_head *head = &sbi->inode_list[ATOMIC_FILE]; + struct inode *inode; + struct f2fs_inode_info *fi; +next: + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (list_empty(head)) { + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + return; + } + fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist); + inode = igrab(&fi->vfs_inode); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + + if (inode) { + drop_inmem_pages(inode); + iput(inode); + } + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + goto next; +} + void drop_inmem_pages(struct inode *inode) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); mutex_lock(&fi->inmem_lock); __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (!list_empty(&fi->inmem_ilist)) + list_del_init(&fi->inmem_ilist); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_FILE); @@ -313,6 +347,7 @@ static int __commit_inmem_pages(struct inode *inode, struct inmem_pages *cur, *tmp; struct f2fs_io_info fio = { .sbi = sbi, + .ino = inode->i_ino, .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, @@ -398,6 +433,10 @@ int commit_inmem_pages(struct inode *inode) /* drop all uncommitted pages */ __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); } + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (!list_empty(&fi->inmem_ilist)) + list_del_init(&fi->inmem_ilist); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_COMMIT); @@ -472,7 +511,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) static int __submit_flush_wait(struct f2fs_sb_info *sbi, struct block_device *bdev) { - struct bio *bio = f2fs_bio_alloc(0); + struct bio *bio = f2fs_bio_alloc(sbi, 0, true); int ret; bio->bi_rw = REQ_OP_WRITE; @@ -485,15 +524,17 @@ static int __submit_flush_wait(struct f2fs_sb_info *sbi, return ret; } -static int submit_flush_wait(struct f2fs_sb_info *sbi) +static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino) { - int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev); + int ret = 0; int i; - if (!sbi->s_ndevs || ret) - return ret; + if (!sbi->s_ndevs) + return __submit_flush_wait(sbi, sbi->sb->s_bdev); - for (i = 1; i < sbi->s_ndevs; i++) { + for (i = 0; i < sbi->s_ndevs; i++) { + if (!is_dirty_device(sbi, ino, i, FLUSH_INO)) + continue; ret = __submit_flush_wait(sbi, FDEV(i).bdev); if (ret) break; @@ -519,7 +560,9 @@ repeat: fcc->dispatch_list = llist_del_all(&fcc->issue_list); fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); - ret = submit_flush_wait(sbi); + cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode); + + ret = submit_flush_wait(sbi, cmd->ino); atomic_inc(&fcc->issued_flush); llist_for_each_entry_safe(cmd, next, @@ -537,7 +580,7 @@ repeat: goto repeat; } -int f2fs_issue_flush(struct f2fs_sb_info *sbi) +int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino) { struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; struct flush_cmd cmd; @@ -547,19 +590,20 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) return 0; if (!test_opt(sbi, FLUSH_MERGE)) { - ret = submit_flush_wait(sbi); + ret = submit_flush_wait(sbi, ino); atomic_inc(&fcc->issued_flush); return ret; } - if (atomic_inc_return(&fcc->issing_flush) == 1) { - ret = submit_flush_wait(sbi); + if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) { + ret = submit_flush_wait(sbi, ino); atomic_dec(&fcc->issing_flush); atomic_inc(&fcc->issued_flush); return ret; } + cmd.ino = ino; init_completion(&cmd.wait); llist_add(&cmd.llnode, &fcc->issue_list); @@ -583,7 +627,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) } else { struct flush_cmd *tmp, *next; - ret = submit_flush_wait(sbi); + ret = submit_flush_wait(sbi, ino); llist_for_each_entry_safe(tmp, next, list, llnode) { if (tmp == &cmd) { @@ -653,6 +697,28 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free) } } +int f2fs_flush_device_cache(struct f2fs_sb_info *sbi) +{ + int ret = 0, i; + + if (!sbi->s_ndevs) + return 0; + + for (i = 1; i < sbi->s_ndevs; i++) { + if (!f2fs_test_bit(i, (char *)&sbi->dirty_device)) + continue; + ret = __submit_flush_wait(sbi, FDEV(i).bdev); + if (ret) + break; + + spin_lock(&sbi->dev_lock); + f2fs_clear_bit(i, (char *)&sbi->dirty_device); + spin_unlock(&sbi->dev_lock); + } + + return ret; +} + static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, enum dirty_type dirty_type) { @@ -794,6 +860,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len); + f2fs_bug_on(sbi, dc->ref); if (dc->error == -EOPNOTSUPP) @@ -875,7 +943,7 @@ static int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, if (ret) return ret; } - bio = f2fs_bio_alloc(1); + bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, 1); bio->bi_iter.bi_sector = sector; bio->bi_bdev = bdev; bio_set_op_attrs(bio, op, 0); @@ -926,10 +994,14 @@ void __check_sit_bitmap(struct f2fs_sb_info *sbi, /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, - struct discard_cmd *dc) + struct discard_policy *dpolicy, + struct discard_cmd *dc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? + &(dcc->fstrim_list) : &(dcc->wait_list); struct bio *bio = NULL; + int flag = dpolicy->sync ? REQ_SYNC : 0; if (dc->state != D_PREP) return; @@ -948,8 +1020,8 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, if (bio) { bio->bi_private = dc; bio->bi_end_io = f2fs_submit_discard_endio; - submit_bio(REQ_SYNC, bio); - list_move_tail(&dc->list, &dcc->wait_list); + submit_bio(flag, bio); + list_move_tail(&dc->list, wait_list); __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); f2fs_update_iostat(sbi, FS_DISCARD, 1); @@ -966,7 +1038,7 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, struct rb_node *insert_parent) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct rb_node **p = &dcc->root.rb_node; + struct rb_node **p; struct rb_node *parent = NULL; struct discard_cmd *dc = NULL; @@ -1134,58 +1206,107 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, return 0; } -static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) +static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy, + unsigned int start, unsigned int end) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + struct discard_cmd *dc; + struct blk_plug plug; + int issued; + +next: + issued = 0; + + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); + + dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + NULL, start, + (struct rb_entry **)&prev_dc, + (struct rb_entry **)&next_dc, + &insert_p, &insert_parent, true); + if (!dc) + dc = next_dc; + + blk_start_plug(&plug); + + while (dc && dc->lstart <= end) { + struct rb_node *node; + + if (dc->len < dpolicy->granularity) + goto skip; + + if (dc->state != D_PREP) { + list_move_tail(&dc->list, &dcc->fstrim_list); + goto skip; + } + + __submit_discard_cmd(sbi, dpolicy, dc); + + if (++issued >= dpolicy->max_requests) { + start = dc->lstart + dc->len; + + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); + + schedule(); + + goto next; + } +skip: + node = rb_next(&dc->rb_node); + dc = rb_entry_safe(node, struct discard_cmd, rb_node); + + if (fatal_signal_pending(current)) + break; + } + + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); +} + +static int __issue_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *pend_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; - int iter = 0, issued = 0; - int i; + int i, iter = 0, issued = 0; bool io_interrupted = false; - mutex_lock(&dcc->cmd_lock); - f2fs_bug_on(sbi, - !__check_rb_tree_consistence(sbi, &dcc->root)); - blk_start_plug(&plug); - for (i = MAX_PLIST_NUM - 1; - i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) { + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + if (i + 1 < dpolicy->granularity) + break; pend_list = &dcc->pend_list[i]; + + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); + blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); - /* Hurry up to finish fstrim */ - if (dcc->pend_list_tag[i] & P_TRIM) { - __submit_discard_cmd(sbi, dc); - issued++; - - if (fatal_signal_pending(current)) - break; - continue; - } - - if (!issue_cond) { - __submit_discard_cmd(sbi, dc); - issued++; - continue; - } - - if (is_idle(sbi)) { - __submit_discard_cmd(sbi, dc); - issued++; - } else { + if (dpolicy->io_aware && i < dpolicy->io_aware_gran && + !is_idle(sbi)) { io_interrupted = true; + goto skip; } - if (++iter >= DISCARD_ISSUE_RATE) - goto out; + __submit_discard_cmd(sbi, dpolicy, dc); + issued++; +skip: + if (++iter >= dpolicy->max_requests) + break; } - if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM) - dcc->pend_list_tag[i] &= (~P_TRIM); + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); + + if (iter >= dpolicy->max_requests) + break; } -out: - blk_finish_plug(&plug); - mutex_unlock(&dcc->cmd_lock); if (!issued && io_interrupted) issued = -1; @@ -1193,12 +1314,13 @@ out: return issued; } -static void __drop_discard_cmd(struct f2fs_sb_info *sbi) +static bool __drop_discard_cmd(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *pend_list; struct discard_cmd *dc, *tmp; int i; + bool dropped = false; mutex_lock(&dcc->cmd_lock); for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { @@ -1206,39 +1328,58 @@ static void __drop_discard_cmd(struct f2fs_sb_info *sbi) list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); __remove_discard_cmd(sbi, dc); + dropped = true; } } mutex_unlock(&dcc->cmd_lock); + + return dropped; } -static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, +static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + unsigned int len = 0; wait_for_completion_io(&dc->wait); mutex_lock(&dcc->cmd_lock); f2fs_bug_on(sbi, dc->state != D_DONE); dc->ref--; - if (!dc->ref) + if (!dc->ref) { + if (!dc->error) + len = dc->len; __remove_discard_cmd(sbi, dc); + } mutex_unlock(&dcc->cmd_lock); + + return len; } -static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) +static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy, + block_t start, block_t end) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *wait_list = &(dcc->wait_list); + struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? + &(dcc->fstrim_list) : &(dcc->wait_list); struct discard_cmd *dc, *tmp; bool need_wait; + unsigned int trimmed = 0; next: need_wait = false; mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, wait_list, list) { - if (!wait_cond || (dc->state == D_DONE && !dc->ref)) { + if (dc->lstart + dc->len <= start || end <= dc->lstart) + continue; + if (dc->len < dpolicy->granularity) + continue; + if (dc->state == D_DONE && !dc->ref) { wait_for_completion_io(&dc->wait); + if (!dc->error) + trimmed += dc->len; __remove_discard_cmd(sbi, dc); } else { dc->ref++; @@ -1249,9 +1390,17 @@ next: mutex_unlock(&dcc->cmd_lock); if (need_wait) { - __wait_one_discard_bio(sbi, dc); + trimmed += __wait_one_discard_bio(sbi, dc); goto next; } + + return trimmed; +} + +static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_policy *dpolicy) +{ + __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX); } /* This should be covered by global mutex, &sit_i->sentry_lock */ @@ -1289,23 +1438,19 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) } } -/* This comes from f2fs_put_super and f2fs_trim_fs */ -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount) -{ - __issue_discard_cmd(sbi, false); - __drop_discard_cmd(sbi); - __wait_discard_cmd(sbi, !umount); -} - -static void mark_discard_range_all(struct f2fs_sb_info *sbi) +/* This comes from f2fs_put_super */ +bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - int i; + struct discard_policy dpolicy; + bool dropped; - mutex_lock(&dcc->cmd_lock); - for (i = 0; i < MAX_PLIST_NUM; i++) - dcc->pend_list_tag[i] |= P_TRIM; - mutex_unlock(&dcc->cmd_lock); + init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); + __issue_discard_cmd(sbi, &dpolicy); + dropped = __drop_discard_cmd(sbi); + __wait_all_discard_cmd(sbi, &dpolicy); + + return dropped; } static int issue_discard_thread(void *data) @@ -1313,12 +1458,16 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; + struct discard_policy dpolicy; unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; int issued; set_freezable(); do { + init_discard_policy(&dpolicy, DPOLICY_BG, + dcc->discard_granularity); + wait_event_interruptible_timeout(*q, kthread_should_stop() || freezing(current) || dcc->discard_wake, @@ -1331,17 +1480,18 @@ static int issue_discard_thread(void *data) if (dcc->discard_wake) { dcc->discard_wake = 0; if (sbi->gc_thread && sbi->gc_thread->gc_urgent) - mark_discard_range_all(sbi); + init_discard_policy(&dpolicy, + DPOLICY_FORCE, 1); } sb_start_intwrite(sbi->sb); - issued = __issue_discard_cmd(sbi, true); + issued = __issue_discard_cmd(sbi, &dpolicy); if (issued) { - __wait_discard_cmd(sbi, true); - wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; + __wait_all_discard_cmd(sbi, &dpolicy); + wait_ms = dpolicy.min_interval; } else { - wait_ms = DEF_MAX_DISCARD_ISSUE_TIME; + wait_ms = dpolicy.max_interval; } sb_end_intwrite(sbi->sb); @@ -1605,7 +1755,6 @@ find_next: f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, len); - cpc->trimmed += len; total_len += len; } else { next_pos = find_next_bit_le(entry->discard_map, @@ -1626,6 +1775,37 @@ skip: wake_up_discard_thread(sbi, false); } +void init_discard_policy(struct discard_policy *dpolicy, + int discard_type, unsigned int granularity) +{ + /* common policy */ + dpolicy->type = discard_type; + dpolicy->sync = true; + dpolicy->granularity = granularity; + + if (discard_type == DPOLICY_BG) { + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->io_aware = true; + } else if (discard_type == DPOLICY_FORCE) { + dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; + dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->io_aware = true; + } else if (discard_type == DPOLICY_FSTRIM) { + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->io_aware = false; + } else if (discard_type == DPOLICY_UMOUNT) { + dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->io_aware_gran = MAX_PLIST_NUM; + dpolicy->io_aware = false; + } +} + static int create_discard_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; @@ -1643,12 +1823,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; INIT_LIST_HEAD(&dcc->entry_list); - for (i = 0; i < MAX_PLIST_NUM; i++) { + for (i = 0; i < MAX_PLIST_NUM; i++) INIT_LIST_HEAD(&dcc->pend_list[i]); - if (i >= dcc->discard_granularity - 1) - dcc->pend_list_tag[i] |= P_ACTIVE; - } INIT_LIST_HEAD(&dcc->wait_list); + INIT_LIST_HEAD(&dcc->fstrim_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); atomic_set(&dcc->issing_discard, 0); @@ -1796,16 +1974,6 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) get_sec_entry(sbi, segno)->valid_blocks += del; } -void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new) -{ - update_sit_entry(sbi, new, 1); - if (GET_SEGNO(sbi, old) != NULL_SEGNO) - update_sit_entry(sbi, old, -1); - - locate_dirty_segment(sbi, GET_SEGNO(sbi, old)); - locate_dirty_segment(sbi, GET_SEGNO(sbi, new)); -} - void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) { unsigned int segno = GET_SEGNO(sbi, addr); @@ -1816,14 +1984,14 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) return; /* add it into sit main buffer */ - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); update_sit_entry(sbi, addr, -1); /* add it into dirty seglist */ locate_dirty_segment(sbi, segno); - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); } bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) @@ -1836,7 +2004,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) return true; - mutex_lock(&sit_i->sentry_lock); + down_read(&sit_i->sentry_lock); segno = GET_SEGNO(sbi, blkaddr); se = get_seg_entry(sbi, segno); @@ -1845,7 +2013,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr) if (f2fs_test_bit(offset, se->ckpt_valid_map)) is_cp = true; - mutex_unlock(&sit_i->sentry_lock); + up_read(&sit_i->sentry_lock); return is_cp; } @@ -1903,12 +2071,8 @@ struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) { struct page *page = grab_meta_page(sbi, blk_addr); - void *dst = page_address(page); - if (src) - memcpy(dst, src, PAGE_SIZE); - else - memset(dst, 0, PAGE_SIZE); + memcpy(page_address(page), src, PAGE_SIZE); set_page_dirty(page); f2fs_put_page(page, 1); } @@ -2007,7 +2171,6 @@ find_other_zone: } secno = left_start; skip_left: - hint = secno; segno = GET_SEG_FROM_SEC(sbi, secno); zoneno = GET_ZONE_FROM_SEC(sbi, secno); @@ -2242,12 +2405,16 @@ void allocate_new_segments(struct f2fs_sb_info *sbi) unsigned int old_segno; int i; + down_write(&SIT_I(sbi)->sentry_lock); + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { curseg = CURSEG_I(sbi, i); old_segno = curseg->segno; SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); locate_dirty_segment(sbi, old_segno); } + + up_write(&SIT_I(sbi)->sentry_lock); } static const struct segment_allocation default_salloc_ops = { @@ -2259,14 +2426,14 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc) __u64 trim_start = cpc->trim_start; bool has_candidate = false; - mutex_lock(&SIT_I(sbi)->sentry_lock); + down_write(&SIT_I(sbi)->sentry_lock); for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) { if (add_discard_addrs(sbi, cpc, true)) { has_candidate = true; break; } } - mutex_unlock(&SIT_I(sbi)->sentry_lock); + up_write(&SIT_I(sbi)->sentry_lock); cpc->trim_start = trim_start; return has_candidate; @@ -2276,14 +2443,16 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) { __u64 start = F2FS_BYTES_TO_BLK(range->start); __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; - unsigned int start_segno, end_segno; + unsigned int start_segno, end_segno, cur_segno; + block_t start_block, end_block; struct cp_control cpc; + struct discard_policy dpolicy; + unsigned long long trimmed = 0; int err = 0; if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize) return -EINVAL; - cpc.trimmed = 0; if (end <= MAIN_BLKADDR(sbi)) goto out; @@ -2297,12 +2466,14 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); + cpc.reason = CP_DISCARD; cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); /* do checkpoint to issue discard commands safely */ - for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { - cpc.trim_start = start_segno; + for (cur_segno = start_segno; cur_segno <= end_segno; + cur_segno = cpc.trim_end + 1) { + cpc.trim_start = cur_segno; if (sbi->discard_blks == 0) break; @@ -2310,7 +2481,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) cpc.trim_end = end_segno; else cpc.trim_end = min_t(unsigned int, - rounddown(start_segno + + rounddown(cur_segno + BATCHED_TRIM_SEGMENTS(sbi), sbi->segs_per_sec) - 1, end_segno); @@ -2322,11 +2493,16 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) schedule(); } - /* It's time to issue all the filed discards */ - mark_discard_range_all(sbi); - f2fs_wait_discard_bios(sbi, false); + + start_block = START_BLOCK(sbi, start_segno); + end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1); + + init_discard_policy(&dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen); + __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block); + trimmed = __wait_discard_cmd_range(sbi, &dpolicy, + start_block, end_block); out: - range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); + range->len = F2FS_BLK_TO_BYTES(trimmed); return err; } @@ -2338,6 +2514,20 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type) return false; } +#if 0 +int rw_hint_to_seg_type(enum rw_hint hint) +{ + switch (hint) { + case WRITE_LIFE_SHORT: + return CURSEG_HOT_DATA; + case WRITE_LIFE_EXTREME: + return CURSEG_COLD_DATA; + default: + return CURSEG_WARM_DATA; + } +} +#endif + static int __get_segment_type_2(struct f2fs_io_info *fio) { if (fio->type == DATA) @@ -2372,6 +2562,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) return CURSEG_COLD_DATA; if (is_inode_flag_set(inode, FI_HOT_DATA)) return CURSEG_HOT_DATA; + /* rw_hint_to_seg_type(inode->i_write_hint); */ return CURSEG_WARM_DATA; } else { if (IS_DNODE(fio->page)) @@ -2416,8 +2607,10 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); + down_read(&SM_I(sbi)->curseg_lock); + mutex_lock(&curseg->curseg_mutex); - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); @@ -2434,15 +2627,26 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, stat_inc_block_count(sbi, curseg); + /* + * SIT information should be updated before segment allocation, + * since SSR needs latest valid block information. + */ + update_sit_entry(sbi, *new_blkaddr, 1); + if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) + update_sit_entry(sbi, old_blkaddr, -1); + if (!__has_curseg_space(sbi, type)) sit_i->s_ops->allocate_segment(sbi, type, false); + /* - * SIT information should be updated after segment allocation, - * since we need to keep dirty segments precisely under SSR. + * segment dirty status should be updated after segment allocation, + * so we just need to update status only one time after previous + * segment being closed. */ - refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); + locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); + locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); if (page && IS_NODESEG(type)) { fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); @@ -2462,6 +2666,29 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, } mutex_unlock(&curseg->curseg_mutex); + + up_read(&SM_I(sbi)->curseg_lock); +} + +static void update_device_state(struct f2fs_io_info *fio) +{ + struct f2fs_sb_info *sbi = fio->sbi; + unsigned int devidx; + + if (!sbi->s_ndevs) + return; + + devidx = f2fs_target_device_index(sbi, fio->new_blkaddr); + + /* update device state for fsync */ + set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO); + + /* update device state for checkpoint */ + if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) { + spin_lock(&sbi->dev_lock); + f2fs_set_bit(devidx, (char *)&sbi->dirty_device); + spin_unlock(&sbi->dev_lock); + } } static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) @@ -2478,6 +2705,8 @@ reallocate: if (err == -EAGAIN) { fio->old_blkaddr = fio->new_blkaddr; goto reallocate; + } else if (!err) { + update_device_state(fio); } } @@ -2538,12 +2767,26 @@ int rewrite_data_page(struct f2fs_io_info *fio) stat_inc_inplace_blocks(fio->sbi); err = f2fs_submit_page_bio(fio); + if (!err) + update_device_state(fio); f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); return err; } +static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + int i; + + for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) { + if (CURSEG_I(sbi, i)->segno == segno) + break; + } + return i; +} + void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, bool recover_curseg, bool recover_newaddr) @@ -2559,6 +2802,8 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, se = get_seg_entry(sbi, segno); type = se->type; + down_write(&SM_I(sbi)->curseg_lock); + if (!recover_curseg) { /* for recovery flow */ if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) { @@ -2568,14 +2813,19 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, type = CURSEG_WARM_DATA; } } else { - if (!IS_CURSEG(sbi, segno)) + if (IS_CURSEG(sbi, segno)) { + /* se->type is volatile as SSR allocation */ + type = __f2fs_get_curseg(sbi, segno); + f2fs_bug_on(sbi, type == NO_CHECK_TYPE); + } else { type = CURSEG_WARM_DATA; + } } curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); old_cursegno = curseg->segno; old_blkoff = curseg->next_blkoff; @@ -2607,8 +2857,9 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, curseg->next_blkoff = old_blkoff; } - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); + up_write(&SM_I(sbi)->curseg_lock); } void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, @@ -3062,7 +3313,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) bool to_journal = true; struct seg_entry *se; - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); if (!sit_i->dirty_sentries) goto out; @@ -3156,7 +3407,7 @@ out: cpc->trim_start = trim_start; } - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); set_prefree_as_free_segments(sbi); } @@ -3249,7 +3500,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi) sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK; sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time); sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec; - mutex_init(&sit_i->sentry_lock); + init_rwsem(&sit_i->sentry_lock); return 0; } @@ -3490,7 +3741,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) struct sit_info *sit_i = SIT_I(sbi); unsigned int segno; - mutex_lock(&sit_i->sentry_lock); + down_write(&sit_i->sentry_lock); sit_i->min_mtime = LLONG_MAX; @@ -3507,7 +3758,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) sit_i->min_mtime = mtime; } sit_i->max_mtime = get_mtime(sbi); - mutex_unlock(&sit_i->sentry_lock); + up_write(&sit_i->sentry_lock); } int build_segment_manager(struct f2fs_sb_info *sbi) @@ -3540,11 +3791,14 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; + sm_info->min_ssr_sections = reserved_sections(sbi); sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; INIT_LIST_HEAD(&sm_info->sit_entry_set); + init_rwsem(&sm_info->curseg_lock); + if (!f2fs_readonly(sbi->sb)) { err = create_flush_cmd_control(sbi); if (err) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index ffa11274b0ce..5264b6ed120c 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -231,7 +231,7 @@ struct sit_info { unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ unsigned int dirty_sentries; /* # of dirty sentries */ unsigned int sents_per_block; /* # of SIT entries per block */ - struct mutex sentry_lock; /* to protect SIT cache */ + struct rw_semaphore sentry_lock; /* to protect SIT cache */ struct seg_entry *sentries; /* SIT segment-level cache */ struct sec_entry *sec_entries; /* SIT section-level cache */ @@ -497,6 +497,33 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi)); } +static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi) +{ + unsigned int node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) + + get_pages(sbi, F2FS_DIRTY_DENTS); + unsigned int dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS); + unsigned int segno, left_blocks; + int i; + + /* check current node segment */ + for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) { + segno = CURSEG_I(sbi, i)->segno; + left_blocks = sbi->blocks_per_seg - + get_seg_entry(sbi, segno)->ckpt_valid_blocks; + + if (node_blocks > left_blocks) + return false; + } + + /* check current data segment */ + segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno; + left_blocks = sbi->blocks_per_seg - + get_seg_entry(sbi, segno)->ckpt_valid_blocks; + if (dent_blocks > left_blocks) + return false; + return true; +} + static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed, int needed) { @@ -507,6 +534,9 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; + if (free_sections(sbi) + freed == reserved_sections(sbi) + needed && + has_curseg_enough_space(sbi)) + return false; return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + imeta_secs + reserved_sections(sbi) + needed); @@ -730,7 +760,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi, unsigned int secno) { - if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >= + if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) > sbi->fggc_threshold) return true; return false; @@ -795,8 +825,9 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) goto wake_up; mutex_lock(&dcc->cmd_lock); - for (i = MAX_PLIST_NUM - 1; - i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) { + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + if (i + 1 < dcc->discard_granularity) + break; if (!list_empty(&dcc->pend_list[i])) { wakeup = true; break; diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 5c60fc28ec75..0b5664a1a6cc 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -28,7 +28,7 @@ static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi) static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) { - long count = NM_I(sbi)->nid_cnt[FREE_NID_LIST] - MAX_FREE_NIDS; + long count = NM_I(sbi)->nid_cnt[FREE_NID] - MAX_FREE_NIDS; return count > 0 ? count : 0; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 482bb0333806..76e2f1518224 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -44,6 +44,8 @@ static struct kmem_cache *f2fs_inode_cachep; char *fault_name[FAULT_MAX] = { [FAULT_KMALLOC] = "kmalloc", [FAULT_PAGE_ALLOC] = "page alloc", + [FAULT_PAGE_GET] = "page get", + [FAULT_ALLOC_BIO] = "alloc bio", [FAULT_ALLOC_NID] = "alloc nid", [FAULT_ORPHAN] = "orphan", [FAULT_BLOCK] = "no more block", @@ -92,6 +94,7 @@ enum { Opt_disable_ext_identify, Opt_inline_xattr, Opt_noinline_xattr, + Opt_inline_xattr_size, Opt_inline_data, Opt_inline_dentry, Opt_noinline_dentry, @@ -141,6 +144,7 @@ static match_table_t f2fs_tokens = { {Opt_disable_ext_identify, "disable_ext_identify"}, {Opt_inline_xattr, "inline_xattr"}, {Opt_noinline_xattr, "noinline_xattr"}, + {Opt_inline_xattr_size, "inline_xattr_size=%u"}, {Opt_inline_data, "inline_data"}, {Opt_inline_dentry, "inline_dentry"}, {Opt_noinline_dentry, "noinline_dentry"}, @@ -209,6 +213,12 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, "quota options when quota turned on"); return -EINVAL; } + if (f2fs_sb_has_quota_ino(sb)) { + f2fs_msg(sb, KERN_INFO, + "QUOTA feature is enabled, so ignore qf_name"); + return 0; + } + qname = match_strdup(args); if (!qname) { f2fs_msg(sb, KERN_ERR, @@ -287,6 +297,18 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) return -1; } } + + if (f2fs_sb_has_quota_ino(sbi->sb) && sbi->s_jquota_fmt) { + f2fs_msg(sbi->sb, KERN_INFO, + "QUOTA feature is enabled, so ignore jquota_fmt"); + sbi->s_jquota_fmt = 0; + } + if (f2fs_sb_has_quota_ino(sbi->sb) && sb_rdonly(sbi->sb)) { + f2fs_msg(sbi->sb, KERN_INFO, + "Filesystem with quota feature cannot be mounted RDWR " + "without CONFIG_QUOTA"); + return -1; + } return 0; } #endif @@ -383,6 +405,12 @@ static int parse_options(struct super_block *sb, char *options) case Opt_noinline_xattr: clear_opt(sbi, INLINE_XATTR); break; + case Opt_inline_xattr_size: + if (args->from && match_int(args, &arg)) + return -EINVAL; + set_opt(sbi, INLINE_XATTR_SIZE); + sbi->inline_xattr_size = arg; + break; #else case Opt_user_xattr: f2fs_msg(sb, KERN_INFO, @@ -604,6 +632,24 @@ static int parse_options(struct super_block *sb, char *options) F2FS_IO_SIZE_KB(sbi)); return -EINVAL; } + + if (test_opt(sbi, INLINE_XATTR_SIZE)) { + if (!test_opt(sbi, INLINE_XATTR)) { + f2fs_msg(sb, KERN_ERR, + "inline_xattr_size option should be " + "set with inline_xattr option"); + return -EINVAL; + } + if (!sbi->inline_xattr_size || + sbi->inline_xattr_size >= DEF_ADDRS_PER_INODE - + F2FS_TOTAL_EXTRA_ATTR_SIZE - + DEF_INLINE_RESERVED_SIZE - + DEF_MIN_INLINE_SIZE) { + f2fs_msg(sb, KERN_ERR, + "inline xattr size is out of range"); + return -EINVAL; + } + } return 0; } @@ -618,13 +664,13 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) init_once((void *) fi); /* Initialize f2fs-specific inode info */ - fi->vfs_inode.i_version = 1; atomic_set(&fi->dirty_pages, 0); fi->i_current_depth = 1; fi->i_advise = 0; init_rwsem(&fi->i_sem); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); + INIT_LIST_HEAD(&fi->inmem_ilist); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); init_rwsem(&fi->dio_rwsem[READ]); @@ -673,7 +719,6 @@ static int f2fs_drop_inode(struct inode *inode) sb_end_intwrite(inode->i_sb); - fscrypt_put_encryption_info(inode, NULL); spin_lock(&inode->i_lock); atomic_dec(&inode->i_count); } @@ -781,6 +826,7 @@ static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); int i; + bool dropped; f2fs_quota_off_umount(sb); @@ -801,9 +847,9 @@ static void f2fs_put_super(struct super_block *sb) } /* be sure to wait for any on-going discard commands */ - f2fs_wait_discard_bios(sbi, true); + dropped = f2fs_wait_discard_bios(sbi); - if (f2fs_discard_en(sbi) && !sbi->discard_blks) { + if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) { struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, }; @@ -859,6 +905,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync) struct f2fs_sb_info *sbi = F2FS_SB(sb); int err = 0; + if (unlikely(f2fs_cp_error(sbi))) + return 0; + trace_f2fs_sync_fs(sb, sync); if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) @@ -958,7 +1007,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count; buf->f_bavail = user_block_count - valid_user_blocks(sbi) - - sbi->reserved_blocks; + sbi->current_reserved_blocks; avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; @@ -1047,6 +1096,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",inline_xattr"); else seq_puts(seq, ",noinline_xattr"); + if (test_opt(sbi, INLINE_XATTR_SIZE)) + seq_printf(seq, ",inline_xattr_size=%u", + sbi->inline_xattr_size); #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL if (test_opt(sbi, POSIX_ACL)) @@ -1109,6 +1161,7 @@ static void default_options(struct f2fs_sb_info *sbi) { /* init some FS parameters */ sbi->active_logs = NR_CURSEG_TYPE; + sbi->inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); @@ -1137,6 +1190,9 @@ static void default_options(struct f2fs_sb_info *sbi) #endif } +#ifdef CONFIG_QUOTA +static int f2fs_enable_quotas(struct super_block *sb); +#endif static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -1203,6 +1259,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (f2fs_readonly(sb) && (*flags & MS_RDONLY)) goto skip; +#ifdef CONFIG_QUOTA if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) { err = dquot_suspend(sb, -1); if (err < 0) @@ -1210,9 +1267,15 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } else { /* dquot_resume needs RW */ sb->s_flags &= ~MS_RDONLY; - dquot_resume(sb, -1); + if (sb_any_quota_suspended(sb)) { + dquot_resume(sb, -1); + } else if (f2fs_sb_has_quota_ino(sb)) { + err = f2fs_enable_quotas(sb); + if (err) + goto restore_opts; + } } - +#endif /* disallow enable/disable extent_cache dynamically */ if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { err = -EINVAL; @@ -1321,8 +1384,13 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread); repeat: page = read_mapping_page(mapping, blkidx, NULL); - if (IS_ERR(page)) + if (IS_ERR(page)) { + if (PTR_ERR(page) == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto repeat; + } return PTR_ERR(page); + } lock_page(page); @@ -1365,11 +1433,16 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, while (towrite > 0) { tocopy = min_t(unsigned long, sb->s_blocksize - offset, towrite); - +retry: err = a_ops->write_begin(NULL, mapping, off, tocopy, 0, &page, NULL); - if (unlikely(err)) + if (unlikely(err)) { + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } break; + } kaddr = kmap_atomic(page); memcpy(kaddr + offset, data, tocopy); @@ -1386,8 +1459,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, } if (len == towrite) - return 0; - inode->i_version++; + return err; inode->i_mtime = inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, false); return len - towrite; @@ -1409,19 +1481,91 @@ static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) sbi->s_jquota_fmt, type); } -void f2fs_enable_quota_files(struct f2fs_sb_info *sbi) +int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) { - int i, ret; + int enabled = 0; + int i, err; + + if (f2fs_sb_has_quota_ino(sbi->sb) && rdonly) { + err = f2fs_enable_quotas(sbi->sb); + if (err) { + f2fs_msg(sbi->sb, KERN_ERR, + "Cannot turn on quota_ino: %d", err); + return 0; + } + return 1; + } for (i = 0; i < MAXQUOTAS; i++) { if (sbi->s_qf_names[i]) { - ret = f2fs_quota_on_mount(sbi, i); - if (ret < 0) - f2fs_msg(sbi->sb, KERN_ERR, - "Cannot turn on journaled " - "quota: error %d", ret); + err = f2fs_quota_on_mount(sbi, i); + if (!err) { + enabled = 1; + continue; + } + f2fs_msg(sbi->sb, KERN_ERR, + "Cannot turn on quotas: %d on %d", err, i); + } + } + return enabled; +} + +static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, + unsigned int flags) +{ + struct inode *qf_inode; + unsigned long qf_inum; + int err; + + BUG_ON(!f2fs_sb_has_quota_ino(sb)); + + qf_inum = f2fs_qf_ino(sb, type); + if (!qf_inum) + return -EPERM; + + qf_inode = f2fs_iget(sb, qf_inum); + if (IS_ERR(qf_inode)) { + f2fs_msg(sb, KERN_ERR, + "Bad quota inode %u:%lu", type, qf_inum); + return PTR_ERR(qf_inode); + } + + /* Don't account quota for quota files to avoid recursion */ + qf_inode->i_flags |= S_NOQUOTA; + err = dquot_enable(qf_inode, type, format_id, flags); + iput(qf_inode); + return err; +} + +static int f2fs_enable_quotas(struct super_block *sb) +{ + int type, err = 0; + unsigned long qf_inum; + bool quota_mopt[MAXQUOTAS] = { + test_opt(F2FS_SB(sb), USRQUOTA), + test_opt(F2FS_SB(sb), GRPQUOTA), + test_opt(F2FS_SB(sb), PRJQUOTA), + }; + + sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; + for (type = 0; type < MAXQUOTAS; type++) { + qf_inum = f2fs_qf_ino(sb, type); + if (qf_inum) { + err = f2fs_quota_enable(sb, type, QFMT_VFS_V1, + DQUOT_USAGE_ENABLED | + (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0)); + if (err) { + f2fs_msg(sb, KERN_ERR, + "Failed to enable quota tracking " + "(type=%d, err=%d). Please run " + "fsck to fix.", type, err); + for (type--; type >= 0; type--) + dquot_quota_off(sb, type); + return err; + } } } + return 0; } static int f2fs_quota_sync(struct super_block *sb, int type) @@ -1492,7 +1636,7 @@ static int f2fs_quota_off(struct super_block *sb, int type) f2fs_quota_sync(sb, type); err = dquot_quota_off(sb, type); - if (err) + if (err || f2fs_sb_has_quota_ino(sb)) goto out_put; inode_lock(inode); @@ -1660,7 +1804,7 @@ static loff_t max_file_blocks(void) /* * note: previously, result is equal to (DEF_ADDRS_PER_INODE - - * F2FS_INLINE_XATTR_ADDRS), but now f2fs try to reserve more + * DEFAULT_INLINE_XATTR_ADDRS), but now f2fs try to reserve more * space in inode.i_addr, it will be more safe to reassign * result as zero. */ @@ -1969,6 +2113,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi) for (j = HOT; j < NR_TEMP_TYPE; j++) mutex_init(&sbi->wio_mutex[i][j]); spin_lock_init(&sbi->cp_lock); + + sbi->dirty_device = 0; + spin_lock_init(&sbi->dev_lock); } static int init_percpu_info(struct f2fs_sb_info *sbi) @@ -2323,7 +2470,10 @@ try_onemore: #ifdef CONFIG_QUOTA sb->dq_op = &f2fs_quota_operations; - sb->s_qcop = &f2fs_quotactl_ops; + if (f2fs_sb_has_quota_ino(sb)) + sb->s_qcop = &dquot_quotactl_sysfile_ops; + else + sb->s_qcop = &f2fs_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; #endif @@ -2419,6 +2569,7 @@ try_onemore: le64_to_cpu(sbi->ckpt->valid_block_count); sbi->last_valid_block_count = sbi->total_valid_block_count; sbi->reserved_blocks = 0; + sbi->current_reserved_blocks = 0; for (i = 0; i < NR_INODE_TYPE; i++) { INIT_LIST_HEAD(&sbi->inode_list[i]); @@ -2493,10 +2644,24 @@ try_onemore: if (err) goto free_root_inode; +#ifdef CONFIG_QUOTA + /* + * Turn on quotas which were not enabled for read-only mounts if + * filesystem has quota feature, so that they are updated correctly. + */ + if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) { + err = f2fs_enable_quotas(sb); + if (err) { + f2fs_msg(sb, KERN_ERR, + "Cannot turn on quotas: error %d", err); + goto free_sysfs; + } + } +#endif /* if there are nt orphan nodes free them */ err = recover_orphan_inodes(sbi); if (err) - goto free_sysfs; + goto free_meta; /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { @@ -2530,7 +2695,7 @@ try_onemore: err = -EINVAL; f2fs_msg(sb, KERN_ERR, "Need to recover fsync data"); - goto free_sysfs; + goto free_meta; } } skip_recovery: @@ -2564,6 +2729,10 @@ skip_recovery: return 0; free_meta: +#ifdef CONFIG_QUOTA + if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) + f2fs_quota_off_umount(sbi->sb); +#endif f2fs_sync_inode_meta(sbi); /* * Some dirty meta pages can be produced by recover_orphan_inodes() @@ -2572,7 +2741,9 @@ free_meta: * falls into an infinite loop in sync_meta_pages(). */ truncate_inode_pages_final(META_MAPPING(sbi)); +#ifdef CONFIG_QUOTA free_sysfs: +#endif f2fs_unregister_sysfs(sbi); free_root_inode: dput(sb->s_root); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index e2c258f717cd..9835348b6e5d 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -30,7 +30,7 @@ enum { FAULT_INFO_RATE, /* struct f2fs_fault_info */ FAULT_INFO_TYPE, /* struct f2fs_fault_info */ #endif - RESERVED_BLOCKS, + RESERVED_BLOCKS, /* struct f2fs_sb_info */ }; struct f2fs_attr { @@ -63,6 +63,13 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) return NULL; } +static ssize_t dirty_segments_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)(dirty_segments(sbi))); +} + static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -100,10 +107,22 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_inode_chksum(sb)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_checksum"); + if (f2fs_sb_has_flexible_inline_xattr(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "flexible_inline_xattr"); + if (f2fs_sb_has_quota_ino(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "quota_ino"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } +static ssize_t current_reserved_blocks_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->current_reserved_blocks); +} + static ssize_t f2fs_sbi_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -143,34 +162,22 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, #endif if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); - if ((unsigned long)sbi->total_valid_block_count + t > - (unsigned long)sbi->user_block_count) { + if (t > (unsigned long)sbi->user_block_count) { spin_unlock(&sbi->stat_lock); return -EINVAL; } *ui = t; + sbi->current_reserved_blocks = min(sbi->reserved_blocks, + sbi->user_block_count - valid_user_blocks(sbi)); spin_unlock(&sbi->stat_lock); return count; } if (!strcmp(a->attr.name, "discard_granularity")) { - struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - int i; - if (t == 0 || t > MAX_PLIST_NUM) return -EINVAL; if (t == *ui) return count; - - mutex_lock(&dcc->cmd_lock); - for (i = 0; i < MAX_PLIST_NUM; i++) { - if (i >= t - 1) - dcc->pend_list_tag[i] |= P_ACTIVE; - else - dcc->pend_list_tag[i] &= (~P_ACTIVE); - } - mutex_unlock(&dcc->cmd_lock); - *ui = t; return count; } @@ -222,6 +229,8 @@ enum feat_id { FEAT_EXTRA_ATTR, FEAT_PROJECT_QUOTA, FEAT_INODE_CHECKSUM, + FEAT_FLEXIBLE_INLINE_XATTR, + FEAT_QUOTA_INO, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -234,6 +243,8 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_EXTRA_ATTR: case FEAT_PROJECT_QUOTA: case FEAT_INODE_CHECKSUM: + case FEAT_FLEXIBLE_INLINE_XATTR: + case FEAT_QUOTA_INO: return snprintf(buf, PAGE_SIZE, "supported\n"); } return 0; @@ -279,6 +290,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); @@ -291,8 +303,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); #endif +F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); F2FS_GENERAL_RO_ATTR(features); +F2FS_GENERAL_RO_ATTR(current_reserved_blocks); #ifdef CONFIG_F2FS_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); @@ -304,6 +318,8 @@ F2FS_FEATURE_RO_ATTR(atomic_write, FEAT_ATOMIC_WRITE); F2FS_FEATURE_RO_ATTR(extra_attr, FEAT_EXTRA_ATTR); F2FS_FEATURE_RO_ATTR(project_quota, FEAT_PROJECT_QUOTA); F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM); +F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); +F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -321,6 +337,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(min_ipu_util), ATTR_LIST(min_fsync_blocks), ATTR_LIST(min_hot_blocks), + ATTR_LIST(min_ssr_sections), ATTR_LIST(max_victim_search), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), @@ -333,9 +350,11 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_rate), ATTR_LIST(inject_type), #endif + ATTR_LIST(dirty_segments), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(features), ATTR_LIST(reserved_blocks), + ATTR_LIST(current_reserved_blocks), NULL, }; @@ -350,6 +369,8 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(extra_attr), ATTR_LIST(project_quota), ATTR_LIST(inode_checksum), + ATTR_LIST(flexible_inline_xattr), + ATTR_LIST(quota_ino), NULL, }; diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index ab658419552b..7acf56ebda65 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -264,12 +264,12 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index, return entry; } -static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr, - void **last_addr, int index, - size_t len, const char *name) +static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode, + void *base_addr, void **last_addr, int index, + size_t len, const char *name) { struct f2fs_xattr_entry *entry; - unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2; + unsigned int inline_size = inline_xattr_size(inode); list_for_each_xattr(entry, base_addr) { if ((void *)entry + sizeof(__u32) > base_addr + inline_size || @@ -288,12 +288,54 @@ static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr, return entry; } +static int read_inline_xattr(struct inode *inode, struct page *ipage, + void *txattr_addr) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned int inline_size = inline_xattr_size(inode); + struct page *page = NULL; + void *inline_addr; + + if (ipage) { + inline_addr = inline_xattr_addr(inode, ipage); + } else { + page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(page)) + return PTR_ERR(page); + + inline_addr = inline_xattr_addr(inode, page); + } + memcpy(txattr_addr, inline_addr, inline_size); + f2fs_put_page(page, 1); + + return 0; +} + +static int read_xattr_block(struct inode *inode, void *txattr_addr) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + nid_t xnid = F2FS_I(inode)->i_xattr_nid; + unsigned int inline_size = inline_xattr_size(inode); + struct page *xpage; + void *xattr_addr; + + /* The inode already has an extended attribute block. */ + xpage = get_node_page(sbi, xnid); + if (IS_ERR(xpage)) + return PTR_ERR(xpage); + + xattr_addr = page_address(xpage); + memcpy(txattr_addr + inline_size, xattr_addr, VALID_XATTR_BLOCK_SIZE); + f2fs_put_page(xpage, 1); + + return 0; +} + static int lookup_all_xattrs(struct inode *inode, struct page *ipage, unsigned int index, unsigned int len, const char *name, struct f2fs_xattr_entry **xe, void **base_addr) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); void *cur_addr, *txattr_addr, *last_addr = NULL; nid_t xnid = F2FS_I(inode)->i_xattr_nid; unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0; @@ -310,23 +352,11 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, /* read from inline xattr */ if (inline_size) { - struct page *page = NULL; - void *inline_addr; - - if (ipage) { - inline_addr = inline_xattr_addr(ipage); - } else { - page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto out; - } - inline_addr = inline_xattr_addr(page); - } - memcpy(txattr_addr, inline_addr, inline_size); - f2fs_put_page(page, 1); + err = read_inline_xattr(inode, ipage, txattr_addr); + if (err) + goto out; - *xe = __find_inline_xattr(txattr_addr, &last_addr, + *xe = __find_inline_xattr(inode, txattr_addr, &last_addr, index, len, name); if (*xe) goto check; @@ -334,19 +364,9 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, /* read from xattr node block */ if (xnid) { - struct page *xpage; - void *xattr_addr; - - /* The inode already has an extended attribute block. */ - xpage = get_node_page(sbi, xnid); - if (IS_ERR(xpage)) { - err = PTR_ERR(xpage); + err = read_xattr_block(inode, txattr_addr); + if (err) goto out; - } - - xattr_addr = page_address(xpage); - memcpy(txattr_addr + inline_size, xattr_addr, size); - f2fs_put_page(xpage, 1); } if (last_addr) @@ -371,7 +391,6 @@ out: static int read_all_xattrs(struct inode *inode, struct page *ipage, void **base_addr) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_xattr_header *header; nid_t xnid = F2FS_I(inode)->i_xattr_nid; unsigned int size = VALID_XATTR_BLOCK_SIZE; @@ -386,38 +405,16 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, /* read from inline xattr */ if (inline_size) { - struct page *page = NULL; - void *inline_addr; - - if (ipage) { - inline_addr = inline_xattr_addr(ipage); - } else { - page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto fail; - } - inline_addr = inline_xattr_addr(page); - } - memcpy(txattr_addr, inline_addr, inline_size); - f2fs_put_page(page, 1); + err = read_inline_xattr(inode, ipage, txattr_addr); + if (err) + goto fail; } /* read from xattr node block */ if (xnid) { - struct page *xpage; - void *xattr_addr; - - /* The inode already has an extended attribute block. */ - xpage = get_node_page(sbi, xnid); - if (IS_ERR(xpage)) { - err = PTR_ERR(xpage); + err = read_xattr_block(inode, txattr_addr); + if (err) goto fail; - } - - xattr_addr = page_address(xpage); - memcpy(txattr_addr + inline_size, xattr_addr, size); - f2fs_put_page(xpage, 1); } header = XATTR_HDR(txattr_addr); @@ -439,10 +436,12 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); size_t inline_size = inline_xattr_size(inode); + struct page *in_page = NULL; void *xattr_addr; + void *inline_addr = NULL; struct page *xpage; nid_t new_nid = 0; - int err; + int err = 0; if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) if (!alloc_nid(sbi, &new_nid)) @@ -450,30 +449,30 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, /* write to inline xattr */ if (inline_size) { - struct page *page = NULL; - void *inline_addr; - if (ipage) { - inline_addr = inline_xattr_addr(ipage); - f2fs_wait_on_page_writeback(ipage, NODE, true); - set_page_dirty(ipage); + inline_addr = inline_xattr_addr(inode, ipage); } else { - page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) { + in_page = get_node_page(sbi, inode->i_ino); + if (IS_ERR(in_page)) { alloc_nid_failed(sbi, new_nid); - return PTR_ERR(page); + return PTR_ERR(in_page); } - inline_addr = inline_xattr_addr(page); - f2fs_wait_on_page_writeback(page, NODE, true); + inline_addr = inline_xattr_addr(inode, in_page); } - memcpy(inline_addr, txattr_addr, inline_size); - f2fs_put_page(page, 1); + f2fs_wait_on_page_writeback(ipage ? ipage : in_page, + NODE, true); /* no need to use xattr node block */ if (hsize <= inline_size) { - err = truncate_xattr_node(inode, ipage); + err = truncate_xattr_node(inode); alloc_nid_failed(sbi, new_nid); - return err; + if (err) { + f2fs_put_page(in_page, 1); + return err; + } + memcpy(inline_addr, txattr_addr, inline_size); + set_page_dirty(ipage ? ipage : in_page); + goto in_page_out; } } @@ -482,7 +481,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); if (IS_ERR(xpage)) { alloc_nid_failed(sbi, new_nid); - return PTR_ERR(xpage); + goto in_page_out; } f2fs_bug_on(sbi, new_nid); f2fs_wait_on_page_writeback(xpage, NODE, true); @@ -492,17 +491,24 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, xpage = new_node_page(&dn, XATTR_NODE_OFFSET); if (IS_ERR(xpage)) { alloc_nid_failed(sbi, new_nid); - return PTR_ERR(xpage); + goto in_page_out; } alloc_nid_done(sbi, new_nid); } - xattr_addr = page_address(xpage); + + if (inline_size) + memcpy(inline_addr, txattr_addr, inline_size); memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE); + + if (inline_size) + set_page_dirty(ipage ? ipage : in_page); set_page_dirty(xpage); - f2fs_put_page(xpage, 1); - return 0; + f2fs_put_page(xpage, 1); +in_page_out: + f2fs_put_page(in_page, 1); + return err; } int f2fs_getxattr(struct inode *inode, int index, const char *name, @@ -721,6 +727,10 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; + err = dquot_initialize(inode); + if (err) + return err; + /* this case is only from init_inode_metadata */ if (ipage) return __f2fs_setxattr(inode, index, name, value, diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 0ac4c1f73fbd..25177e6bd603 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -103,7 +103,7 @@ static inline unsigned int isonum_733(char *p) /* Ignore bigendian datum due to broken mastering programs */ return get_unaligned_le32(p); } -extern int iso_date(char *, int); +extern int iso_date(u8 *, int); struct inode; /* To make gcc happy */ diff --git a/fs/isofs/rock.h b/fs/isofs/rock.h index ed09e2b08637..f835976ce033 100644 --- a/fs/isofs/rock.h +++ b/fs/isofs/rock.h @@ -65,7 +65,7 @@ struct RR_PL_s { }; struct stamp { - char time[7]; + __u8 time[7]; /* actually 6 unsigned, 1 signed */ } __attribute__ ((packed)); struct RR_TF_s { diff --git a/fs/isofs/util.c b/fs/isofs/util.c index 005a15cfd30a..37860fea364d 100644 --- a/fs/isofs/util.c +++ b/fs/isofs/util.c @@ -15,7 +15,7 @@ * to GMT. Thus we should always be correct. */ -int iso_date(char * p, int flag) +int iso_date(u8 *p, int flag) { int year, month, day, hour, minute, second, tz; int crtime; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8e425f2c5ddd..6fef53f18dcf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -242,15 +242,12 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE }; const u32 nfs4_fs_locations_bitmap[3] = { - FATTR4_WORD0_TYPE - | FATTR4_WORD0_CHANGE + FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_FSID | FATTR4_WORD0_FILEID | FATTR4_WORD0_FS_LOCATIONS, - FATTR4_WORD1_MODE - | FATTR4_WORD1_NUMLINKS - | FATTR4_WORD1_OWNER + FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV | FATTR4_WORD1_SPACE_USED @@ -6351,9 +6348,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, struct page *page) { struct nfs_server *server = NFS_SERVER(dir); - u32 bitmask[3] = { - [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, - }; + u32 bitmask[3]; struct nfs4_fs_locations_arg args = { .dir_fh = NFS_FH(dir), .name = name, @@ -6372,12 +6367,15 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, dprintk("%s: start\n", __func__); + bitmask[0] = nfs4_fattr_bitmap[0] | FATTR4_WORD0_FS_LOCATIONS; + bitmask[1] = nfs4_fattr_bitmap[1]; + /* Ask for the fileid of the absent filesystem if mounted_on_fileid * is not supported */ if (NFS_SERVER(dir)->attr_bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) - bitmask[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID; + bitmask[0] &= ~FATTR4_WORD0_FILEID; else - bitmask[0] |= FATTR4_WORD0_FILEID; + bitmask[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; nfs_fattr_init(&fs_locations->fattr); fs_locations->server = server; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f1268280244e..3149f7e58d6f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1322,7 +1322,7 @@ static int nfs_parse_mount_options(char *raw, mnt->options |= NFS_OPTION_MIGRATION; break; case Opt_nomigration: - mnt->options &= NFS_OPTION_MIGRATION; + mnt->options &= ~NFS_OPTION_MIGRATION; break; /* diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ca9ebc3242d3..421935f3d909 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3829,7 +3829,8 @@ static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, statei { struct nfs4_stid *ret; - ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID); + ret = find_stateid_by_type(cl, s, + NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID); if (!ret) return NULL; return delegstateid(ret); @@ -3852,6 +3853,12 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open, deleg = find_deleg_stateid(cl, &open->op_delegate_stateid); if (deleg == NULL) goto out; + if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) { + nfs4_put_stid(&deleg->dl_stid); + if (cl->cl_minorversion) + status = nfserr_deleg_revoked; + goto out; + } flags = share_access_to_flags(open->op_share_access); status = nfs4_check_delegmode(deleg, flags); if (status) { @@ -4696,6 +4703,16 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, struct nfs4_stid **s, struct nfsd_net *nn) { __be32 status; + bool return_revoked = false; + + /* + * only return revoked delegations if explicitly asked. + * otherwise we report revoked or bad_stateid status. + */ + if (typemask & NFS4_REVOKED_DELEG_STID) + return_revoked = true; + else if (typemask & NFS4_DELEG_STID) + typemask |= NFS4_REVOKED_DELEG_STID; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return nfserr_bad_stateid; @@ -4710,6 +4727,12 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, *s = find_stateid_by_type(cstate->clp, stateid, typemask); if (!*s) return nfserr_bad_stateid; + if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) { + nfs4_put_stid(*s); + if (cstate->minorversion) + return nfserr_deleg_revoked; + return nfserr_bad_stateid; + } return nfs_ok; } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 2f27c935bd57..34c22fe4eca0 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1945,8 +1945,6 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, "failed to get inode block.\n"); return err; } - mark_buffer_dirty(ibh); - nilfs_mdt_mark_dirty(ifile); spin_lock(&nilfs->ns_inode_lock); if (likely(!ii->i_bh)) ii->i_bh = ibh; @@ -1955,6 +1953,10 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, goto retry; } + // Always redirty the buffer to avoid race condition + mark_buffer_dirty(ii->i_bh); + nilfs_mdt_mark_dirty(ifile); + clear_bit(NILFS_I_QUEUED, &ii->i_state); set_bit(NILFS_I_BUSY, &ii->i_state); list_move_tail(&ii->i_dirty, &sci->sc_dirty_files); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index c2a975e4a711..fef1caeddf54 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -36,6 +36,8 @@ #define F2FS_NODE_INO(sbi) (sbi->node_ino_num) #define F2FS_META_INO(sbi) (sbi->meta_ino_num) +#define F2FS_MAX_QUOTAS 3 + #define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ #define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */ #define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */ @@ -108,7 +110,8 @@ struct f2fs_super_block { __u8 encryption_level; /* versioning level for encryption */ __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ struct f2fs_device devs[MAX_DEVICES]; /* device list */ - __u8 reserved[327]; /* valid reserved region */ + __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */ + __u8 reserved[315]; /* valid reserved region */ } __packed; /* @@ -184,7 +187,8 @@ struct f2fs_extent { } __packed; #define F2FS_NAME_LEN 255 -#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ +/* 200 bytes for inline xattrs by default */ +#define DEFAULT_INLINE_XATTR_ADDRS 50 #define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ #define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \ get_extra_isize(inode)) @@ -238,7 +242,7 @@ struct f2fs_inode { union { struct { __le16 i_extra_isize; /* extra inode attribute size */ - __le16 i_padding; /* padding */ + __le16 i_inline_xattr_size; /* inline xattr size, unit: 4 bytes */ __le32 i_projid; /* project id */ __le32 i_inode_checksum;/* inode meta checksum */ __le32 i_extra_end[0]; /* for attribute size calculation */ diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index f0f1793cfa49..3a5af09af18b 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -50,13 +50,13 @@ struct tk_read_base { * @tai_offset: The current UTC to TAI offset in seconds * @clock_was_set_seq: The sequence number of clock was set events * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second - * @raw_time: Monotonic raw base time in timespec64 format + * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP * interval. * @xtime_remainder: Shifted nano seconds left over when rounding * @cycle_interval - * @raw_interval: Raw nano seconds accumulated per NTP interval. + * @raw_interval: Shifted raw nano seconds accumulated per NTP interval. * @ntp_error: Difference between accumulated time and NTP time in ntp * shifted nano seconds. * @ntp_error_shift: Shift conversion between clock shifted nano seconds and @@ -91,13 +91,13 @@ struct timekeeper { s32 tai_offset; unsigned int clock_was_set_seq; ktime_t next_leap_ktime; - struct timespec64 raw_time; + u64 raw_sec; /* The following members are for timekeeping internal use */ cycle_t cycle_interval; u64 xtime_interval; s64 xtime_remainder; - u32 raw_interval; + u64 raw_interval; /* The ntp_tick_length() value currently being used. * This cached copy ensures we consistently apply the tick * length for an entire tick, as ntp_tick_length may change diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 7063bbcca03b..589df6f73789 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -128,6 +128,18 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); { CP_DISCARD, "Discard" }, \ { CP_UMOUNT | CP_TRIMMED, "Umount,Trimmed" }) +#define show_fsync_cpreason(type) \ + __print_symbolic(type, \ + { CP_NO_NEEDED, "no needed" }, \ + { CP_NON_REGULAR, "non regular" }, \ + { CP_HARDLINK, "hardlink" }, \ + { CP_SB_NEED_CP, "sb needs cp" }, \ + { CP_WRONG_PINO, "wrong pino" }, \ + { CP_NO_SPC_ROLL, "no space roll forward" }, \ + { CP_NODE_NEED_CP, "node needs cp" }, \ + { CP_FASTBOOT_MODE, "fastboot mode" }, \ + { CP_SPEC_LOG_NUM, "log type is 2" }) + struct victim_sel_policy; struct f2fs_map_blocks; @@ -202,14 +214,14 @@ DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter, TRACE_EVENT(f2fs_sync_file_exit, - TP_PROTO(struct inode *inode, int need_cp, int datasync, int ret), + TP_PROTO(struct inode *inode, int cp_reason, int datasync, int ret), - TP_ARGS(inode, need_cp, datasync, ret), + TP_ARGS(inode, cp_reason, datasync, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(int, need_cp) + __field(int, cp_reason) __field(int, datasync) __field(int, ret) ), @@ -217,15 +229,15 @@ TRACE_EVENT(f2fs_sync_file_exit, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->need_cp = need_cp; + __entry->cp_reason = cp_reason; __entry->datasync = datasync; __entry->ret = ret; ), - TP_printk("dev = (%d,%d), ino = %lu, checkpoint is %s, " + TP_printk("dev = (%d,%d), ino = %lu, cp_reason: %s, " "datasync = %d, ret = %d", show_dev_ino(__entry), - __entry->need_cp ? "needed" : "not needed", + show_fsync_cpreason(__entry->cp_reason), __entry->datasync, __entry->ret) ); @@ -716,6 +728,91 @@ TRACE_EVENT(f2fs_get_victim, __entry->free) ); +TRACE_EVENT(f2fs_lookup_start, + + TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags), + + TP_ARGS(dir, dentry, flags), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(const char *, name) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->ino = dir->i_ino; + __entry->name = dentry->d_name.name; + __entry->flags = flags; + ), + + TP_printk("dev = (%d,%d), pino = %lu, name:%s, flags:%u", + show_dev_ino(__entry), + __entry->name, + __entry->flags) +); + +TRACE_EVENT(f2fs_lookup_end, + + TP_PROTO(struct inode *dir, struct dentry *dentry, nid_t ino, + int err), + + TP_ARGS(dir, dentry, ino, err), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(const char *, name) + __field(nid_t, cino) + __field(int, err) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->ino = dir->i_ino; + __entry->name = dentry->d_name.name; + __entry->cino = ino; + __entry->err = err; + ), + + TP_printk("dev = (%d,%d), pino = %lu, name:%s, ino:%u, err:%d", + show_dev_ino(__entry), + __entry->name, + __entry->cino, + __entry->err) +); + +TRACE_EVENT(f2fs_readdir, + + TP_PROTO(struct inode *dir, loff_t start_pos, loff_t end_pos, int err), + + TP_ARGS(dir, start_pos, end_pos, err), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, start) + __field(loff_t, end) + __field(int, err) + ), + + TP_fast_assign( + __entry->dev = dir->i_sb->s_dev; + __entry->ino = dir->i_ino; + __entry->start = start_pos; + __entry->end = end_pos; + __entry->err = err; + ), + + TP_printk("dev = (%d,%d), ino = %lu, start_pos:%llu, end_pos:%llu, err:%d", + show_dev_ino(__entry), + __entry->start, + __entry->end, + __entry->err) +); + TRACE_EVENT(f2fs_fallocate, TP_PROTO(struct inode *inode, int mode, @@ -1274,6 +1371,13 @@ DEFINE_EVENT(f2fs_discard, f2fs_issue_discard, TP_ARGS(dev, blkstart, blklen) ); +DEFINE_EVENT(f2fs_discard, f2fs_remove_discard, + + TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen), + + TP_ARGS(dev, blkstart, blklen) +); + TRACE_EVENT(f2fs_issue_reset_zone, TP_PROTO(struct block_device *dev, block_t blkstart), diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 5664ca07c9c7..a01a076ea060 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -455,20 +455,22 @@ TRACE_EVENT(svc_recv, TP_ARGS(rqst, status), TP_STRUCT__entry( - __field(struct sockaddr *, addr) __field(__be32, xid) __field(int, status) __field(unsigned long, flags) + __dynamic_array(unsigned char, addr, rqst->rq_addrlen) ), TP_fast_assign( - __entry->addr = (struct sockaddr *)&rqst->rq_addr; __entry->xid = status > 0 ? rqst->rq_xid : 0; __entry->status = status; __entry->flags = rqst->rq_flags; + memcpy(__get_dynamic_array(addr), + &rqst->rq_addr, rqst->rq_addrlen); ), - TP_printk("addr=%pIScp xid=0x%x status=%d flags=%s", __entry->addr, + TP_printk("addr=%pIScp xid=0x%x status=%d flags=%s", + (struct sockaddr *)__get_dynamic_array(addr), be32_to_cpu(__entry->xid), __entry->status, show_rqstp_flags(__entry->flags)) ); @@ -480,22 +482,23 @@ DECLARE_EVENT_CLASS(svc_rqst_status, TP_ARGS(rqst, status), TP_STRUCT__entry( - __field(struct sockaddr *, addr) __field(__be32, xid) - __field(int, dropme) __field(int, status) __field(unsigned long, flags) + __dynamic_array(unsigned char, addr, rqst->rq_addrlen) ), TP_fast_assign( - __entry->addr = (struct sockaddr *)&rqst->rq_addr; __entry->xid = rqst->rq_xid; __entry->status = status; __entry->flags = rqst->rq_flags; + memcpy(__get_dynamic_array(addr), + &rqst->rq_addr, rqst->rq_addrlen); ), TP_printk("addr=%pIScp rq_xid=0x%x status=%d flags=%s", - __entry->addr, be32_to_cpu(__entry->xid), + (struct sockaddr *)__get_dynamic_array(addr), + be32_to_cpu(__entry->xid), __entry->status, show_rqstp_flags(__entry->flags)) ); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1d91c012b5d8..c6c1a2a20474 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6632,6 +6632,12 @@ static int init_rootdomain(struct root_domain *rd) if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) goto free_dlo_mask; +#ifdef HAVE_RT_PUSH_IPI + rd->rto_cpu = -1; + raw_spin_lock_init(&rd->rto_lock); + init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); +#endif + init_dl_bw(&rd->dl_bw); if (cpudl_init(&rd->cpudl) != 0) goto free_dlo_mask; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index af6a7f424d94..c290db7f289a 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -68,10 +68,6 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) raw_spin_unlock(&rt_b->rt_runtime_lock); } -#if defined(CONFIG_SMP) && defined(HAVE_RT_PUSH_IPI) -static void push_irq_work_func(struct irq_work *work); -#endif - void init_rt_rq(struct rt_rq *rt_rq) { struct rt_prio_array *array; @@ -91,13 +87,6 @@ void init_rt_rq(struct rt_rq *rt_rq) rt_rq->rt_nr_migratory = 0; rt_rq->overloaded = 0; plist_head_init(&rt_rq->pushable_tasks); - -#ifdef HAVE_RT_PUSH_IPI - rt_rq->push_flags = 0; - rt_rq->push_cpu = nr_cpu_ids; - raw_spin_lock_init(&rt_rq->push_lock); - init_irq_work(&rt_rq->push_work, push_irq_work_func); -#endif #endif /* CONFIG_SMP */ /* We start is dequeued state, because no RT tasks are queued */ rt_rq->rt_queued = 0; @@ -2104,160 +2093,166 @@ static void push_rt_tasks(struct rq *rq) } #ifdef HAVE_RT_PUSH_IPI + /* - * The search for the next cpu always starts at rq->cpu and ends - * when we reach rq->cpu again. It will never return rq->cpu. - * This returns the next cpu to check, or nr_cpu_ids if the loop - * is complete. + * When a high priority task schedules out from a CPU and a lower priority + * task is scheduled in, a check is made to see if there's any RT tasks + * on other CPUs that are waiting to run because a higher priority RT task + * is currently running on its CPU. In this case, the CPU with multiple RT + * tasks queued on it (overloaded) needs to be notified that a CPU has opened + * up that may be able to run one of its non-running queued RT tasks. + * + * All CPUs with overloaded RT tasks need to be notified as there is currently + * no way to know which of these CPUs have the highest priority task waiting + * to run. Instead of trying to take a spinlock on each of these CPUs, + * which has shown to cause large latency when done on machines with many + * CPUs, sending an IPI to the CPUs to have them push off the overloaded + * RT tasks waiting to run. + * + * Just sending an IPI to each of the CPUs is also an issue, as on large + * count CPU machines, this can cause an IPI storm on a CPU, especially + * if its the only CPU with multiple RT tasks queued, and a large number + * of CPUs scheduling a lower priority task at the same time. + * + * Each root domain has its own irq work function that can iterate over + * all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT + * tassk must be checked if there's one or many CPUs that are lowering + * their priority, there's a single irq work iterator that will try to + * push off RT tasks that are waiting to run. + * + * When a CPU schedules a lower priority task, it will kick off the + * irq work iterator that will jump to each CPU with overloaded RT tasks. + * As it only takes the first CPU that schedules a lower priority task + * to start the process, the rto_start variable is incremented and if + * the atomic result is one, then that CPU will try to take the rto_lock. + * This prevents high contention on the lock as the process handles all + * CPUs scheduling lower priority tasks. + * + * All CPUs that are scheduling a lower priority task will increment the + * rt_loop_next variable. This will make sure that the irq work iterator + * checks all RT overloaded CPUs whenever a CPU schedules a new lower + * priority task, even if the iterator is in the middle of a scan. Incrementing + * the rt_loop_next will cause the iterator to perform another scan. * - * rq->rt.push_cpu holds the last cpu returned by this function, - * or if this is the first instance, it must hold rq->cpu. */ static int rto_next_cpu(struct rq *rq) { - int prev_cpu = rq->rt.push_cpu; + struct root_domain *rd = rq->rd; + int next; int cpu; - cpu = cpumask_next(prev_cpu, rq->rd->rto_mask); - /* - * If the previous cpu is less than the rq's CPU, then it already - * passed the end of the mask, and has started from the beginning. - * We end if the next CPU is greater or equal to rq's CPU. + * When starting the IPI RT pushing, the rto_cpu is set to -1, + * rt_next_cpu() will simply return the first CPU found in + * the rto_mask. + * + * If rto_next_cpu() is called with rto_cpu is a valid cpu, it + * will return the next CPU found in the rto_mask. + * + * If there are no more CPUs left in the rto_mask, then a check is made + * against rto_loop and rto_loop_next. rto_loop is only updated with + * the rto_lock held, but any CPU may increment the rto_loop_next + * without any locking. */ - if (prev_cpu < rq->cpu) { - if (cpu >= rq->cpu) - return nr_cpu_ids; + for (;;) { - } else if (cpu >= nr_cpu_ids) { - /* - * We passed the end of the mask, start at the beginning. - * If the result is greater or equal to the rq's CPU, then - * the loop is finished. - */ - cpu = cpumask_first(rq->rd->rto_mask); - if (cpu >= rq->cpu) - return nr_cpu_ids; - } - rq->rt.push_cpu = cpu; + /* When rto_cpu is -1 this acts like cpumask_first() */ + cpu = cpumask_next(rd->rto_cpu, rd->rto_mask); - /* Return cpu to let the caller know if the loop is finished or not */ - return cpu; -} + rd->rto_cpu = cpu; -static int find_next_push_cpu(struct rq *rq) -{ - struct rq *next_rq; - int cpu; + if (cpu < nr_cpu_ids) + return cpu; - while (1) { - cpu = rto_next_cpu(rq); - if (cpu >= nr_cpu_ids) - break; - next_rq = cpu_rq(cpu); + rd->rto_cpu = -1; - /* Make sure the next rq can push to this rq */ - if (next_rq->rt.highest_prio.next < rq->rt.highest_prio.curr) + /* + * ACQUIRE ensures we see the @rto_mask changes + * made prior to the @next value observed. + * + * Matches WMB in rt_set_overload(). + */ + next = atomic_read_acquire(&rd->rto_loop_next); + + if (rd->rto_loop == next) break; + + rd->rto_loop = next; } - return cpu; + return -1; +} + +static inline bool rto_start_trylock(atomic_t *v) +{ + return !atomic_cmpxchg_acquire(v, 0, 1); } -#define RT_PUSH_IPI_EXECUTING 1 -#define RT_PUSH_IPI_RESTART 2 +static inline void rto_start_unlock(atomic_t *v) +{ + atomic_set_release(v, 0); +} static void tell_cpu_to_push(struct rq *rq) { - int cpu; + int cpu = -1; - if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) { - raw_spin_lock(&rq->rt.push_lock); - /* Make sure it's still executing */ - if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) { - /* - * Tell the IPI to restart the loop as things have - * changed since it started. - */ - rq->rt.push_flags |= RT_PUSH_IPI_RESTART; - raw_spin_unlock(&rq->rt.push_lock); - return; - } - raw_spin_unlock(&rq->rt.push_lock); - } + /* Keep the loop going if the IPI is currently active */ + atomic_inc(&rq->rd->rto_loop_next); - /* When here, there's no IPI going around */ - - rq->rt.push_cpu = rq->cpu; - cpu = find_next_push_cpu(rq); - if (cpu >= nr_cpu_ids) + /* Only one CPU can initiate a loop at a time */ + if (!rto_start_trylock(&rq->rd->rto_loop_start)) return; - rq->rt.push_flags = RT_PUSH_IPI_EXECUTING; + raw_spin_lock(&rq->rd->rto_lock); + + /* + * The rto_cpu is updated under the lock, if it has a valid cpu + * then the IPI is still running and will continue due to the + * update to loop_next, and nothing needs to be done here. + * Otherwise it is finishing up and an ipi needs to be sent. + */ + if (rq->rd->rto_cpu < 0) + cpu = rto_next_cpu(rq); + + raw_spin_unlock(&rq->rd->rto_lock); - irq_work_queue_on(&rq->rt.push_work, cpu); + rto_start_unlock(&rq->rd->rto_loop_start); + + if (cpu >= 0) + irq_work_queue_on(&rq->rd->rto_push_work, cpu); } /* Called from hardirq context */ -static void try_to_push_tasks(void *arg) +void rto_push_irq_work_func(struct irq_work *work) { - struct rt_rq *rt_rq = arg; - struct rq *rq, *src_rq; - int this_cpu; + struct rq *rq; int cpu; - this_cpu = rt_rq->push_cpu; + rq = this_rq(); - /* Paranoid check */ - BUG_ON(this_cpu != smp_processor_id()); - - rq = cpu_rq(this_cpu); - src_rq = rq_of_rt_rq(rt_rq); - -again: + /* + * We do not need to grab the lock to check for has_pushable_tasks. + * When it gets updated, a check is made if a push is possible. + */ if (has_pushable_tasks(rq)) { raw_spin_lock(&rq->lock); - push_rt_task(rq); + push_rt_tasks(rq); raw_spin_unlock(&rq->lock); } - /* Pass the IPI to the next rt overloaded queue */ - raw_spin_lock(&rt_rq->push_lock); - /* - * If the source queue changed since the IPI went out, - * we need to restart the search from that CPU again. - */ - if (rt_rq->push_flags & RT_PUSH_IPI_RESTART) { - rt_rq->push_flags &= ~RT_PUSH_IPI_RESTART; - rt_rq->push_cpu = src_rq->cpu; - } + raw_spin_lock(&rq->rd->rto_lock); - cpu = find_next_push_cpu(src_rq); + /* Pass the IPI to the next rt overloaded queue */ + cpu = rto_next_cpu(rq); - if (cpu >= nr_cpu_ids) - rt_rq->push_flags &= ~RT_PUSH_IPI_EXECUTING; - raw_spin_unlock(&rt_rq->push_lock); + raw_spin_unlock(&rq->rd->rto_lock); - if (cpu >= nr_cpu_ids) + if (cpu < 0) return; - /* - * It is possible that a restart caused this CPU to be - * chosen again. Don't bother with an IPI, just see if we - * have more to push. - */ - if (unlikely(cpu == rq->cpu)) - goto again; - /* Try the next RT overloaded CPU */ - irq_work_queue_on(&rt_rq->push_work, cpu); -} - -static void push_irq_work_func(struct irq_work *work) -{ - struct rt_rq *rt_rq = container_of(work, struct rt_rq, push_work); - - try_to_push_tasks(rt_rq); + irq_work_queue_on(&rq->rd->rto_push_work, cpu); } #endif /* HAVE_RT_PUSH_IPI */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7426ae4dced3..10c978fab1a3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -534,7 +534,7 @@ static inline int rt_bandwidth_enabled(void) } /* RT IPI pull logic requires IRQ_WORK */ -#ifdef CONFIG_IRQ_WORK +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_SMP) # define HAVE_RT_PUSH_IPI #endif @@ -555,12 +555,6 @@ struct rt_rq { unsigned long rt_nr_total; int overloaded; struct plist_head pushable_tasks; -#ifdef HAVE_RT_PUSH_IPI - int push_flags; - int push_cpu; - struct irq_work push_work; - raw_spinlock_t push_lock; -#endif #endif /* CONFIG_SMP */ int rt_queued; @@ -653,6 +647,19 @@ struct root_domain { struct dl_bw dl_bw; struct cpudl cpudl; +#ifdef HAVE_RT_PUSH_IPI + /* + * For IPI pull requests, loop across the rto_mask. + */ + struct irq_work rto_push_work; + raw_spinlock_t rto_lock; + /* These are only updated and read within rto_lock */ + int rto_loop; + int rto_cpu; + /* These atomics are updated outside of a lock */ + atomic_t rto_loop_next; + atomic_t rto_loop_start; +#endif /* * The "RT overload" flag: it gets set if a CPU has more than * one runnable RT task. @@ -669,6 +676,9 @@ struct root_domain { extern struct root_domain def_root_domain; +#ifdef HAVE_RT_PUSH_IPI +extern void rto_push_irq_work_func(struct irq_work *work); +#endif #endif /* CONFIG_SMP */ /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 738f3467d169..fc86fdcce932 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -70,6 +70,10 @@ static inline void tk_normalize_xtime(struct timekeeper *tk) tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift; tk->xtime_sec++; } + while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) { + tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift; + tk->raw_sec++; + } } static inline struct timespec64 tk_xtime(struct timekeeper *tk) @@ -277,18 +281,19 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) /* Go back from cycles -> shifted ns */ tk->xtime_interval = (u64) interval * clock->mult; tk->xtime_remainder = ntpinterval - tk->xtime_interval; - tk->raw_interval = - ((u64) interval * clock->mult) >> clock->shift; + tk->raw_interval = interval * clock->mult; /* if changing clocks, convert xtime_nsec shift units */ if (old_clock) { int shift_change = clock->shift - old_clock->shift; - if (shift_change < 0) + if (shift_change < 0) { tk->tkr_mono.xtime_nsec >>= -shift_change; - else + tk->tkr_raw.xtime_nsec >>= -shift_change; + } else { tk->tkr_mono.xtime_nsec <<= shift_change; + tk->tkr_raw.xtime_nsec <<= shift_change; + } } - tk->tkr_raw.xtime_nsec = 0; tk->tkr_mono.shift = clock->shift; tk->tkr_raw.shift = clock->shift; @@ -617,9 +622,6 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) nsec = (u32) tk->wall_to_monotonic.tv_nsec; tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); - /* Update the monotonic raw base */ - tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time); - /* * The sum of the nanoseconds portions of xtime and * wall_to_monotonic can be greater/equal one second. Take @@ -629,6 +631,11 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) if (nsec >= NSEC_PER_SEC) seconds++; tk->ktime_sec = seconds; + + /* Update the monotonic raw base */ + seconds = tk->raw_sec; + nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift); + tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); } /* must hold timekeeper_lock */ @@ -670,7 +677,6 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) static void timekeeping_forward_now(struct timekeeper *tk) { cycle_t cycle_now, delta; - s64 nsec; cycle_now = tk_clock_read(&tk->tkr_mono); delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); @@ -682,10 +688,13 @@ static void timekeeping_forward_now(struct timekeeper *tk) /* If arch requires, add in get_arch_timeoffset() */ tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift; - tk_normalize_xtime(tk); - nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift); - timespec64_add_ns(&tk->raw_time, nsec); + tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult; + + /* If arch requires, add in get_arch_timeoffset() */ + tk->tkr_raw.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_raw.shift; + + tk_normalize_xtime(tk); } /** @@ -1179,19 +1188,18 @@ int timekeeping_notify(struct clocksource *clock) void getrawmonotonic64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; - struct timespec64 ts64; unsigned long seq; s64 nsecs; do { seq = read_seqcount_begin(&tk_core.seq); + ts->tv_sec = tk->raw_sec; nsecs = timekeeping_get_ns(&tk->tkr_raw); - ts64 = tk->raw_time; } while (read_seqcount_retry(&tk_core.seq, seq)); - timespec64_add_ns(&ts64, nsecs); - *ts = ts64; + ts->tv_nsec = 0; + timespec64_add_ns(ts, nsecs); } EXPORT_SYMBOL(getrawmonotonic64); @@ -1315,8 +1323,7 @@ void __init timekeeping_init(void) tk_setup_internals(tk, clock); tk_set_xtime(tk, &now); - tk->raw_time.tv_sec = 0; - tk->raw_time.tv_nsec = 0; + tk->raw_sec = 0; if (boot.tv_sec == 0 && boot.tv_nsec == 0) boot = tk_xtime(tk); @@ -1796,7 +1803,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, unsigned int *clock_set) { cycle_t interval = tk->cycle_interval << shift; - u64 raw_nsecs; + u64 snsec_per_sec; /* If the offset is smaller than a shifted interval, do nothing */ if (offset < interval) @@ -1811,14 +1818,12 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, *clock_set |= accumulate_nsecs_to_secs(tk); /* Accumulate raw time */ - raw_nsecs = (u64)tk->raw_interval << shift; - raw_nsecs += tk->raw_time.tv_nsec; - if (raw_nsecs >= NSEC_PER_SEC) { - u64 raw_secs = raw_nsecs; - raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); - tk->raw_time.tv_sec += raw_secs; + tk->tkr_raw.xtime_nsec += tk->raw_interval << shift; + snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift; + while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) { + tk->tkr_raw.xtime_nsec -= snsec_per_sec; + tk->raw_sec++; } - tk->raw_time.tv_nsec = raw_nsecs; /* Accumulate error between NTP and clock interval */ tk->ntp_error += tk->ntp_tick << shift; diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c index e24388a863a7..468fb7cd1221 100644 --- a/lib/mpi/mpi-pow.c +++ b/lib/mpi/mpi-pow.c @@ -26,6 +26,7 @@ * however I decided to publish this code under the plain GPL. */ +#include <linux/sched.h> #include <linux/string.h> #include "mpi-internal.h" #include "longlong.h" @@ -256,6 +257,7 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) } e <<= 1; c--; + cond_resched(); } i--; diff --git a/net/9p/client.c b/net/9p/client.c index f5feac4ff4ec..3ff26eb1ea20 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -749,8 +749,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) } again: /* Wait for the response */ - err = wait_event_interruptible(*req->wq, - req->status >= REQ_STATUS_RCVD); + err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD); /* * Make sure our req is coherent with regard to updates in other diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 6e70ddb158b4..2ddeecca5b12 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -290,8 +290,8 @@ req_retry: if (err == -ENOSPC) { chan->ring_bufs_avail = 0; spin_unlock_irqrestore(&chan->lock, flags); - err = wait_event_interruptible(*chan->vc_wq, - chan->ring_bufs_avail); + err = wait_event_killable(*chan->vc_wq, + chan->ring_bufs_avail); if (err == -ERESTARTSYS) return err; @@ -331,7 +331,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, * Other zc request to finish here */ if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { - err = wait_event_interruptible(vp_wq, + err = wait_event_killable(vp_wq, (atomic_read(&vp_pinned) < chan->p9_max_pages)); if (err == -ERESTARTSYS) return err; @@ -475,8 +475,8 @@ req_retry_pinned: if (err == -ENOSPC) { chan->ring_bufs_avail = 0; spin_unlock_irqrestore(&chan->lock, flags); - err = wait_event_interruptible(*chan->vc_wq, - chan->ring_bufs_avail); + err = wait_event_killable(*chan->vc_wq, + chan->ring_bufs_avail); if (err == -ERESTARTSYS) goto err_out; @@ -493,8 +493,7 @@ req_retry_pinned: virtqueue_kick(chan->vq); spin_unlock_irqrestore(&chan->lock, flags); p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); - err = wait_event_interruptible(*req->wq, - req->status >= REQ_STATUS_RCVD); + err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD); /* * Non kernel buffers are pinned, unpin them */ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index f300d1cbfa91..097a1243c16c 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -808,6 +808,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, { struct ip_mreqn mreq; struct net_device *dev = NULL; + int midx; if (sk->sk_type == SOCK_STREAM) goto e_inval; @@ -852,11 +853,15 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EADDRNOTAVAIL; if (!dev) break; + + midx = l3mdev_master_ifindex(dev); + dev_put(dev); err = -EINVAL; if (sk->sk_bound_dev_if && - mreq.imr_ifindex != sk->sk_bound_dev_if) + mreq.imr_ifindex != sk->sk_bound_dev_if && + (!midx || midx != sk->sk_bound_dev_if)) break; inet->mc_index = mreq.imr_ifindex; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4449ad1f8114..a4a30d2ca66f 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -583,16 +583,24 @@ done: if (val) { struct net_device *dev; + int midx; - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val) - goto e_inval; + rcu_read_lock(); - dev = dev_get_by_index(net, val); + dev = dev_get_by_index_rcu(net, val); if (!dev) { + rcu_read_unlock(); retv = -ENODEV; break; } - dev_put(dev); + midx = l3mdev_master_ifindex_rcu(dev); + + rcu_read_unlock(); + + if (sk->sk_bound_dev_if && + sk->sk_bound_dev_if != val && + (!midx || midx != sk->sk_bound_dev_if)) + goto e_inval; } np->mcast_oif = val; retv = 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5c710f78163e..e367ce026db3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3380,7 +3380,11 @@ static int ip6_route_dev_notify(struct notifier_block *this, net->ipv6.ip6_blk_hole_entry->dst.dev = dev; net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); #endif - } else if (event == NETDEV_UNREGISTER) { + } else if (event == NETDEV_UNREGISTER && + dev->reg_state != NETREG_UNREGISTERED) { + /* NETDEV_UNREGISTER could be fired for multiple times by + * netdev_wait_allrefs(). Make sure we only call this once. + */ in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 67fede656ea5..424aca76a192 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -682,7 +682,6 @@ struct ieee80211_if_mesh { const struct ieee80211_mesh_sync_ops *sync_ops; s64 sync_offset_clockdrift_max; spinlock_t sync_offset_lock; - bool adjusting_tbtt; /* mesh power save */ enum nl80211_mesh_power_mode nonpeer_pm; int ps_peers_light_sleep; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 9063e8e736ad..9e1ded80a992 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -295,8 +295,6 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, /* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */ *pos |= ifmsh->ps_peers_deep_sleep ? IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL : 0x00; - *pos++ |= ifmsh->adjusting_tbtt ? - IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING : 0x00; *pos++ = 0x00; return 0; @@ -866,7 +864,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) ifmsh->mesh_cc_id = 0; /* Disabled */ /* register sync ops from extensible synchronization framework */ ifmsh->sync_ops = ieee80211_mesh_sync_ops_get(ifmsh->mesh_sp_id); - ifmsh->adjusting_tbtt = false; ifmsh->sync_offset_clockdrift_max = 0; set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); ieee80211_mesh_root_setup(ifmsh); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index bd3d55eb21d4..9f02e54ad2a5 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -495,12 +495,14 @@ mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr, /* Userspace handles station allocation */ if (sdata->u.mesh.user_mpm || - sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) - cfg80211_notify_new_peer_candidate(sdata->dev, addr, - elems->ie_start, - elems->total_len, - GFP_KERNEL); - else + sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) { + if (mesh_peer_accepts_plinks(elems) && + mesh_plink_availables(sdata)) + cfg80211_notify_new_peer_candidate(sdata->dev, addr, + elems->ie_start, + elems->total_len, + GFP_KERNEL); + } else sta = __mesh_sta_info_alloc(sdata, addr); return sta; diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 64bc22ad9496..16ed43fe4841 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -119,7 +119,6 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, */ if (elems->mesh_config && mesh_peer_tbtt_adjusting(elems)) { - clear_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN); msync_dbg(sdata, "STA %pM : is adjusting TBTT\n", sta->sta.addr); goto no_sync; @@ -168,11 +167,9 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata, struct beacon_data *beacon) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - u8 cap; WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET); WARN_ON(!rcu_read_lock_held()); - cap = beacon->meshconf->meshconf_cap; spin_lock_bh(&ifmsh->sync_offset_lock); @@ -186,21 +183,13 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata, "TBTT : kicking off TBTT adjustment with clockdrift_max=%lld\n", ifmsh->sync_offset_clockdrift_max); set_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags); - - ifmsh->adjusting_tbtt = true; } else { msync_dbg(sdata, "TBTT : max clockdrift=%lld; too small to adjust\n", (long long)ifmsh->sync_offset_clockdrift_max); ifmsh->sync_offset_clockdrift_max = 0; - - ifmsh->adjusting_tbtt = false; } spin_unlock_bh(&ifmsh->sync_offset_lock); - - beacon->meshconf->meshconf_cap = ifmsh->adjusting_tbtt ? - IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING | cap : - ~IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING & cap; } static const struct sync_method sync_methods[] = { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2cb429d34c03..120e9ae04db3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1996,7 +1996,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, * is called on error from nf_tables_newrule(). */ expr = nft_expr_first(rule); - while (expr->ops && expr != nft_expr_last(rule)) { + while (expr != nft_expr_last(rule) && expr->ops) { nf_tables_expr_destroy(ctx, expr); expr = nft_expr_next(expr); } diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 61d216eb7917..5d189c11d208 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -37,7 +37,7 @@ static void nft_queue_eval(const struct nft_expr *expr, if (priv->queues_total > 1) { if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) { - int cpu = smp_processor_id(); + int cpu = raw_smp_processor_id(); queue = priv->queuenum + cpu % priv->queues_total; } else { diff --git a/net/nfc/core.c b/net/nfc/core.c index c5a2c7e733b3..1471e4b0aa2c 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -1093,7 +1093,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, err_free_dev: kfree(dev); - return ERR_PTR(rc); + return NULL; } EXPORT_SYMBOL(nfc_allocate_device); diff --git a/net/rds/send.c b/net/rds/send.c index 6815f03324d7..1a3c6acdd3f8 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -959,6 +959,11 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, ret = rds_cmsg_rdma_map(rs, rm, cmsg); if (!ret) *allocated_mr = 1; + else if (ret == -ENODEV) + /* Accommodate the get_mr() case which can fail + * if connection isn't established yet. + */ + ret = -EAGAIN; break; case RDS_CMSG_ATOMIC_CSWP: case RDS_CMSG_ATOMIC_FADD: @@ -1072,8 +1077,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) /* Parse any control messages the user may have included. */ ret = rds_cmsg_send(rs, rm, msg, &allocated_mr); - if (ret) + if (ret) { + /* Trigger connection so that its ready for the next retry */ + if (ret == -EAGAIN) + rds_conn_connect_if_down(conn); goto out; + } if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) { printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 272edd7748a0..7f0f689b8d2b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4457,10 +4457,6 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) if (!net_eq(current->nsproxy->net_ns, sock_net(sk))) return -EINVAL; - /* Do not peel off from one netns to another one. */ - if (!net_eq(current->nsproxy->net_ns, sock_net(sk))) - return -EINVAL; - if (!asoc) return -EINVAL; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 9b5bd6d142dc..60324f7c72bd 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1209,10 +1209,14 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (signal_pending(current)) { err = sock_intr_errno(timeout); - goto out_wait_error; + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; - goto out_wait_error; + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + goto out_wait; } prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); @@ -1220,20 +1224,17 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (sk->sk_err) { err = -sk->sk_err; - goto out_wait_error; - } else + sk->sk_state = SS_UNCONNECTED; + sock->state = SS_UNCONNECTED; + } else { err = 0; + } out_wait: finish_wait(sk_sleep(sk), &wait); out: release_sock(sk); return err; - -out_wait_error: - sk->sk_state = SS_UNCONNECTED; - sock->state = SS_UNCONNECTED; - goto out_wait; } static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) @@ -1270,18 +1271,20 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) listener->sk_err == 0) { release_sock(listener); timeout = schedule_timeout(timeout); + finish_wait(sk_sleep(listener), &wait); lock_sock(listener); if (signal_pending(current)) { err = sock_intr_errno(timeout); - goto out_wait; + goto out; } else if (timeout == 0) { err = -EAGAIN; - goto out_wait; + goto out; } prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); } + finish_wait(sk_sleep(listener), &wait); if (listener->sk_err) err = -listener->sk_err; @@ -1301,19 +1304,15 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) */ if (err) { vconnected->rejected = true; - release_sock(connected); - sock_put(connected); - goto out_wait; + } else { + newsock->state = SS_CONNECTED; + sock_graft(connected, newsock); } - newsock->state = SS_CONNECTED; - sock_graft(connected, newsock); release_sock(connected); sock_put(connected); } -out_wait: - finish_wait(sk_sleep(listener), &wait); out: release_sock(listener); return err; @@ -1513,8 +1512,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, long timeout; int err; struct vsock_transport_send_notify_data send_data; - - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); sk = sock->sk; vsk = vsock_sk(sk); @@ -1557,11 +1555,10 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, if (err < 0) goto out; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - while (total_written < len) { ssize_t written; + add_wait_queue(sk_sleep(sk), &wait); while (vsock_stream_has_space(vsk) == 0 && sk->sk_err == 0 && !(sk->sk_shutdown & SEND_SHUTDOWN) && @@ -1570,27 +1567,30 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, /* Don't wait for non-blocking sockets. */ if (timeout == 0) { err = -EAGAIN; - goto out_wait; + remove_wait_queue(sk_sleep(sk), &wait); + goto out_err; } err = transport->notify_send_pre_block(vsk, &send_data); - if (err < 0) - goto out_wait; + if (err < 0) { + remove_wait_queue(sk_sleep(sk), &wait); + goto out_err; + } release_sock(sk); - timeout = schedule_timeout(timeout); + timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout); lock_sock(sk); if (signal_pending(current)) { err = sock_intr_errno(timeout); - goto out_wait; + remove_wait_queue(sk_sleep(sk), &wait); + goto out_err; } else if (timeout == 0) { err = -EAGAIN; - goto out_wait; + remove_wait_queue(sk_sleep(sk), &wait); + goto out_err; } - - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); } + remove_wait_queue(sk_sleep(sk), &wait); /* These checks occur both as part of and after the loop * conditional since we need to check before and after @@ -1598,16 +1598,16 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, */ if (sk->sk_err) { err = -sk->sk_err; - goto out_wait; + goto out_err; } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || (vsk->peer_shutdown & RCV_SHUTDOWN)) { err = -EPIPE; - goto out_wait; + goto out_err; } err = transport->notify_send_pre_enqueue(vsk, &send_data); if (err < 0) - goto out_wait; + goto out_err; /* Note that enqueue will only write as many bytes as are free * in the produce queue, so we don't need to ensure len is @@ -1620,7 +1620,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, len - total_written); if (written < 0) { err = -ENOMEM; - goto out_wait; + goto out_err; } total_written += written; @@ -1628,14 +1628,13 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, err = transport->notify_send_post_enqueue( vsk, written, &send_data); if (err < 0) - goto out_wait; + goto out_err; } -out_wait: +out_err: if (total_written > 0) err = total_written; - finish_wait(sk_sleep(sk), &wait); out: release_sock(sk); return err; @@ -1716,21 +1715,61 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (err < 0) goto out; - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); while (1) { - s64 ready = vsock_stream_has_data(vsk); + s64 ready; - if (ready < 0) { - /* Invalid queue pair content. XXX This should be - * changed to a connection reset in a later change. - */ + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + ready = vsock_stream_has_data(vsk); - err = -ENOMEM; - goto out_wait; - } else if (ready > 0) { + if (ready == 0) { + if (sk->sk_err != 0 || + (sk->sk_shutdown & RCV_SHUTDOWN) || + (vsk->peer_shutdown & SEND_SHUTDOWN)) { + finish_wait(sk_sleep(sk), &wait); + break; + } + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + finish_wait(sk_sleep(sk), &wait); + break; + } + + err = transport->notify_recv_pre_block( + vsk, target, &recv_data); + if (err < 0) { + finish_wait(sk_sleep(sk), &wait); + break; + } + release_sock(sk); + timeout = schedule_timeout(timeout); + lock_sock(sk); + + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + finish_wait(sk_sleep(sk), &wait); + break; + } else if (timeout == 0) { + err = -EAGAIN; + finish_wait(sk_sleep(sk), &wait); + break; + } + } else { ssize_t read; + finish_wait(sk_sleep(sk), &wait); + + if (ready < 0) { + /* Invalid queue pair content. XXX This should + * be changed to a connection reset in a later + * change. + */ + + err = -ENOMEM; + goto out; + } + err = transport->notify_recv_pre_dequeue( vsk, target, &recv_data); if (err < 0) @@ -1750,42 +1789,12 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, vsk, target, read, !(flags & MSG_PEEK), &recv_data); if (err < 0) - goto out_wait; + goto out; if (read >= target || flags & MSG_PEEK) break; target -= read; - } else { - if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN) - || (vsk->peer_shutdown & SEND_SHUTDOWN)) { - break; - } - /* Don't wait for non-blocking sockets. */ - if (timeout == 0) { - err = -EAGAIN; - break; - } - - err = transport->notify_recv_pre_block( - vsk, target, &recv_data); - if (err < 0) - break; - - release_sock(sk); - timeout = schedule_timeout(timeout); - lock_sock(sk); - - if (signal_pending(current)) { - err = sock_intr_errno(timeout); - break; - } else if (timeout == 0) { - err = -EAGAIN; - break; - } - - prepare_to_wait(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); } } @@ -1797,8 +1806,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (copied > 0) err = copied; -out_wait: - finish_wait(sk_sleep(sk), &wait); out: release_sock(sk); return err; diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 2530669e2f94..cdbe90b37ae7 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -267,8 +267,10 @@ static void update_audio_tstamp(struct snd_pcm_substream *substream, runtime->rate); *audio_tstamp = ns_to_timespec(audio_nsecs); } - runtime->status->audio_tstamp = *audio_tstamp; - runtime->status->tstamp = *curr_tstamp; + if (!timespec_equal(&runtime->status->audio_tstamp, audio_tstamp)) { + runtime->status->audio_tstamp = *audio_tstamp; + runtime->status->tstamp = *curr_tstamp; + } /* * re-take a driver timestamp to let apps detect if the reference tstamp diff --git a/sound/core/timer_compat.c b/sound/core/timer_compat.c index 0b4b028e8e98..de9155eed727 100644 --- a/sound/core/timer_compat.c +++ b/sound/core/timer_compat.c @@ -40,11 +40,11 @@ static int snd_timer_user_info_compat(struct file *file, struct snd_timer *t; tu = file->private_data; - if (snd_BUG_ON(!tu->timeri)) - return -ENXIO; + if (!tu->timeri) + return -EBADFD; t = tu->timeri->timer; - if (snd_BUG_ON(!t)) - return -ENXIO; + if (!t) + return -EBADFD; memset(&info, 0, sizeof(info)); info.card = t->card ? t->card->number : -1; if (t->hw.flags & SNDRV_TIMER_HW_SLAVE) @@ -73,8 +73,8 @@ static int snd_timer_user_status_compat(struct file *file, struct snd_timer_status32 status; tu = file->private_data; - if (snd_BUG_ON(!tu->timeri)) - return -ENXIO; + if (!tu->timeri) + return -EBADFD; memset(&status, 0, sizeof(status)); status.tstamp.tv_sec = tu->tstamp.tv_sec; status.tstamp.tv_nsec = tu->tstamp.tv_nsec; diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index e6de496bffbe..e2e08fc73b50 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2316,6 +2316,9 @@ static const struct pci_device_id azx_ids[] = { /* AMD Hudson */ { PCI_DEVICE(0x1022, 0x780d), .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB }, + /* AMD Raven */ + { PCI_DEVICE(0x1022, 0x15e3), + .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB }, /* ATI HDMI */ { PCI_DEVICE(0x1002, 0x0002), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS }, diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index af0962307b7f..e5730a7d0480 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4404,7 +4404,7 @@ static void alc_no_shutup(struct hda_codec *codec) static void alc_fixup_no_shutup(struct hda_codec *codec, const struct hda_fixup *fix, int action) { - if (action == HDA_FIXUP_ACT_PRE_PROBE) { + if (action == HDA_FIXUP_ACT_PROBE) { struct alc_spec *spec = codec->spec; spec->shutup = alc_no_shutup; } @@ -6254,7 +6254,7 @@ static int patch_alc269(struct hda_codec *codec) case 0x10ec0703: spec->codec_variant = ALC269_TYPE_ALC700; spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */ - alc_update_coef_idx(codec, 0x4a, 0, 1 << 15); /* Combo jack auto trigger control */ + alc_update_coef_idx(codec, 0x4a, 1 << 15, 0); /* Combo jack auto trigger control */ break; } diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 0bb415a28723..f1f990b325ad 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1060,7 +1060,7 @@ static int wm_adsp_load(struct wm_adsp *dsp) const struct wmfw_region *region; const struct wm_adsp_region *mem; const char *region_name; - char *file, *text; + char *file, *text = NULL; struct wm_adsp_buf *buf; unsigned int reg; int regions = 0; @@ -1221,10 +1221,21 @@ static int wm_adsp_load(struct wm_adsp *dsp) regions, le32_to_cpu(region->len), offset, region_name); + if ((pos + le32_to_cpu(region->len) + sizeof(*region)) > + firmware->size) { + adsp_err(dsp, + "%s.%d: %s region len %d bytes exceeds file length %zu\n", + file, regions, region_name, + le32_to_cpu(region->len), firmware->size); + ret = -EINVAL; + goto out_fw; + } + if (text) { memcpy(text, region->data, le32_to_cpu(region->len)); adsp_info(dsp, "%s: %s\n", file, text); kfree(text); + text = NULL; } if (reg) { @@ -1269,6 +1280,7 @@ out_fw: regmap_async_complete(regmap); wm_adsp_buf_free(&buf_list); release_firmware(firmware); + kfree(text); out: kfree(file); @@ -1730,6 +1742,17 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp) } if (reg) { + if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) > + firmware->size) { + adsp_err(dsp, + "%s.%d: %s region len %d bytes exceeds file length %zu\n", + file, blocks, region_name, + le32_to_cpu(blk->len), + firmware->size); + ret = -EINVAL; + goto out_fw; + } + buf = wm_adsp_buf_alloc(blk->data, le32_to_cpu(blk->len), &buf_list); diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 362446c36c9e..e00dfbec22c5 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1049,10 +1049,8 @@ static int __rsnd_kctrl_new(struct rsnd_mod *mod, return -ENOMEM; ret = snd_ctl_add(card, kctrl); - if (ret < 0) { - snd_ctl_free_one(kctrl); + if (ret < 0) return ret; - } cfg->update = update; cfg->card = card; diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 2cd09ceba5e9..2899797610e8 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -43,7 +43,7 @@ static struct uac_clock_source_descriptor * while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra, ctrl_iface->extralen, cs, UAC2_CLOCK_SOURCE))) { - if (cs->bClockID == clock_id) + if (cs->bLength >= sizeof(*cs) && cs->bClockID == clock_id) return cs; } @@ -59,8 +59,11 @@ static struct uac_clock_selector_descriptor * while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra, ctrl_iface->extralen, cs, UAC2_CLOCK_SELECTOR))) { - if (cs->bClockID == clock_id) + if (cs->bLength >= sizeof(*cs) && cs->bClockID == clock_id) { + if (cs->bLength < 5 + cs->bNrInPins) + return NULL; return cs; + } } return NULL; @@ -75,7 +78,7 @@ static struct uac_clock_multiplier_descriptor * while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra, ctrl_iface->extralen, cs, UAC2_CLOCK_MULTIPLIER))) { - if (cs->bClockID == clock_id) + if (cs->bLength >= sizeof(*cs) && cs->bClockID == clock_id) return cs; } diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 413824566102..9d864648c901 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1541,6 +1541,12 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, __u8 *bmaControls; if (state->mixer->protocol == UAC_VERSION_1) { + if (hdr->bLength < 7) { + usb_audio_err(state->chip, + "unit %u: invalid UAC_FEATURE_UNIT descriptor\n", + unitid); + return -EINVAL; + } csize = hdr->bControlSize; if (!csize) { usb_audio_dbg(state->chip, @@ -1558,6 +1564,12 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, } } else if (state->mixer->protocol == UAC_VERSION_2) { struct uac2_feature_unit_descriptor *ftr = _ftr; + if (hdr->bLength < 6) { + usb_audio_err(state->chip, + "unit %u: invalid UAC_FEATURE_UNIT descriptor\n", + unitid); + return -EINVAL; + } csize = 4; channels = (hdr->bLength - 6) / 4 - 1; bmaControls = ftr->bmaControls; @@ -2277,7 +2289,8 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid, const struct usbmix_name_map *map; char **namelist; - if (!desc->bNrInPins || desc->bLength < 5 + desc->bNrInPins) { + if (desc->bLength < 5 || !desc->bNrInPins || + desc->bLength < 5 + desc->bNrInPins) { usb_audio_err(state->chip, "invalid SELECTOR UNIT descriptor %d\n", unitid); return -EINVAL; |