summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs43
-rw-r--r--Makefile2
-rw-r--r--arch/arm/mm/dump.c4
-rw-r--r--arch/arm/mm/init.c4
-rw-r--r--arch/arm64/kernel/vdso.c4
-rw-r--r--arch/arm64/kernel/vdso/gettimeofday.S2
-rw-r--r--arch/mips/bcm47xx/leds.c2
-rw-r--r--arch/mips/kernel/ptrace.c17
-rw-r--r--arch/mips/ralink/mt7620.c4
-rw-r--r--arch/parisc/kernel/syscall.S6
-rw-r--r--arch/powerpc/kernel/signal.c2
-rw-r--r--arch/s390/include/asm/asm-prototypes.h8
-rw-r--r--arch/s390/include/asm/switch_to.h2
-rw-r--r--arch/s390/kernel/dis.c5
-rw-r--r--arch/s390/kernel/early.c4
-rw-r--r--arch/s390/kernel/process.c1
-rw-r--r--arch/s390/kernel/runtime_instr.c4
-rw-r--r--arch/x86/kvm/svm.c7
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/lib/x86-opcode-map.txt2
-rw-r--r--drivers/android/binder.c6
-rw-r--r--drivers/ata/libata-eh.c2
-rw-r--r--drivers/base/power/opp/core.c1
-rw-r--r--drivers/clk/ti/clk-dra7-atl.c3
-rw-r--r--drivers/dma/zx296702_dma.c1
-rw-r--r--drivers/gpu/drm/armada/Makefile2
-rw-r--r--drivers/gpu/drm/drm_mm.c16
-rw-r--r--drivers/iio/light/cm3232.c2
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c25
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c9
-rw-r--r--drivers/md/bcache/alloc.c3
-rw-r--r--drivers/md/dm-bufio.c15
-rw-r--r--drivers/md/dm.c12
-rw-r--r--drivers/media/rc/ir-lirc-codec.c9
-rw-r--r--drivers/media/usb/as102/as102_fw.c28
-rw-r--r--drivers/media/usb/cx231xx/cx231xx-cards.c2
-rw-r--r--drivers/media/v4l2-core/v4l2-ctrls.c16
-rw-r--r--drivers/net/ethernet/3com/typhoon.c25
-rw-r--r--drivers/net/ethernet/intel/e1000e/mac.c11
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c4
-rw-r--r--drivers/net/ethernet/intel/e1000e/phy.c7
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_main.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c2
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c2
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c2
-rw-r--r--drivers/net/ethernet/intel/igbvf/netdev.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c2
-rw-r--r--drivers/net/wireless/ath/ath10k/core.c5
-rw-r--r--drivers/net/wireless/ath/ath10k/mac.c51
-rw-r--r--drivers/net/wireless/ath/ath10k/wmi-tlv.c12
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c1
-rw-r--r--drivers/nvdimm/label.c2
-rw-r--r--drivers/nvdimm/namespace_devs.c2
-rw-r--r--drivers/pci/probe.c15
-rw-r--r--drivers/spi/Kconfig1
-rw-r--r--drivers/staging/iio/cdc/ad7150.c2
-rw-r--r--drivers/target/iscsi/iscsi_target.c8
-rw-r--r--drivers/target/target_core_transport.c4
-rw-r--r--drivers/vhost/scsi.c5
-rw-r--r--drivers/xen/xenbus/xenbus_dev_frontend.c2
-rw-r--r--fs/9p/vfs_inode.c3
-rw-r--r--fs/9p/vfs_inode_dotl.c3
-rw-r--r--fs/autofs4/waitq.c15
-rw-r--r--fs/btrfs/uuid-tree.c4
-rw-r--r--fs/ecryptfs/messaging.c7
-rw-r--r--fs/ext4/crypto_key.c8
-rw-r--r--fs/ext4/extents.c6
-rw-r--r--fs/f2fs/acl.c3
-rw-r--r--fs/f2fs/checkpoint.c64
-rw-r--r--fs/f2fs/data.c38
-rw-r--r--fs/f2fs/debug.c31
-rw-r--r--fs/f2fs/dir.c32
-rw-r--r--fs/f2fs/f2fs.h223
-rw-r--r--fs/f2fs/file.c120
-rw-r--r--fs/f2fs/gc.c37
-rw-r--r--fs/f2fs/inline.c1
-rw-r--r--fs/f2fs/inode.c26
-rw-r--r--fs/f2fs/namei.c101
-rw-r--r--fs/f2fs/node.c410
-rw-r--r--fs/f2fs/node.h16
-rw-r--r--fs/f2fs/recovery.c8
-rw-r--r--fs/f2fs/segment.c512
-rw-r--r--fs/f2fs/segment.h39
-rw-r--r--fs/f2fs/shrinker.c2
-rw-r--r--fs/f2fs/super.c219
-rw-r--r--fs/f2fs/sysfs.c53
-rw-r--r--fs/f2fs/xattr.c174
-rw-r--r--fs/isofs/isofs.h2
-rw-r--r--fs/isofs/rock.h2
-rw-r--r--fs/isofs/util.c2
-rw-r--r--fs/nfs/nfs4proc.c18
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfsd/nfs4state.c25
-rw-r--r--fs/nilfs2/segment.c6
-rw-r--r--include/linux/f2fs_fs.h10
-rw-r--r--include/linux/timekeeper_internal.h8
-rw-r--r--include/trace/events/f2fs.h116
-rw-r--r--include/trace/events/sunrpc.h17
-rw-r--r--kernel/sched/core.c6
-rw-r--r--kernel/sched/rt.c235
-rw-r--r--kernel/sched/sched.h24
-rw-r--r--kernel/time/timekeeping.c57
-rw-r--r--lib/mpi/mpi-pow.c2
-rw-r--r--net/9p/client.c3
-rw-r--r--net/9p/trans_virtio.c13
-rw-r--r--net/ipv4/ip_sockglue.c7
-rw-r--r--net/ipv6/ipv6_sockglue.c16
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/mac80211/ieee80211_i.h1
-rw-r--r--net/mac80211/mesh.c3
-rw-r--r--net/mac80211/mesh_plink.c14
-rw-r--r--net/mac80211/mesh_sync.c11
-rw-r--r--net/netfilter/nf_tables_api.c2
-rw-r--r--net/netfilter/nft_queue.c2
-rw-r--r--net/nfc/core.c2
-rw-r--r--net/rds/send.c11
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/vmw_vsock/af_vsock.c167
-rw-r--r--sound/core/pcm_lib.c6
-rw-r--r--sound/core/timer_compat.c12
-rw-r--r--sound/pci/hda/hda_intel.c3
-rw-r--r--sound/pci/hda/patch_realtek.c4
-rw-r--r--sound/soc/codecs/wm_adsp.c25
-rw-r--r--sound/soc/sh/rcar/core.c4
-rw-r--r--sound/usb/clock.c9
-rw-r--r--sound/usb/mixer.c15
129 files changed, 2389 insertions, 1094 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 500c60403653..2baed1151eac 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -51,6 +51,18 @@ Description:
Controls the dirty page count condition for the in-place-update
policies.
+What: /sys/fs/f2fs/<disk>/min_hot_blocks
+Date: March 2017
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+ Controls the dirty page count condition for redefining hot data.
+
+What: /sys/fs/f2fs/<disk>/min_ssr_sections
+Date: October 2017
+Contact: "Chao Yu" <yuchao0@huawei.com>
+Description:
+ Controls the fee section threshold to trigger SSR allocation.
+
What: /sys/fs/f2fs/<disk>/max_small_discards
Date: November 2013
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
@@ -96,6 +108,18 @@ Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Controls the checkpoint timing.
+What: /sys/fs/f2fs/<disk>/idle_interval
+Date: January 2016
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+ Controls the idle timing.
+
+What: /sys/fs/f2fs/<disk>/iostat_enable
+Date: August 2017
+Contact: "Chao Yu" <yuchao0@huawei.com>
+Description:
+ Controls to enable/disable IO stat.
+
What: /sys/fs/f2fs/<disk>/ra_nid_pages
Date: October 2015
Contact: "Chao Yu" <chao2.yu@samsung.com>
@@ -116,6 +140,12 @@ Contact: "Shuoran Liu" <liushuoran@huawei.com>
Description:
Shows total written kbytes issued to disk.
+What: /sys/fs/f2fs/<disk>/feature
+Date: July 2017
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+ Shows all enabled features in current device.
+
What: /sys/fs/f2fs/<disk>/inject_rate
Date: May 2016
Contact: "Sheng Yong" <shengyong1@huawei.com>
@@ -132,7 +162,18 @@ What: /sys/fs/f2fs/<disk>/reserved_blocks
Date: June 2017
Contact: "Chao Yu" <yuchao0@huawei.com>
Description:
- Controls current reserved blocks in system.
+ Controls target reserved blocks in system, the threshold
+ is soft, it could exceed current available user space.
+
+What: /sys/fs/f2fs/<disk>/current_reserved_blocks
+Date: October 2017
+Contact: "Yunlong Song" <yunlong.song@huawei.com>
+Contact: "Chao Yu" <yuchao0@huawei.com>
+Description:
+ Shows current reserved blocks in system, it may be temporarily
+ smaller than target_reserved_blocks, but will gradually
+ increase to target_reserved_blocks when more free blocks are
+ freed by user later.
What: /sys/fs/f2fs/<disk>/gc_urgent
Date: August 2017
diff --git a/Makefile b/Makefile
index 3e0cc5a42f71..beeaecc25859 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 4
-SUBLEVEL = 101
+SUBLEVEL = 103
EXTRAVERSION =
NAME = Blurry Fish Butt
diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c
index 9fe8e241335c..e1f6f0daa847 100644
--- a/arch/arm/mm/dump.c
+++ b/arch/arm/mm/dump.c
@@ -126,8 +126,8 @@ static const struct prot_bits section_bits[] = {
.val = PMD_SECT_USER,
.set = "USR",
}, {
- .mask = L_PMD_SECT_RDONLY,
- .val = L_PMD_SECT_RDONLY,
+ .mask = L_PMD_SECT_RDONLY | PMD_SECT_AP2,
+ .val = L_PMD_SECT_RDONLY | PMD_SECT_AP2,
.set = "ro",
.clear = "RW",
#elif __LINUX_ARM_ARCH__ >= 6
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index d3d718772381..4d58a6eca48e 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -669,8 +669,8 @@ static struct section_perm ro_perms[] = {
.start = (unsigned long)_stext,
.end = (unsigned long)__init_begin,
#ifdef CONFIG_ARM_LPAE
- .mask = ~L_PMD_SECT_RDONLY,
- .prot = L_PMD_SECT_RDONLY,
+ .mask = ~(L_PMD_SECT_RDONLY | PMD_SECT_AP2),
+ .prot = L_PMD_SECT_RDONLY | PMD_SECT_AP2,
#else
.mask = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE),
.prot = PMD_SECT_APX | PMD_SECT_AP_WRITE,
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index b7730ba782dd..7e9dd94452bb 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -218,8 +218,8 @@ void update_vsyscall(struct timekeeper *tk)
if (!use_syscall) {
/* tkr_mono.cycle_last == tkr_raw.cycle_last */
vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
- vdso_data->raw_time_sec = tk->raw_time.tv_sec;
- vdso_data->raw_time_nsec = tk->raw_time.tv_nsec;
+ vdso_data->raw_time_sec = tk->raw_sec;
+ vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
/* tkr_raw.xtime_nsec == 0 */
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index e00b4671bd7c..c97ce91cf023 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -310,7 +310,7 @@ ENTRY(__kernel_clock_getres)
b.ne 4f
ldr x2, 6f
2:
- cbz w1, 3f
+ cbz x1, 3f
stp xzr, x2, [x1]
3: /* res == NULL. */
diff --git a/arch/mips/bcm47xx/leds.c b/arch/mips/bcm47xx/leds.c
index d20ae63eb3c2..46abe9e4e0e0 100644
--- a/arch/mips/bcm47xx/leds.c
+++ b/arch/mips/bcm47xx/leds.c
@@ -330,7 +330,7 @@ bcm47xx_leds_linksys_wrt54g3gv2[] __initconst = {
/* Verified on: WRT54GS V1.0 */
static const struct gpio_led
bcm47xx_leds_linksys_wrt54g_type_0101[] __initconst = {
- BCM47XX_GPIO_LED(0, "green", "wlan", 0, LEDS_GPIO_DEFSTATE_OFF),
+ BCM47XX_GPIO_LED(0, "green", "wlan", 1, LEDS_GPIO_DEFSTATE_OFF),
BCM47XX_GPIO_LED(1, "green", "power", 0, LEDS_GPIO_DEFSTATE_ON),
BCM47XX_GPIO_LED(7, "green", "dmz", 1, LEDS_GPIO_DEFSTATE_OFF),
};
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 24c115a0721a..a3f38e6b7ea1 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -650,6 +650,19 @@ static const struct user_regset_view user_mips64_view = {
.n = ARRAY_SIZE(mips64_regsets),
};
+#ifdef CONFIG_MIPS32_N32
+
+static const struct user_regset_view user_mipsn32_view = {
+ .name = "mipsn32",
+ .e_flags = EF_MIPS_ABI2,
+ .e_machine = ELF_ARCH,
+ .ei_osabi = ELF_OSABI,
+ .regsets = mips64_regsets,
+ .n = ARRAY_SIZE(mips64_regsets),
+};
+
+#endif /* CONFIG_MIPS32_N32 */
+
#endif /* CONFIG_64BIT */
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
@@ -661,6 +674,10 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
if (test_tsk_thread_flag(task, TIF_32BIT_REGS))
return &user_mips_view;
#endif
+#ifdef CONFIG_MIPS32_N32
+ if (test_tsk_thread_flag(task, TIF_32BIT_ADDR))
+ return &user_mipsn32_view;
+#endif
return &user_mips64_view;
#endif
}
diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c
index 48d6349fd9d7..c5f45fc96c74 100644
--- a/arch/mips/ralink/mt7620.c
+++ b/arch/mips/ralink/mt7620.c
@@ -141,8 +141,8 @@ static struct rt2880_pmx_func i2c_grp_mt7628[] = {
FUNC("i2c", 0, 4, 2),
};
-static struct rt2880_pmx_func refclk_grp_mt7628[] = { FUNC("reclk", 0, 36, 1) };
-static struct rt2880_pmx_func perst_grp_mt7628[] = { FUNC("perst", 0, 37, 1) };
+static struct rt2880_pmx_func refclk_grp_mt7628[] = { FUNC("refclk", 0, 37, 1) };
+static struct rt2880_pmx_func perst_grp_mt7628[] = { FUNC("perst", 0, 36, 1) };
static struct rt2880_pmx_func wdt_grp_mt7628[] = { FUNC("wdt", 0, 38, 1) };
static struct rt2880_pmx_func spi_grp_mt7628[] = { FUNC("spi", 0, 7, 4) };
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index c6b855f7892c..9f22195b90ed 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -688,15 +688,15 @@ cas_action:
/* ELF32 Process entry path */
lws_compare_and_swap_2:
#ifdef CONFIG_64BIT
- /* Clip the input registers */
+ /* Clip the input registers. We don't need to clip %r23 as we
+ only use it for word operations */
depdi 0, 31, 32, %r26
depdi 0, 31, 32, %r25
depdi 0, 31, 32, %r24
- depdi 0, 31, 32, %r23
#endif
/* Check the validity of the size pointer */
- subi,>>= 4, %r23, %r0
+ subi,>>= 3, %r23, %r0
b,n lws_exit_nosys
/* Jump to the functions which will load the old and new values into
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index cf8c7e4e0b21..984a54c85952 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -102,7 +102,7 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
static void do_signal(struct pt_regs *regs)
{
sigset_t *oldset = sigmask_to_save();
- struct ksignal ksig;
+ struct ksignal ksig = { .sig = 0 };
int ret;
int is32 = is_32bit_task();
diff --git a/arch/s390/include/asm/asm-prototypes.h b/arch/s390/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..2c3413b0ca52
--- /dev/null
+++ b/arch/s390/include/asm/asm-prototypes.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_S390_PROTOTYPES_H
+
+#include <linux/kvm_host.h>
+#include <linux/ftrace.h>
+#include <asm/fpu/api.h>
+#include <asm-generic/asm-prototypes.h>
+
+#endif /* _ASM_S390_PROTOTYPES_H */
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 12d45f0cfdd9..dde6b52359c5 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -34,8 +34,8 @@ static inline void restore_access_regs(unsigned int *acrs)
save_access_regs(&prev->thread.acrs[0]); \
save_ri_cb(prev->thread.ri_cb); \
} \
+ update_cr_regs(next); \
if (next->mm) { \
- update_cr_regs(next); \
set_cpu_flag(CIF_FPU); \
restore_access_regs(&next->thread.acrs[0]); \
restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 6e72961608f0..07477ba392b7 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -1549,6 +1549,7 @@ static struct s390_insn opcode_e7[] = {
{ "vfsq", 0xce, INSTR_VRR_VV000MM },
{ "vfs", 0xe2, INSTR_VRR_VVV00MM },
{ "vftci", 0x4a, INSTR_VRI_VVIMM },
+ { "", 0, INSTR_INVALID }
};
static struct s390_insn opcode_eb[] = {
@@ -1961,7 +1962,7 @@ void show_code(struct pt_regs *regs)
{
char *mode = user_mode(regs) ? "User" : "Krnl";
unsigned char code[64];
- char buffer[64], *ptr;
+ char buffer[128], *ptr;
mm_segment_t old_fs;
unsigned long addr;
int start, end, opsize, hops, i;
@@ -2024,7 +2025,7 @@ void show_code(struct pt_regs *regs)
start += opsize;
printk(buffer);
ptr = buffer;
- ptr += sprintf(ptr, "\n ");
+ ptr += sprintf(ptr, "\n\t ");
hops++;
}
printk("\n");
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 3c31609df959..ee7b8e7ca4f8 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -325,8 +325,10 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
if (test_facility(40))
S390_lowcore.machine_flags |= MACHINE_FLAG_LPP;
- if (test_facility(50) && test_facility(73))
+ if (test_facility(50) && test_facility(73)) {
S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
+ __ctl_set_bit(0, 55);
+ }
if (test_facility(51))
S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
if (test_facility(129)) {
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 114ee8b96f17..efa035a31b98 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -137,6 +137,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
+ p->thread.per_flags = 0;
/* Initialize per thread user and system timer values */
ti = task_thread_info(p);
ti->user_timer = 0;
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
index fffa0e5462af..70cdb03d4acd 100644
--- a/arch/s390/kernel/runtime_instr.c
+++ b/arch/s390/kernel/runtime_instr.c
@@ -47,11 +47,13 @@ void exit_thread_runtime_instr(void)
{
struct task_struct *task = current;
+ preempt_disable();
if (!task->thread.ri_cb)
return;
disable_runtime_instr();
kfree(task->thread.ri_cb);
task->thread.ri_cb = NULL;
+ preempt_enable();
}
SYSCALL_DEFINE1(s390_runtime_instr, int, command)
@@ -62,9 +64,7 @@ SYSCALL_DEFINE1(s390_runtime_instr, int, command)
return -EOPNOTSUPP;
if (command == S390_RUNTIME_INSTR_STOP) {
- preempt_disable();
exit_thread_runtime_instr();
- preempt_enable();
return 0;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 899c40f826dd..4e1b254c3695 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3114,6 +3114,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u32 ecx = msr->index;
u64 data = msr->data;
switch (ecx) {
+ case MSR_IA32_CR_PAT:
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+ return 1;
+ vcpu->arch.pat = data;
+ svm->vmcb->save.g_pat = data;
+ mark_dirty(svm->vmcb, VMCB_NPT);
+ break;
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr);
break;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9114588e3e61..67ba0d8f87c7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10394,6 +10394,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
+ vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
+ vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
/* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index d388de72eaca..ec039f2a0c13 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -833,7 +833,7 @@ EndTable
GrpTable: Grp3_1
0: TEST Eb,Ib
-1:
+1: TEST Eb,Ib
2: NOT Eb
3: NEG Eb
4: MUL AL,Eb
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 47ba64f1e70b..2106014f1ea8 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2468,7 +2468,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
debug_id, (u64)fda->num_fds);
continue;
}
- fd_array = (u32 *)(parent_buffer + fda->parent_offset);
+ fd_array = (u32 *)(parent_buffer + (uintptr_t)fda->parent_offset);
for (fd_index = 0; fd_index < fda->num_fds; fd_index++)
task_close_fd(proc, fd_array[fd_index]);
} break;
@@ -2692,7 +2692,7 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda,
*/
parent_buffer = parent->buffer -
binder_alloc_get_user_buffer_offset(&target_proc->alloc);
- fd_array = (u32 *)(parent_buffer + fda->parent_offset);
+ fd_array = (u32 *)(parent_buffer + (uintptr_t)fda->parent_offset);
if (!IS_ALIGNED((unsigned long)fd_array, sizeof(u32))) {
binder_user_error("%d:%d parent offset not aligned correctly.\n",
proc->pid, thread->pid);
@@ -2758,7 +2758,7 @@ static int binder_fixup_parent(struct binder_transaction *t,
proc->pid, thread->pid);
return -EINVAL;
}
- parent_buffer = (u8 *)(parent->buffer -
+ parent_buffer = (u8 *)((uintptr_t)parent->buffer -
binder_alloc_get_user_buffer_offset(
&target_proc->alloc));
*(binder_uintptr_t *)(parent_buffer + bp->parent_offset) = bp->buffer;
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 91a9e6af2ec4..75cced210b2a 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -2245,8 +2245,8 @@ static void ata_eh_link_autopsy(struct ata_link *link)
if (dev->flags & ATA_DFLAG_DUBIOUS_XFER)
eflags |= ATA_EFLAG_DUBIOUS_XFER;
ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask);
+ trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask);
}
- trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask);
DPRINTK("EXIT\n");
}
diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 433b60092972..e40f67b7d28b 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -1936,6 +1936,7 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
if (ret) {
dev_err(dev, "%s: Failed to add OPP, %d\n", __func__,
ret);
+ of_node_put(np);
goto free_table;
}
}
diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c
index 2e14dfb588f4..7d060ffe8975 100644
--- a/drivers/clk/ti/clk-dra7-atl.c
+++ b/drivers/clk/ti/clk-dra7-atl.c
@@ -265,7 +265,7 @@ static int of_dra7_atl_clk_probe(struct platform_device *pdev)
/* Get configuration for the ATL instances */
snprintf(prop, sizeof(prop), "atl%u", i);
- cfg_node = of_find_node_by_name(node, prop);
+ cfg_node = of_get_child_by_name(node, prop);
if (cfg_node) {
ret = of_property_read_u32(cfg_node, "bws",
&cdesc->bws);
@@ -278,6 +278,7 @@ static int of_dra7_atl_clk_probe(struct platform_device *pdev)
atl_write(cinfo, DRA7_ATL_AWSMUX_REG(i),
cdesc->aws);
}
+ of_node_put(cfg_node);
}
cdesc->probed = true;
diff --git a/drivers/dma/zx296702_dma.c b/drivers/dma/zx296702_dma.c
index 245d759d5ffc..6059d81e701a 100644
--- a/drivers/dma/zx296702_dma.c
+++ b/drivers/dma/zx296702_dma.c
@@ -813,6 +813,7 @@ static int zx_dma_probe(struct platform_device *op)
INIT_LIST_HEAD(&d->slave.channels);
dma_cap_set(DMA_SLAVE, d->slave.cap_mask);
dma_cap_set(DMA_MEMCPY, d->slave.cap_mask);
+ dma_cap_set(DMA_CYCLIC, d->slave.cap_mask);
dma_cap_set(DMA_PRIVATE, d->slave.cap_mask);
d->slave.dev = &op->dev;
d->slave.device_free_chan_resources = zx_dma_free_chan_resources;
diff --git a/drivers/gpu/drm/armada/Makefile b/drivers/gpu/drm/armada/Makefile
index ffd673615772..26412d2f8c98 100644
--- a/drivers/gpu/drm/armada/Makefile
+++ b/drivers/gpu/drm/armada/Makefile
@@ -4,3 +4,5 @@ armada-y += armada_510.o
armada-$(CONFIG_DEBUG_FS) += armada_debugfs.o
obj-$(CONFIG_DRM_ARMADA) := armada.o
+
+CFLAGS_armada_trace.o := -I$(src)
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 6e4dd62d4ed9..7ee077aca5da 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -378,14 +378,12 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
BUG_ON(!hole_node->hole_follows || node->allocated);
- if (adj_start < start)
- adj_start = start;
- if (adj_end > end)
- adj_end = end;
-
if (mm->color_adjust)
mm->color_adjust(hole_node, color, &adj_start, &adj_end);
+ adj_start = max(adj_start, start);
+ adj_end = min(adj_end, end);
+
if (flags & DRM_MM_CREATE_TOP)
adj_start = adj_end - size;
@@ -650,17 +648,15 @@ static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_
adj_end = drm_mm_hole_node_end(entry);
hole_size = adj_end - adj_start;
- if (adj_start < start)
- adj_start = start;
- if (adj_end > end)
- adj_end = end;
-
if (mm->color_adjust) {
mm->color_adjust(entry, color, &adj_start, &adj_end);
if (adj_end <= adj_start)
continue;
}
+ adj_start = max(adj_start, start);
+ adj_end = min(adj_end, end);
+
if (!check_free_hole(adj_start, adj_end, size, alignment))
continue;
diff --git a/drivers/iio/light/cm3232.c b/drivers/iio/light/cm3232.c
index fe89b6823217..263e97235ea0 100644
--- a/drivers/iio/light/cm3232.c
+++ b/drivers/iio/light/cm3232.c
@@ -119,7 +119,7 @@ static int cm3232_reg_init(struct cm3232_chip *chip)
if (ret < 0)
dev_err(&chip->client->dev, "Error writing reg_cmd\n");
- return 0;
+ return ret;
}
/**
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index e397f1b0af09..9a99cee2665a 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -670,12 +670,19 @@ static void srp_path_rec_completion(int status,
static int srp_lookup_path(struct srp_rdma_ch *ch)
{
struct srp_target_port *target = ch->target;
- int ret;
+ int ret = -ENODEV;
ch->path.numb_path = 1;
init_completion(&ch->done);
+ /*
+ * Avoid that the SCSI host can be removed by srp_remove_target()
+ * before srp_path_rec_completion() is called.
+ */
+ if (!scsi_host_get(target->scsi_host))
+ goto out;
+
ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
target->srp_host->srp_dev->dev,
target->srp_host->port,
@@ -689,18 +696,24 @@ static int srp_lookup_path(struct srp_rdma_ch *ch)
GFP_KERNEL,
srp_path_rec_completion,
ch, &ch->path_query);
- if (ch->path_query_id < 0)
- return ch->path_query_id;
+ ret = ch->path_query_id;
+ if (ret < 0)
+ goto put;
ret = wait_for_completion_interruptible(&ch->done);
if (ret < 0)
- return ret;
+ goto put;
- if (ch->status < 0)
+ ret = ch->status;
+ if (ret < 0)
shost_printk(KERN_WARNING, target->scsi_host,
PFX "Path record query failed\n");
- return ch->status;
+put:
+ scsi_host_put(target->scsi_host);
+
+out:
+ return ret;
}
static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index eaabf3125846..c52131233ba7 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -3425,7 +3425,7 @@ static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name)
{
const char *p;
unsigned len, count, leading_zero_bytes;
- int ret, rc;
+ int ret;
p = name;
if (strncasecmp(p, "0x", 2) == 0)
@@ -3437,10 +3437,9 @@ static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name)
count = min(len / 2, 16U);
leading_zero_bytes = 16 - count;
memset(i_port_id, 0, leading_zero_bytes);
- rc = hex2bin(i_port_id + leading_zero_bytes, p, count);
- if (rc < 0)
- pr_debug("hex2bin failed for srpt_parse_i_port_id: %d\n", rc);
- ret = 0;
+ ret = hex2bin(i_port_id + leading_zero_bytes, p, count);
+ if (ret < 0)
+ pr_debug("hex2bin failed for srpt_parse_i_port_id: %d\n", ret);
out:
return ret;
}
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 8eeab72b93e2..ea47980949ef 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -406,7 +406,8 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
finish_wait(&ca->set->bucket_wait, &w);
out:
- wake_up_process(ca->alloc_thread);
+ if (ca->alloc_thread)
+ wake_up_process(ca->alloc_thread);
trace_bcache_alloc(ca, reserve);
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index cdceefd0e57d..2ec7f90e3455 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -928,7 +928,8 @@ static void __get_memory_limit(struct dm_bufio_client *c,
buffers = c->minimum_buffers;
*limit_buffers = buffers;
- *threshold_buffers = buffers * DM_BUFIO_WRITEBACK_PERCENT / 100;
+ *threshold_buffers = mult_frac(buffers,
+ DM_BUFIO_WRITEBACK_PERCENT, 100);
}
/*
@@ -1829,19 +1830,15 @@ static int __init dm_bufio_init(void)
memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches);
memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names);
- mem = (__u64)((totalram_pages - totalhigh_pages) *
- DM_BUFIO_MEMORY_PERCENT / 100) << PAGE_SHIFT;
+ mem = (__u64)mult_frac(totalram_pages - totalhigh_pages,
+ DM_BUFIO_MEMORY_PERCENT, 100) << PAGE_SHIFT;
if (mem > ULONG_MAX)
mem = ULONG_MAX;
#ifdef CONFIG_MMU
- /*
- * Get the size of vmalloc space the same way as VMALLOC_TOTAL
- * in fs/proc/internal.h
- */
- if (mem > (VMALLOC_END - VMALLOC_START) * DM_BUFIO_VMALLOC_PERCENT / 100)
- mem = (VMALLOC_END - VMALLOC_START) * DM_BUFIO_VMALLOC_PERCENT / 100;
+ if (mem > mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100))
+ mem = mult_frac(VMALLOC_TOTAL, DM_BUFIO_VMALLOC_PERCENT, 100);
#endif
dm_bufio_default_cache_size = mem;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 47ac131099d9..f7f560f5f056 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -3517,11 +3517,15 @@ struct mapped_device *dm_get_from_kobject(struct kobject *kobj)
md = container_of(kobj, struct mapped_device, kobj_holder.kobj);
- if (test_bit(DMF_FREEING, &md->flags) ||
- dm_deleting_md(md))
- return NULL;
-
+ spin_lock(&_minor_lock);
+ if (test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) {
+ md = NULL;
+ goto out;
+ }
dm_get(md);
+out:
+ spin_unlock(&_minor_lock);
+
return md;
}
diff --git a/drivers/media/rc/ir-lirc-codec.c b/drivers/media/rc/ir-lirc-codec.c
index efc21b1da211..ca107033e429 100644
--- a/drivers/media/rc/ir-lirc-codec.c
+++ b/drivers/media/rc/ir-lirc-codec.c
@@ -286,11 +286,14 @@ static long ir_lirc_ioctl(struct file *filep, unsigned int cmd,
if (!dev->max_timeout)
return -ENOSYS;
+ /* Check for multiply overflow */
+ if (val > U32_MAX / 1000)
+ return -EINVAL;
+
tmp = val * 1000;
- if (tmp < dev->min_timeout ||
- tmp > dev->max_timeout)
- return -EINVAL;
+ if (tmp < dev->min_timeout || tmp > dev->max_timeout)
+ return -EINVAL;
dev->timeout = tmp;
break;
diff --git a/drivers/media/usb/as102/as102_fw.c b/drivers/media/usb/as102/as102_fw.c
index 07d08c49f4d4..b2e16bb67572 100644
--- a/drivers/media/usb/as102/as102_fw.c
+++ b/drivers/media/usb/as102/as102_fw.c
@@ -101,18 +101,23 @@ static int as102_firmware_upload(struct as10x_bus_adapter_t *bus_adap,
unsigned char *cmd,
const struct firmware *firmware) {
- struct as10x_fw_pkt_t fw_pkt;
+ struct as10x_fw_pkt_t *fw_pkt;
int total_read_bytes = 0, errno = 0;
unsigned char addr_has_changed = 0;
+ fw_pkt = kmalloc(sizeof(*fw_pkt), GFP_KERNEL);
+ if (!fw_pkt)
+ return -ENOMEM;
+
+
for (total_read_bytes = 0; total_read_bytes < firmware->size; ) {
int read_bytes = 0, data_len = 0;
/* parse intel hex line */
read_bytes = parse_hex_line(
(u8 *) (firmware->data + total_read_bytes),
- fw_pkt.raw.address,
- fw_pkt.raw.data,
+ fw_pkt->raw.address,
+ fw_pkt->raw.data,
&data_len,
&addr_has_changed);
@@ -122,28 +127,28 @@ static int as102_firmware_upload(struct as10x_bus_adapter_t *bus_adap,
/* detect the end of file */
total_read_bytes += read_bytes;
if (total_read_bytes == firmware->size) {
- fw_pkt.u.request[0] = 0x00;
- fw_pkt.u.request[1] = 0x03;
+ fw_pkt->u.request[0] = 0x00;
+ fw_pkt->u.request[1] = 0x03;
/* send EOF command */
errno = bus_adap->ops->upload_fw_pkt(bus_adap,
(uint8_t *)
- &fw_pkt, 2, 0);
+ fw_pkt, 2, 0);
if (errno < 0)
goto error;
} else {
if (!addr_has_changed) {
/* prepare command to send */
- fw_pkt.u.request[0] = 0x00;
- fw_pkt.u.request[1] = 0x01;
+ fw_pkt->u.request[0] = 0x00;
+ fw_pkt->u.request[1] = 0x01;
- data_len += sizeof(fw_pkt.u.request);
- data_len += sizeof(fw_pkt.raw.address);
+ data_len += sizeof(fw_pkt->u.request);
+ data_len += sizeof(fw_pkt->raw.address);
/* send cmd to device */
errno = bus_adap->ops->upload_fw_pkt(bus_adap,
(uint8_t *)
- &fw_pkt,
+ fw_pkt,
data_len,
0);
if (errno < 0)
@@ -152,6 +157,7 @@ static int as102_firmware_upload(struct as10x_bus_adapter_t *bus_adap,
}
}
error:
+ kfree(fw_pkt);
return (errno == 0) ? total_read_bytes : errno;
}
diff --git a/drivers/media/usb/cx231xx/cx231xx-cards.c b/drivers/media/usb/cx231xx/cx231xx-cards.c
index 2c5f76d588ac..04ae21278440 100644
--- a/drivers/media/usb/cx231xx/cx231xx-cards.c
+++ b/drivers/media/usb/cx231xx/cx231xx-cards.c
@@ -1672,7 +1672,7 @@ static int cx231xx_usb_probe(struct usb_interface *interface,
nr = dev->devno;
assoc_desc = udev->actconfig->intf_assoc[0];
- if (assoc_desc->bFirstInterface != ifnum) {
+ if (!assoc_desc || assoc_desc->bFirstInterface != ifnum) {
dev_err(d, "Not found matching IAD interface\n");
retval = -ENODEV;
goto err_if;
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index bc45a225e710..3feaa9b154f0 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -1205,6 +1205,16 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
}
EXPORT_SYMBOL(v4l2_ctrl_fill);
+static u32 user_flags(const struct v4l2_ctrl *ctrl)
+{
+ u32 flags = ctrl->flags;
+
+ if (ctrl->is_ptr)
+ flags |= V4L2_CTRL_FLAG_HAS_PAYLOAD;
+
+ return flags;
+}
+
static void fill_event(struct v4l2_event *ev, struct v4l2_ctrl *ctrl, u32 changes)
{
memset(ev->reserved, 0, sizeof(ev->reserved));
@@ -1212,7 +1222,7 @@ static void fill_event(struct v4l2_event *ev, struct v4l2_ctrl *ctrl, u32 change
ev->id = ctrl->id;
ev->u.ctrl.changes = changes;
ev->u.ctrl.type = ctrl->type;
- ev->u.ctrl.flags = ctrl->flags;
+ ev->u.ctrl.flags = user_flags(ctrl);
if (ctrl->is_ptr)
ev->u.ctrl.value64 = 0;
else
@@ -2541,10 +2551,8 @@ int v4l2_query_ext_ctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_query_ext_ctr
else
qc->id = ctrl->id;
strlcpy(qc->name, ctrl->name, sizeof(qc->name));
- qc->flags = ctrl->flags;
+ qc->flags = user_flags(ctrl);
qc->type = ctrl->type;
- if (ctrl->is_ptr)
- qc->flags |= V4L2_CTRL_FLAG_HAS_PAYLOAD;
qc->elem_size = ctrl->elem_size;
qc->elems = ctrl->elems;
qc->nr_of_dims = ctrl->nr_of_dims;
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index 8f8418d2ac4a..a0012c3cb4f6 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -2366,9 +2366,9 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
* 4) Get the hardware address.
* 5) Put the card to sleep.
*/
- if (typhoon_reset(ioaddr, WaitSleep) < 0) {
+ err = typhoon_reset(ioaddr, WaitSleep);
+ if (err < 0) {
err_msg = "could not reset 3XP";
- err = -EIO;
goto error_out_dma;
}
@@ -2382,24 +2382,25 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
typhoon_init_interface(tp);
typhoon_init_rings(tp);
- if(typhoon_boot_3XP(tp, TYPHOON_STATUS_WAITING_FOR_HOST) < 0) {
+ err = typhoon_boot_3XP(tp, TYPHOON_STATUS_WAITING_FOR_HOST);
+ if (err < 0) {
err_msg = "cannot boot 3XP sleep image";
- err = -EIO;
goto error_out_reset;
}
INIT_COMMAND_WITH_RESPONSE(&xp_cmd, TYPHOON_CMD_READ_MAC_ADDRESS);
- if(typhoon_issue_command(tp, 1, &xp_cmd, 1, xp_resp) < 0) {
+ err = typhoon_issue_command(tp, 1, &xp_cmd, 1, xp_resp);
+ if (err < 0) {
err_msg = "cannot read MAC address";
- err = -EIO;
goto error_out_reset;
}
*(__be16 *)&dev->dev_addr[0] = htons(le16_to_cpu(xp_resp[0].parm1));
*(__be32 *)&dev->dev_addr[2] = htonl(le32_to_cpu(xp_resp[0].parm2));
- if(!is_valid_ether_addr(dev->dev_addr)) {
+ if (!is_valid_ether_addr(dev->dev_addr)) {
err_msg = "Could not obtain valid ethernet address, aborting";
+ err = -EIO;
goto error_out_reset;
}
@@ -2407,7 +2408,8 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
* later when we print out the version reported.
*/
INIT_COMMAND_WITH_RESPONSE(&xp_cmd, TYPHOON_CMD_READ_VERSIONS);
- if(typhoon_issue_command(tp, 1, &xp_cmd, 3, xp_resp) < 0) {
+ err = typhoon_issue_command(tp, 1, &xp_cmd, 3, xp_resp);
+ if (err < 0) {
err_msg = "Could not get Sleep Image version";
goto error_out_reset;
}
@@ -2424,9 +2426,9 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if(xp_resp[0].numDesc != 0)
tp->capabilities |= TYPHOON_WAKEUP_NEEDS_RESET;
- if(typhoon_sleep(tp, PCI_D3hot, 0) < 0) {
+ err = typhoon_sleep(tp, PCI_D3hot, 0);
+ if (err < 0) {
err_msg = "cannot put adapter to sleep";
- err = -EIO;
goto error_out_reset;
}
@@ -2449,7 +2451,8 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dev->features = dev->hw_features |
NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM;
- if(register_netdev(dev) < 0) {
+ err = register_netdev(dev);
+ if (err < 0) {
err_msg = "unable to register netdev";
goto error_out_reset;
}
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index e59d7c283cd4..645ace74429e 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -410,6 +410,9 @@ void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw)
* Checks to see of the link status of the hardware has changed. If a
* change in link status has been detected, then we read the PHY registers
* to get the current speed/duplex if link exists.
+ *
+ * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
+ * up).
**/
s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
{
@@ -423,7 +426,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
* Change or Rx Sequence Error interrupt.
*/
if (!mac->get_link_status)
- return 0;
+ return 1;
/* First we want to see if the MII Status Register reports
* link. If so, then we want to get the current speed/duplex
@@ -461,10 +464,12 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
* different link partner.
*/
ret_val = e1000e_config_fc_after_link_up(hw);
- if (ret_val)
+ if (ret_val) {
e_dbg("Error configuring flow control\n");
+ return ret_val;
+ }
- return ret_val;
+ return 1;
}
/**
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 80ec587d510e..5205f1ebe381 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5017,7 +5017,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter)
case e1000_media_type_copper:
if (hw->mac.get_link_status) {
ret_val = hw->mac.ops.check_for_link(hw);
- link_active = !hw->mac.get_link_status;
+ link_active = ret_val > 0;
} else {
link_active = true;
}
@@ -5035,7 +5035,7 @@ static bool e1000e_has_link(struct e1000_adapter *adapter)
break;
}
- if ((ret_val == E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) &&
+ if ((ret_val == -E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) &&
(er32(CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) {
/* See e1000_kmrn_lock_loss_workaround_ich8lan() */
e_info("Gigabit has been disabled, downgrading speed\n");
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index de13aeacae97..8e674a0988b0 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -1744,6 +1744,7 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
s32 ret_val = 0;
u16 i, phy_status;
+ *success = false;
for (i = 0; i < iterations; i++) {
/* Some PHYs require the MII_BMSR register to be read
* twice due to the link bit being sticky. No harm doing
@@ -1763,16 +1764,16 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
ret_val = e1e_rphy(hw, MII_BMSR, &phy_status);
if (ret_val)
break;
- if (phy_status & BMSR_LSTATUS)
+ if (phy_status & BMSR_LSTATUS) {
+ *success = true;
break;
+ }
if (usec_interval >= 1000)
msleep(usec_interval / 1000);
else
udelay(usec_interval);
}
- *success = (i < iterations);
-
return ret_val;
}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 09281558bfbc..c21fa56afd7c 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -1226,7 +1226,7 @@ static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector,
break;
/* prevent any other reads prior to eop_desc */
- read_barrier_depends();
+ smp_rmb();
/* if DD is not set pending work has not been completed */
if (!(eop_desc->flags & FM10K_TXD_FLAG_DONE))
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 4edbab6ca7ef..b5b228c9a030 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3595,7 +3595,7 @@ static bool i40e_clean_fdir_tx_irq(struct i40e_ring *tx_ring, int budget)
break;
/* prevent any other reads prior to eop_desc */
- read_barrier_depends();
+ smp_rmb();
/* if the descriptor isn't done, no work yet to do */
if (!(eop_desc->cmd_type_offset_bsz &
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 26c55bba4bf3..6dcc3854844d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -663,7 +663,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
break;
/* prevent any other reads prior to eop_desc */
- read_barrier_depends();
+ smp_rmb();
/* we have caught up to head, no work left to do */
if (tx_head == tx_desc)
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 39db70a597ed..1ed27fcd5031 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -172,7 +172,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
break;
/* prevent any other reads prior to eop_desc */
- read_barrier_depends();
+ smp_rmb();
/* we have caught up to head, no work left to do */
if (tx_head == tx_desc)
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index ff6e57d788eb..c55552c3d2f9 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6433,7 +6433,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
break;
/* prevent any other reads prior to eop_desc */
- read_barrier_depends();
+ smp_rmb();
/* if DD is not set pending work has not been completed */
if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 297af801f051..519b72c41888 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -809,7 +809,7 @@ static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring)
break;
/* prevent any other reads prior to eop_desc */
- read_barrier_depends();
+ smp_rmb();
/* if DD is not set pending work has not been completed */
if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 83645d8503d4..a5b443171b8b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1114,7 +1114,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
break;
/* prevent any other reads prior to eop_desc */
- read_barrier_depends();
+ smp_rmb();
/* if DD is not set pending work has not been completed */
if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 592ff237d692..50bbad37d640 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -312,7 +312,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
break;
/* prevent any other reads prior to eop_desc */
- read_barrier_depends();
+ smp_rmb();
/* if DD is not set pending work has not been completed */
if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 55033aed6d6b..d89a2d8adc2a 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -706,8 +706,11 @@ static int ath10k_core_get_board_id_from_otp(struct ath10k *ar)
"boot get otp board id result 0x%08x board_id %d chip_id %d\n",
result, board_id, chip_id);
- if ((result & ATH10K_BMI_BOARD_ID_STATUS_MASK) != 0)
+ if ((result & ATH10K_BMI_BOARD_ID_STATUS_MASK) != 0 ||
+ (board_id == 0)) {
+ ath10k_warn(ar, "board id is not exist in otp, ignore it\n");
return -EOPNOTSUPP;
+ }
ar->id.bmi_ids_valid = true;
ar->id.bmi_board_id = board_id;
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 16a5c5fd3925..28042100ae0a 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -1237,6 +1237,36 @@ static int ath10k_monitor_recalc(struct ath10k *ar)
return ath10k_monitor_stop(ar);
}
+static bool ath10k_mac_can_set_cts_prot(struct ath10k_vif *arvif)
+{
+ struct ath10k *ar = arvif->ar;
+
+ lockdep_assert_held(&ar->conf_mutex);
+
+ if (!arvif->is_started) {
+ ath10k_dbg(ar, ATH10K_DBG_MAC, "defer cts setup, vdev is not ready yet\n");
+ return false;
+ }
+
+ return true;
+}
+
+static int ath10k_mac_set_cts_prot(struct ath10k_vif *arvif)
+{
+ struct ath10k *ar = arvif->ar;
+ u32 vdev_param;
+
+ lockdep_assert_held(&ar->conf_mutex);
+
+ vdev_param = ar->wmi.vdev_param->protection_mode;
+
+ ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d cts_protection %d\n",
+ arvif->vdev_id, arvif->use_cts_prot);
+
+ return ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
+ arvif->use_cts_prot ? 1 : 0);
+}
+
static int ath10k_recalc_rtscts_prot(struct ath10k_vif *arvif)
{
struct ath10k *ar = arvif->ar;
@@ -5386,20 +5416,18 @@ static void ath10k_bss_info_changed(struct ieee80211_hw *hw,
if (changed & BSS_CHANGED_ERP_CTS_PROT) {
arvif->use_cts_prot = info->use_cts_prot;
- ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vdev %d cts_prot %d\n",
- arvif->vdev_id, info->use_cts_prot);
ret = ath10k_recalc_rtscts_prot(arvif);
if (ret)
ath10k_warn(ar, "failed to recalculate rts/cts prot for vdev %d: %d\n",
arvif->vdev_id, ret);
- vdev_param = ar->wmi.vdev_param->protection_mode;
- ret = ath10k_wmi_vdev_set_param(ar, arvif->vdev_id, vdev_param,
- info->use_cts_prot ? 1 : 0);
- if (ret)
- ath10k_warn(ar, "failed to set protection mode %d on vdev %i: %d\n",
- info->use_cts_prot, arvif->vdev_id, ret);
+ if (ath10k_mac_can_set_cts_prot(arvif)) {
+ ret = ath10k_mac_set_cts_prot(arvif);
+ if (ret)
+ ath10k_warn(ar, "failed to set cts protection for vdev %d: %d\n",
+ arvif->vdev_id, ret);
+ }
}
if (changed & BSS_CHANGED_ERP_SLOT) {
@@ -7463,6 +7491,13 @@ ath10k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
arvif->is_up = true;
}
+ if (ath10k_mac_can_set_cts_prot(arvif)) {
+ ret = ath10k_mac_set_cts_prot(arvif);
+ if (ret)
+ ath10k_warn(ar, "failed to set cts protection for vdev %d: %d\n",
+ arvif->vdev_id, ret);
+ }
+
mutex_unlock(&ar->conf_mutex);
return 0;
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index 5ce4fdfca724..ba411cba6fc9 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -1156,8 +1156,10 @@ static int ath10k_wmi_tlv_op_pull_fw_stats(struct ath10k *ar,
struct ath10k_fw_stats_pdev *dst;
src = data;
- if (data_len < sizeof(*src))
+ if (data_len < sizeof(*src)) {
+ kfree(tb);
return -EPROTO;
+ }
data += sizeof(*src);
data_len -= sizeof(*src);
@@ -1177,8 +1179,10 @@ static int ath10k_wmi_tlv_op_pull_fw_stats(struct ath10k *ar,
struct ath10k_fw_stats_vdev *dst;
src = data;
- if (data_len < sizeof(*src))
+ if (data_len < sizeof(*src)) {
+ kfree(tb);
return -EPROTO;
+ }
data += sizeof(*src);
data_len -= sizeof(*src);
@@ -1196,8 +1200,10 @@ static int ath10k_wmi_tlv_op_pull_fw_stats(struct ath10k *ar,
struct ath10k_fw_stats_peer *dst;
src = data;
- if (data_len < sizeof(*src))
+ if (data_len < sizeof(*src)) {
+ kfree(tb);
return -EPROTO;
+ }
data += sizeof(*src);
data_len -= sizeof(*src);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c
index 0708eedd9671..1c69e8140d9d 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c
@@ -664,7 +664,7 @@ void rtl92ee_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished)
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
struct sk_buff *skb = NULL;
-
+ bool rtstatus;
u32 totalpacketlen;
u8 u1rsvdpageloc[5] = { 0 };
bool b_dlok = false;
@@ -727,7 +727,9 @@ void rtl92ee_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished)
memcpy((u8 *)skb_put(skb, totalpacketlen),
&reserved_page_packet, totalpacketlen);
- b_dlok = true;
+ rtstatus = rtl_cmd_send_packet(hw, skb);
+ if (rtstatus)
+ b_dlok = true;
if (b_dlok) {
RT_TRACE(rtlpriv, COMP_POWER, DBG_LOUD ,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
index bbb789f8990b..738d541a2255 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
@@ -1377,6 +1377,7 @@ static void _rtl8821ae_get_wakeup_reason(struct ieee80211_hw *hw)
ppsc->wakeup_reason = 0;
+ do_gettimeofday(&ts);
rtlhal->last_suspend_sec = ts.tv_sec;
switch (fw_reason) {
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 96526dcfdd37..ff7b9632ad61 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -823,7 +823,7 @@ static int init_labels(struct nd_mapping *nd_mapping, int num_labels)
nsindex = to_namespace_index(ndd, 0);
memset(nsindex, 0, ndd->nsarea.config_size);
for (i = 0; i < 2; i++) {
- int rc = nd_label_write_index(ndd, i, i*2, ND_NSINDEX_INIT);
+ int rc = nd_label_write_index(ndd, i, 3 - i, ND_NSINDEX_INIT);
if (rc)
return rc;
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index aae7379af4e4..c2184104b789 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1305,7 +1305,7 @@ static umode_t namespace_visible(struct kobject *kobj,
if (a == &dev_attr_resource.attr) {
if (is_namespace_blk(dev))
return 0;
- return a->mode;
+ return 0400;
}
if (is_namespace_pmem(dev) || is_namespace_blk(dev)) {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index b83df942794f..193ac13de49b 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1414,8 +1414,16 @@ static void program_hpp_type0(struct pci_dev *dev, struct hpp_type0 *hpp)
static void program_hpp_type1(struct pci_dev *dev, struct hpp_type1 *hpp)
{
- if (hpp)
- dev_warn(&dev->dev, "PCI-X settings not supported\n");
+ int pos;
+
+ if (!hpp)
+ return;
+
+ pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+ if (!pos)
+ return;
+
+ dev_warn(&dev->dev, "PCI-X settings not supported\n");
}
static bool pcie_root_rcb_set(struct pci_dev *dev)
@@ -1441,6 +1449,9 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
if (!hpp)
return;
+ if (!pci_is_pcie(dev))
+ return;
+
if (hpp->revision > 1) {
dev_warn(&dev->dev, "PCIe settings rev %d not supported\n",
hpp->revision);
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 1f4a1f02a2cd..fec1ef2b1748 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -315,6 +315,7 @@ config SPI_FSL_SPI
config SPI_FSL_DSPI
tristate "Freescale DSPI controller"
select REGMAP_MMIO
+ depends on HAS_DMA
depends on SOC_VF610 || SOC_LS1021A || ARCH_LAYERSCAPE || COMPILE_TEST
help
This enables support for the Freescale DSPI controller in master
diff --git a/drivers/staging/iio/cdc/ad7150.c b/drivers/staging/iio/cdc/ad7150.c
index e8d0ff2d5c9b..808d6ebf6c94 100644
--- a/drivers/staging/iio/cdc/ad7150.c
+++ b/drivers/staging/iio/cdc/ad7150.c
@@ -272,7 +272,7 @@ static int ad7150_write_event_config(struct iio_dev *indio_dev,
error_ret:
mutex_unlock(&chip->state_lock);
- return 0;
+ return ret;
}
static int ad7150_read_event_value(struct iio_dev *indio_dev,
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index fd493412b172..bb73401f5761 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1923,12 +1923,14 @@ attach:
if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) {
int cmdsn_ret = iscsit_sequence_cmd(conn, cmd, buf, hdr->cmdsn);
- if (cmdsn_ret == CMDSN_HIGHER_THAN_EXP)
+ if (cmdsn_ret == CMDSN_HIGHER_THAN_EXP) {
out_of_order_cmdsn = 1;
- else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP)
+ } else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) {
+ target_put_sess_cmd(&cmd->se_cmd);
return 0;
- else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER)
+ } else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) {
return -1;
+ }
}
iscsit_ack_from_expstatsn(conn, be32_to_cpu(hdr->exp_statsn));
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index a42054edd427..37abf881ca75 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1970,6 +1970,8 @@ static void target_restart_delayed_cmds(struct se_device *dev)
list_del(&cmd->se_delayed_node);
spin_unlock(&dev->delayed_cmd_lock);
+ cmd->transport_state |= CMD_T_SENT;
+
__target_execute_cmd(cmd, true);
if (cmd->sam_task_attr == TCM_ORDERED_TAG)
@@ -2007,6 +2009,8 @@ static void transport_complete_task_attr(struct se_cmd *cmd)
pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED\n",
dev->dev_cur_ordered_id);
}
+ cmd->se_cmd_flags &= ~SCF_TASK_ATTR_SET;
+
restart:
target_restart_delayed_cmds(dev);
}
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index e4110d6de0b5..da6cc25baaef 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -703,6 +703,7 @@ vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write,
struct scatterlist *sg, int sg_count)
{
size_t off = iter->iov_offset;
+ struct scatterlist *p = sg;
int i, ret;
for (i = 0; i < iter->nr_segs; i++) {
@@ -711,8 +712,8 @@ vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write,
ret = vhost_scsi_map_to_sgl(cmd, base, len, sg, write);
if (ret < 0) {
- for (i = 0; i < sg_count; i++) {
- struct page *page = sg_page(&sg[i]);
+ while (p < sg) {
+ struct page *page = sg_page(p++);
if (page)
put_page(page);
}
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 0e0eb10f82a0..816a0e08ef10 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -316,7 +316,7 @@ static int xenbus_write_transaction(unsigned msg_type,
rc = -ENOMEM;
goto out;
}
- } else if (msg_type == XS_TRANSACTION_END) {
+ } else if (u->u.msg.tx_id != 0) {
list_for_each_entry(trans, &u->transactions, list)
if (trans->handle.id == u->u.msg.tx_id)
break;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 511078586fa1..73f1d1b3a51c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -483,6 +483,9 @@ static int v9fs_test_inode(struct inode *inode, void *data)
if (v9inode->qid.type != st->qid.type)
return 0;
+
+ if (v9inode->qid.path != st->qid.path)
+ return 0;
return 1;
}
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index cb899af1babc..0b88744c6446 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -87,6 +87,9 @@ static int v9fs_test_inode_dotl(struct inode *inode, void *data)
if (v9inode->qid.type != st->qid.type)
return 0;
+
+ if (v9inode->qid.path != st->qid.path)
+ return 0;
return 1;
}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 35b755e79c2d..fe6e7050fe50 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -87,7 +87,8 @@ static int autofs4_write(struct autofs_sb_info *sbi,
spin_unlock_irqrestore(&current->sighand->siglock, flags);
}
- return (bytes > 0);
+ /* if 'wr' returned 0 (impossible) we assume -EIO (safe) */
+ return bytes == 0 ? 0 : wr < 0 ? wr : -EIO;
}
static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
@@ -101,6 +102,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
} pkt;
struct file *pipe = NULL;
size_t pktsz;
+ int ret;
DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
(unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, type);
@@ -173,7 +175,18 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
mutex_unlock(&sbi->wq_mutex);
if (autofs4_write(sbi, pipe, &pkt, pktsz))
+ switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) {
+ case 0:
+ break;
+ case -ENOMEM:
+ case -ERESTARTSYS:
+ /* Just fail this one */
+ autofs4_wait_release(sbi, wq->wait_queue_token, ret);
+ break;
+ default:
autofs4_catatonic_mode(sbi);
+ break;
+ }
fput(pipe);
}
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 778282944530..837a9a8d579e 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -348,7 +348,5 @@ skip:
out:
btrfs_free_path(path);
- if (ret)
- btrfs_warn(fs_info, "btrfs_uuid_tree_iterate failed %d", ret);
- return 0;
+ return ret;
}
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 286f10b0363b..4f457d5c4933 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -442,15 +442,16 @@ void ecryptfs_release_messaging(void)
}
if (ecryptfs_daemon_hash) {
struct ecryptfs_daemon *daemon;
+ struct hlist_node *n;
int i;
mutex_lock(&ecryptfs_daemon_hash_mux);
for (i = 0; i < (1 << ecryptfs_hash_bits); i++) {
int rc;
- hlist_for_each_entry(daemon,
- &ecryptfs_daemon_hash[i],
- euid_chain) {
+ hlist_for_each_entry_safe(daemon, n,
+ &ecryptfs_daemon_hash[i],
+ euid_chain) {
rc = ecryptfs_exorcise_daemon(daemon);
if (rc)
printk(KERN_ERR "%s: Error whilst "
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index 15ebac242288..d3d6b28ce9b9 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -220,11 +220,9 @@ int _ext4_get_encryption_info(struct inode *inode)
int mode;
int res;
- if (!ext4_read_workqueue) {
- res = ext4_init_crypto();
- if (res)
- return res;
- }
+ res = ext4_init_crypto();
+ if (res)
+ return res;
retry:
crypt_info = ACCESS_ONCE(ei->i_crypt_info);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index fc496c646d12..468e7fe3616c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4810,7 +4810,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
}
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode)) {
+ (offset + len > i_size_read(inode) ||
+ offset + len > EXT4_I(inode)->i_disksize)) {
new_size = offset + len;
ret = inode_newsize_ok(inode, new_size);
if (ret)
@@ -4986,7 +4987,8 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
}
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode)) {
+ (offset + len > i_size_read(inode) ||
+ offset + len > EXT4_I(inode)->i_disksize)) {
new_size = offset + len;
ret = inode_newsize_ok(inode, new_size);
if (ret)
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 112f8e04c549..3f52efa0f94f 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -253,6 +253,9 @@ static int __f2fs_set_acl(struct inode *inode, int type,
int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ return -EIO;
+
return __f2fs_set_acl(inode, type, acl, NULL);
}
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index e86f67ac96c6..2eb778174a9b 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -29,7 +29,6 @@ struct kmem_cache *inode_entry_slab;
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
set_ckpt_flags(sbi, CP_ERROR_FLAG);
- sbi->sb->s_flags |= MS_RDONLY;
if (!end_io)
f2fs_flush_merged_writes(sbi);
}
@@ -402,24 +401,23 @@ const struct address_space_operations f2fs_meta_aops = {
#endif
};
-static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
+static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino,
+ unsigned int devidx, int type)
{
struct inode_management *im = &sbi->im[type];
struct ino_entry *e, *tmp;
tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
-retry:
+
radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
spin_lock(&im->ino_lock);
e = radix_tree_lookup(&im->ino_root, ino);
if (!e) {
e = tmp;
- if (radix_tree_insert(&im->ino_root, ino, e)) {
- spin_unlock(&im->ino_lock);
- radix_tree_preload_end();
- goto retry;
- }
+ if (unlikely(radix_tree_insert(&im->ino_root, ino, e)))
+ f2fs_bug_on(sbi, 1);
+
memset(e, 0, sizeof(struct ino_entry));
e->ino = ino;
@@ -427,6 +425,10 @@ retry:
if (type != ORPHAN_INO)
im->ino_num++;
}
+
+ if (type == FLUSH_INO)
+ f2fs_set_bit(devidx, (char *)&e->dirty_device);
+
spin_unlock(&im->ino_lock);
radix_tree_preload_end();
@@ -455,7 +457,7 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
/* add new dirty ino entry into list */
- __add_ino_entry(sbi, ino, type);
+ __add_ino_entry(sbi, ino, 0, type);
}
void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -481,7 +483,7 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
struct ino_entry *e, *tmp;
int i;
- for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) {
+ for (i = all ? ORPHAN_INO : APPEND_INO; i < MAX_INO_ENTRY; i++) {
struct inode_management *im = &sbi->im[i];
spin_lock(&im->ino_lock);
@@ -495,6 +497,27 @@ void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
}
}
+void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+ unsigned int devidx, int type)
+{
+ __add_ino_entry(sbi, ino, devidx, type);
+}
+
+bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+ unsigned int devidx, int type)
+{
+ struct inode_management *im = &sbi->im[type];
+ struct ino_entry *e;
+ bool is_dirty = false;
+
+ spin_lock(&im->ino_lock);
+ e = radix_tree_lookup(&im->ino_root, ino);
+ if (e && f2fs_test_bit(devidx, (char *)&e->dirty_device))
+ is_dirty = true;
+ spin_unlock(&im->ino_lock);
+ return is_dirty;
+}
+
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
{
struct inode_management *im = &sbi->im[ORPHAN_INO];
@@ -531,7 +554,7 @@ void release_orphan_inode(struct f2fs_sb_info *sbi)
void add_orphan_inode(struct inode *inode)
{
/* add new orphan ino entry into list */
- __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO);
+ __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO);
update_inode_page(inode);
}
@@ -555,7 +578,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
return err;
}
- __add_ino_entry(sbi, ino, ORPHAN_INO);
+ __add_ino_entry(sbi, ino, 0, ORPHAN_INO);
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode)) {
@@ -591,6 +614,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
block_t start_blk, orphan_blocks, i, j;
unsigned int s_flags = sbi->sb->s_flags;
int err = 0;
+#ifdef CONFIG_QUOTA
+ int quota_enabled;
+#endif
if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
@@ -603,8 +629,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= MS_ACTIVE;
+
/* Turn on quotas so that they are updated correctly */
- f2fs_enable_quota_files(sbi);
+ quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY);
#endif
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
@@ -632,7 +659,8 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
out:
#ifdef CONFIG_QUOTA
/* Turn quotas off */
- f2fs_quota_off_umount(sbi->sb);
+ if (quota_enabled)
+ f2fs_quota_off_umount(sbi->sb);
#endif
sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@ -987,7 +1015,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
update_inode_page(inode);
iput(inode);
}
- };
+ }
return 0;
}
@@ -1147,6 +1175,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct super_block *sb = sbi->sb;
struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
u64 kbytes_written;
+ int err;
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
@@ -1240,6 +1269,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
+ /* flush all device cache */
+ err = f2fs_flush_device_cache(sbi);
+ if (err)
+ return err;
+
/* write out checkpoint buffer at block 0 */
update_meta_page(sbi, ckpt, start_blk++);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c8583d7a1845..cdccc429325b 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -172,7 +172,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
{
struct bio *bio;
- bio = f2fs_bio_alloc(npages);
+ bio = f2fs_bio_alloc(sbi, npages, true);
f2fs_target_device(sbi, blk_addr, bio);
bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
@@ -417,8 +417,8 @@ next:
bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
- /* set submitted = 1 as a return value */
- fio->submitted = 1;
+ /* set submitted = true as a return value */
+ fio->submitted = true;
inc_page_count(sbi, WB_DATA_TYPE(bio_page));
@@ -472,7 +472,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
f2fs_wait_on_block_writeback(sbi, blkaddr);
}
- bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES));
+ bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
if (!bio) {
if (ctx)
fscrypt_release_ctx(ctx);
@@ -832,6 +832,13 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
struct f2fs_map_blocks map;
int err = 0;
+ /* convert inline data for Direct I/O*/
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ err = f2fs_convert_inline_inode(inode);
+ if (err)
+ return err;
+ }
+
if (is_inode_flag_set(inode, FI_NO_PREALLOC))
return 0;
@@ -844,15 +851,11 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
map.m_next_pgofs = NULL;
- if (iocb->ki_flags & IOCB_DIRECT) {
- err = f2fs_convert_inline_inode(inode);
- if (err)
- return err;
+ if (iocb->ki_flags & IOCB_DIRECT)
return f2fs_map_blocks(inode, &map, 1,
__force_buffered_io(inode, WRITE) ?
F2FS_GET_BLOCK_PRE_AIO :
F2FS_GET_BLOCK_PRE_DIO);
- }
if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
err = f2fs_convert_inline_inode(inode);
if (err)
@@ -1332,7 +1335,7 @@ static int f2fs_read_data_pages(struct file *file,
struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
- struct inode *inode = file->f_mapping->host;
+ struct inode *inode = mapping->host;
struct page *page = list_last_entry(pages, struct page, lru);
trace_f2fs_readpages(inode, page, nr_pages);
@@ -1493,6 +1496,7 @@ static int __write_data_page(struct page *page, bool *submitted,
int err = 0;
struct f2fs_io_info fio = {
.sbi = sbi,
+ .ino = inode->i_ino,
.type = DATA,
.op = REQ_OP_WRITE,
.op_flags = wbc_to_write_flags(wbc),
@@ -1564,8 +1568,11 @@ write:
err = do_write_data_page(&fio);
}
}
+
+ down_write(&F2FS_I(inode)->i_sem);
if (F2FS_I(inode)->last_disk_size < psize)
F2FS_I(inode)->last_disk_size = psize;
+ up_write(&F2FS_I(inode)->i_sem);
done:
if (err && err != -ENOENT)
@@ -1945,6 +1952,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
}
trace_f2fs_write_begin(inode, pos, len, flags);
+ if (f2fs_is_atomic_file(inode) &&
+ !available_free_memory(sbi, INMEM_PAGES)) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
/*
* We should check this at this moment to avoid deadlock on inode page
* and #0 page. The locking rule for inline_data conversion should be:
@@ -1960,7 +1973,8 @@ repeat:
* Do not use grab_cache_page_write_begin() to avoid deadlock due to
* wait_for_stable_page. Will wait that below with our IO control.
*/
- page = grab_cache_page(mapping, index);
+ page = f2fs_pagecache_get_page(mapping, index,
+ FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
if (!page) {
err = -ENOMEM;
goto fail;
@@ -2021,6 +2035,8 @@ repeat:
fail:
f2fs_put_page(page, 1);
f2fs_write_failed(mapping, pos + len);
+ if (f2fs_is_atomic_file(inode))
+ drop_inmem_pages_all(sbi);
return err;
}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 87f449845f5f..ecada8425268 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -45,9 +45,18 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA);
+ si->ndirty_qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
+
+ si->nquota_files = 0;
+ if (f2fs_sb_has_quota_ino(sbi->sb)) {
+ for (i = 0; i < MAXQUOTAS; i++) {
+ if (f2fs_qf_ino(sbi->sb, i))
+ si->nquota_files++;
+ }
+ }
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->aw_cnt = atomic_read(&sbi->aw_cnt);
@@ -61,6 +70,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
si->nr_flushing =
atomic_read(&SM_I(sbi)->fcc_info->issing_flush);
+ si->flush_list_empty =
+ llist_empty(&SM_I(sbi)->fcc_info->issue_list);
}
if (SM_I(sbi) && SM_I(sbi)->dcc_info) {
si->nr_discarded =
@@ -96,9 +107,9 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
si->sits = MAIN_SEGS(sbi);
si->dirty_sits = SIT_I(sbi)->dirty_sentries;
- si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST];
+ si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID];
si->avail_nids = NM_I(sbi)->available_nids;
- si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST];
+ si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
si->bg_gc = sbi->bg_gc;
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
@@ -231,14 +242,14 @@ get_cache:
}
/* free nids */
- si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
- NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]) *
+ si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] +
+ NM_I(sbi)->nid_cnt[PREALLOC_NID]) *
sizeof(struct free_nid);
si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
sizeof(struct nat_entry_set);
si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
- for (i = 0; i <= ORPHAN_INO; i++)
+ for (i = 0; i < MAX_INO_ENTRY; i++)
si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
si->cache_mem += atomic_read(&sbi->total_ext_tree) *
sizeof(struct extent_tree);
@@ -262,9 +273,10 @@ static int stat_show(struct seq_file *s, void *v)
list_for_each_entry(si, &f2fs_stat_list, stat_list) {
update_general_status(si->sbi);
- seq_printf(s, "\n=====[ partition info(%pg). #%d, %s]=====\n",
+ seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
si->sbi->sb->s_bdev, i++,
- f2fs_readonly(si->sbi->sb) ? "RO": "RW");
+ f2fs_readonly(si->sbi->sb) ? "RO": "RW",
+ f2fs_cp_error(si->sbi) ? "Error": "Good");
seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
si->sit_area_segs, si->nat_area_segs);
seq_printf(s, "[SSA: %d] [MAIN: %d",
@@ -349,10 +361,11 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
- seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), "
+ seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
"Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
si->nr_wb_cp_data, si->nr_wb_data,
si->nr_flushing, si->nr_flushed,
+ si->flush_list_empty,
si->nr_discarding, si->nr_discarded,
si->nr_discard_cmd, si->undiscard_blks);
seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), "
@@ -365,6 +378,8 @@ static int stat_show(struct seq_file *s, void *v)
si->ndirty_dent, si->ndirty_dirs, si->ndirty_all);
seq_printf(s, " - datas: %4d in files:%4d\n",
si->ndirty_data, si->ndirty_files);
+ seq_printf(s, " - quota datas: %4d in quota files:%4d\n",
+ si->ndirty_qdata, si->nquota_files);
seq_printf(s, " - meta: %4d in %4d\n",
si->ndirty_meta, si->meta_pages);
seq_printf(s, " - imeta: %4d\n",
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4f2a8fedb313..1955707b138b 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -10,10 +10,12 @@
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
+#include <linux/sched.h>
#include "f2fs.h"
#include "node.h"
#include "acl.h"
#include "xattr.h"
+#include <trace/events/f2fs.h>
static unsigned long dir_blocks(struct inode *inode)
{
@@ -847,6 +849,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
struct f2fs_dentry_block *dentry_blk = NULL;
struct page *dentry_page = NULL;
struct file_ra_state *ra = &file->f_ra;
+ loff_t start_pos = ctx->pos;
unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
struct f2fs_dentry_ptr d;
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
@@ -855,24 +858,32 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
if (f2fs_encrypted_inode(inode)) {
err = fscrypt_get_encryption_info(inode);
if (err && err != -ENOKEY)
- return err;
+ goto out;
err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr);
if (err < 0)
- return err;
+ goto out;
}
if (f2fs_has_inline_dentry(inode)) {
err = f2fs_read_inline_dir(file, ctx, &fstr);
- goto out;
+ goto out_free;
}
- /* readahead for multi pages of dir */
- if (npages - n > 1 && !ra_has_index(ra, n))
- page_cache_sync_readahead(inode->i_mapping, ra, file, n,
+ for (; n < npages; n++, ctx->pos = n * NR_DENTRY_IN_BLOCK) {
+
+ /* allow readdir() to be interrupted */
+ if (fatal_signal_pending(current)) {
+ err = -ERESTARTSYS;
+ goto out_free;
+ }
+ cond_resched();
+
+ /* readahead for multi pages of dir */
+ if (npages - n > 1 && !ra_has_index(ra, n))
+ page_cache_sync_readahead(inode->i_mapping, ra, file, n,
min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
- for (; n < npages; n++) {
dentry_page = get_lock_data_page(inode, n, false);
if (IS_ERR(dentry_page)) {
err = PTR_ERR(dentry_page);
@@ -880,7 +891,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
err = 0;
continue;
} else {
- goto out;
+ goto out_free;
}
}
@@ -896,12 +907,13 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
break;
}
- ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
}
-out:
+out_free:
fscrypt_fname_free_buffer(&fstr);
+out:
+ trace_f2fs_readdir(inode, start_pos, ctx->pos, err);
return err < 0 ? err : 0;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c1a0aef8efc6..081ec493baae 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -47,6 +47,8 @@
enum {
FAULT_KMALLOC,
FAULT_PAGE_ALLOC,
+ FAULT_PAGE_GET,
+ FAULT_ALLOC_BIO,
FAULT_ALLOC_NID,
FAULT_ORPHAN,
FAULT_BLOCK,
@@ -94,6 +96,7 @@ extern char *fault_name[FAULT_MAX];
#define F2FS_MOUNT_GRPQUOTA 0x00100000
#define F2FS_MOUNT_PRJQUOTA 0x00200000
#define F2FS_MOUNT_QUOTA 0x00400000
+#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000
#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -119,6 +122,8 @@ struct f2fs_mount_info {
#define F2FS_FEATURE_EXTRA_ATTR 0x0008
#define F2FS_FEATURE_PRJQUOTA 0x0010
#define F2FS_FEATURE_INODE_CHKSUM 0x0020
+#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040
+#define F2FS_FEATURE_QUOTA_INO 0x0080
#define F2FS_HAS_FEATURE(sb, mask) \
((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -214,7 +219,7 @@ enum {
#define BATCHED_TRIM_BLOCKS(sbi) \
(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
-#define DISCARD_ISSUE_RATE 8
+#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
#define DEF_CP_INTERVAL 60 /* 60 secs */
@@ -225,7 +230,6 @@ struct cp_control {
__u64 trim_start;
__u64 trim_end;
__u64 trim_minlen;
- __u64 trimmed;
};
/*
@@ -244,12 +248,14 @@ enum {
ORPHAN_INO, /* for orphan ino list */
APPEND_INO, /* for append ino list */
UPDATE_INO, /* for update ino list */
+ FLUSH_INO, /* for multiple device flushing */
MAX_INO_ENTRY, /* max. list */
};
struct ino_entry {
- struct list_head list; /* list head */
- nid_t ino; /* inode number */
+ struct list_head list; /* list head */
+ nid_t ino; /* inode number */
+ unsigned int dirty_device; /* dirty device bitmap */
};
/* for the list of inodes to be GCed */
@@ -273,10 +279,6 @@ struct discard_entry {
#define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \
(MAX_PLIST_NUM - 1) : (blk_num - 1))
-#define P_ACTIVE 0x01
-#define P_TRIM 0x02
-#define plist_issue(tag) (((tag) & P_ACTIVE) || ((tag) & P_TRIM))
-
enum {
D_PREP,
D_SUBMIT,
@@ -308,12 +310,32 @@ struct discard_cmd {
int error; /* bio error */
};
+enum {
+ DPOLICY_BG,
+ DPOLICY_FORCE,
+ DPOLICY_FSTRIM,
+ DPOLICY_UMOUNT,
+ MAX_DPOLICY,
+};
+
+struct discard_policy {
+ int type; /* type of discard */
+ unsigned int min_interval; /* used for candidates exist */
+ unsigned int max_interval; /* used for candidates not exist */
+ unsigned int max_requests; /* # of discards issued per round */
+ unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */
+ bool io_aware; /* issue discard in idle time */
+ bool sync; /* submit discard with REQ_SYNC flag */
+ unsigned int granularity; /* discard granularity */
+};
+
struct discard_cmd_control {
struct task_struct *f2fs_issue_discard; /* discard thread */
struct list_head entry_list; /* 4KB discard entry list */
struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */
unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */
struct list_head wait_list; /* store on-flushing entries */
+ struct list_head fstrim_list; /* in-flight discard from fstrim */
wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */
unsigned int discard_wake; /* to wake up discard thread */
struct mutex cmd_lock;
@@ -443,11 +465,14 @@ struct f2fs_flush_device {
/* for inline stuff */
#define DEF_INLINE_RESERVED_SIZE 1
+#define DEF_MIN_INLINE_SIZE 1
static inline int get_extra_isize(struct inode *inode);
-#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \
- (CUR_ADDRS_PER_INODE(inode) - \
- DEF_INLINE_RESERVED_SIZE - \
- F2FS_INLINE_XATTR_ADDRS))
+static inline int get_inline_xattr_addrs(struct inode *inode);
+#define F2FS_INLINE_XATTR_ADDRS(inode) get_inline_xattr_addrs(inode)
+#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \
+ (CUR_ADDRS_PER_INODE(inode) - \
+ F2FS_INLINE_XATTR_ADDRS(inode) - \
+ DEF_INLINE_RESERVED_SIZE))
/* for inline dir */
#define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \
@@ -647,6 +672,7 @@ struct f2fs_inode_info {
#endif
struct list_head dirty_list; /* dirty list for dirs and files */
struct list_head gdirty_list; /* linked in global dirty list */
+ struct list_head inmem_ilist; /* list for inmem inodes */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct task_struct *inmem_task; /* store inmemory task */
struct mutex inmem_lock; /* lock for inmemory pages */
@@ -657,6 +683,7 @@ struct f2fs_inode_info {
int i_extra_isize; /* size of extra space located in i_addr */
kprojid_t i_projid; /* id for project quota */
+ int i_inline_xattr_size; /* inline xattr size */
};
static inline void get_extent_info(struct extent_info *ext,
@@ -730,10 +757,13 @@ static inline void __try_update_largest_extent(struct inode *inode,
}
}
-enum nid_list {
- FREE_NID_LIST,
- ALLOC_NID_LIST,
- MAX_NID_LIST,
+/*
+ * For free nid management
+ */
+enum nid_state {
+ FREE_NID, /* newly added to free nid list */
+ PREALLOC_NID, /* it is preallocated */
+ MAX_NID_STATE,
};
struct f2fs_nm_info {
@@ -756,8 +786,8 @@ struct f2fs_nm_info {
/* free node ids management */
struct radix_tree_root free_nid_root;/* root of the free_nid cache */
- struct list_head nid_list[MAX_NID_LIST];/* lists for free nids */
- unsigned int nid_cnt[MAX_NID_LIST]; /* the number of free node id */
+ struct list_head free_nid_list; /* list for free nids excluding preallocated nids */
+ unsigned int nid_cnt[MAX_NID_STATE]; /* the number of free node id */
spinlock_t nid_list_lock; /* protect nid lists ops */
struct mutex build_lock; /* lock for build free nids */
unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE];
@@ -835,6 +865,7 @@ enum {
struct flush_cmd {
struct completion wait;
struct llist_node llnode;
+ nid_t ino;
int ret;
};
@@ -853,6 +884,8 @@ struct f2fs_sm_info {
struct dirty_seglist_info *dirty_info; /* dirty segment information */
struct curseg_info *curseg_array; /* active segment information */
+ struct rw_semaphore curseg_lock; /* for preventing curseg change */
+
block_t seg0_blkaddr; /* block address of 0'th segment */
block_t main_blkaddr; /* start block address of main area */
block_t ssa_blkaddr; /* start block address of SSA area */
@@ -874,6 +907,7 @@ struct f2fs_sm_info {
unsigned int min_ipu_util; /* in-place-update threshold */
unsigned int min_fsync_blocks; /* threshold for fsync */
unsigned int min_hot_blocks; /* threshold for hot block allocation */
+ unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */
/* for flush command control */
struct flush_cmd_control *fcc_info;
@@ -895,6 +929,7 @@ struct f2fs_sm_info {
enum count_type {
F2FS_DIRTY_DENTS,
F2FS_DIRTY_DATA,
+ F2FS_DIRTY_QDATA,
F2FS_DIRTY_NODES,
F2FS_DIRTY_META,
F2FS_INMEM_PAGES,
@@ -943,6 +978,18 @@ enum need_lock_type {
LOCK_RETRY,
};
+enum cp_reason_type {
+ CP_NO_NEEDED,
+ CP_NON_REGULAR,
+ CP_HARDLINK,
+ CP_SB_NEED_CP,
+ CP_WRONG_PINO,
+ CP_NO_SPC_ROLL,
+ CP_NODE_NEED_CP,
+ CP_FASTBOOT_MODE,
+ CP_SPEC_LOG_NUM,
+};
+
enum iostat_type {
APP_DIRECT_IO, /* app direct IOs */
APP_BUFFERED_IO, /* app buffered IOs */
@@ -962,6 +1009,7 @@ enum iostat_type {
struct f2fs_io_info {
struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */
+ nid_t ino; /* inode number */
enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
enum temp_type temp; /* contains HOT/WARM/COLD */
int op; /* contains REQ_OP_ */
@@ -1006,6 +1054,7 @@ enum inode_type {
DIR_INODE, /* for dirty dir inode */
FILE_INODE, /* for dirty regular/symlink inode */
DIRTY_META, /* for all dirtied inode metadata */
+ ATOMIC_FILE, /* for all atomic files */
NR_INODE_TYPE,
};
@@ -1108,12 +1157,15 @@ struct f2fs_sb_info {
loff_t max_file_blocks; /* max block index of file */
int active_logs; /* # of active logs */
int dir_level; /* directory level */
+ int inline_xattr_size; /* inline xattr size */
+ unsigned int trigger_ssr_threshold; /* threshold to trigger ssr */
block_t user_block_count; /* # of user blocks */
block_t total_valid_block_count; /* # of valid blocks */
block_t discard_blks; /* discard command candidats */
block_t last_valid_block_count; /* for recovery */
block_t reserved_blocks; /* configurable reserved blocks */
+ block_t current_reserved_blocks; /* current reserved blocks */
u32 s_next_generation; /* for NFS support */
@@ -1179,6 +1231,8 @@ struct f2fs_sb_info {
struct list_head s_list;
int s_ndevs; /* number of devices */
struct f2fs_dev_info *devs; /* for device list */
+ unsigned int dirty_device; /* for checkpoint data flush */
+ spinlock_t dev_lock; /* protect dirty_device */
struct mutex umount_mutex;
unsigned int shrinker_run_no;
@@ -1242,8 +1296,7 @@ static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
{
- struct timespec ts = {sbi->interval_time[type], 0};
- unsigned long interval = timespec_to_jiffies(&ts);
+ unsigned long interval = sbi->interval_time[type] * HZ;
return time_after(jiffies, sbi->last_time[type] + interval);
}
@@ -1410,6 +1463,13 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp)
return le64_to_cpu(cp->checkpoint_ver);
}
+static inline unsigned long f2fs_qf_ino(struct super_block *sb, int type)
+{
+ if (type < F2FS_MAX_QUOTAS)
+ return le32_to_cpu(F2FS_SB(sb)->raw_super->qf_ino[type]);
+ return 0;
+}
+
static inline __u64 cur_cp_crc(struct f2fs_checkpoint *cp)
{
size_t crc_offset = le32_to_cpu(cp->checksum_offset);
@@ -1588,7 +1648,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
spin_lock(&sbi->stat_lock);
sbi->total_valid_block_count += (block_t)(*count);
- avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks;
+ avail_user_block_count = sbi->user_block_count -
+ sbi->current_reserved_blocks;
if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
diff = sbi->total_valid_block_count - avail_user_block_count;
*count -= diff;
@@ -1622,6 +1683,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
f2fs_bug_on(sbi, inode->i_blocks < sectors);
sbi->total_valid_block_count -= (block_t)count;
+ if (sbi->reserved_blocks &&
+ sbi->current_reserved_blocks < sbi->reserved_blocks)
+ sbi->current_reserved_blocks = min(sbi->reserved_blocks,
+ sbi->current_reserved_blocks + count);
spin_unlock(&sbi->stat_lock);
f2fs_i_blocks_write(inode, count, false, true);
}
@@ -1642,6 +1707,8 @@ static inline void inode_inc_dirty_pages(struct inode *inode)
atomic_inc(&F2FS_I(inode)->dirty_pages);
inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
+ if (IS_NOQUOTA(inode))
+ inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA);
}
static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
@@ -1658,6 +1725,8 @@ static inline void inode_dec_dirty_pages(struct inode *inode)
atomic_dec(&F2FS_I(inode)->dirty_pages);
dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
+ if (IS_NOQUOTA(inode))
+ dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_QDATA);
}
static inline s64 get_pages(struct f2fs_sb_info *sbi, int count_type)
@@ -1765,10 +1834,17 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
return ret;
}
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ if (time_to_inject(sbi, FAULT_BLOCK)) {
+ f2fs_show_injection_info(FAULT_BLOCK);
+ goto enospc;
+ }
+#endif
+
spin_lock(&sbi->stat_lock);
valid_block_count = sbi->total_valid_block_count + 1;
- if (unlikely(valid_block_count + sbi->reserved_blocks >
+ if (unlikely(valid_block_count + sbi->current_reserved_blocks >
sbi->user_block_count)) {
spin_unlock(&sbi->stat_lock);
goto enospc;
@@ -1811,6 +1887,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
sbi->total_valid_node_count--;
sbi->total_valid_block_count--;
+ if (sbi->reserved_blocks &&
+ sbi->current_reserved_blocks < sbi->reserved_blocks)
+ sbi->current_reserved_blocks++;
spin_unlock(&sbi->stat_lock);
@@ -1857,6 +1936,19 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
}
+static inline struct page *f2fs_pagecache_get_page(
+ struct address_space *mapping, pgoff_t index,
+ int fgp_flags, gfp_t gfp_mask)
+{
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) {
+ f2fs_show_injection_info(FAULT_PAGE_GET);
+ return NULL;
+ }
+#endif
+ return pagecache_get_page(mapping, index, fgp_flags, gfp_mask);
+}
+
static inline void f2fs_copy_page(struct page *src, struct page *dst)
{
char *src_kaddr = kmap(src);
@@ -1906,15 +1998,25 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
return entry;
}
-static inline struct bio *f2fs_bio_alloc(int npages)
+static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi,
+ int npages, bool no_fail)
{
struct bio *bio;
- /* No failure on bio allocation */
- bio = bio_alloc(GFP_NOIO, npages);
- if (!bio)
- bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages);
- return bio;
+ if (no_fail) {
+ /* No failure on bio allocation */
+ bio = bio_alloc(GFP_NOIO, npages);
+ if (!bio)
+ bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages);
+ return bio;
+ }
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ if (time_to_inject(sbi, FAULT_ALLOC_BIO)) {
+ f2fs_show_injection_info(FAULT_ALLOC_BIO);
+ return NULL;
+ }
+#endif
+ return bio_alloc(GFP_KERNEL, npages);
}
static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
@@ -2224,25 +2326,20 @@ static inline int f2fs_has_inline_xattr(struct inode *inode)
static inline unsigned int addrs_per_inode(struct inode *inode)
{
- if (f2fs_has_inline_xattr(inode))
- return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS;
- return CUR_ADDRS_PER_INODE(inode);
+ return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS(inode);
}
-static inline void *inline_xattr_addr(struct page *page)
+static inline void *inline_xattr_addr(struct inode *inode, struct page *page)
{
struct f2fs_inode *ri = F2FS_INODE(page);
return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE -
- F2FS_INLINE_XATTR_ADDRS]);
+ F2FS_INLINE_XATTR_ADDRS(inode)]);
}
static inline int inline_xattr_size(struct inode *inode)
{
- if (f2fs_has_inline_xattr(inode))
- return F2FS_INLINE_XATTR_ADDRS << 2;
- else
- return 0;
+ return get_inline_xattr_addrs(inode) * sizeof(__le32);
}
static inline int f2fs_has_inline_data(struct inode *inode)
@@ -2323,9 +2420,10 @@ static inline void clear_file(struct inode *inode, int type)
static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
{
+ bool ret;
+
if (dsync) {
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- bool ret;
spin_lock(&sbi->inode_lock[DIRTY_META]);
ret = list_empty(&F2FS_I(inode)->gdirty_list);
@@ -2336,9 +2434,15 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
file_keep_isize(inode) ||
i_size_read(inode) & PAGE_MASK)
return false;
- return F2FS_I(inode)->last_disk_size == i_size_read(inode);
+
+ down_read(&F2FS_I(inode)->i_sem);
+ ret = F2FS_I(inode)->last_disk_size == i_size_read(inode);
+ up_read(&F2FS_I(inode)->i_sem);
+
+ return ret;
}
+#define sb_rdonly f2fs_readonly
static inline int f2fs_readonly(struct super_block *sb)
{
return sb->s_flags & MS_RDONLY;
@@ -2406,6 +2510,12 @@ static inline int get_extra_isize(struct inode *inode)
return F2FS_I(inode)->i_extra_isize / sizeof(__le32);
}
+static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb);
+static inline int get_inline_xattr_addrs(struct inode *inode)
+{
+ return F2FS_I(inode)->i_inline_xattr_size;
+}
+
#define get_inode_mode(i) \
((is_inode_flag_set(i, FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
@@ -2534,7 +2644,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
*/
int f2fs_inode_dirtied(struct inode *inode, bool sync);
void f2fs_inode_synced(struct inode *inode);
-void f2fs_enable_quota_files(struct f2fs_sb_info *sbi);
+int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
void f2fs_quota_off_umount(struct super_block *sb);
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
int f2fs_sync_fs(struct super_block *sb, int sync);
@@ -2562,7 +2672,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni);
pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
int truncate_inode_blocks(struct inode *inode, pgoff_t from);
-int truncate_xattr_node(struct inode *inode, struct page *page);
+int truncate_xattr_node(struct inode *inode);
int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino);
int remove_inode_page(struct inode *inode);
struct page *new_inode_page(struct inode *inode);
@@ -2597,19 +2707,22 @@ void destroy_node_manager_caches(void);
*/
bool need_SSR(struct f2fs_sb_info *sbi);
void register_inmem_page(struct inode *inode, struct page *page);
+void drop_inmem_pages_all(struct f2fs_sb_info *sbi);
void drop_inmem_pages(struct inode *inode);
void drop_inmem_page(struct inode *inode, struct page *page);
int commit_inmem_pages(struct inode *inode);
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need);
void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi);
-int f2fs_issue_flush(struct f2fs_sb_info *sbi);
+int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino);
int create_flush_cmd_control(struct f2fs_sb_info *sbi);
+int f2fs_flush_device_cache(struct f2fs_sb_info *sbi);
void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free);
void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
-void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new);
+void init_discard_policy(struct discard_policy *dpolicy, int discard_type,
+ unsigned int granularity);
void stop_discard_thread(struct f2fs_sb_info *sbi);
-void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount);
+bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc);
void release_discard_addrs(struct f2fs_sb_info *sbi);
int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
@@ -2664,6 +2777,10 @@ void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type);
void release_ino_entry(struct f2fs_sb_info *sbi, bool all);
bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode);
+void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+ unsigned int devidx, int type);
+bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
+ unsigned int devidx, int type);
int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi);
int acquire_orphan_inode(struct f2fs_sb_info *sbi);
void release_orphan_inode(struct f2fs_sb_info *sbi);
@@ -2751,14 +2868,16 @@ struct f2fs_stat_info {
unsigned long long hit_largest, hit_cached, hit_rbtree;
unsigned long long hit_total, total_ext;
int ext_tree, zombie_tree, ext_node;
- int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta;
+ int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
+ int ndirty_data, ndirty_qdata;
int inmem_pages;
- unsigned int ndirty_dirs, ndirty_files, ndirty_all;
+ unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
int nats, dirty_nats, sits, dirty_sits;
int free_nids, avail_nids, alloc_nids;
int total_count, utilization;
int bg_gc, nr_wb_cp_data, nr_wb_data;
- int nr_flushing, nr_flushed, nr_discarding, nr_discarded;
+ int nr_flushing, nr_flushed, flush_list_empty;
+ int nr_discarding, nr_discarded;
int nr_discard_cmd;
unsigned int undiscard_blks;
int inline_xattr, inline_inode, inline_dir, append, update, orphans;
@@ -3066,6 +3185,16 @@ static inline int f2fs_sb_has_inode_chksum(struct super_block *sb)
return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM);
}
+static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb)
+{
+ return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR);
+}
+
+static inline int f2fs_sb_has_quota_ino(struct super_block *sb)
+{
+ return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_QUOTA_INO);
+}
+
#ifdef CONFIG_BLK_DEV_ZONED
static inline int get_blkz_type(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkaddr)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index a9e1655a6bf8..bfff53f658e1 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -56,6 +56,11 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
struct dnode_of_data dn;
int err;
+ if (unlikely(f2fs_cp_error(sbi))) {
+ err = -EIO;
+ goto err;
+ }
+
sb_start_pagefault(inode->i_sb);
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
@@ -117,6 +122,7 @@ out_sem:
out:
sb_end_pagefault(inode->i_sb);
f2fs_update_time(sbi, REQ_TIME);
+err:
return block_page_mkwrite_return(err);
}
@@ -141,27 +147,29 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
return 1;
}
-static inline bool need_do_checkpoint(struct inode *inode)
+static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- bool need_cp = false;
+ enum cp_reason_type cp_reason = CP_NO_NEEDED;
- if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
- need_cp = true;
+ if (!S_ISREG(inode->i_mode))
+ cp_reason = CP_NON_REGULAR;
+ else if (inode->i_nlink != 1)
+ cp_reason = CP_HARDLINK;
else if (is_sbi_flag_set(sbi, SBI_NEED_CP))
- need_cp = true;
+ cp_reason = CP_SB_NEED_CP;
else if (file_wrong_pino(inode))
- need_cp = true;
+ cp_reason = CP_WRONG_PINO;
else if (!space_for_roll_forward(sbi))
- need_cp = true;
+ cp_reason = CP_NO_SPC_ROLL;
else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
- need_cp = true;
+ cp_reason = CP_NODE_NEED_CP;
else if (test_opt(sbi, FASTBOOT))
- need_cp = true;
+ cp_reason = CP_FASTBOOT_MODE;
else if (sbi->active_logs == 2)
- need_cp = true;
+ cp_reason = CP_SPEC_LOG_NUM;
- return need_cp;
+ return cp_reason;
}
static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
@@ -196,7 +204,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
nid_t ino = inode->i_ino;
int ret = 0;
- bool need_cp = false;
+ enum cp_reason_type cp_reason = 0;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
@@ -215,7 +223,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
clear_inode_flag(inode, FI_NEED_IPU);
if (ret) {
- trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
+ trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
return ret;
}
@@ -246,10 +254,10 @@ go_write:
* sudden-power-off.
*/
down_read(&F2FS_I(inode)->i_sem);
- need_cp = need_do_checkpoint(inode);
+ cp_reason = need_do_checkpoint(inode);
up_read(&F2FS_I(inode)->i_sem);
- if (need_cp) {
+ if (cp_reason) {
/* all the dirty node pages should be flushed for POR */
ret = f2fs_sync_fs(inode->i_sb, 1);
@@ -297,19 +305,24 @@ sync_nodes:
remove_ino_entry(sbi, ino, APPEND_INO);
clear_inode_flag(inode, FI_APPEND_WRITE);
flush_out:
- remove_ino_entry(sbi, ino, UPDATE_INO);
- clear_inode_flag(inode, FI_UPDATE_WRITE);
if (!atomic)
- ret = f2fs_issue_flush(sbi);
+ ret = f2fs_issue_flush(sbi, inode->i_ino);
+ if (!ret) {
+ remove_ino_entry(sbi, ino, UPDATE_INO);
+ clear_inode_flag(inode, FI_UPDATE_WRITE);
+ remove_ino_entry(sbi, ino, FLUSH_INO);
+ }
f2fs_update_time(sbi, REQ_TIME);
out:
- trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
+ trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
f2fs_trace_ios(NULL, 1);
return ret;
}
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
+ return -EIO;
return f2fs_do_sync_file(file, start, end, datasync, false);
}
@@ -446,6 +459,9 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
struct inode *inode = file_inode(file);
int err;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ return -EIO;
+
/* we don't need to use inline_data strictly */
err = f2fs_convert_inline_inode(inode);
if (err)
@@ -632,6 +648,9 @@ int f2fs_truncate(struct inode *inode)
{
int err;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ return -EIO;
+
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
return 0;
@@ -667,7 +686,8 @@ int f2fs_getattr(struct vfsmount *mnt,
generic_fillattr(inode, stat);
/* we need to show initial sectors used for inline_data/dentries */
- if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
+ if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
+ f2fs_has_inline_dentry(inode))
stat->blocks += (stat->size + 511) >> 9;
return 0;
@@ -709,6 +729,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
int err;
bool size_changed = false;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ return -EIO;
+
err = inode_change_ok(inode, attr);
if (err)
return err;
@@ -761,6 +784,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
inode->i_mtime = inode->i_ctime = current_time(inode);
}
+ down_write(&F2FS_I(inode)->i_sem);
+ F2FS_I(inode)->last_disk_size = i_size_read(inode);
+ up_write(&F2FS_I(inode)->i_sem);
+
size_changed = true;
}
@@ -834,7 +861,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
err = get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
if (err) {
if (err == -ENOENT) {
- pg_start++;
+ pg_start = get_next_page_offset(&dn, pg_start);
continue;
}
return err;
@@ -1149,11 +1176,14 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (ret)
goto out;
+ /* avoid gc operation during block exchange */
+ down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+
truncate_pagecache(inode, offset);
ret = f2fs_do_collapse(inode, pg_start, pg_end);
if (ret)
- goto out;
+ goto out_unlock;
/* write out all moved pages, if possible */
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
@@ -1165,7 +1195,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ret = truncate_blocks(inode, new_size, true);
if (!ret)
f2fs_i_size_write(inode, new_size);
-
+out_unlock:
+ up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
out:
up_write(&F2FS_I(inode)->i_mmap_sem);
return ret;
@@ -1348,6 +1379,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
if (ret)
goto out;
+ /* avoid gc operation during block exchange */
+ down_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
+
truncate_pagecache(inode, offset);
pg_start = offset >> PAGE_SHIFT;
@@ -1375,6 +1409,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
if (!ret)
f2fs_i_size_write(inode, new_size);
+
+ up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
out:
up_write(&F2FS_I(inode)->i_mmap_sem);
return ret;
@@ -1424,8 +1460,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
}
- if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
- f2fs_i_size_write(inode, new_size);
+ if (new_size > i_size_read(inode)) {
+ if (mode & FALLOC_FL_KEEP_SIZE)
+ file_set_keep_isize(inode);
+ else
+ f2fs_i_size_write(inode, new_size);
+ }
return err;
}
@@ -1436,6 +1476,9 @@ static long f2fs_fallocate(struct file *file, int mode,
struct inode *inode = file_inode(file);
long ret = 0;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ return -EIO;
+
/* f2fs only support ->fallocate for regular file */
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -1469,8 +1512,6 @@ static long f2fs_fallocate(struct file *file, int mode,
if (!ret) {
inode->i_mtime = inode->i_ctime = current_time(inode);
f2fs_mark_inode_dirty_sync(inode, false);
- if (mode & FALLOC_FL_KEEP_SIZE)
- file_set_keep_isize(inode);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
}
@@ -1864,6 +1905,9 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
+ if (!f2fs_sb_has_crypto(inode->i_sb))
+ return -EOPNOTSUPP;
+
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
@@ -1871,6 +1915,8 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
{
+ if (!f2fs_sb_has_crypto(file_inode(filp)->i_sb))
+ return -EOPNOTSUPP;
return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
}
@@ -2226,9 +2272,13 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
}
inode_lock(src);
+ down_write(&F2FS_I(src)->dio_rwsem[WRITE]);
if (src != dst) {
- if (!inode_trylock(dst)) {
- ret = -EBUSY;
+ ret = -EBUSY;
+ if (!inode_trylock(dst))
+ goto out;
+ if (!down_write_trylock(&F2FS_I(dst)->dio_rwsem[WRITE])) {
+ inode_unlock(dst);
goto out;
}
}
@@ -2288,9 +2338,12 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
}
f2fs_unlock_op(sbi);
out_unlock:
- if (src != dst)
+ if (src != dst) {
+ up_write(&F2FS_I(dst)->dio_rwsem[WRITE]);
inode_unlock(dst);
+ }
out:
+ up_write(&F2FS_I(src)->dio_rwsem[WRITE]);
inode_unlock(src);
return ret;
}
@@ -2412,6 +2465,9 @@ static int f2fs_ioc_get_features(struct file *filp, unsigned long arg)
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
+ return -EIO;
+
switch (cmd) {
case F2FS_IOC_GETFLAGS:
return f2fs_ioc_getflags(filp, arg);
@@ -2465,6 +2521,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct blk_plug plug;
ssize_t ret;
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
+ return -EIO;
+
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret > 0) {
@@ -2475,6 +2534,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
err = f2fs_preallocate_blocks(iocb, from);
if (err) {
+ clear_inode_flag(inode, FI_NO_PREALLOC);
inode_unlock(inode);
return err;
}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index bd16e6631cf3..be9fd616736b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -267,16 +267,6 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
}
-static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
- unsigned int segno)
-{
- unsigned int valid_blocks =
- get_valid_blocks(sbi, segno, true);
-
- return IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
- valid_blocks * 2 : valid_blocks;
-}
-
static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
unsigned int segno, struct victim_sel_policy *p)
{
@@ -285,7 +275,7 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
/* alloc_mode == LFS */
if (p->gc_mode == GC_GREEDY)
- return get_greedy_cost(sbi, segno);
+ return get_valid_blocks(sbi, segno, true);
else
return get_cb_cost(sbi, segno);
}
@@ -466,10 +456,10 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
struct seg_entry *sentry;
int ret;
- mutex_lock(&sit_i->sentry_lock);
+ down_read(&sit_i->sentry_lock);
sentry = get_seg_entry(sbi, segno);
ret = f2fs_test_bit(offset, sentry->cur_valid_map);
- mutex_unlock(&sit_i->sentry_lock);
+ up_read(&sit_i->sentry_lock);
return ret;
}
@@ -608,6 +598,7 @@ static void move_data_block(struct inode *inode, block_t bidx,
{
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
+ .ino = inode->i_ino,
.type = DATA,
.temp = COLD,
.op = REQ_OP_READ,
@@ -659,8 +650,8 @@ static void move_data_block(struct inode *inode, block_t bidx,
allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
&sum, CURSEG_COLD_DATA, NULL, false);
- fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr,
- FGP_LOCK | FGP_CREAT, GFP_NOFS);
+ fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
+ newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
if (!fio.encrypted_page) {
err = -ENOMEM;
goto recover_block;
@@ -738,6 +729,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
} else {
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
+ .ino = inode->i_ino,
.type = DATA,
.temp = COLD,
.op = REQ_OP_WRITE,
@@ -840,10 +832,17 @@ next_step:
continue;
}
+ if (!down_write_trylock(
+ &F2FS_I(inode)->dio_rwsem[WRITE])) {
+ iput(inode);
+ continue;
+ }
+
start_bidx = start_bidx_of_node(nofs, inode);
data_page = get_read_data_page(inode,
start_bidx + ofs_in_node, REQ_RAHEAD,
true);
+ up_write(&F2FS_I(inode)->dio_rwsem[WRITE]);
if (IS_ERR(data_page)) {
iput(inode);
continue;
@@ -901,10 +900,10 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
struct sit_info *sit_i = SIT_I(sbi);
int ret;
- mutex_lock(&sit_i->sentry_lock);
+ down_write(&sit_i->sentry_lock);
ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
NO_CHECK_TYPE, LFS);
- mutex_unlock(&sit_i->sentry_lock);
+ up_write(&sit_i->sentry_lock);
return ret;
}
@@ -952,8 +951,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
/*
* this is to avoid deadlock:
* - lock_page(sum_page) - f2fs_replace_block
- * - check_valid_map() - mutex_lock(sentry_lock)
- * - mutex_lock(sentry_lock) - change_curseg()
+ * - check_valid_map() - down_write(sentry_lock)
+ * - down_read(sentry_lock) - change_curseg()
* - lock_page(sum_page)
*/
if (type == SUM_TYPE_NODE)
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index fbf22b0f667f..91d5d831be72 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -130,6 +130,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
{
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(dn->inode),
+ .ino = dn->inode->i_ino,
.type = DATA,
.op = REQ_OP_WRITE,
.op_flags = REQ_SYNC | REQ_NOIDLE | REQ_PRIO,
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 50c88e37ed66..9684d53563f1 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -232,6 +232,23 @@ static int do_read_inode(struct inode *inode)
fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
le16_to_cpu(ri->i_extra_isize) : 0;
+ if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
+ f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
+ fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size);
+ } else if (f2fs_has_inline_xattr(inode) ||
+ f2fs_has_inline_dentry(inode)) {
+ fi->i_inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
+ } else {
+
+ /*
+ * Previous inline data or directory always reserved 200 bytes
+ * in inode layout, even if inline_xattr is disabled. In order
+ * to keep inline_dentry's structure for backward compatibility,
+ * we get the space back only from inline_data.
+ */
+ fi->i_inline_xattr_size = 0;
+ }
+
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
__recover_inline_status(inode, node_page);
@@ -384,6 +401,10 @@ int update_inode(struct inode *inode, struct page *node_page)
if (f2fs_has_extra_attr(inode)) {
ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize);
+ if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb))
+ ri->i_inline_xattr_size =
+ cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size);
+
if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) &&
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_projid)) {
@@ -480,6 +501,7 @@ void f2fs_evict_inode(struct inode *inode)
remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
+ remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
sb_start_intwrite(inode->i_sb);
set_inode_flag(inode, FI_NO_ALLOC);
@@ -519,8 +541,10 @@ no_delete:
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
- if (!is_set_ckpt_flags(sbi, CP_ERROR_FLAG))
+ if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)))
f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
+ else
+ f2fs_inode_synced(inode);
/* ino == 0, if f2fs_new_inode() was failed t*/
if (inode->i_ino)
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index d92b8e9064cb..cf8f4370d256 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -29,6 +29,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
nid_t ino;
struct inode *inode;
bool nid_free = false;
+ int xattr_size = 0;
int err;
inode = new_inode(dir->i_sb);
@@ -86,11 +87,23 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (test_opt(sbi, INLINE_XATTR))
set_inode_flag(inode, FI_INLINE_XATTR);
+
if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
set_inode_flag(inode, FI_INLINE_DATA);
if (f2fs_may_inline_dentry(inode))
set_inode_flag(inode, FI_INLINE_DENTRY);
+ if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) {
+ f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode));
+ if (f2fs_has_inline_xattr(inode))
+ xattr_size = sbi->inline_xattr_size;
+ /* Otherwise, will be 0 */
+ } else if (f2fs_has_inline_xattr(inode) ||
+ f2fs_has_inline_dentry(inode)) {
+ xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
+ }
+ F2FS_I(inode)->i_inline_xattr_size = xattr_size;
+
f2fs_init_extent_tree(inode, NULL);
stat_inc_inline_xattr(inode);
@@ -177,6 +190,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
nid_t ino = 0;
int err;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
err = dquot_initialize(dir);
if (err)
return err;
@@ -221,6 +237,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
int err;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
if (f2fs_encrypted_inode(dir) &&
!fscrypt_has_permitted_context(dir, inode))
return -EPERM;
@@ -331,12 +350,15 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
struct inode *inode = NULL;
struct f2fs_dir_entry *de;
struct page *page;
- nid_t ino;
+ struct dentry *new;
+ nid_t ino = -1;
int err = 0;
unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir));
+ trace_f2fs_lookup_start(dir, dentry, flags);
+
if (f2fs_encrypted_inode(dir)) {
- int res = fscrypt_get_encryption_info(dir);
+ err = fscrypt_get_encryption_info(dir);
/*
* DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is
@@ -346,18 +368,22 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
if (fscrypt_has_encryption_key(dir))
fscrypt_set_encrypted_dentry(dentry);
fscrypt_set_d_op(dentry);
- if (res && res != -ENOKEY)
- return ERR_PTR(res);
+ if (err && err != -ENOKEY)
+ goto out;
}
- if (dentry->d_name.len > F2FS_NAME_LEN)
- return ERR_PTR(-ENAMETOOLONG);
+ if (dentry->d_name.len > F2FS_NAME_LEN) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (!de) {
- if (IS_ERR(page))
- return (struct dentry *)page;
- return d_splice_alias(inode, dentry);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto out;
+ }
+ goto out_splice;
}
ino = le32_to_cpu(de->ino);
@@ -365,19 +391,21 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
f2fs_put_page(page, 0);
inode = f2fs_iget(dir->i_sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out;
+ }
if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) {
err = __recover_dot_dentries(dir, root_ino);
if (err)
- goto err_out;
+ goto out_iput;
}
if (f2fs_has_inline_dots(inode)) {
err = __recover_dot_dentries(inode, dir->i_ino);
if (err)
- goto err_out;
+ goto out_iput;
}
if (f2fs_encrypted_inode(dir) &&
(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
@@ -386,12 +414,18 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
"Inconsistent encryption contexts: %lu/%lu",
dir->i_ino, inode->i_ino);
err = -EPERM;
- goto err_out;
+ goto out_iput;
}
- return d_splice_alias(inode, dentry);
-
-err_out:
+out_splice:
+ new = d_splice_alias(inode, dentry);
+ if (IS_ERR(new))
+ err = PTR_ERR(new);
+ trace_f2fs_lookup_end(dir, dentry, ino, err);
+ return new;
+out_iput:
iput(inode);
+out:
+ trace_f2fs_lookup_end(dir, dentry, ino, err);
return ERR_PTR(err);
}
@@ -405,9 +439,15 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
trace_f2fs_unlink_enter(dir, dentry);
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
err = dquot_initialize(dir);
if (err)
return err;
+ err = dquot_initialize(inode);
+ if (err)
+ return err;
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (!de) {
@@ -457,6 +497,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
struct fscrypt_symlink_data *sd = NULL;
int err;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
if (f2fs_encrypted_inode(dir)) {
err = fscrypt_get_encryption_info(dir);
if (err)
@@ -563,6 +606,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
int err;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
err = dquot_initialize(dir);
if (err)
return err;
@@ -615,6 +661,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode;
int err = 0;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
err = dquot_initialize(dir);
if (err)
return err;
@@ -709,6 +758,9 @@ out:
static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
{
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(dir))))
+ return -EIO;
+
if (f2fs_encrypted_inode(dir)) {
int err = fscrypt_get_encryption_info(dir);
if (err)
@@ -720,6 +772,9 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout)
{
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(dir))))
+ return -EIO;
+
return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout);
}
@@ -739,6 +794,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
bool is_old_inline = f2fs_has_inline_dentry(old_dir);
int err = -ENOENT;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
if ((f2fs_encrypted_inode(old_dir) &&
!fscrypt_has_encryption_key(old_dir)) ||
(f2fs_encrypted_inode(new_dir) &&
@@ -764,6 +822,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (err)
goto out;
+ if (new_inode) {
+ err = dquot_initialize(new_inode);
+ if (err)
+ goto out;
+ }
+
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry) {
if (IS_ERR(old_page))
@@ -932,6 +996,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
int old_nlink = 0, new_nlink = 0;
int err = -ENOENT;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
if ((f2fs_encrypted_inode(old_dir) &&
!fscrypt_has_encryption_key(old_dir)) ||
(f2fs_encrypted_inode(new_dir) &&
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 32474db18ad9..964c99655942 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -46,7 +46,7 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
* give 25%, 25%, 50%, 50%, 50% memory for each components respectively
*/
if (type == FREE_NIDS) {
- mem_size = (nm_i->nid_cnt[FREE_NID_LIST] *
+ mem_size = (nm_i->nid_cnt[FREE_NID] *
sizeof(struct free_nid)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == NAT_ENTRIES) {
@@ -63,7 +63,7 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
} else if (type == INO_ENTRIES) {
int i;
- for (i = 0; i <= UPDATE_INO; i++)
+ for (i = 0; i < MAX_INO_ENTRY; i++)
mem_size += sbi->im[i].ino_num *
sizeof(struct ino_entry);
mem_size >>= PAGE_SHIFT;
@@ -74,6 +74,10 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
atomic_read(&sbi->total_ext_node) *
sizeof(struct extent_node)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
+ } else if (type == INMEM_PAGES) {
+ /* it allows 20% / total_ram for inmemory pages */
+ mem_size = get_pages(sbi, F2FS_INMEM_PAGES);
+ res = mem_size < (val.totalram / 5);
} else {
if (!sbi->sb->s_bdi->wb.dirty_exceeded)
return true;
@@ -134,6 +138,44 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
return dst_page;
}
+static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail)
+{
+ struct nat_entry *new;
+
+ if (no_fail)
+ new = f2fs_kmem_cache_alloc(nat_entry_slab,
+ GFP_NOFS | __GFP_ZERO);
+ else
+ new = kmem_cache_alloc(nat_entry_slab,
+ GFP_NOFS | __GFP_ZERO);
+ if (new) {
+ nat_set_nid(new, nid);
+ nat_reset_flag(new);
+ }
+ return new;
+}
+
+static void __free_nat_entry(struct nat_entry *e)
+{
+ kmem_cache_free(nat_entry_slab, e);
+}
+
+/* must be locked by nat_tree_lock */
+static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
+ struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail)
+{
+ if (no_fail)
+ f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne);
+ else if (radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne))
+ return NULL;
+
+ if (raw_ne)
+ node_info_from_raw_nat(&ne->ni, raw_ne);
+ list_add_tail(&ne->list, &nm_i->nat_entries);
+ nm_i->nat_cnt++;
+ return ne;
+}
+
static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
{
return radix_tree_lookup(&nm_i->nat_root, n);
@@ -150,7 +192,7 @@ static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
list_del(&e->list);
radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
nm_i->nat_cnt--;
- kmem_cache_free(nat_entry_slab, e);
+ __free_nat_entry(e);
}
static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
@@ -246,49 +288,29 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
return need_update;
}
-static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
- bool no_fail)
-{
- struct nat_entry *new;
-
- if (no_fail) {
- new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
- f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
- } else {
- new = kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
- if (!new)
- return NULL;
- if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
- kmem_cache_free(nat_entry_slab, new);
- return NULL;
- }
- }
-
- memset(new, 0, sizeof(struct nat_entry));
- nat_set_nid(new, nid);
- nat_reset_flag(new);
- list_add_tail(&new->list, &nm_i->nat_entries);
- nm_i->nat_cnt++;
- return new;
-}
-
+/* must be locked by nat_tree_lock */
static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
struct f2fs_nat_entry *ne)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- struct nat_entry *e;
+ struct nat_entry *new, *e;
+ new = __alloc_nat_entry(nid, false);
+ if (!new)
+ return;
+
+ down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
- if (!e) {
- e = grab_nat_entry(nm_i, nid, false);
- if (e)
- node_info_from_raw_nat(&e->ni, ne);
- } else {
+ if (!e)
+ e = __init_nat_entry(nm_i, new, ne, false);
+ else
f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
nat_get_blkaddr(e) !=
le32_to_cpu(ne->block_addr) ||
nat_get_version(e) != ne->version);
- }
+ up_write(&nm_i->nat_tree_lock);
+ if (e != new)
+ __free_nat_entry(new);
}
static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
@@ -296,11 +318,12 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
+ struct nat_entry *new = __alloc_nat_entry(ni->nid, true);
down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ni->nid);
if (!e) {
- e = grab_nat_entry(nm_i, ni->nid, true);
+ e = __init_nat_entry(nm_i, new, NULL, true);
copy_node_info(&e->ni, ni);
f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
} else if (new_blkaddr == NEW_ADDR) {
@@ -312,6 +335,9 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
copy_node_info(&e->ni, ni);
f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
}
+ /* let's free early to reduce memory consumption */
+ if (e != new)
+ __free_nat_entry(new);
/* sanity check */
f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
@@ -327,10 +353,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
unsigned char version = nat_get_version(e);
nat_set_version(e, inc_node_version(version));
-
- /* in order to reuse the nid */
- if (nm_i->next_scan_nid > ni->nid)
- nm_i->next_scan_nid = ni->nid;
}
/* change address */
@@ -424,9 +446,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
f2fs_put_page(page, 1);
cache:
/* cache nat entry */
- down_write(&nm_i->nat_tree_lock);
cache_nat_entry(sbi, nid, &ne);
- up_write(&nm_i->nat_tree_lock);
}
/*
@@ -962,7 +982,8 @@ fail:
return err > 0 ? 0 : err;
}
-int truncate_xattr_node(struct inode *inode, struct page *page)
+/* caller must lock inode page */
+int truncate_xattr_node(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
nid_t nid = F2FS_I(inode)->i_xattr_nid;
@@ -978,10 +999,7 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
f2fs_i_xnid_write(inode, 0);
- set_new_dnode(&dn, inode, page, npage, nid);
-
- if (page)
- dn.inode_page_locked = true;
+ set_new_dnode(&dn, inode, NULL, npage, nid);
truncate_node(&dn);
return 0;
}
@@ -1000,7 +1018,7 @@ int remove_inode_page(struct inode *inode)
if (err)
return err;
- err = truncate_xattr_node(inode, dn.inode_page);
+ err = truncate_xattr_node(inode);
if (err) {
f2fs_put_dnode(&dn);
return err;
@@ -1220,7 +1238,8 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
if (!inode)
return;
- page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0);
+ page = f2fs_pagecache_get_page(inode->i_mapping, 0,
+ FGP_LOCK|FGP_NOWAIT, 0);
if (!page)
goto iput_out;
@@ -1244,37 +1263,6 @@ iput_out:
iput(inode);
}
-void move_node_page(struct page *node_page, int gc_type)
-{
- if (gc_type == FG_GC) {
- struct f2fs_sb_info *sbi = F2FS_P_SB(node_page);
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = 1,
- .for_reclaim = 0,
- };
-
- set_page_dirty(node_page);
- f2fs_wait_on_page_writeback(node_page, NODE, true);
-
- f2fs_bug_on(sbi, PageWriteback(node_page));
- if (!clear_page_dirty_for_io(node_page))
- goto out_page;
-
- if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc))
- unlock_page(node_page);
- goto release_page;
- } else {
- /* set page dirty and write it */
- if (!PageWriteback(node_page))
- set_page_dirty(node_page);
- }
-out_page:
- unlock_page(node_page);
-release_page:
- f2fs_put_page(node_page, 0);
-}
-
static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
{
pgoff_t index, end;
@@ -1344,6 +1332,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
struct node_info ni;
struct f2fs_io_info fio = {
.sbi = sbi,
+ .ino = ino_of_node(page),
.type = NODE,
.op = REQ_OP_WRITE,
.op_flags = wbc_to_write_flags(wbc),
@@ -1416,6 +1405,37 @@ redirty_out:
return AOP_WRITEPAGE_ACTIVATE;
}
+void move_node_page(struct page *node_page, int gc_type)
+{
+ if (gc_type == FG_GC) {
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 1,
+ .for_reclaim = 0,
+ };
+
+ set_page_dirty(node_page);
+ f2fs_wait_on_page_writeback(node_page, NODE, true);
+
+ f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page));
+ if (!clear_page_dirty_for_io(node_page))
+ goto out_page;
+
+ if (__write_node_page(node_page, false, NULL,
+ &wbc, false, FS_GC_NODE_IO))
+ unlock_page(node_page);
+ goto release_page;
+ } else {
+ /* set page dirty and write it */
+ if (!PageWriteback(node_page))
+ set_page_dirty(node_page);
+ }
+out_page:
+ unlock_page(node_page);
+release_page:
+ f2fs_put_page(node_page, 0);
+}
+
static int f2fs_write_node_page(struct page *page,
struct writeback_control *wbc)
{
@@ -1764,35 +1784,54 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
return radix_tree_lookup(&nm_i->free_nid_root, n);
}
-static int __insert_nid_to_list(struct f2fs_sb_info *sbi,
- struct free_nid *i, enum nid_list list, bool new)
+static int __insert_free_nid(struct f2fs_sb_info *sbi,
+ struct free_nid *i, enum nid_state state)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- if (new) {
- int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
- if (err)
- return err;
- }
+ int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
+ if (err)
+ return err;
- f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
- i->state != NID_ALLOC);
- nm_i->nid_cnt[list]++;
- list_add_tail(&i->list, &nm_i->nid_list[list]);
+ f2fs_bug_on(sbi, state != i->state);
+ nm_i->nid_cnt[state]++;
+ if (state == FREE_NID)
+ list_add_tail(&i->list, &nm_i->free_nid_list);
return 0;
}
-static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
- struct free_nid *i, enum nid_list list, bool reuse)
+static void __remove_free_nid(struct f2fs_sb_info *sbi,
+ struct free_nid *i, enum nid_state state)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+ f2fs_bug_on(sbi, state != i->state);
+ nm_i->nid_cnt[state]--;
+ if (state == FREE_NID)
+ list_del(&i->list);
+ radix_tree_delete(&nm_i->free_nid_root, i->nid);
+}
+
+static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i,
+ enum nid_state org_state, enum nid_state dst_state)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
- i->state != NID_ALLOC);
- nm_i->nid_cnt[list]--;
- list_del(&i->list);
- if (!reuse)
- radix_tree_delete(&nm_i->free_nid_root, i->nid);
+ f2fs_bug_on(sbi, org_state != i->state);
+ i->state = dst_state;
+ nm_i->nid_cnt[org_state]--;
+ nm_i->nid_cnt[dst_state]++;
+
+ switch (dst_state) {
+ case PREALLOC_NID:
+ list_del(&i->list);
+ break;
+ case FREE_NID:
+ list_add_tail(&i->list, &nm_i->free_nid_list);
+ break;
+ default:
+ BUG_ON(1);
+ }
}
/* return if the nid is recognized as free */
@@ -1810,7 +1849,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
i->nid = nid;
- i->state = NID_NEW;
+ i->state = FREE_NID;
if (radix_tree_preload(GFP_NOFS))
goto err;
@@ -1823,7 +1862,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
* - f2fs_create
* - f2fs_new_inode
* - alloc_nid
- * - __insert_nid_to_list(ALLOC_NID_LIST)
+ * - __insert_nid_to_list(PREALLOC_NID)
* - f2fs_balance_fs_bg
* - build_free_nids
* - __build_free_nids
@@ -1836,8 +1875,8 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
* - new_node_page
* - set_node_addr
* - alloc_nid_done
- * - __remove_nid_from_list(ALLOC_NID_LIST)
- * - __insert_nid_to_list(FREE_NID_LIST)
+ * - __remove_nid_from_list(PREALLOC_NID)
+ * - __insert_nid_to_list(FREE_NID)
*/
ne = __lookup_nat_cache(nm_i, nid);
if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
@@ -1846,13 +1885,13 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
e = __lookup_free_nid_list(nm_i, nid);
if (e) {
- if (e->state == NID_NEW)
+ if (e->state == FREE_NID)
ret = true;
goto err_out;
}
}
ret = true;
- err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true);
+ err = __insert_free_nid(sbi, i, FREE_NID);
err_out:
spin_unlock(&nm_i->nid_list_lock);
radix_tree_preload_end();
@@ -1870,8 +1909,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
spin_lock(&nm_i->nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
- if (i && i->state == NID_NEW) {
- __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
+ if (i && i->state == FREE_NID) {
+ __remove_free_nid(sbi, i, FREE_NID);
need_free = true;
}
spin_unlock(&nm_i->nid_list_lock);
@@ -1890,15 +1929,18 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
return;
- if (set)
+ if (set) {
+ if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
+ return;
__set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
- else
- __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
-
- if (set)
nm_i->free_nid_count[nat_ofs]++;
- else if (!build)
- nm_i->free_nid_count[nat_ofs]--;
+ } else {
+ if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
+ return;
+ __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
+ if (!build)
+ nm_i->free_nid_count[nat_ofs]--;
+ }
}
static void scan_nat_page(struct f2fs_sb_info *sbi,
@@ -1933,12 +1975,32 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
}
}
-static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
+static void scan_curseg_cache(struct f2fs_sb_info *sbi)
{
- struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_journal *journal = curseg->journal;
+ int i;
+
+ down_read(&curseg->journal_rwsem);
+ for (i = 0; i < nats_in_cursum(journal); i++) {
+ block_t addr;
+ nid_t nid;
+
+ addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
+ nid = le32_to_cpu(nid_in_journal(journal, i));
+ if (addr == NULL_ADDR)
+ add_free_nid(sbi, nid, true);
+ else
+ remove_free_nid(sbi, nid);
+ }
+ up_read(&curseg->journal_rwsem);
+}
+
+static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned int i, idx;
+ nid_t nid;
down_read(&nm_i->nat_tree_lock);
@@ -1948,40 +2010,27 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
if (!nm_i->free_nid_count[i])
continue;
for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
- nid_t nid;
-
- if (!test_bit_le(idx, nm_i->free_nid_bitmap[i]))
- continue;
+ idx = find_next_bit_le(nm_i->free_nid_bitmap[i],
+ NAT_ENTRY_PER_BLOCK, idx);
+ if (idx >= NAT_ENTRY_PER_BLOCK)
+ break;
nid = i * NAT_ENTRY_PER_BLOCK + idx;
add_free_nid(sbi, nid, true);
- if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
+ if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS)
goto out;
}
}
out:
- down_read(&curseg->journal_rwsem);
- for (i = 0; i < nats_in_cursum(journal); i++) {
- block_t addr;
- nid_t nid;
+ scan_curseg_cache(sbi);
- addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
- nid = le32_to_cpu(nid_in_journal(journal, i));
- if (addr == NULL_ADDR)
- add_free_nid(sbi, nid, true);
- else
- remove_free_nid(sbi, nid);
- }
- up_read(&curseg->journal_rwsem);
up_read(&nm_i->nat_tree_lock);
}
static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
- struct f2fs_journal *journal = curseg->journal;
int i = 0;
nid_t nid = nm_i->next_scan_nid;
@@ -1989,7 +2038,7 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
nid = 0;
/* Enough entries */
- if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK)
+ if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
return;
if (!sync && !available_free_memory(sbi, FREE_NIDS))
@@ -1999,7 +2048,7 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
/* try to find free nids in free_nid_bitmap */
scan_free_nid_bits(sbi);
- if (nm_i->nid_cnt[FREE_NID_LIST])
+ if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
return;
}
@@ -2027,18 +2076,8 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
nm_i->next_scan_nid = nid;
/* find free nids from current sum_pages */
- down_read(&curseg->journal_rwsem);
- for (i = 0; i < nats_in_cursum(journal); i++) {
- block_t addr;
+ scan_curseg_cache(sbi);
- addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
- nid = le32_to_cpu(nid_in_journal(journal, i));
- if (addr == NULL_ADDR)
- add_free_nid(sbi, nid, true);
- else
- remove_free_nid(sbi, nid);
- }
- up_read(&curseg->journal_rwsem);
up_read(&nm_i->nat_tree_lock);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
@@ -2076,15 +2115,13 @@ retry:
}
/* We should not use stale free nids created by build_free_nids */
- if (nm_i->nid_cnt[FREE_NID_LIST] && !on_build_free_nids(nm_i)) {
- f2fs_bug_on(sbi, list_empty(&nm_i->nid_list[FREE_NID_LIST]));
- i = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
+ if (nm_i->nid_cnt[FREE_NID] && !on_build_free_nids(nm_i)) {
+ f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
+ i = list_first_entry(&nm_i->free_nid_list,
struct free_nid, list);
*nid = i->nid;
- __remove_nid_from_list(sbi, i, FREE_NID_LIST, true);
- i->state = NID_ALLOC;
- __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
+ __move_free_nid(sbi, i, FREE_NID, PREALLOC_NID);
nm_i->available_nids--;
update_free_nid_bitmap(sbi, *nid, false, false);
@@ -2110,7 +2147,7 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
spin_lock(&nm_i->nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
f2fs_bug_on(sbi, !i);
- __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
+ __remove_free_nid(sbi, i, PREALLOC_NID);
spin_unlock(&nm_i->nid_list_lock);
kmem_cache_free(free_nid_slab, i);
@@ -2133,12 +2170,10 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
f2fs_bug_on(sbi, !i);
if (!available_free_memory(sbi, FREE_NIDS)) {
- __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
+ __remove_free_nid(sbi, i, PREALLOC_NID);
need_free = true;
} else {
- __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, true);
- i->state = NID_NEW;
- __insert_nid_to_list(sbi, i, FREE_NID_LIST, false);
+ __move_free_nid(sbi, i, PREALLOC_NID, FREE_NID);
}
nm_i->available_nids++;
@@ -2157,20 +2192,19 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
struct free_nid *i, *next;
int nr = nr_shrink;
- if (nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
+ if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
return 0;
if (!mutex_trylock(&nm_i->build_lock))
return 0;
spin_lock(&nm_i->nid_list_lock);
- list_for_each_entry_safe(i, next, &nm_i->nid_list[FREE_NID_LIST],
- list) {
+ list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
if (nr_shrink <= 0 ||
- nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
+ nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
break;
- __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
+ __remove_free_nid(sbi, i, FREE_NID);
kmem_cache_free(free_nid_slab, i);
nr_shrink--;
}
@@ -2196,8 +2230,8 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
goto update_inode;
}
- dst_addr = inline_xattr_addr(ipage);
- src_addr = inline_xattr_addr(page);
+ dst_addr = inline_xattr_addr(inode, ipage);
+ src_addr = inline_xattr_addr(inode, page);
inline_size = inline_xattr_size(inode);
f2fs_wait_on_page_writeback(ipage, NODE, true);
@@ -2286,6 +2320,12 @@ retry:
dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR);
if (dst->i_inline & F2FS_EXTRA_ATTR) {
dst->i_extra_isize = src->i_extra_isize;
+
+ if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) &&
+ F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
+ i_inline_xattr_size))
+ dst->i_inline_xattr_size = src->i_inline_xattr_size;
+
if (f2fs_sb_has_project_quota(sbi->sb) &&
F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
i_projid))
@@ -2357,8 +2397,8 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
ne = __lookup_nat_cache(nm_i, nid);
if (!ne) {
- ne = grab_nat_entry(nm_i, nid, true);
- node_info_from_raw_nat(&ne->ni, &raw_ne);
+ ne = __alloc_nat_entry(nid, true);
+ __init_nat_entry(nm_i, ne, &raw_ne, true);
}
/*
@@ -2404,15 +2444,17 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
struct f2fs_nat_block *nat_blk = page_address(page);
int valid = 0;
- int i;
+ int i = 0;
if (!enabled_nat_bits(sbi, NULL))
return;
- for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++) {
- if (start_nid == 0 && i == 0)
- valid++;
- if (nat_blk->entries[i].block_addr)
+ if (nat_index == 0) {
+ valid = 1;
+ i = 1;
+ }
+ for (; i < NAT_ENTRY_PER_BLOCK; i++) {
+ if (nat_blk->entries[i].block_addr != NULL_ADDR)
valid++;
}
if (valid == 0) {
@@ -2607,7 +2649,7 @@ static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
__set_bit_le(i, nm_i->nat_block_bitmap);
nid = i * NAT_ENTRY_PER_BLOCK;
- last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;
+ last_nid = nid + NAT_ENTRY_PER_BLOCK;
spin_lock(&NM_I(sbi)->nid_list_lock);
for (; nid < last_nid; nid++)
@@ -2642,16 +2684,15 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
/* not used nids: 0, node, meta, (and root counted as valid node) */
nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
F2FS_RESERVED_NODE_NUM;
- nm_i->nid_cnt[FREE_NID_LIST] = 0;
- nm_i->nid_cnt[ALLOC_NID_LIST] = 0;
+ nm_i->nid_cnt[FREE_NID] = 0;
+ nm_i->nid_cnt[PREALLOC_NID] = 0;
nm_i->nat_cnt = 0;
nm_i->ram_thresh = DEF_RAM_THRESHOLD;
nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
- INIT_LIST_HEAD(&nm_i->nid_list[FREE_NID_LIST]);
- INIT_LIST_HEAD(&nm_i->nid_list[ALLOC_NID_LIST]);
+ INIT_LIST_HEAD(&nm_i->free_nid_list);
INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
INIT_LIST_HEAD(&nm_i->nat_entries);
@@ -2743,16 +2784,15 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
/* destroy free nid list */
spin_lock(&nm_i->nid_list_lock);
- list_for_each_entry_safe(i, next_i, &nm_i->nid_list[FREE_NID_LIST],
- list) {
- __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
+ list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
+ __remove_free_nid(sbi, i, FREE_NID);
spin_unlock(&nm_i->nid_list_lock);
kmem_cache_free(free_nid_slab, i);
spin_lock(&nm_i->nid_list_lock);
}
- f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID_LIST]);
- f2fs_bug_on(sbi, nm_i->nid_cnt[ALLOC_NID_LIST]);
- f2fs_bug_on(sbi, !list_empty(&nm_i->nid_list[ALLOC_NID_LIST]));
+ f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID]);
+ f2fs_bug_on(sbi, nm_i->nid_cnt[PREALLOC_NID]);
+ f2fs_bug_on(sbi, !list_empty(&nm_i->free_nid_list));
spin_unlock(&nm_i->nid_list_lock);
/* destroy nat cache */
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index bb53e9955ff2..0ee3e5ff49a3 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -140,6 +140,7 @@ enum mem_type {
DIRTY_DENTS, /* indicates dirty dentry pages */
INO_ENTRIES, /* indicates inode entries */
EXTENT_CACHE, /* indicates extent cache */
+ INMEM_PAGES, /* indicates inmemory pages */
BASE_CHECK, /* check kernel status */
};
@@ -150,18 +151,10 @@ struct nat_entry_set {
unsigned int entry_cnt; /* the # of nat entries in set */
};
-/*
- * For free nid mangement
- */
-enum nid_state {
- NID_NEW, /* newly added to free nid list */
- NID_ALLOC /* it is allocated */
-};
-
struct free_nid {
struct list_head list; /* for free node id list */
nid_t nid; /* node id */
- int state; /* in use or not: NID_NEW or NID_ALLOC */
+ int state; /* in use or not: FREE_NID or PREALLOC_NID */
};
static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
@@ -170,12 +163,11 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
struct free_nid *fnid;
spin_lock(&nm_i->nid_list_lock);
- if (nm_i->nid_cnt[FREE_NID_LIST] <= 0) {
+ if (nm_i->nid_cnt[FREE_NID] <= 0) {
spin_unlock(&nm_i->nid_list_lock);
return;
}
- fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
- struct free_nid, list);
+ fnid = list_first_entry(&nm_i->free_nid_list, struct free_nid, list);
*nid = fnid->nid;
spin_unlock(&nm_i->nid_list_lock);
}
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 9626758bc762..92c57ace1939 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -594,6 +594,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
int ret = 0;
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
+#ifdef CONFIG_QUOTA
+ int quota_enabled;
+#endif
if (s_flags & MS_RDONLY) {
f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
@@ -604,7 +607,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= MS_ACTIVE;
/* Turn on quotas so that they are updated correctly */
- f2fs_enable_quota_files(sbi);
+ quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY);
#endif
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
@@ -665,7 +668,8 @@ skip:
out:
#ifdef CONFIG_QUOTA
/* Turn quotas off */
- f2fs_quota_off_umount(sbi->sb);
+ if (quota_enabled)
+ f2fs_quota_off_umount(sbi->sb);
#endif
sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f5c494389483..94939a5a96c8 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -181,11 +181,12 @@ bool need_SSR(struct f2fs_sb_info *sbi)
return true;
return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
- 2 * reserved_sections(sbi));
+ SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
}
void register_inmem_page(struct inode *inode, struct page *page)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct inmem_pages *new;
@@ -204,6 +205,10 @@ void register_inmem_page(struct inode *inode, struct page *page)
mutex_lock(&fi->inmem_lock);
get_page(page);
list_add_tail(&new->list, &fi->inmem_pages);
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ if (list_empty(&fi->inmem_ilist))
+ list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
mutex_unlock(&fi->inmem_lock);
@@ -262,12 +267,41 @@ next:
return err;
}
+void drop_inmem_pages_all(struct f2fs_sb_info *sbi)
+{
+ struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
+ struct inode *inode;
+ struct f2fs_inode_info *fi;
+next:
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ if (list_empty(head)) {
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+ return;
+ }
+ fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
+ inode = igrab(&fi->vfs_inode);
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+
+ if (inode) {
+ drop_inmem_pages(inode);
+ iput(inode);
+ }
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ cond_resched();
+ goto next;
+}
+
void drop_inmem_pages(struct inode *inode)
{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
mutex_lock(&fi->inmem_lock);
__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ if (!list_empty(&fi->inmem_ilist))
+ list_del_init(&fi->inmem_ilist);
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
mutex_unlock(&fi->inmem_lock);
clear_inode_flag(inode, FI_ATOMIC_FILE);
@@ -313,6 +347,7 @@ static int __commit_inmem_pages(struct inode *inode,
struct inmem_pages *cur, *tmp;
struct f2fs_io_info fio = {
.sbi = sbi,
+ .ino = inode->i_ino,
.type = DATA,
.op = REQ_OP_WRITE,
.op_flags = REQ_SYNC | REQ_PRIO,
@@ -398,6 +433,10 @@ int commit_inmem_pages(struct inode *inode)
/* drop all uncommitted pages */
__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
}
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ if (!list_empty(&fi->inmem_ilist))
+ list_del_init(&fi->inmem_ilist);
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
mutex_unlock(&fi->inmem_lock);
clear_inode_flag(inode, FI_ATOMIC_COMMIT);
@@ -472,7 +511,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
static int __submit_flush_wait(struct f2fs_sb_info *sbi,
struct block_device *bdev)
{
- struct bio *bio = f2fs_bio_alloc(0);
+ struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
int ret;
bio->bi_rw = REQ_OP_WRITE;
@@ -485,15 +524,17 @@ static int __submit_flush_wait(struct f2fs_sb_info *sbi,
return ret;
}
-static int submit_flush_wait(struct f2fs_sb_info *sbi)
+static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
{
- int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev);
+ int ret = 0;
int i;
- if (!sbi->s_ndevs || ret)
- return ret;
+ if (!sbi->s_ndevs)
+ return __submit_flush_wait(sbi, sbi->sb->s_bdev);
- for (i = 1; i < sbi->s_ndevs; i++) {
+ for (i = 0; i < sbi->s_ndevs; i++) {
+ if (!is_dirty_device(sbi, ino, i, FLUSH_INO))
+ continue;
ret = __submit_flush_wait(sbi, FDEV(i).bdev);
if (ret)
break;
@@ -519,7 +560,9 @@ repeat:
fcc->dispatch_list = llist_del_all(&fcc->issue_list);
fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
- ret = submit_flush_wait(sbi);
+ cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
+
+ ret = submit_flush_wait(sbi, cmd->ino);
atomic_inc(&fcc->issued_flush);
llist_for_each_entry_safe(cmd, next,
@@ -537,7 +580,7 @@ repeat:
goto repeat;
}
-int f2fs_issue_flush(struct f2fs_sb_info *sbi)
+int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
{
struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
struct flush_cmd cmd;
@@ -547,19 +590,20 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
return 0;
if (!test_opt(sbi, FLUSH_MERGE)) {
- ret = submit_flush_wait(sbi);
+ ret = submit_flush_wait(sbi, ino);
atomic_inc(&fcc->issued_flush);
return ret;
}
- if (atomic_inc_return(&fcc->issing_flush) == 1) {
- ret = submit_flush_wait(sbi);
+ if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) {
+ ret = submit_flush_wait(sbi, ino);
atomic_dec(&fcc->issing_flush);
atomic_inc(&fcc->issued_flush);
return ret;
}
+ cmd.ino = ino;
init_completion(&cmd.wait);
llist_add(&cmd.llnode, &fcc->issue_list);
@@ -583,7 +627,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
} else {
struct flush_cmd *tmp, *next;
- ret = submit_flush_wait(sbi);
+ ret = submit_flush_wait(sbi, ino);
llist_for_each_entry_safe(tmp, next, list, llnode) {
if (tmp == &cmd) {
@@ -653,6 +697,28 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
}
}
+int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
+{
+ int ret = 0, i;
+
+ if (!sbi->s_ndevs)
+ return 0;
+
+ for (i = 1; i < sbi->s_ndevs; i++) {
+ if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
+ continue;
+ ret = __submit_flush_wait(sbi, FDEV(i).bdev);
+ if (ret)
+ break;
+
+ spin_lock(&sbi->dev_lock);
+ f2fs_clear_bit(i, (char *)&sbi->dirty_device);
+ spin_unlock(&sbi->dev_lock);
+ }
+
+ return ret;
+}
+
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
enum dirty_type dirty_type)
{
@@ -794,6 +860,8 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
+
f2fs_bug_on(sbi, dc->ref);
if (dc->error == -EOPNOTSUPP)
@@ -875,7 +943,7 @@ static int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
if (ret)
return ret;
}
- bio = f2fs_bio_alloc(1);
+ bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, 1);
bio->bi_iter.bi_sector = sector;
bio->bi_bdev = bdev;
bio_set_op_attrs(bio, op, 0);
@@ -926,10 +994,14 @@ void __check_sit_bitmap(struct f2fs_sb_info *sbi,
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
- struct discard_cmd *dc)
+ struct discard_policy *dpolicy,
+ struct discard_cmd *dc)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
+ &(dcc->fstrim_list) : &(dcc->wait_list);
struct bio *bio = NULL;
+ int flag = dpolicy->sync ? REQ_SYNC : 0;
if (dc->state != D_PREP)
return;
@@ -948,8 +1020,8 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
if (bio) {
bio->bi_private = dc;
bio->bi_end_io = f2fs_submit_discard_endio;
- submit_bio(REQ_SYNC, bio);
- list_move_tail(&dc->list, &dcc->wait_list);
+ submit_bio(flag, bio);
+ list_move_tail(&dc->list, wait_list);
__check_sit_bitmap(sbi, dc->start, dc->start + dc->len);
f2fs_update_iostat(sbi, FS_DISCARD, 1);
@@ -966,7 +1038,7 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
struct rb_node *insert_parent)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
- struct rb_node **p = &dcc->root.rb_node;
+ struct rb_node **p;
struct rb_node *parent = NULL;
struct discard_cmd *dc = NULL;
@@ -1134,58 +1206,107 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
return 0;
}
-static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond)
+static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy,
+ unsigned int start, unsigned int end)
+{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
+ struct rb_node **insert_p = NULL, *insert_parent = NULL;
+ struct discard_cmd *dc;
+ struct blk_plug plug;
+ int issued;
+
+next:
+ issued = 0;
+
+ mutex_lock(&dcc->cmd_lock);
+ f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+
+ dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root,
+ NULL, start,
+ (struct rb_entry **)&prev_dc,
+ (struct rb_entry **)&next_dc,
+ &insert_p, &insert_parent, true);
+ if (!dc)
+ dc = next_dc;
+
+ blk_start_plug(&plug);
+
+ while (dc && dc->lstart <= end) {
+ struct rb_node *node;
+
+ if (dc->len < dpolicy->granularity)
+ goto skip;
+
+ if (dc->state != D_PREP) {
+ list_move_tail(&dc->list, &dcc->fstrim_list);
+ goto skip;
+ }
+
+ __submit_discard_cmd(sbi, dpolicy, dc);
+
+ if (++issued >= dpolicy->max_requests) {
+ start = dc->lstart + dc->len;
+
+ blk_finish_plug(&plug);
+ mutex_unlock(&dcc->cmd_lock);
+
+ schedule();
+
+ goto next;
+ }
+skip:
+ node = rb_next(&dc->rb_node);
+ dc = rb_entry_safe(node, struct discard_cmd, rb_node);
+
+ if (fatal_signal_pending(current))
+ break;
+ }
+
+ blk_finish_plug(&plug);
+ mutex_unlock(&dcc->cmd_lock);
+}
+
+static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
struct blk_plug plug;
- int iter = 0, issued = 0;
- int i;
+ int i, iter = 0, issued = 0;
bool io_interrupted = false;
- mutex_lock(&dcc->cmd_lock);
- f2fs_bug_on(sbi,
- !__check_rb_tree_consistence(sbi, &dcc->root));
- blk_start_plug(&plug);
- for (i = MAX_PLIST_NUM - 1;
- i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) {
+ for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
+ if (i + 1 < dpolicy->granularity)
+ break;
pend_list = &dcc->pend_list[i];
+
+ mutex_lock(&dcc->cmd_lock);
+ f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root));
+ blk_start_plug(&plug);
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
- /* Hurry up to finish fstrim */
- if (dcc->pend_list_tag[i] & P_TRIM) {
- __submit_discard_cmd(sbi, dc);
- issued++;
-
- if (fatal_signal_pending(current))
- break;
- continue;
- }
-
- if (!issue_cond) {
- __submit_discard_cmd(sbi, dc);
- issued++;
- continue;
- }
-
- if (is_idle(sbi)) {
- __submit_discard_cmd(sbi, dc);
- issued++;
- } else {
+ if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
+ !is_idle(sbi)) {
io_interrupted = true;
+ goto skip;
}
- if (++iter >= DISCARD_ISSUE_RATE)
- goto out;
+ __submit_discard_cmd(sbi, dpolicy, dc);
+ issued++;
+skip:
+ if (++iter >= dpolicy->max_requests)
+ break;
}
- if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM)
- dcc->pend_list_tag[i] &= (~P_TRIM);
+ blk_finish_plug(&plug);
+ mutex_unlock(&dcc->cmd_lock);
+
+ if (iter >= dpolicy->max_requests)
+ break;
}
-out:
- blk_finish_plug(&plug);
- mutex_unlock(&dcc->cmd_lock);
if (!issued && io_interrupted)
issued = -1;
@@ -1193,12 +1314,13 @@ out:
return issued;
}
-static void __drop_discard_cmd(struct f2fs_sb_info *sbi)
+static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
int i;
+ bool dropped = false;
mutex_lock(&dcc->cmd_lock);
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
@@ -1206,39 +1328,58 @@ static void __drop_discard_cmd(struct f2fs_sb_info *sbi)
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
__remove_discard_cmd(sbi, dc);
+ dropped = true;
}
}
mutex_unlock(&dcc->cmd_lock);
+
+ return dropped;
}
-static void __wait_one_discard_bio(struct f2fs_sb_info *sbi,
+static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
struct discard_cmd *dc)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ unsigned int len = 0;
wait_for_completion_io(&dc->wait);
mutex_lock(&dcc->cmd_lock);
f2fs_bug_on(sbi, dc->state != D_DONE);
dc->ref--;
- if (!dc->ref)
+ if (!dc->ref) {
+ if (!dc->error)
+ len = dc->len;
__remove_discard_cmd(sbi, dc);
+ }
mutex_unlock(&dcc->cmd_lock);
+
+ return len;
}
-static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond)
+static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy,
+ block_t start, block_t end)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
- struct list_head *wait_list = &(dcc->wait_list);
+ struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
+ &(dcc->fstrim_list) : &(dcc->wait_list);
struct discard_cmd *dc, *tmp;
bool need_wait;
+ unsigned int trimmed = 0;
next:
need_wait = false;
mutex_lock(&dcc->cmd_lock);
list_for_each_entry_safe(dc, tmp, wait_list, list) {
- if (!wait_cond || (dc->state == D_DONE && !dc->ref)) {
+ if (dc->lstart + dc->len <= start || end <= dc->lstart)
+ continue;
+ if (dc->len < dpolicy->granularity)
+ continue;
+ if (dc->state == D_DONE && !dc->ref) {
wait_for_completion_io(&dc->wait);
+ if (!dc->error)
+ trimmed += dc->len;
__remove_discard_cmd(sbi, dc);
} else {
dc->ref++;
@@ -1249,9 +1390,17 @@ next:
mutex_unlock(&dcc->cmd_lock);
if (need_wait) {
- __wait_one_discard_bio(sbi, dc);
+ trimmed += __wait_one_discard_bio(sbi, dc);
goto next;
}
+
+ return trimmed;
+}
+
+static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy)
+{
+ __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
}
/* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -1289,23 +1438,19 @@ void stop_discard_thread(struct f2fs_sb_info *sbi)
}
}
-/* This comes from f2fs_put_super and f2fs_trim_fs */
-void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount)
-{
- __issue_discard_cmd(sbi, false);
- __drop_discard_cmd(sbi);
- __wait_discard_cmd(sbi, !umount);
-}
-
-static void mark_discard_range_all(struct f2fs_sb_info *sbi)
+/* This comes from f2fs_put_super */
+bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
- int i;
+ struct discard_policy dpolicy;
+ bool dropped;
- mutex_lock(&dcc->cmd_lock);
- for (i = 0; i < MAX_PLIST_NUM; i++)
- dcc->pend_list_tag[i] |= P_TRIM;
- mutex_unlock(&dcc->cmd_lock);
+ init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity);
+ __issue_discard_cmd(sbi, &dpolicy);
+ dropped = __drop_discard_cmd(sbi);
+ __wait_all_discard_cmd(sbi, &dpolicy);
+
+ return dropped;
}
static int issue_discard_thread(void *data)
@@ -1313,12 +1458,16 @@ static int issue_discard_thread(void *data)
struct f2fs_sb_info *sbi = data;
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
wait_queue_head_t *q = &dcc->discard_wait_queue;
+ struct discard_policy dpolicy;
unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
int issued;
set_freezable();
do {
+ init_discard_policy(&dpolicy, DPOLICY_BG,
+ dcc->discard_granularity);
+
wait_event_interruptible_timeout(*q,
kthread_should_stop() || freezing(current) ||
dcc->discard_wake,
@@ -1331,17 +1480,18 @@ static int issue_discard_thread(void *data)
if (dcc->discard_wake) {
dcc->discard_wake = 0;
if (sbi->gc_thread && sbi->gc_thread->gc_urgent)
- mark_discard_range_all(sbi);
+ init_discard_policy(&dpolicy,
+ DPOLICY_FORCE, 1);
}
sb_start_intwrite(sbi->sb);
- issued = __issue_discard_cmd(sbi, true);
+ issued = __issue_discard_cmd(sbi, &dpolicy);
if (issued) {
- __wait_discard_cmd(sbi, true);
- wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
+ __wait_all_discard_cmd(sbi, &dpolicy);
+ wait_ms = dpolicy.min_interval;
} else {
- wait_ms = DEF_MAX_DISCARD_ISSUE_TIME;
+ wait_ms = dpolicy.max_interval;
}
sb_end_intwrite(sbi->sb);
@@ -1605,7 +1755,6 @@ find_next:
f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
len);
- cpc->trimmed += len;
total_len += len;
} else {
next_pos = find_next_bit_le(entry->discard_map,
@@ -1626,6 +1775,37 @@ skip:
wake_up_discard_thread(sbi, false);
}
+void init_discard_policy(struct discard_policy *dpolicy,
+ int discard_type, unsigned int granularity)
+{
+ /* common policy */
+ dpolicy->type = discard_type;
+ dpolicy->sync = true;
+ dpolicy->granularity = granularity;
+
+ if (discard_type == DPOLICY_BG) {
+ dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+ dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+ dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+ dpolicy->io_aware_gran = MAX_PLIST_NUM;
+ dpolicy->io_aware = true;
+ } else if (discard_type == DPOLICY_FORCE) {
+ dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+ dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
+ dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+ dpolicy->io_aware_gran = MAX_PLIST_NUM;
+ dpolicy->io_aware = true;
+ } else if (discard_type == DPOLICY_FSTRIM) {
+ dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+ dpolicy->io_aware_gran = MAX_PLIST_NUM;
+ dpolicy->io_aware = false;
+ } else if (discard_type == DPOLICY_UMOUNT) {
+ dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
+ dpolicy->io_aware_gran = MAX_PLIST_NUM;
+ dpolicy->io_aware = false;
+ }
+}
+
static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
{
dev_t dev = sbi->sb->s_bdev->bd_dev;
@@ -1643,12 +1823,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
INIT_LIST_HEAD(&dcc->entry_list);
- for (i = 0; i < MAX_PLIST_NUM; i++) {
+ for (i = 0; i < MAX_PLIST_NUM; i++)
INIT_LIST_HEAD(&dcc->pend_list[i]);
- if (i >= dcc->discard_granularity - 1)
- dcc->pend_list_tag[i] |= P_ACTIVE;
- }
INIT_LIST_HEAD(&dcc->wait_list);
+ INIT_LIST_HEAD(&dcc->fstrim_list);
mutex_init(&dcc->cmd_lock);
atomic_set(&dcc->issued_discard, 0);
atomic_set(&dcc->issing_discard, 0);
@@ -1796,16 +1974,6 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
get_sec_entry(sbi, segno)->valid_blocks += del;
}
-void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
-{
- update_sit_entry(sbi, new, 1);
- if (GET_SEGNO(sbi, old) != NULL_SEGNO)
- update_sit_entry(sbi, old, -1);
-
- locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
- locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
-}
-
void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
{
unsigned int segno = GET_SEGNO(sbi, addr);
@@ -1816,14 +1984,14 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
return;
/* add it into sit main buffer */
- mutex_lock(&sit_i->sentry_lock);
+ down_write(&sit_i->sentry_lock);
update_sit_entry(sbi, addr, -1);
/* add it into dirty seglist */
locate_dirty_segment(sbi, segno);
- mutex_unlock(&sit_i->sentry_lock);
+ up_write(&sit_i->sentry_lock);
}
bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
@@ -1836,7 +2004,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
return true;
- mutex_lock(&sit_i->sentry_lock);
+ down_read(&sit_i->sentry_lock);
segno = GET_SEGNO(sbi, blkaddr);
se = get_seg_entry(sbi, segno);
@@ -1845,7 +2013,7 @@ bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
if (f2fs_test_bit(offset, se->ckpt_valid_map))
is_cp = true;
- mutex_unlock(&sit_i->sentry_lock);
+ up_read(&sit_i->sentry_lock);
return is_cp;
}
@@ -1903,12 +2071,8 @@ struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
{
struct page *page = grab_meta_page(sbi, blk_addr);
- void *dst = page_address(page);
- if (src)
- memcpy(dst, src, PAGE_SIZE);
- else
- memset(dst, 0, PAGE_SIZE);
+ memcpy(page_address(page), src, PAGE_SIZE);
set_page_dirty(page);
f2fs_put_page(page, 1);
}
@@ -2007,7 +2171,6 @@ find_other_zone:
}
secno = left_start;
skip_left:
- hint = secno;
segno = GET_SEG_FROM_SEC(sbi, secno);
zoneno = GET_ZONE_FROM_SEC(sbi, secno);
@@ -2242,12 +2405,16 @@ void allocate_new_segments(struct f2fs_sb_info *sbi)
unsigned int old_segno;
int i;
+ down_write(&SIT_I(sbi)->sentry_lock);
+
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
curseg = CURSEG_I(sbi, i);
old_segno = curseg->segno;
SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
locate_dirty_segment(sbi, old_segno);
}
+
+ up_write(&SIT_I(sbi)->sentry_lock);
}
static const struct segment_allocation default_salloc_ops = {
@@ -2259,14 +2426,14 @@ bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc)
__u64 trim_start = cpc->trim_start;
bool has_candidate = false;
- mutex_lock(&SIT_I(sbi)->sentry_lock);
+ down_write(&SIT_I(sbi)->sentry_lock);
for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
if (add_discard_addrs(sbi, cpc, true)) {
has_candidate = true;
break;
}
}
- mutex_unlock(&SIT_I(sbi)->sentry_lock);
+ up_write(&SIT_I(sbi)->sentry_lock);
cpc->trim_start = trim_start;
return has_candidate;
@@ -2276,14 +2443,16 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
{
__u64 start = F2FS_BYTES_TO_BLK(range->start);
__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
- unsigned int start_segno, end_segno;
+ unsigned int start_segno, end_segno, cur_segno;
+ block_t start_block, end_block;
struct cp_control cpc;
+ struct discard_policy dpolicy;
+ unsigned long long trimmed = 0;
int err = 0;
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
- cpc.trimmed = 0;
if (end <= MAIN_BLKADDR(sbi))
goto out;
@@ -2297,12 +2466,14 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
GET_SEGNO(sbi, end);
+
cpc.reason = CP_DISCARD;
cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
/* do checkpoint to issue discard commands safely */
- for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) {
- cpc.trim_start = start_segno;
+ for (cur_segno = start_segno; cur_segno <= end_segno;
+ cur_segno = cpc.trim_end + 1) {
+ cpc.trim_start = cur_segno;
if (sbi->discard_blks == 0)
break;
@@ -2310,7 +2481,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
cpc.trim_end = end_segno;
else
cpc.trim_end = min_t(unsigned int,
- rounddown(start_segno +
+ rounddown(cur_segno +
BATCHED_TRIM_SEGMENTS(sbi),
sbi->segs_per_sec) - 1, end_segno);
@@ -2322,11 +2493,16 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
schedule();
}
- /* It's time to issue all the filed discards */
- mark_discard_range_all(sbi);
- f2fs_wait_discard_bios(sbi, false);
+
+ start_block = START_BLOCK(sbi, start_segno);
+ end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1);
+
+ init_discard_policy(&dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
+ __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
+ trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
+ start_block, end_block);
out:
- range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
+ range->len = F2FS_BLK_TO_BYTES(trimmed);
return err;
}
@@ -2338,6 +2514,20 @@ static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
return false;
}
+#if 0
+int rw_hint_to_seg_type(enum rw_hint hint)
+{
+ switch (hint) {
+ case WRITE_LIFE_SHORT:
+ return CURSEG_HOT_DATA;
+ case WRITE_LIFE_EXTREME:
+ return CURSEG_COLD_DATA;
+ default:
+ return CURSEG_WARM_DATA;
+ }
+}
+#endif
+
static int __get_segment_type_2(struct f2fs_io_info *fio)
{
if (fio->type == DATA)
@@ -2372,6 +2562,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
return CURSEG_COLD_DATA;
if (is_inode_flag_set(inode, FI_HOT_DATA))
return CURSEG_HOT_DATA;
+ /* rw_hint_to_seg_type(inode->i_write_hint); */
return CURSEG_WARM_DATA;
} else {
if (IS_DNODE(fio->page))
@@ -2416,8 +2607,10 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
+ down_read(&SM_I(sbi)->curseg_lock);
+
mutex_lock(&curseg->curseg_mutex);
- mutex_lock(&sit_i->sentry_lock);
+ down_write(&sit_i->sentry_lock);
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
@@ -2434,15 +2627,26 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
stat_inc_block_count(sbi, curseg);
+ /*
+ * SIT information should be updated before segment allocation,
+ * since SSR needs latest valid block information.
+ */
+ update_sit_entry(sbi, *new_blkaddr, 1);
+ if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+ update_sit_entry(sbi, old_blkaddr, -1);
+
if (!__has_curseg_space(sbi, type))
sit_i->s_ops->allocate_segment(sbi, type, false);
+
/*
- * SIT information should be updated after segment allocation,
- * since we need to keep dirty segments precisely under SSR.
+ * segment dirty status should be updated after segment allocation,
+ * so we just need to update status only one time after previous
+ * segment being closed.
*/
- refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
+ locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
+ locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
- mutex_unlock(&sit_i->sentry_lock);
+ up_write(&sit_i->sentry_lock);
if (page && IS_NODESEG(type)) {
fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
@@ -2462,6 +2666,29 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
}
mutex_unlock(&curseg->curseg_mutex);
+
+ up_read(&SM_I(sbi)->curseg_lock);
+}
+
+static void update_device_state(struct f2fs_io_info *fio)
+{
+ struct f2fs_sb_info *sbi = fio->sbi;
+ unsigned int devidx;
+
+ if (!sbi->s_ndevs)
+ return;
+
+ devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
+
+ /* update device state for fsync */
+ set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
+
+ /* update device state for checkpoint */
+ if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
+ spin_lock(&sbi->dev_lock);
+ f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
+ spin_unlock(&sbi->dev_lock);
+ }
}
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
@@ -2478,6 +2705,8 @@ reallocate:
if (err == -EAGAIN) {
fio->old_blkaddr = fio->new_blkaddr;
goto reallocate;
+ } else if (!err) {
+ update_device_state(fio);
}
}
@@ -2538,12 +2767,26 @@ int rewrite_data_page(struct f2fs_io_info *fio)
stat_inc_inplace_blocks(fio->sbi);
err = f2fs_submit_page_bio(fio);
+ if (!err)
+ update_device_state(fio);
f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
return err;
}
+static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ int i;
+
+ for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
+ if (CURSEG_I(sbi, i)->segno == segno)
+ break;
+ }
+ return i;
+}
+
void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
bool recover_curseg, bool recover_newaddr)
@@ -2559,6 +2802,8 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
se = get_seg_entry(sbi, segno);
type = se->type;
+ down_write(&SM_I(sbi)->curseg_lock);
+
if (!recover_curseg) {
/* for recovery flow */
if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
@@ -2568,14 +2813,19 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
type = CURSEG_WARM_DATA;
}
} else {
- if (!IS_CURSEG(sbi, segno))
+ if (IS_CURSEG(sbi, segno)) {
+ /* se->type is volatile as SSR allocation */
+ type = __f2fs_get_curseg(sbi, segno);
+ f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
+ } else {
type = CURSEG_WARM_DATA;
+ }
}
curseg = CURSEG_I(sbi, type);
mutex_lock(&curseg->curseg_mutex);
- mutex_lock(&sit_i->sentry_lock);
+ down_write(&sit_i->sentry_lock);
old_cursegno = curseg->segno;
old_blkoff = curseg->next_blkoff;
@@ -2607,8 +2857,9 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
curseg->next_blkoff = old_blkoff;
}
- mutex_unlock(&sit_i->sentry_lock);
+ up_write(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
+ up_write(&SM_I(sbi)->curseg_lock);
}
void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
@@ -3062,7 +3313,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
bool to_journal = true;
struct seg_entry *se;
- mutex_lock(&sit_i->sentry_lock);
+ down_write(&sit_i->sentry_lock);
if (!sit_i->dirty_sentries)
goto out;
@@ -3156,7 +3407,7 @@ out:
cpc->trim_start = trim_start;
}
- mutex_unlock(&sit_i->sentry_lock);
+ up_write(&sit_i->sentry_lock);
set_prefree_as_free_segments(sbi);
}
@@ -3249,7 +3500,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
- mutex_init(&sit_i->sentry_lock);
+ init_rwsem(&sit_i->sentry_lock);
return 0;
}
@@ -3490,7 +3741,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
struct sit_info *sit_i = SIT_I(sbi);
unsigned int segno;
- mutex_lock(&sit_i->sentry_lock);
+ down_write(&sit_i->sentry_lock);
sit_i->min_mtime = LLONG_MAX;
@@ -3507,7 +3758,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
sit_i->min_mtime = mtime;
}
sit_i->max_mtime = get_mtime(sbi);
- mutex_unlock(&sit_i->sentry_lock);
+ up_write(&sit_i->sentry_lock);
}
int build_segment_manager(struct f2fs_sb_info *sbi)
@@ -3540,11 +3791,14 @@ int build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
+ sm_info->min_ssr_sections = reserved_sections(sbi);
sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS;
INIT_LIST_HEAD(&sm_info->sit_entry_set);
+ init_rwsem(&sm_info->curseg_lock);
+
if (!f2fs_readonly(sbi->sb)) {
err = create_flush_cmd_control(sbi);
if (err)
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index ffa11274b0ce..5264b6ed120c 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -231,7 +231,7 @@ struct sit_info {
unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */
unsigned int dirty_sentries; /* # of dirty sentries */
unsigned int sents_per_block; /* # of SIT entries per block */
- struct mutex sentry_lock; /* to protect SIT cache */
+ struct rw_semaphore sentry_lock; /* to protect SIT cache */
struct seg_entry *sentries; /* SIT segment-level cache */
struct sec_entry *sec_entries; /* SIT section-level cache */
@@ -497,6 +497,33 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)
return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi));
}
+static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
+{
+ unsigned int node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
+ get_pages(sbi, F2FS_DIRTY_DENTS);
+ unsigned int dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
+ unsigned int segno, left_blocks;
+ int i;
+
+ /* check current node segment */
+ for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) {
+ segno = CURSEG_I(sbi, i)->segno;
+ left_blocks = sbi->blocks_per_seg -
+ get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+
+ if (node_blocks > left_blocks)
+ return false;
+ }
+
+ /* check current data segment */
+ segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno;
+ left_blocks = sbi->blocks_per_seg -
+ get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+ if (dent_blocks > left_blocks)
+ return false;
+ return true;
+}
+
static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
int freed, int needed)
{
@@ -507,6 +534,9 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
return false;
+ if (free_sections(sbi) + freed == reserved_sections(sbi) + needed &&
+ has_curseg_enough_space(sbi))
+ return false;
return (free_sections(sbi) + freed) <=
(node_secs + 2 * dent_secs + imeta_secs +
reserved_sections(sbi) + needed);
@@ -730,7 +760,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi,
unsigned int secno)
{
- if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >=
+ if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >
sbi->fggc_threshold)
return true;
return false;
@@ -795,8 +825,9 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
goto wake_up;
mutex_lock(&dcc->cmd_lock);
- for (i = MAX_PLIST_NUM - 1;
- i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) {
+ for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
+ if (i + 1 < dcc->discard_granularity)
+ break;
if (!list_empty(&dcc->pend_list[i])) {
wakeup = true;
break;
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 5c60fc28ec75..0b5664a1a6cc 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -28,7 +28,7 @@ static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
{
- long count = NM_I(sbi)->nid_cnt[FREE_NID_LIST] - MAX_FREE_NIDS;
+ long count = NM_I(sbi)->nid_cnt[FREE_NID] - MAX_FREE_NIDS;
return count > 0 ? count : 0;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 482bb0333806..76e2f1518224 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -44,6 +44,8 @@ static struct kmem_cache *f2fs_inode_cachep;
char *fault_name[FAULT_MAX] = {
[FAULT_KMALLOC] = "kmalloc",
[FAULT_PAGE_ALLOC] = "page alloc",
+ [FAULT_PAGE_GET] = "page get",
+ [FAULT_ALLOC_BIO] = "alloc bio",
[FAULT_ALLOC_NID] = "alloc nid",
[FAULT_ORPHAN] = "orphan",
[FAULT_BLOCK] = "no more block",
@@ -92,6 +94,7 @@ enum {
Opt_disable_ext_identify,
Opt_inline_xattr,
Opt_noinline_xattr,
+ Opt_inline_xattr_size,
Opt_inline_data,
Opt_inline_dentry,
Opt_noinline_dentry,
@@ -141,6 +144,7 @@ static match_table_t f2fs_tokens = {
{Opt_disable_ext_identify, "disable_ext_identify"},
{Opt_inline_xattr, "inline_xattr"},
{Opt_noinline_xattr, "noinline_xattr"},
+ {Opt_inline_xattr_size, "inline_xattr_size=%u"},
{Opt_inline_data, "inline_data"},
{Opt_inline_dentry, "inline_dentry"},
{Opt_noinline_dentry, "noinline_dentry"},
@@ -209,6 +213,12 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
"quota options when quota turned on");
return -EINVAL;
}
+ if (f2fs_sb_has_quota_ino(sb)) {
+ f2fs_msg(sb, KERN_INFO,
+ "QUOTA feature is enabled, so ignore qf_name");
+ return 0;
+ }
+
qname = match_strdup(args);
if (!qname) {
f2fs_msg(sb, KERN_ERR,
@@ -287,6 +297,18 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
return -1;
}
}
+
+ if (f2fs_sb_has_quota_ino(sbi->sb) && sbi->s_jquota_fmt) {
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "QUOTA feature is enabled, so ignore jquota_fmt");
+ sbi->s_jquota_fmt = 0;
+ }
+ if (f2fs_sb_has_quota_ino(sbi->sb) && sb_rdonly(sbi->sb)) {
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "Filesystem with quota feature cannot be mounted RDWR "
+ "without CONFIG_QUOTA");
+ return -1;
+ }
return 0;
}
#endif
@@ -383,6 +405,12 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_noinline_xattr:
clear_opt(sbi, INLINE_XATTR);
break;
+ case Opt_inline_xattr_size:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ set_opt(sbi, INLINE_XATTR_SIZE);
+ sbi->inline_xattr_size = arg;
+ break;
#else
case Opt_user_xattr:
f2fs_msg(sb, KERN_INFO,
@@ -604,6 +632,24 @@ static int parse_options(struct super_block *sb, char *options)
F2FS_IO_SIZE_KB(sbi));
return -EINVAL;
}
+
+ if (test_opt(sbi, INLINE_XATTR_SIZE)) {
+ if (!test_opt(sbi, INLINE_XATTR)) {
+ f2fs_msg(sb, KERN_ERR,
+ "inline_xattr_size option should be "
+ "set with inline_xattr option");
+ return -EINVAL;
+ }
+ if (!sbi->inline_xattr_size ||
+ sbi->inline_xattr_size >= DEF_ADDRS_PER_INODE -
+ F2FS_TOTAL_EXTRA_ATTR_SIZE -
+ DEF_INLINE_RESERVED_SIZE -
+ DEF_MIN_INLINE_SIZE) {
+ f2fs_msg(sb, KERN_ERR,
+ "inline xattr size is out of range");
+ return -EINVAL;
+ }
+ }
return 0;
}
@@ -618,13 +664,13 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
init_once((void *) fi);
/* Initialize f2fs-specific inode info */
- fi->vfs_inode.i_version = 1;
atomic_set(&fi->dirty_pages, 0);
fi->i_current_depth = 1;
fi->i_advise = 0;
init_rwsem(&fi->i_sem);
INIT_LIST_HEAD(&fi->dirty_list);
INIT_LIST_HEAD(&fi->gdirty_list);
+ INIT_LIST_HEAD(&fi->inmem_ilist);
INIT_LIST_HEAD(&fi->inmem_pages);
mutex_init(&fi->inmem_lock);
init_rwsem(&fi->dio_rwsem[READ]);
@@ -673,7 +719,6 @@ static int f2fs_drop_inode(struct inode *inode)
sb_end_intwrite(inode->i_sb);
- fscrypt_put_encryption_info(inode, NULL);
spin_lock(&inode->i_lock);
atomic_dec(&inode->i_count);
}
@@ -781,6 +826,7 @@ static void f2fs_put_super(struct super_block *sb)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
int i;
+ bool dropped;
f2fs_quota_off_umount(sb);
@@ -801,9 +847,9 @@ static void f2fs_put_super(struct super_block *sb)
}
/* be sure to wait for any on-going discard commands */
- f2fs_wait_discard_bios(sbi, true);
+ dropped = f2fs_wait_discard_bios(sbi);
- if (f2fs_discard_en(sbi) && !sbi->discard_blks) {
+ if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) {
struct cp_control cpc = {
.reason = CP_UMOUNT | CP_TRIMMED,
};
@@ -859,6 +905,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
struct f2fs_sb_info *sbi = F2FS_SB(sb);
int err = 0;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return 0;
+
trace_f2fs_sync_fs(sb, sync);
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
@@ -958,7 +1007,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_blocks = total_count - start_count;
buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count;
buf->f_bavail = user_block_count - valid_user_blocks(sbi) -
- sbi->reserved_blocks;
+ sbi->current_reserved_blocks;
avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
@@ -1047,6 +1096,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",inline_xattr");
else
seq_puts(seq, ",noinline_xattr");
+ if (test_opt(sbi, INLINE_XATTR_SIZE))
+ seq_printf(seq, ",inline_xattr_size=%u",
+ sbi->inline_xattr_size);
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
if (test_opt(sbi, POSIX_ACL))
@@ -1109,6 +1161,7 @@ static void default_options(struct f2fs_sb_info *sbi)
{
/* init some FS parameters */
sbi->active_logs = NR_CURSEG_TYPE;
+ sbi->inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
set_opt(sbi, BG_GC);
set_opt(sbi, INLINE_XATTR);
@@ -1137,6 +1190,9 @@ static void default_options(struct f2fs_sb_info *sbi)
#endif
}
+#ifdef CONFIG_QUOTA
+static int f2fs_enable_quotas(struct super_block *sb);
+#endif
static int f2fs_remount(struct super_block *sb, int *flags, char *data)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -1203,6 +1259,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
goto skip;
+#ifdef CONFIG_QUOTA
if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) {
err = dquot_suspend(sb, -1);
if (err < 0)
@@ -1210,9 +1267,15 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
} else {
/* dquot_resume needs RW */
sb->s_flags &= ~MS_RDONLY;
- dquot_resume(sb, -1);
+ if (sb_any_quota_suspended(sb)) {
+ dquot_resume(sb, -1);
+ } else if (f2fs_sb_has_quota_ino(sb)) {
+ err = f2fs_enable_quotas(sb);
+ if (err)
+ goto restore_opts;
+ }
}
-
+#endif
/* disallow enable/disable extent_cache dynamically */
if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
err = -EINVAL;
@@ -1321,8 +1384,13 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data,
tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
repeat:
page = read_mapping_page(mapping, blkidx, NULL);
- if (IS_ERR(page))
+ if (IS_ERR(page)) {
+ if (PTR_ERR(page) == -ENOMEM) {
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto repeat;
+ }
return PTR_ERR(page);
+ }
lock_page(page);
@@ -1365,11 +1433,16 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
while (towrite > 0) {
tocopy = min_t(unsigned long, sb->s_blocksize - offset,
towrite);
-
+retry:
err = a_ops->write_begin(NULL, mapping, off, tocopy, 0,
&page, NULL);
- if (unlikely(err))
+ if (unlikely(err)) {
+ if (err == -ENOMEM) {
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto retry;
+ }
break;
+ }
kaddr = kmap_atomic(page);
memcpy(kaddr + offset, data, tocopy);
@@ -1386,8 +1459,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
}
if (len == towrite)
- return 0;
- inode->i_version++;
+ return err;
inode->i_mtime = inode->i_ctime = current_time(inode);
f2fs_mark_inode_dirty_sync(inode, false);
return len - towrite;
@@ -1409,19 +1481,91 @@ static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
sbi->s_jquota_fmt, type);
}
-void f2fs_enable_quota_files(struct f2fs_sb_info *sbi)
+int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly)
{
- int i, ret;
+ int enabled = 0;
+ int i, err;
+
+ if (f2fs_sb_has_quota_ino(sbi->sb) && rdonly) {
+ err = f2fs_enable_quotas(sbi->sb);
+ if (err) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Cannot turn on quota_ino: %d", err);
+ return 0;
+ }
+ return 1;
+ }
for (i = 0; i < MAXQUOTAS; i++) {
if (sbi->s_qf_names[i]) {
- ret = f2fs_quota_on_mount(sbi, i);
- if (ret < 0)
- f2fs_msg(sbi->sb, KERN_ERR,
- "Cannot turn on journaled "
- "quota: error %d", ret);
+ err = f2fs_quota_on_mount(sbi, i);
+ if (!err) {
+ enabled = 1;
+ continue;
+ }
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Cannot turn on quotas: %d on %d", err, i);
+ }
+ }
+ return enabled;
+}
+
+static int f2fs_quota_enable(struct super_block *sb, int type, int format_id,
+ unsigned int flags)
+{
+ struct inode *qf_inode;
+ unsigned long qf_inum;
+ int err;
+
+ BUG_ON(!f2fs_sb_has_quota_ino(sb));
+
+ qf_inum = f2fs_qf_ino(sb, type);
+ if (!qf_inum)
+ return -EPERM;
+
+ qf_inode = f2fs_iget(sb, qf_inum);
+ if (IS_ERR(qf_inode)) {
+ f2fs_msg(sb, KERN_ERR,
+ "Bad quota inode %u:%lu", type, qf_inum);
+ return PTR_ERR(qf_inode);
+ }
+
+ /* Don't account quota for quota files to avoid recursion */
+ qf_inode->i_flags |= S_NOQUOTA;
+ err = dquot_enable(qf_inode, type, format_id, flags);
+ iput(qf_inode);
+ return err;
+}
+
+static int f2fs_enable_quotas(struct super_block *sb)
+{
+ int type, err = 0;
+ unsigned long qf_inum;
+ bool quota_mopt[MAXQUOTAS] = {
+ test_opt(F2FS_SB(sb), USRQUOTA),
+ test_opt(F2FS_SB(sb), GRPQUOTA),
+ test_opt(F2FS_SB(sb), PRJQUOTA),
+ };
+
+ sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+ for (type = 0; type < MAXQUOTAS; type++) {
+ qf_inum = f2fs_qf_ino(sb, type);
+ if (qf_inum) {
+ err = f2fs_quota_enable(sb, type, QFMT_VFS_V1,
+ DQUOT_USAGE_ENABLED |
+ (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
+ if (err) {
+ f2fs_msg(sb, KERN_ERR,
+ "Failed to enable quota tracking "
+ "(type=%d, err=%d). Please run "
+ "fsck to fix.", type, err);
+ for (type--; type >= 0; type--)
+ dquot_quota_off(sb, type);
+ return err;
+ }
}
}
+ return 0;
}
static int f2fs_quota_sync(struct super_block *sb, int type)
@@ -1492,7 +1636,7 @@ static int f2fs_quota_off(struct super_block *sb, int type)
f2fs_quota_sync(sb, type);
err = dquot_quota_off(sb, type);
- if (err)
+ if (err || f2fs_sb_has_quota_ino(sb))
goto out_put;
inode_lock(inode);
@@ -1660,7 +1804,7 @@ static loff_t max_file_blocks(void)
/*
* note: previously, result is equal to (DEF_ADDRS_PER_INODE -
- * F2FS_INLINE_XATTR_ADDRS), but now f2fs try to reserve more
+ * DEFAULT_INLINE_XATTR_ADDRS), but now f2fs try to reserve more
* space in inode.i_addr, it will be more safe to reassign
* result as zero.
*/
@@ -1969,6 +2113,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
for (j = HOT; j < NR_TEMP_TYPE; j++)
mutex_init(&sbi->wio_mutex[i][j]);
spin_lock_init(&sbi->cp_lock);
+
+ sbi->dirty_device = 0;
+ spin_lock_init(&sbi->dev_lock);
}
static int init_percpu_info(struct f2fs_sb_info *sbi)
@@ -2323,7 +2470,10 @@ try_onemore:
#ifdef CONFIG_QUOTA
sb->dq_op = &f2fs_quota_operations;
- sb->s_qcop = &f2fs_quotactl_ops;
+ if (f2fs_sb_has_quota_ino(sb))
+ sb->s_qcop = &dquot_quotactl_sysfile_ops;
+ else
+ sb->s_qcop = &f2fs_quotactl_ops;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
@@ -2419,6 +2569,7 @@ try_onemore:
le64_to_cpu(sbi->ckpt->valid_block_count);
sbi->last_valid_block_count = sbi->total_valid_block_count;
sbi->reserved_blocks = 0;
+ sbi->current_reserved_blocks = 0;
for (i = 0; i < NR_INODE_TYPE; i++) {
INIT_LIST_HEAD(&sbi->inode_list[i]);
@@ -2493,10 +2644,24 @@ try_onemore:
if (err)
goto free_root_inode;
+#ifdef CONFIG_QUOTA
+ /*
+ * Turn on quotas which were not enabled for read-only mounts if
+ * filesystem has quota feature, so that they are updated correctly.
+ */
+ if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) {
+ err = f2fs_enable_quotas(sb);
+ if (err) {
+ f2fs_msg(sb, KERN_ERR,
+ "Cannot turn on quotas: error %d", err);
+ goto free_sysfs;
+ }
+ }
+#endif
/* if there are nt orphan nodes free them */
err = recover_orphan_inodes(sbi);
if (err)
- goto free_sysfs;
+ goto free_meta;
/* recover fsynced data */
if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
@@ -2530,7 +2695,7 @@ try_onemore:
err = -EINVAL;
f2fs_msg(sb, KERN_ERR,
"Need to recover fsync data");
- goto free_sysfs;
+ goto free_meta;
}
}
skip_recovery:
@@ -2564,6 +2729,10 @@ skip_recovery:
return 0;
free_meta:
+#ifdef CONFIG_QUOTA
+ if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb))
+ f2fs_quota_off_umount(sbi->sb);
+#endif
f2fs_sync_inode_meta(sbi);
/*
* Some dirty meta pages can be produced by recover_orphan_inodes()
@@ -2572,7 +2741,9 @@ free_meta:
* falls into an infinite loop in sync_meta_pages().
*/
truncate_inode_pages_final(META_MAPPING(sbi));
+#ifdef CONFIG_QUOTA
free_sysfs:
+#endif
f2fs_unregister_sysfs(sbi);
free_root_inode:
dput(sb->s_root);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index e2c258f717cd..9835348b6e5d 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -30,7 +30,7 @@ enum {
FAULT_INFO_RATE, /* struct f2fs_fault_info */
FAULT_INFO_TYPE, /* struct f2fs_fault_info */
#endif
- RESERVED_BLOCKS,
+ RESERVED_BLOCKS, /* struct f2fs_sb_info */
};
struct f2fs_attr {
@@ -63,6 +63,13 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
return NULL;
}
+static ssize_t dirty_segments_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)(dirty_segments(sbi)));
+}
+
static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -100,10 +107,22 @@ static ssize_t features_show(struct f2fs_attr *a,
if (f2fs_sb_has_inode_chksum(sb))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "inode_checksum");
+ if (f2fs_sb_has_flexible_inline_xattr(sb))
+ len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "flexible_inline_xattr");
+ if (f2fs_sb_has_quota_ino(sb))
+ len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len ? ", " : "", "quota_ino");
len += snprintf(buf + len, PAGE_SIZE - len, "\n");
return len;
}
+static ssize_t current_reserved_blocks_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%u\n", sbi->current_reserved_blocks);
+}
+
static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -143,34 +162,22 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
#endif
if (a->struct_type == RESERVED_BLOCKS) {
spin_lock(&sbi->stat_lock);
- if ((unsigned long)sbi->total_valid_block_count + t >
- (unsigned long)sbi->user_block_count) {
+ if (t > (unsigned long)sbi->user_block_count) {
spin_unlock(&sbi->stat_lock);
return -EINVAL;
}
*ui = t;
+ sbi->current_reserved_blocks = min(sbi->reserved_blocks,
+ sbi->user_block_count - valid_user_blocks(sbi));
spin_unlock(&sbi->stat_lock);
return count;
}
if (!strcmp(a->attr.name, "discard_granularity")) {
- struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
- int i;
-
if (t == 0 || t > MAX_PLIST_NUM)
return -EINVAL;
if (t == *ui)
return count;
-
- mutex_lock(&dcc->cmd_lock);
- for (i = 0; i < MAX_PLIST_NUM; i++) {
- if (i >= t - 1)
- dcc->pend_list_tag[i] |= P_ACTIVE;
- else
- dcc->pend_list_tag[i] &= (~P_ACTIVE);
- }
- mutex_unlock(&dcc->cmd_lock);
-
*ui = t;
return count;
}
@@ -222,6 +229,8 @@ enum feat_id {
FEAT_EXTRA_ATTR,
FEAT_PROJECT_QUOTA,
FEAT_INODE_CHECKSUM,
+ FEAT_FLEXIBLE_INLINE_XATTR,
+ FEAT_QUOTA_INO,
};
static ssize_t f2fs_feature_show(struct f2fs_attr *a,
@@ -234,6 +243,8 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a,
case FEAT_EXTRA_ATTR:
case FEAT_PROJECT_QUOTA:
case FEAT_INODE_CHECKSUM:
+ case FEAT_FLEXIBLE_INLINE_XATTR:
+ case FEAT_QUOTA_INO:
return snprintf(buf, PAGE_SIZE, "supported\n");
}
return 0;
@@ -279,6 +290,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
@@ -291,8 +303,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
#endif
+F2FS_GENERAL_RO_ATTR(dirty_segments);
F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
F2FS_GENERAL_RO_ATTR(features);
+F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
#ifdef CONFIG_F2FS_FS_ENCRYPTION
F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
@@ -304,6 +318,8 @@ F2FS_FEATURE_RO_ATTR(atomic_write, FEAT_ATOMIC_WRITE);
F2FS_FEATURE_RO_ATTR(extra_attr, FEAT_EXTRA_ATTR);
F2FS_FEATURE_RO_ATTR(project_quota, FEAT_PROJECT_QUOTA);
F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM);
+F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR);
+F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
@@ -321,6 +337,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(min_ipu_util),
ATTR_LIST(min_fsync_blocks),
ATTR_LIST(min_hot_blocks),
+ ATTR_LIST(min_ssr_sections),
ATTR_LIST(max_victim_search),
ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh),
@@ -333,9 +350,11 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(inject_rate),
ATTR_LIST(inject_type),
#endif
+ ATTR_LIST(dirty_segments),
ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(features),
ATTR_LIST(reserved_blocks),
+ ATTR_LIST(current_reserved_blocks),
NULL,
};
@@ -350,6 +369,8 @@ static struct attribute *f2fs_feat_attrs[] = {
ATTR_LIST(extra_attr),
ATTR_LIST(project_quota),
ATTR_LIST(inode_checksum),
+ ATTR_LIST(flexible_inline_xattr),
+ ATTR_LIST(quota_ino),
NULL,
};
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index ab658419552b..7acf56ebda65 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -264,12 +264,12 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
return entry;
}
-static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr,
- void **last_addr, int index,
- size_t len, const char *name)
+static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode,
+ void *base_addr, void **last_addr, int index,
+ size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
- unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2;
+ unsigned int inline_size = inline_xattr_size(inode);
list_for_each_xattr(entry, base_addr) {
if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
@@ -288,12 +288,54 @@ static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr,
return entry;
}
+static int read_inline_xattr(struct inode *inode, struct page *ipage,
+ void *txattr_addr)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ unsigned int inline_size = inline_xattr_size(inode);
+ struct page *page = NULL;
+ void *inline_addr;
+
+ if (ipage) {
+ inline_addr = inline_xattr_addr(inode, ipage);
+ } else {
+ page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ inline_addr = inline_xattr_addr(inode, page);
+ }
+ memcpy(txattr_addr, inline_addr, inline_size);
+ f2fs_put_page(page, 1);
+
+ return 0;
+}
+
+static int read_xattr_block(struct inode *inode, void *txattr_addr)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ nid_t xnid = F2FS_I(inode)->i_xattr_nid;
+ unsigned int inline_size = inline_xattr_size(inode);
+ struct page *xpage;
+ void *xattr_addr;
+
+ /* The inode already has an extended attribute block. */
+ xpage = get_node_page(sbi, xnid);
+ if (IS_ERR(xpage))
+ return PTR_ERR(xpage);
+
+ xattr_addr = page_address(xpage);
+ memcpy(txattr_addr + inline_size, xattr_addr, VALID_XATTR_BLOCK_SIZE);
+ f2fs_put_page(xpage, 1);
+
+ return 0;
+}
+
static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
unsigned int index, unsigned int len,
const char *name, struct f2fs_xattr_entry **xe,
void **base_addr)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
void *cur_addr, *txattr_addr, *last_addr = NULL;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
@@ -310,23 +352,11 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
/* read from inline xattr */
if (inline_size) {
- struct page *page = NULL;
- void *inline_addr;
-
- if (ipage) {
- inline_addr = inline_xattr_addr(ipage);
- } else {
- page = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- goto out;
- }
- inline_addr = inline_xattr_addr(page);
- }
- memcpy(txattr_addr, inline_addr, inline_size);
- f2fs_put_page(page, 1);
+ err = read_inline_xattr(inode, ipage, txattr_addr);
+ if (err)
+ goto out;
- *xe = __find_inline_xattr(txattr_addr, &last_addr,
+ *xe = __find_inline_xattr(inode, txattr_addr, &last_addr,
index, len, name);
if (*xe)
goto check;
@@ -334,19 +364,9 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
/* read from xattr node block */
if (xnid) {
- struct page *xpage;
- void *xattr_addr;
-
- /* The inode already has an extended attribute block. */
- xpage = get_node_page(sbi, xnid);
- if (IS_ERR(xpage)) {
- err = PTR_ERR(xpage);
+ err = read_xattr_block(inode, txattr_addr);
+ if (err)
goto out;
- }
-
- xattr_addr = page_address(xpage);
- memcpy(txattr_addr + inline_size, xattr_addr, size);
- f2fs_put_page(xpage, 1);
}
if (last_addr)
@@ -371,7 +391,6 @@ out:
static int read_all_xattrs(struct inode *inode, struct page *ipage,
void **base_addr)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_xattr_header *header;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
unsigned int size = VALID_XATTR_BLOCK_SIZE;
@@ -386,38 +405,16 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage,
/* read from inline xattr */
if (inline_size) {
- struct page *page = NULL;
- void *inline_addr;
-
- if (ipage) {
- inline_addr = inline_xattr_addr(ipage);
- } else {
- page = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(page)) {
- err = PTR_ERR(page);
- goto fail;
- }
- inline_addr = inline_xattr_addr(page);
- }
- memcpy(txattr_addr, inline_addr, inline_size);
- f2fs_put_page(page, 1);
+ err = read_inline_xattr(inode, ipage, txattr_addr);
+ if (err)
+ goto fail;
}
/* read from xattr node block */
if (xnid) {
- struct page *xpage;
- void *xattr_addr;
-
- /* The inode already has an extended attribute block. */
- xpage = get_node_page(sbi, xnid);
- if (IS_ERR(xpage)) {
- err = PTR_ERR(xpage);
+ err = read_xattr_block(inode, txattr_addr);
+ if (err)
goto fail;
- }
-
- xattr_addr = page_address(xpage);
- memcpy(txattr_addr + inline_size, xattr_addr, size);
- f2fs_put_page(xpage, 1);
}
header = XATTR_HDR(txattr_addr);
@@ -439,10 +436,12 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
size_t inline_size = inline_xattr_size(inode);
+ struct page *in_page = NULL;
void *xattr_addr;
+ void *inline_addr = NULL;
struct page *xpage;
nid_t new_nid = 0;
- int err;
+ int err = 0;
if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid)
if (!alloc_nid(sbi, &new_nid))
@@ -450,30 +449,30 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
/* write to inline xattr */
if (inline_size) {
- struct page *page = NULL;
- void *inline_addr;
-
if (ipage) {
- inline_addr = inline_xattr_addr(ipage);
- f2fs_wait_on_page_writeback(ipage, NODE, true);
- set_page_dirty(ipage);
+ inline_addr = inline_xattr_addr(inode, ipage);
} else {
- page = get_node_page(sbi, inode->i_ino);
- if (IS_ERR(page)) {
+ in_page = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(in_page)) {
alloc_nid_failed(sbi, new_nid);
- return PTR_ERR(page);
+ return PTR_ERR(in_page);
}
- inline_addr = inline_xattr_addr(page);
- f2fs_wait_on_page_writeback(page, NODE, true);
+ inline_addr = inline_xattr_addr(inode, in_page);
}
- memcpy(inline_addr, txattr_addr, inline_size);
- f2fs_put_page(page, 1);
+ f2fs_wait_on_page_writeback(ipage ? ipage : in_page,
+ NODE, true);
/* no need to use xattr node block */
if (hsize <= inline_size) {
- err = truncate_xattr_node(inode, ipage);
+ err = truncate_xattr_node(inode);
alloc_nid_failed(sbi, new_nid);
- return err;
+ if (err) {
+ f2fs_put_page(in_page, 1);
+ return err;
+ }
+ memcpy(inline_addr, txattr_addr, inline_size);
+ set_page_dirty(ipage ? ipage : in_page);
+ goto in_page_out;
}
}
@@ -482,7 +481,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
if (IS_ERR(xpage)) {
alloc_nid_failed(sbi, new_nid);
- return PTR_ERR(xpage);
+ goto in_page_out;
}
f2fs_bug_on(sbi, new_nid);
f2fs_wait_on_page_writeback(xpage, NODE, true);
@@ -492,17 +491,24 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
xpage = new_node_page(&dn, XATTR_NODE_OFFSET);
if (IS_ERR(xpage)) {
alloc_nid_failed(sbi, new_nid);
- return PTR_ERR(xpage);
+ goto in_page_out;
}
alloc_nid_done(sbi, new_nid);
}
-
xattr_addr = page_address(xpage);
+
+ if (inline_size)
+ memcpy(inline_addr, txattr_addr, inline_size);
memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE);
+
+ if (inline_size)
+ set_page_dirty(ipage ? ipage : in_page);
set_page_dirty(xpage);
- f2fs_put_page(xpage, 1);
- return 0;
+ f2fs_put_page(xpage, 1);
+in_page_out:
+ f2fs_put_page(in_page, 1);
+ return err;
}
int f2fs_getxattr(struct inode *inode, int index, const char *name,
@@ -721,6 +727,10 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err;
+ err = dquot_initialize(inode);
+ if (err)
+ return err;
+
/* this case is only from init_inode_metadata */
if (ipage)
return __f2fs_setxattr(inode, index, name, value,
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 0ac4c1f73fbd..25177e6bd603 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -103,7 +103,7 @@ static inline unsigned int isonum_733(char *p)
/* Ignore bigendian datum due to broken mastering programs */
return get_unaligned_le32(p);
}
-extern int iso_date(char *, int);
+extern int iso_date(u8 *, int);
struct inode; /* To make gcc happy */
diff --git a/fs/isofs/rock.h b/fs/isofs/rock.h
index ed09e2b08637..f835976ce033 100644
--- a/fs/isofs/rock.h
+++ b/fs/isofs/rock.h
@@ -65,7 +65,7 @@ struct RR_PL_s {
};
struct stamp {
- char time[7];
+ __u8 time[7]; /* actually 6 unsigned, 1 signed */
} __attribute__ ((packed));
struct RR_TF_s {
diff --git a/fs/isofs/util.c b/fs/isofs/util.c
index 005a15cfd30a..37860fea364d 100644
--- a/fs/isofs/util.c
+++ b/fs/isofs/util.c
@@ -15,7 +15,7 @@
* to GMT. Thus we should always be correct.
*/
-int iso_date(char * p, int flag)
+int iso_date(u8 *p, int flag)
{
int year, month, day, hour, minute, second, tz;
int crtime;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8e425f2c5ddd..6fef53f18dcf 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -242,15 +242,12 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
};
const u32 nfs4_fs_locations_bitmap[3] = {
- FATTR4_WORD0_TYPE
- | FATTR4_WORD0_CHANGE
+ FATTR4_WORD0_CHANGE
| FATTR4_WORD0_SIZE
| FATTR4_WORD0_FSID
| FATTR4_WORD0_FILEID
| FATTR4_WORD0_FS_LOCATIONS,
- FATTR4_WORD1_MODE
- | FATTR4_WORD1_NUMLINKS
- | FATTR4_WORD1_OWNER
+ FATTR4_WORD1_OWNER
| FATTR4_WORD1_OWNER_GROUP
| FATTR4_WORD1_RAWDEV
| FATTR4_WORD1_SPACE_USED
@@ -6351,9 +6348,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
struct page *page)
{
struct nfs_server *server = NFS_SERVER(dir);
- u32 bitmask[3] = {
- [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
- };
+ u32 bitmask[3];
struct nfs4_fs_locations_arg args = {
.dir_fh = NFS_FH(dir),
.name = name,
@@ -6372,12 +6367,15 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir,
dprintk("%s: start\n", __func__);
+ bitmask[0] = nfs4_fattr_bitmap[0] | FATTR4_WORD0_FS_LOCATIONS;
+ bitmask[1] = nfs4_fattr_bitmap[1];
+
/* Ask for the fileid of the absent filesystem if mounted_on_fileid
* is not supported */
if (NFS_SERVER(dir)->attr_bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
- bitmask[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
+ bitmask[0] &= ~FATTR4_WORD0_FILEID;
else
- bitmask[0] |= FATTR4_WORD0_FILEID;
+ bitmask[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
nfs_fattr_init(&fs_locations->fattr);
fs_locations->server = server;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f1268280244e..3149f7e58d6f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1322,7 +1322,7 @@ static int nfs_parse_mount_options(char *raw,
mnt->options |= NFS_OPTION_MIGRATION;
break;
case Opt_nomigration:
- mnt->options &= NFS_OPTION_MIGRATION;
+ mnt->options &= ~NFS_OPTION_MIGRATION;
break;
/*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ca9ebc3242d3..421935f3d909 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3829,7 +3829,8 @@ static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, statei
{
struct nfs4_stid *ret;
- ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID);
+ ret = find_stateid_by_type(cl, s,
+ NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID);
if (!ret)
return NULL;
return delegstateid(ret);
@@ -3852,6 +3853,12 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
if (deleg == NULL)
goto out;
+ if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
+ nfs4_put_stid(&deleg->dl_stid);
+ if (cl->cl_minorversion)
+ status = nfserr_deleg_revoked;
+ goto out;
+ }
flags = share_access_to_flags(open->op_share_access);
status = nfs4_check_delegmode(deleg, flags);
if (status) {
@@ -4696,6 +4703,16 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
struct nfs4_stid **s, struct nfsd_net *nn)
{
__be32 status;
+ bool return_revoked = false;
+
+ /*
+ * only return revoked delegations if explicitly asked.
+ * otherwise we report revoked or bad_stateid status.
+ */
+ if (typemask & NFS4_REVOKED_DELEG_STID)
+ return_revoked = true;
+ else if (typemask & NFS4_DELEG_STID)
+ typemask |= NFS4_REVOKED_DELEG_STID;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
return nfserr_bad_stateid;
@@ -4710,6 +4727,12 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
*s = find_stateid_by_type(cstate->clp, stateid, typemask);
if (!*s)
return nfserr_bad_stateid;
+ if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
+ nfs4_put_stid(*s);
+ if (cstate->minorversion)
+ return nfserr_deleg_revoked;
+ return nfserr_bad_stateid;
+ }
return nfs_ok;
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 2f27c935bd57..34c22fe4eca0 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1945,8 +1945,6 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
"failed to get inode block.\n");
return err;
}
- mark_buffer_dirty(ibh);
- nilfs_mdt_mark_dirty(ifile);
spin_lock(&nilfs->ns_inode_lock);
if (likely(!ii->i_bh))
ii->i_bh = ibh;
@@ -1955,6 +1953,10 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
goto retry;
}
+ // Always redirty the buffer to avoid race condition
+ mark_buffer_dirty(ii->i_bh);
+ nilfs_mdt_mark_dirty(ifile);
+
clear_bit(NILFS_I_QUEUED, &ii->i_state);
set_bit(NILFS_I_BUSY, &ii->i_state);
list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index c2a975e4a711..fef1caeddf54 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -36,6 +36,8 @@
#define F2FS_NODE_INO(sbi) (sbi->node_ino_num)
#define F2FS_META_INO(sbi) (sbi->meta_ino_num)
+#define F2FS_MAX_QUOTAS 3
+
#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */
#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */
#define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */
@@ -108,7 +110,8 @@ struct f2fs_super_block {
__u8 encryption_level; /* versioning level for encryption */
__u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */
struct f2fs_device devs[MAX_DEVICES]; /* device list */
- __u8 reserved[327]; /* valid reserved region */
+ __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */
+ __u8 reserved[315]; /* valid reserved region */
} __packed;
/*
@@ -184,7 +187,8 @@ struct f2fs_extent {
} __packed;
#define F2FS_NAME_LEN 255
-#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */
+/* 200 bytes for inline xattrs by default */
+#define DEFAULT_INLINE_XATTR_ADDRS 50
#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */
#define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \
get_extra_isize(inode))
@@ -238,7 +242,7 @@ struct f2fs_inode {
union {
struct {
__le16 i_extra_isize; /* extra inode attribute size */
- __le16 i_padding; /* padding */
+ __le16 i_inline_xattr_size; /* inline xattr size, unit: 4 bytes */
__le32 i_projid; /* project id */
__le32 i_inode_checksum;/* inode meta checksum */
__le32 i_extra_end[0]; /* for attribute size calculation */
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index f0f1793cfa49..3a5af09af18b 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -50,13 +50,13 @@ struct tk_read_base {
* @tai_offset: The current UTC to TAI offset in seconds
* @clock_was_set_seq: The sequence number of clock was set events
* @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
- * @raw_time: Monotonic raw base time in timespec64 format
+ * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
* @cycle_interval: Number of clock cycles in one NTP interval
* @xtime_interval: Number of clock shifted nano seconds in one NTP
* interval.
* @xtime_remainder: Shifted nano seconds left over when rounding
* @cycle_interval
- * @raw_interval: Raw nano seconds accumulated per NTP interval.
+ * @raw_interval: Shifted raw nano seconds accumulated per NTP interval.
* @ntp_error: Difference between accumulated time and NTP time in ntp
* shifted nano seconds.
* @ntp_error_shift: Shift conversion between clock shifted nano seconds and
@@ -91,13 +91,13 @@ struct timekeeper {
s32 tai_offset;
unsigned int clock_was_set_seq;
ktime_t next_leap_ktime;
- struct timespec64 raw_time;
+ u64 raw_sec;
/* The following members are for timekeeping internal use */
cycle_t cycle_interval;
u64 xtime_interval;
s64 xtime_remainder;
- u32 raw_interval;
+ u64 raw_interval;
/* The ntp_tick_length() value currently being used.
* This cached copy ensures we consistently apply the tick
* length for an entire tick, as ntp_tick_length may change
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 7063bbcca03b..589df6f73789 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -128,6 +128,18 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
{ CP_DISCARD, "Discard" }, \
{ CP_UMOUNT | CP_TRIMMED, "Umount,Trimmed" })
+#define show_fsync_cpreason(type) \
+ __print_symbolic(type, \
+ { CP_NO_NEEDED, "no needed" }, \
+ { CP_NON_REGULAR, "non regular" }, \
+ { CP_HARDLINK, "hardlink" }, \
+ { CP_SB_NEED_CP, "sb needs cp" }, \
+ { CP_WRONG_PINO, "wrong pino" }, \
+ { CP_NO_SPC_ROLL, "no space roll forward" }, \
+ { CP_NODE_NEED_CP, "node needs cp" }, \
+ { CP_FASTBOOT_MODE, "fastboot mode" }, \
+ { CP_SPEC_LOG_NUM, "log type is 2" })
+
struct victim_sel_policy;
struct f2fs_map_blocks;
@@ -202,14 +214,14 @@ DEFINE_EVENT(f2fs__inode, f2fs_sync_file_enter,
TRACE_EVENT(f2fs_sync_file_exit,
- TP_PROTO(struct inode *inode, int need_cp, int datasync, int ret),
+ TP_PROTO(struct inode *inode, int cp_reason, int datasync, int ret),
- TP_ARGS(inode, need_cp, datasync, ret),
+ TP_ARGS(inode, cp_reason, datasync, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
- __field(int, need_cp)
+ __field(int, cp_reason)
__field(int, datasync)
__field(int, ret)
),
@@ -217,15 +229,15 @@ TRACE_EVENT(f2fs_sync_file_exit,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
- __entry->need_cp = need_cp;
+ __entry->cp_reason = cp_reason;
__entry->datasync = datasync;
__entry->ret = ret;
),
- TP_printk("dev = (%d,%d), ino = %lu, checkpoint is %s, "
+ TP_printk("dev = (%d,%d), ino = %lu, cp_reason: %s, "
"datasync = %d, ret = %d",
show_dev_ino(__entry),
- __entry->need_cp ? "needed" : "not needed",
+ show_fsync_cpreason(__entry->cp_reason),
__entry->datasync,
__entry->ret)
);
@@ -716,6 +728,91 @@ TRACE_EVENT(f2fs_get_victim,
__entry->free)
);
+TRACE_EVENT(f2fs_lookup_start,
+
+ TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags),
+
+ TP_ARGS(dir, dentry, flags),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(const char *, name)
+ __field(unsigned int, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->ino = dir->i_ino;
+ __entry->name = dentry->d_name.name;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("dev = (%d,%d), pino = %lu, name:%s, flags:%u",
+ show_dev_ino(__entry),
+ __entry->name,
+ __entry->flags)
+);
+
+TRACE_EVENT(f2fs_lookup_end,
+
+ TP_PROTO(struct inode *dir, struct dentry *dentry, nid_t ino,
+ int err),
+
+ TP_ARGS(dir, dentry, ino, err),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(const char *, name)
+ __field(nid_t, cino)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->ino = dir->i_ino;
+ __entry->name = dentry->d_name.name;
+ __entry->cino = ino;
+ __entry->err = err;
+ ),
+
+ TP_printk("dev = (%d,%d), pino = %lu, name:%s, ino:%u, err:%d",
+ show_dev_ino(__entry),
+ __entry->name,
+ __entry->cino,
+ __entry->err)
+);
+
+TRACE_EVENT(f2fs_readdir,
+
+ TP_PROTO(struct inode *dir, loff_t start_pos, loff_t end_pos, int err),
+
+ TP_ARGS(dir, start_pos, end_pos, err),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ino_t, ino)
+ __field(loff_t, start)
+ __field(loff_t, end)
+ __field(int, err)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dir->i_sb->s_dev;
+ __entry->ino = dir->i_ino;
+ __entry->start = start_pos;
+ __entry->end = end_pos;
+ __entry->err = err;
+ ),
+
+ TP_printk("dev = (%d,%d), ino = %lu, start_pos:%llu, end_pos:%llu, err:%d",
+ show_dev_ino(__entry),
+ __entry->start,
+ __entry->end,
+ __entry->err)
+);
+
TRACE_EVENT(f2fs_fallocate,
TP_PROTO(struct inode *inode, int mode,
@@ -1274,6 +1371,13 @@ DEFINE_EVENT(f2fs_discard, f2fs_issue_discard,
TP_ARGS(dev, blkstart, blklen)
);
+DEFINE_EVENT(f2fs_discard, f2fs_remove_discard,
+
+ TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen),
+
+ TP_ARGS(dev, blkstart, blklen)
+);
+
TRACE_EVENT(f2fs_issue_reset_zone,
TP_PROTO(struct block_device *dev, block_t blkstart),
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 5664ca07c9c7..a01a076ea060 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -455,20 +455,22 @@ TRACE_EVENT(svc_recv,
TP_ARGS(rqst, status),
TP_STRUCT__entry(
- __field(struct sockaddr *, addr)
__field(__be32, xid)
__field(int, status)
__field(unsigned long, flags)
+ __dynamic_array(unsigned char, addr, rqst->rq_addrlen)
),
TP_fast_assign(
- __entry->addr = (struct sockaddr *)&rqst->rq_addr;
__entry->xid = status > 0 ? rqst->rq_xid : 0;
__entry->status = status;
__entry->flags = rqst->rq_flags;
+ memcpy(__get_dynamic_array(addr),
+ &rqst->rq_addr, rqst->rq_addrlen);
),
- TP_printk("addr=%pIScp xid=0x%x status=%d flags=%s", __entry->addr,
+ TP_printk("addr=%pIScp xid=0x%x status=%d flags=%s",
+ (struct sockaddr *)__get_dynamic_array(addr),
be32_to_cpu(__entry->xid), __entry->status,
show_rqstp_flags(__entry->flags))
);
@@ -480,22 +482,23 @@ DECLARE_EVENT_CLASS(svc_rqst_status,
TP_ARGS(rqst, status),
TP_STRUCT__entry(
- __field(struct sockaddr *, addr)
__field(__be32, xid)
- __field(int, dropme)
__field(int, status)
__field(unsigned long, flags)
+ __dynamic_array(unsigned char, addr, rqst->rq_addrlen)
),
TP_fast_assign(
- __entry->addr = (struct sockaddr *)&rqst->rq_addr;
__entry->xid = rqst->rq_xid;
__entry->status = status;
__entry->flags = rqst->rq_flags;
+ memcpy(__get_dynamic_array(addr),
+ &rqst->rq_addr, rqst->rq_addrlen);
),
TP_printk("addr=%pIScp rq_xid=0x%x status=%d flags=%s",
- __entry->addr, be32_to_cpu(__entry->xid),
+ (struct sockaddr *)__get_dynamic_array(addr),
+ be32_to_cpu(__entry->xid),
__entry->status, show_rqstp_flags(__entry->flags))
);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1d91c012b5d8..c6c1a2a20474 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6632,6 +6632,12 @@ static int init_rootdomain(struct root_domain *rd)
if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
goto free_dlo_mask;
+#ifdef HAVE_RT_PUSH_IPI
+ rd->rto_cpu = -1;
+ raw_spin_lock_init(&rd->rto_lock);
+ init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
+#endif
+
init_dl_bw(&rd->dl_bw);
if (cpudl_init(&rd->cpudl) != 0)
goto free_dlo_mask;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index af6a7f424d94..c290db7f289a 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -68,10 +68,6 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
raw_spin_unlock(&rt_b->rt_runtime_lock);
}
-#if defined(CONFIG_SMP) && defined(HAVE_RT_PUSH_IPI)
-static void push_irq_work_func(struct irq_work *work);
-#endif
-
void init_rt_rq(struct rt_rq *rt_rq)
{
struct rt_prio_array *array;
@@ -91,13 +87,6 @@ void init_rt_rq(struct rt_rq *rt_rq)
rt_rq->rt_nr_migratory = 0;
rt_rq->overloaded = 0;
plist_head_init(&rt_rq->pushable_tasks);
-
-#ifdef HAVE_RT_PUSH_IPI
- rt_rq->push_flags = 0;
- rt_rq->push_cpu = nr_cpu_ids;
- raw_spin_lock_init(&rt_rq->push_lock);
- init_irq_work(&rt_rq->push_work, push_irq_work_func);
-#endif
#endif /* CONFIG_SMP */
/* We start is dequeued state, because no RT tasks are queued */
rt_rq->rt_queued = 0;
@@ -2104,160 +2093,166 @@ static void push_rt_tasks(struct rq *rq)
}
#ifdef HAVE_RT_PUSH_IPI
+
/*
- * The search for the next cpu always starts at rq->cpu and ends
- * when we reach rq->cpu again. It will never return rq->cpu.
- * This returns the next cpu to check, or nr_cpu_ids if the loop
- * is complete.
+ * When a high priority task schedules out from a CPU and a lower priority
+ * task is scheduled in, a check is made to see if there's any RT tasks
+ * on other CPUs that are waiting to run because a higher priority RT task
+ * is currently running on its CPU. In this case, the CPU with multiple RT
+ * tasks queued on it (overloaded) needs to be notified that a CPU has opened
+ * up that may be able to run one of its non-running queued RT tasks.
+ *
+ * All CPUs with overloaded RT tasks need to be notified as there is currently
+ * no way to know which of these CPUs have the highest priority task waiting
+ * to run. Instead of trying to take a spinlock on each of these CPUs,
+ * which has shown to cause large latency when done on machines with many
+ * CPUs, sending an IPI to the CPUs to have them push off the overloaded
+ * RT tasks waiting to run.
+ *
+ * Just sending an IPI to each of the CPUs is also an issue, as on large
+ * count CPU machines, this can cause an IPI storm on a CPU, especially
+ * if its the only CPU with multiple RT tasks queued, and a large number
+ * of CPUs scheduling a lower priority task at the same time.
+ *
+ * Each root domain has its own irq work function that can iterate over
+ * all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT
+ * tassk must be checked if there's one or many CPUs that are lowering
+ * their priority, there's a single irq work iterator that will try to
+ * push off RT tasks that are waiting to run.
+ *
+ * When a CPU schedules a lower priority task, it will kick off the
+ * irq work iterator that will jump to each CPU with overloaded RT tasks.
+ * As it only takes the first CPU that schedules a lower priority task
+ * to start the process, the rto_start variable is incremented and if
+ * the atomic result is one, then that CPU will try to take the rto_lock.
+ * This prevents high contention on the lock as the process handles all
+ * CPUs scheduling lower priority tasks.
+ *
+ * All CPUs that are scheduling a lower priority task will increment the
+ * rt_loop_next variable. This will make sure that the irq work iterator
+ * checks all RT overloaded CPUs whenever a CPU schedules a new lower
+ * priority task, even if the iterator is in the middle of a scan. Incrementing
+ * the rt_loop_next will cause the iterator to perform another scan.
*
- * rq->rt.push_cpu holds the last cpu returned by this function,
- * or if this is the first instance, it must hold rq->cpu.
*/
static int rto_next_cpu(struct rq *rq)
{
- int prev_cpu = rq->rt.push_cpu;
+ struct root_domain *rd = rq->rd;
+ int next;
int cpu;
- cpu = cpumask_next(prev_cpu, rq->rd->rto_mask);
-
/*
- * If the previous cpu is less than the rq's CPU, then it already
- * passed the end of the mask, and has started from the beginning.
- * We end if the next CPU is greater or equal to rq's CPU.
+ * When starting the IPI RT pushing, the rto_cpu is set to -1,
+ * rt_next_cpu() will simply return the first CPU found in
+ * the rto_mask.
+ *
+ * If rto_next_cpu() is called with rto_cpu is a valid cpu, it
+ * will return the next CPU found in the rto_mask.
+ *
+ * If there are no more CPUs left in the rto_mask, then a check is made
+ * against rto_loop and rto_loop_next. rto_loop is only updated with
+ * the rto_lock held, but any CPU may increment the rto_loop_next
+ * without any locking.
*/
- if (prev_cpu < rq->cpu) {
- if (cpu >= rq->cpu)
- return nr_cpu_ids;
+ for (;;) {
- } else if (cpu >= nr_cpu_ids) {
- /*
- * We passed the end of the mask, start at the beginning.
- * If the result is greater or equal to the rq's CPU, then
- * the loop is finished.
- */
- cpu = cpumask_first(rq->rd->rto_mask);
- if (cpu >= rq->cpu)
- return nr_cpu_ids;
- }
- rq->rt.push_cpu = cpu;
+ /* When rto_cpu is -1 this acts like cpumask_first() */
+ cpu = cpumask_next(rd->rto_cpu, rd->rto_mask);
- /* Return cpu to let the caller know if the loop is finished or not */
- return cpu;
-}
+ rd->rto_cpu = cpu;
-static int find_next_push_cpu(struct rq *rq)
-{
- struct rq *next_rq;
- int cpu;
+ if (cpu < nr_cpu_ids)
+ return cpu;
- while (1) {
- cpu = rto_next_cpu(rq);
- if (cpu >= nr_cpu_ids)
- break;
- next_rq = cpu_rq(cpu);
+ rd->rto_cpu = -1;
- /* Make sure the next rq can push to this rq */
- if (next_rq->rt.highest_prio.next < rq->rt.highest_prio.curr)
+ /*
+ * ACQUIRE ensures we see the @rto_mask changes
+ * made prior to the @next value observed.
+ *
+ * Matches WMB in rt_set_overload().
+ */
+ next = atomic_read_acquire(&rd->rto_loop_next);
+
+ if (rd->rto_loop == next)
break;
+
+ rd->rto_loop = next;
}
- return cpu;
+ return -1;
+}
+
+static inline bool rto_start_trylock(atomic_t *v)
+{
+ return !atomic_cmpxchg_acquire(v, 0, 1);
}
-#define RT_PUSH_IPI_EXECUTING 1
-#define RT_PUSH_IPI_RESTART 2
+static inline void rto_start_unlock(atomic_t *v)
+{
+ atomic_set_release(v, 0);
+}
static void tell_cpu_to_push(struct rq *rq)
{
- int cpu;
+ int cpu = -1;
- if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) {
- raw_spin_lock(&rq->rt.push_lock);
- /* Make sure it's still executing */
- if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) {
- /*
- * Tell the IPI to restart the loop as things have
- * changed since it started.
- */
- rq->rt.push_flags |= RT_PUSH_IPI_RESTART;
- raw_spin_unlock(&rq->rt.push_lock);
- return;
- }
- raw_spin_unlock(&rq->rt.push_lock);
- }
+ /* Keep the loop going if the IPI is currently active */
+ atomic_inc(&rq->rd->rto_loop_next);
- /* When here, there's no IPI going around */
-
- rq->rt.push_cpu = rq->cpu;
- cpu = find_next_push_cpu(rq);
- if (cpu >= nr_cpu_ids)
+ /* Only one CPU can initiate a loop at a time */
+ if (!rto_start_trylock(&rq->rd->rto_loop_start))
return;
- rq->rt.push_flags = RT_PUSH_IPI_EXECUTING;
+ raw_spin_lock(&rq->rd->rto_lock);
+
+ /*
+ * The rto_cpu is updated under the lock, if it has a valid cpu
+ * then the IPI is still running and will continue due to the
+ * update to loop_next, and nothing needs to be done here.
+ * Otherwise it is finishing up and an ipi needs to be sent.
+ */
+ if (rq->rd->rto_cpu < 0)
+ cpu = rto_next_cpu(rq);
+
+ raw_spin_unlock(&rq->rd->rto_lock);
- irq_work_queue_on(&rq->rt.push_work, cpu);
+ rto_start_unlock(&rq->rd->rto_loop_start);
+
+ if (cpu >= 0)
+ irq_work_queue_on(&rq->rd->rto_push_work, cpu);
}
/* Called from hardirq context */
-static void try_to_push_tasks(void *arg)
+void rto_push_irq_work_func(struct irq_work *work)
{
- struct rt_rq *rt_rq = arg;
- struct rq *rq, *src_rq;
- int this_cpu;
+ struct rq *rq;
int cpu;
- this_cpu = rt_rq->push_cpu;
+ rq = this_rq();
- /* Paranoid check */
- BUG_ON(this_cpu != smp_processor_id());
-
- rq = cpu_rq(this_cpu);
- src_rq = rq_of_rt_rq(rt_rq);
-
-again:
+ /*
+ * We do not need to grab the lock to check for has_pushable_tasks.
+ * When it gets updated, a check is made if a push is possible.
+ */
if (has_pushable_tasks(rq)) {
raw_spin_lock(&rq->lock);
- push_rt_task(rq);
+ push_rt_tasks(rq);
raw_spin_unlock(&rq->lock);
}
- /* Pass the IPI to the next rt overloaded queue */
- raw_spin_lock(&rt_rq->push_lock);
- /*
- * If the source queue changed since the IPI went out,
- * we need to restart the search from that CPU again.
- */
- if (rt_rq->push_flags & RT_PUSH_IPI_RESTART) {
- rt_rq->push_flags &= ~RT_PUSH_IPI_RESTART;
- rt_rq->push_cpu = src_rq->cpu;
- }
+ raw_spin_lock(&rq->rd->rto_lock);
- cpu = find_next_push_cpu(src_rq);
+ /* Pass the IPI to the next rt overloaded queue */
+ cpu = rto_next_cpu(rq);
- if (cpu >= nr_cpu_ids)
- rt_rq->push_flags &= ~RT_PUSH_IPI_EXECUTING;
- raw_spin_unlock(&rt_rq->push_lock);
+ raw_spin_unlock(&rq->rd->rto_lock);
- if (cpu >= nr_cpu_ids)
+ if (cpu < 0)
return;
- /*
- * It is possible that a restart caused this CPU to be
- * chosen again. Don't bother with an IPI, just see if we
- * have more to push.
- */
- if (unlikely(cpu == rq->cpu))
- goto again;
-
/* Try the next RT overloaded CPU */
- irq_work_queue_on(&rt_rq->push_work, cpu);
-}
-
-static void push_irq_work_func(struct irq_work *work)
-{
- struct rt_rq *rt_rq = container_of(work, struct rt_rq, push_work);
-
- try_to_push_tasks(rt_rq);
+ irq_work_queue_on(&rq->rd->rto_push_work, cpu);
}
#endif /* HAVE_RT_PUSH_IPI */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7426ae4dced3..10c978fab1a3 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -534,7 +534,7 @@ static inline int rt_bandwidth_enabled(void)
}
/* RT IPI pull logic requires IRQ_WORK */
-#ifdef CONFIG_IRQ_WORK
+#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_SMP)
# define HAVE_RT_PUSH_IPI
#endif
@@ -555,12 +555,6 @@ struct rt_rq {
unsigned long rt_nr_total;
int overloaded;
struct plist_head pushable_tasks;
-#ifdef HAVE_RT_PUSH_IPI
- int push_flags;
- int push_cpu;
- struct irq_work push_work;
- raw_spinlock_t push_lock;
-#endif
#endif /* CONFIG_SMP */
int rt_queued;
@@ -653,6 +647,19 @@ struct root_domain {
struct dl_bw dl_bw;
struct cpudl cpudl;
+#ifdef HAVE_RT_PUSH_IPI
+ /*
+ * For IPI pull requests, loop across the rto_mask.
+ */
+ struct irq_work rto_push_work;
+ raw_spinlock_t rto_lock;
+ /* These are only updated and read within rto_lock */
+ int rto_loop;
+ int rto_cpu;
+ /* These atomics are updated outside of a lock */
+ atomic_t rto_loop_next;
+ atomic_t rto_loop_start;
+#endif
/*
* The "RT overload" flag: it gets set if a CPU has more than
* one runnable RT task.
@@ -669,6 +676,9 @@ struct root_domain {
extern struct root_domain def_root_domain;
+#ifdef HAVE_RT_PUSH_IPI
+extern void rto_push_irq_work_func(struct irq_work *work);
+#endif
#endif /* CONFIG_SMP */
/*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 738f3467d169..fc86fdcce932 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -70,6 +70,10 @@ static inline void tk_normalize_xtime(struct timekeeper *tk)
tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
tk->xtime_sec++;
}
+ while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) {
+ tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+ tk->raw_sec++;
+ }
}
static inline struct timespec64 tk_xtime(struct timekeeper *tk)
@@ -277,18 +281,19 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
/* Go back from cycles -> shifted ns */
tk->xtime_interval = (u64) interval * clock->mult;
tk->xtime_remainder = ntpinterval - tk->xtime_interval;
- tk->raw_interval =
- ((u64) interval * clock->mult) >> clock->shift;
+ tk->raw_interval = interval * clock->mult;
/* if changing clocks, convert xtime_nsec shift units */
if (old_clock) {
int shift_change = clock->shift - old_clock->shift;
- if (shift_change < 0)
+ if (shift_change < 0) {
tk->tkr_mono.xtime_nsec >>= -shift_change;
- else
+ tk->tkr_raw.xtime_nsec >>= -shift_change;
+ } else {
tk->tkr_mono.xtime_nsec <<= shift_change;
+ tk->tkr_raw.xtime_nsec <<= shift_change;
+ }
}
- tk->tkr_raw.xtime_nsec = 0;
tk->tkr_mono.shift = clock->shift;
tk->tkr_raw.shift = clock->shift;
@@ -617,9 +622,6 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
nsec = (u32) tk->wall_to_monotonic.tv_nsec;
tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
- /* Update the monotonic raw base */
- tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
-
/*
* The sum of the nanoseconds portions of xtime and
* wall_to_monotonic can be greater/equal one second. Take
@@ -629,6 +631,11 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
if (nsec >= NSEC_PER_SEC)
seconds++;
tk->ktime_sec = seconds;
+
+ /* Update the monotonic raw base */
+ seconds = tk->raw_sec;
+ nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift);
+ tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
}
/* must hold timekeeper_lock */
@@ -670,7 +677,6 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
static void timekeeping_forward_now(struct timekeeper *tk)
{
cycle_t cycle_now, delta;
- s64 nsec;
cycle_now = tk_clock_read(&tk->tkr_mono);
delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
@@ -682,10 +688,13 @@ static void timekeeping_forward_now(struct timekeeper *tk)
/* If arch requires, add in get_arch_timeoffset() */
tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
- tk_normalize_xtime(tk);
- nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
- timespec64_add_ns(&tk->raw_time, nsec);
+ tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult;
+
+ /* If arch requires, add in get_arch_timeoffset() */
+ tk->tkr_raw.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_raw.shift;
+
+ tk_normalize_xtime(tk);
}
/**
@@ -1179,19 +1188,18 @@ int timekeeping_notify(struct clocksource *clock)
void getrawmonotonic64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
- struct timespec64 ts64;
unsigned long seq;
s64 nsecs;
do {
seq = read_seqcount_begin(&tk_core.seq);
+ ts->tv_sec = tk->raw_sec;
nsecs = timekeeping_get_ns(&tk->tkr_raw);
- ts64 = tk->raw_time;
} while (read_seqcount_retry(&tk_core.seq, seq));
- timespec64_add_ns(&ts64, nsecs);
- *ts = ts64;
+ ts->tv_nsec = 0;
+ timespec64_add_ns(ts, nsecs);
}
EXPORT_SYMBOL(getrawmonotonic64);
@@ -1315,8 +1323,7 @@ void __init timekeeping_init(void)
tk_setup_internals(tk, clock);
tk_set_xtime(tk, &now);
- tk->raw_time.tv_sec = 0;
- tk->raw_time.tv_nsec = 0;
+ tk->raw_sec = 0;
if (boot.tv_sec == 0 && boot.tv_nsec == 0)
boot = tk_xtime(tk);
@@ -1796,7 +1803,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
unsigned int *clock_set)
{
cycle_t interval = tk->cycle_interval << shift;
- u64 raw_nsecs;
+ u64 snsec_per_sec;
/* If the offset is smaller than a shifted interval, do nothing */
if (offset < interval)
@@ -1811,14 +1818,12 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
*clock_set |= accumulate_nsecs_to_secs(tk);
/* Accumulate raw time */
- raw_nsecs = (u64)tk->raw_interval << shift;
- raw_nsecs += tk->raw_time.tv_nsec;
- if (raw_nsecs >= NSEC_PER_SEC) {
- u64 raw_secs = raw_nsecs;
- raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
- tk->raw_time.tv_sec += raw_secs;
+ tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
+ snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
+ while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
+ tk->tkr_raw.xtime_nsec -= snsec_per_sec;
+ tk->raw_sec++;
}
- tk->raw_time.tv_nsec = raw_nsecs;
/* Accumulate error between NTP and clock interval */
tk->ntp_error += tk->ntp_tick << shift;
diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c
index e24388a863a7..468fb7cd1221 100644
--- a/lib/mpi/mpi-pow.c
+++ b/lib/mpi/mpi-pow.c
@@ -26,6 +26,7 @@
* however I decided to publish this code under the plain GPL.
*/
+#include <linux/sched.h>
#include <linux/string.h>
#include "mpi-internal.h"
#include "longlong.h"
@@ -256,6 +257,7 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
}
e <<= 1;
c--;
+ cond_resched();
}
i--;
diff --git a/net/9p/client.c b/net/9p/client.c
index f5feac4ff4ec..3ff26eb1ea20 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -749,8 +749,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
}
again:
/* Wait for the response */
- err = wait_event_interruptible(*req->wq,
- req->status >= REQ_STATUS_RCVD);
+ err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD);
/*
* Make sure our req is coherent with regard to updates in other
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 6e70ddb158b4..2ddeecca5b12 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -290,8 +290,8 @@ req_retry:
if (err == -ENOSPC) {
chan->ring_bufs_avail = 0;
spin_unlock_irqrestore(&chan->lock, flags);
- err = wait_event_interruptible(*chan->vc_wq,
- chan->ring_bufs_avail);
+ err = wait_event_killable(*chan->vc_wq,
+ chan->ring_bufs_avail);
if (err == -ERESTARTSYS)
return err;
@@ -331,7 +331,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
* Other zc request to finish here
*/
if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
- err = wait_event_interruptible(vp_wq,
+ err = wait_event_killable(vp_wq,
(atomic_read(&vp_pinned) < chan->p9_max_pages));
if (err == -ERESTARTSYS)
return err;
@@ -475,8 +475,8 @@ req_retry_pinned:
if (err == -ENOSPC) {
chan->ring_bufs_avail = 0;
spin_unlock_irqrestore(&chan->lock, flags);
- err = wait_event_interruptible(*chan->vc_wq,
- chan->ring_bufs_avail);
+ err = wait_event_killable(*chan->vc_wq,
+ chan->ring_bufs_avail);
if (err == -ERESTARTSYS)
goto err_out;
@@ -493,8 +493,7 @@ req_retry_pinned:
virtqueue_kick(chan->vq);
spin_unlock_irqrestore(&chan->lock, flags);
p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
- err = wait_event_interruptible(*req->wq,
- req->status >= REQ_STATUS_RCVD);
+ err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD);
/*
* Non kernel buffers are pinned, unpin them
*/
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index f300d1cbfa91..097a1243c16c 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -808,6 +808,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
{
struct ip_mreqn mreq;
struct net_device *dev = NULL;
+ int midx;
if (sk->sk_type == SOCK_STREAM)
goto e_inval;
@@ -852,11 +853,15 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -EADDRNOTAVAIL;
if (!dev)
break;
+
+ midx = l3mdev_master_ifindex(dev);
+
dev_put(dev);
err = -EINVAL;
if (sk->sk_bound_dev_if &&
- mreq.imr_ifindex != sk->sk_bound_dev_if)
+ mreq.imr_ifindex != sk->sk_bound_dev_if &&
+ (!midx || midx != sk->sk_bound_dev_if))
break;
inet->mc_index = mreq.imr_ifindex;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4449ad1f8114..a4a30d2ca66f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -583,16 +583,24 @@ done:
if (val) {
struct net_device *dev;
+ int midx;
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
- goto e_inval;
+ rcu_read_lock();
- dev = dev_get_by_index(net, val);
+ dev = dev_get_by_index_rcu(net, val);
if (!dev) {
+ rcu_read_unlock();
retv = -ENODEV;
break;
}
- dev_put(dev);
+ midx = l3mdev_master_ifindex_rcu(dev);
+
+ rcu_read_unlock();
+
+ if (sk->sk_bound_dev_if &&
+ sk->sk_bound_dev_if != val &&
+ (!midx || midx != sk->sk_bound_dev_if))
+ goto e_inval;
}
np->mcast_oif = val;
retv = 0;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5c710f78163e..e367ce026db3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3380,7 +3380,11 @@ static int ip6_route_dev_notify(struct notifier_block *this,
net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
#endif
- } else if (event == NETDEV_UNREGISTER) {
+ } else if (event == NETDEV_UNREGISTER &&
+ dev->reg_state != NETREG_UNREGISTERED) {
+ /* NETDEV_UNREGISTER could be fired for multiple times by
+ * netdev_wait_allrefs(). Make sure we only call this once.
+ */
in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 67fede656ea5..424aca76a192 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -682,7 +682,6 @@ struct ieee80211_if_mesh {
const struct ieee80211_mesh_sync_ops *sync_ops;
s64 sync_offset_clockdrift_max;
spinlock_t sync_offset_lock;
- bool adjusting_tbtt;
/* mesh power save */
enum nl80211_mesh_power_mode nonpeer_pm;
int ps_peers_light_sleep;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 9063e8e736ad..9e1ded80a992 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -295,8 +295,6 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata,
/* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */
*pos |= ifmsh->ps_peers_deep_sleep ?
IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL : 0x00;
- *pos++ |= ifmsh->adjusting_tbtt ?
- IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING : 0x00;
*pos++ = 0x00;
return 0;
@@ -866,7 +864,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
ifmsh->mesh_cc_id = 0; /* Disabled */
/* register sync ops from extensible synchronization framework */
ifmsh->sync_ops = ieee80211_mesh_sync_ops_get(ifmsh->mesh_sp_id);
- ifmsh->adjusting_tbtt = false;
ifmsh->sync_offset_clockdrift_max = 0;
set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags);
ieee80211_mesh_root_setup(ifmsh);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index bd3d55eb21d4..9f02e54ad2a5 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -495,12 +495,14 @@ mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr,
/* Userspace handles station allocation */
if (sdata->u.mesh.user_mpm ||
- sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED)
- cfg80211_notify_new_peer_candidate(sdata->dev, addr,
- elems->ie_start,
- elems->total_len,
- GFP_KERNEL);
- else
+ sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) {
+ if (mesh_peer_accepts_plinks(elems) &&
+ mesh_plink_availables(sdata))
+ cfg80211_notify_new_peer_candidate(sdata->dev, addr,
+ elems->ie_start,
+ elems->total_len,
+ GFP_KERNEL);
+ } else
sta = __mesh_sta_info_alloc(sdata, addr);
return sta;
diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index 64bc22ad9496..16ed43fe4841 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c
@@ -119,7 +119,6 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
*/
if (elems->mesh_config && mesh_peer_tbtt_adjusting(elems)) {
- clear_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN);
msync_dbg(sdata, "STA %pM : is adjusting TBTT\n",
sta->sta.addr);
goto no_sync;
@@ -168,11 +167,9 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata,
struct beacon_data *beacon)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- u8 cap;
WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET);
WARN_ON(!rcu_read_lock_held());
- cap = beacon->meshconf->meshconf_cap;
spin_lock_bh(&ifmsh->sync_offset_lock);
@@ -186,21 +183,13 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata,
"TBTT : kicking off TBTT adjustment with clockdrift_max=%lld\n",
ifmsh->sync_offset_clockdrift_max);
set_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags);
-
- ifmsh->adjusting_tbtt = true;
} else {
msync_dbg(sdata,
"TBTT : max clockdrift=%lld; too small to adjust\n",
(long long)ifmsh->sync_offset_clockdrift_max);
ifmsh->sync_offset_clockdrift_max = 0;
-
- ifmsh->adjusting_tbtt = false;
}
spin_unlock_bh(&ifmsh->sync_offset_lock);
-
- beacon->meshconf->meshconf_cap = ifmsh->adjusting_tbtt ?
- IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING | cap :
- ~IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING & cap;
}
static const struct sync_method sync_methods[] = {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2cb429d34c03..120e9ae04db3 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1996,7 +1996,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
* is called on error from nf_tables_newrule().
*/
expr = nft_expr_first(rule);
- while (expr->ops && expr != nft_expr_last(rule)) {
+ while (expr != nft_expr_last(rule) && expr->ops) {
nf_tables_expr_destroy(ctx, expr);
expr = nft_expr_next(expr);
}
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 61d216eb7917..5d189c11d208 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -37,7 +37,7 @@ static void nft_queue_eval(const struct nft_expr *expr,
if (priv->queues_total > 1) {
if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) {
- int cpu = smp_processor_id();
+ int cpu = raw_smp_processor_id();
queue = priv->queuenum + cpu % priv->queues_total;
} else {
diff --git a/net/nfc/core.c b/net/nfc/core.c
index c5a2c7e733b3..1471e4b0aa2c 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -1093,7 +1093,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
err_free_dev:
kfree(dev);
- return ERR_PTR(rc);
+ return NULL;
}
EXPORT_SYMBOL(nfc_allocate_device);
diff --git a/net/rds/send.c b/net/rds/send.c
index 6815f03324d7..1a3c6acdd3f8 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -959,6 +959,11 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
ret = rds_cmsg_rdma_map(rs, rm, cmsg);
if (!ret)
*allocated_mr = 1;
+ else if (ret == -ENODEV)
+ /* Accommodate the get_mr() case which can fail
+ * if connection isn't established yet.
+ */
+ ret = -EAGAIN;
break;
case RDS_CMSG_ATOMIC_CSWP:
case RDS_CMSG_ATOMIC_FADD:
@@ -1072,8 +1077,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
/* Parse any control messages the user may have included. */
ret = rds_cmsg_send(rs, rm, msg, &allocated_mr);
- if (ret)
+ if (ret) {
+ /* Trigger connection so that its ready for the next retry */
+ if (ret == -EAGAIN)
+ rds_conn_connect_if_down(conn);
goto out;
+ }
if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) {
printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 272edd7748a0..7f0f689b8d2b 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4457,10 +4457,6 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
if (!net_eq(current->nsproxy->net_ns, sock_net(sk)))
return -EINVAL;
- /* Do not peel off from one netns to another one. */
- if (!net_eq(current->nsproxy->net_ns, sock_net(sk)))
- return -EINVAL;
-
if (!asoc)
return -EINVAL;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 9b5bd6d142dc..60324f7c72bd 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1209,10 +1209,14 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
- goto out_wait_error;
+ sk->sk_state = SS_UNCONNECTED;
+ sock->state = SS_UNCONNECTED;
+ goto out_wait;
} else if (timeout == 0) {
err = -ETIMEDOUT;
- goto out_wait_error;
+ sk->sk_state = SS_UNCONNECTED;
+ sock->state = SS_UNCONNECTED;
+ goto out_wait;
}
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
@@ -1220,20 +1224,17 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (sk->sk_err) {
err = -sk->sk_err;
- goto out_wait_error;
- } else
+ sk->sk_state = SS_UNCONNECTED;
+ sock->state = SS_UNCONNECTED;
+ } else {
err = 0;
+ }
out_wait:
finish_wait(sk_sleep(sk), &wait);
out:
release_sock(sk);
return err;
-
-out_wait_error:
- sk->sk_state = SS_UNCONNECTED;
- sock->state = SS_UNCONNECTED;
- goto out_wait;
}
static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
@@ -1270,18 +1271,20 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
listener->sk_err == 0) {
release_sock(listener);
timeout = schedule_timeout(timeout);
+ finish_wait(sk_sleep(listener), &wait);
lock_sock(listener);
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
- goto out_wait;
+ goto out;
} else if (timeout == 0) {
err = -EAGAIN;
- goto out_wait;
+ goto out;
}
prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
}
+ finish_wait(sk_sleep(listener), &wait);
if (listener->sk_err)
err = -listener->sk_err;
@@ -1301,19 +1304,15 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
*/
if (err) {
vconnected->rejected = true;
- release_sock(connected);
- sock_put(connected);
- goto out_wait;
+ } else {
+ newsock->state = SS_CONNECTED;
+ sock_graft(connected, newsock);
}
- newsock->state = SS_CONNECTED;
- sock_graft(connected, newsock);
release_sock(connected);
sock_put(connected);
}
-out_wait:
- finish_wait(sk_sleep(listener), &wait);
out:
release_sock(listener);
return err;
@@ -1513,8 +1512,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
long timeout;
int err;
struct vsock_transport_send_notify_data send_data;
-
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
sk = sock->sk;
vsk = vsock_sk(sk);
@@ -1557,11 +1555,10 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
if (err < 0)
goto out;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-
while (total_written < len) {
ssize_t written;
+ add_wait_queue(sk_sleep(sk), &wait);
while (vsock_stream_has_space(vsk) == 0 &&
sk->sk_err == 0 &&
!(sk->sk_shutdown & SEND_SHUTDOWN) &&
@@ -1570,27 +1567,30 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
/* Don't wait for non-blocking sockets. */
if (timeout == 0) {
err = -EAGAIN;
- goto out_wait;
+ remove_wait_queue(sk_sleep(sk), &wait);
+ goto out_err;
}
err = transport->notify_send_pre_block(vsk, &send_data);
- if (err < 0)
- goto out_wait;
+ if (err < 0) {
+ remove_wait_queue(sk_sleep(sk), &wait);
+ goto out_err;
+ }
release_sock(sk);
- timeout = schedule_timeout(timeout);
+ timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout);
lock_sock(sk);
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
- goto out_wait;
+ remove_wait_queue(sk_sleep(sk), &wait);
+ goto out_err;
} else if (timeout == 0) {
err = -EAGAIN;
- goto out_wait;
+ remove_wait_queue(sk_sleep(sk), &wait);
+ goto out_err;
}
-
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
}
+ remove_wait_queue(sk_sleep(sk), &wait);
/* These checks occur both as part of and after the loop
* conditional since we need to check before and after
@@ -1598,16 +1598,16 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
*/
if (sk->sk_err) {
err = -sk->sk_err;
- goto out_wait;
+ goto out_err;
} else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
(vsk->peer_shutdown & RCV_SHUTDOWN)) {
err = -EPIPE;
- goto out_wait;
+ goto out_err;
}
err = transport->notify_send_pre_enqueue(vsk, &send_data);
if (err < 0)
- goto out_wait;
+ goto out_err;
/* Note that enqueue will only write as many bytes as are free
* in the produce queue, so we don't need to ensure len is
@@ -1620,7 +1620,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
len - total_written);
if (written < 0) {
err = -ENOMEM;
- goto out_wait;
+ goto out_err;
}
total_written += written;
@@ -1628,14 +1628,13 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
err = transport->notify_send_post_enqueue(
vsk, written, &send_data);
if (err < 0)
- goto out_wait;
+ goto out_err;
}
-out_wait:
+out_err:
if (total_written > 0)
err = total_written;
- finish_wait(sk_sleep(sk), &wait);
out:
release_sock(sk);
return err;
@@ -1716,21 +1715,61 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (err < 0)
goto out;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
while (1) {
- s64 ready = vsock_stream_has_data(vsk);
+ s64 ready;
- if (ready < 0) {
- /* Invalid queue pair content. XXX This should be
- * changed to a connection reset in a later change.
- */
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ ready = vsock_stream_has_data(vsk);
- err = -ENOMEM;
- goto out_wait;
- } else if (ready > 0) {
+ if (ready == 0) {
+ if (sk->sk_err != 0 ||
+ (sk->sk_shutdown & RCV_SHUTDOWN) ||
+ (vsk->peer_shutdown & SEND_SHUTDOWN)) {
+ finish_wait(sk_sleep(sk), &wait);
+ break;
+ }
+ /* Don't wait for non-blocking sockets. */
+ if (timeout == 0) {
+ err = -EAGAIN;
+ finish_wait(sk_sleep(sk), &wait);
+ break;
+ }
+
+ err = transport->notify_recv_pre_block(
+ vsk, target, &recv_data);
+ if (err < 0) {
+ finish_wait(sk_sleep(sk), &wait);
+ break;
+ }
+ release_sock(sk);
+ timeout = schedule_timeout(timeout);
+ lock_sock(sk);
+
+ if (signal_pending(current)) {
+ err = sock_intr_errno(timeout);
+ finish_wait(sk_sleep(sk), &wait);
+ break;
+ } else if (timeout == 0) {
+ err = -EAGAIN;
+ finish_wait(sk_sleep(sk), &wait);
+ break;
+ }
+ } else {
ssize_t read;
+ finish_wait(sk_sleep(sk), &wait);
+
+ if (ready < 0) {
+ /* Invalid queue pair content. XXX This should
+ * be changed to a connection reset in a later
+ * change.
+ */
+
+ err = -ENOMEM;
+ goto out;
+ }
+
err = transport->notify_recv_pre_dequeue(
vsk, target, &recv_data);
if (err < 0)
@@ -1750,42 +1789,12 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
vsk, target, read,
!(flags & MSG_PEEK), &recv_data);
if (err < 0)
- goto out_wait;
+ goto out;
if (read >= target || flags & MSG_PEEK)
break;
target -= read;
- } else {
- if (sk->sk_err != 0 || (sk->sk_shutdown & RCV_SHUTDOWN)
- || (vsk->peer_shutdown & SEND_SHUTDOWN)) {
- break;
- }
- /* Don't wait for non-blocking sockets. */
- if (timeout == 0) {
- err = -EAGAIN;
- break;
- }
-
- err = transport->notify_recv_pre_block(
- vsk, target, &recv_data);
- if (err < 0)
- break;
-
- release_sock(sk);
- timeout = schedule_timeout(timeout);
- lock_sock(sk);
-
- if (signal_pending(current)) {
- err = sock_intr_errno(timeout);
- break;
- } else if (timeout == 0) {
- err = -EAGAIN;
- break;
- }
-
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
}
}
@@ -1797,8 +1806,6 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (copied > 0)
err = copied;
-out_wait:
- finish_wait(sk_sleep(sk), &wait);
out:
release_sock(sk);
return err;
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index 2530669e2f94..cdbe90b37ae7 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -267,8 +267,10 @@ static void update_audio_tstamp(struct snd_pcm_substream *substream,
runtime->rate);
*audio_tstamp = ns_to_timespec(audio_nsecs);
}
- runtime->status->audio_tstamp = *audio_tstamp;
- runtime->status->tstamp = *curr_tstamp;
+ if (!timespec_equal(&runtime->status->audio_tstamp, audio_tstamp)) {
+ runtime->status->audio_tstamp = *audio_tstamp;
+ runtime->status->tstamp = *curr_tstamp;
+ }
/*
* re-take a driver timestamp to let apps detect if the reference tstamp
diff --git a/sound/core/timer_compat.c b/sound/core/timer_compat.c
index 0b4b028e8e98..de9155eed727 100644
--- a/sound/core/timer_compat.c
+++ b/sound/core/timer_compat.c
@@ -40,11 +40,11 @@ static int snd_timer_user_info_compat(struct file *file,
struct snd_timer *t;
tu = file->private_data;
- if (snd_BUG_ON(!tu->timeri))
- return -ENXIO;
+ if (!tu->timeri)
+ return -EBADFD;
t = tu->timeri->timer;
- if (snd_BUG_ON(!t))
- return -ENXIO;
+ if (!t)
+ return -EBADFD;
memset(&info, 0, sizeof(info));
info.card = t->card ? t->card->number : -1;
if (t->hw.flags & SNDRV_TIMER_HW_SLAVE)
@@ -73,8 +73,8 @@ static int snd_timer_user_status_compat(struct file *file,
struct snd_timer_status32 status;
tu = file->private_data;
- if (snd_BUG_ON(!tu->timeri))
- return -ENXIO;
+ if (!tu->timeri)
+ return -EBADFD;
memset(&status, 0, sizeof(status));
status.tstamp.tv_sec = tu->tstamp.tv_sec;
status.tstamp.tv_nsec = tu->tstamp.tv_nsec;
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index e6de496bffbe..e2e08fc73b50 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2316,6 +2316,9 @@ static const struct pci_device_id azx_ids[] = {
/* AMD Hudson */
{ PCI_DEVICE(0x1022, 0x780d),
.driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB },
+ /* AMD Raven */
+ { PCI_DEVICE(0x1022, 0x15e3),
+ .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB },
/* ATI HDMI */
{ PCI_DEVICE(0x1002, 0x0002),
.driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index af0962307b7f..e5730a7d0480 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4404,7 +4404,7 @@ static void alc_no_shutup(struct hda_codec *codec)
static void alc_fixup_no_shutup(struct hda_codec *codec,
const struct hda_fixup *fix, int action)
{
- if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+ if (action == HDA_FIXUP_ACT_PROBE) {
struct alc_spec *spec = codec->spec;
spec->shutup = alc_no_shutup;
}
@@ -6254,7 +6254,7 @@ static int patch_alc269(struct hda_codec *codec)
case 0x10ec0703:
spec->codec_variant = ALC269_TYPE_ALC700;
spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */
- alc_update_coef_idx(codec, 0x4a, 0, 1 << 15); /* Combo jack auto trigger control */
+ alc_update_coef_idx(codec, 0x4a, 1 << 15, 0); /* Combo jack auto trigger control */
break;
}
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 0bb415a28723..f1f990b325ad 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -1060,7 +1060,7 @@ static int wm_adsp_load(struct wm_adsp *dsp)
const struct wmfw_region *region;
const struct wm_adsp_region *mem;
const char *region_name;
- char *file, *text;
+ char *file, *text = NULL;
struct wm_adsp_buf *buf;
unsigned int reg;
int regions = 0;
@@ -1221,10 +1221,21 @@ static int wm_adsp_load(struct wm_adsp *dsp)
regions, le32_to_cpu(region->len), offset,
region_name);
+ if ((pos + le32_to_cpu(region->len) + sizeof(*region)) >
+ firmware->size) {
+ adsp_err(dsp,
+ "%s.%d: %s region len %d bytes exceeds file length %zu\n",
+ file, regions, region_name,
+ le32_to_cpu(region->len), firmware->size);
+ ret = -EINVAL;
+ goto out_fw;
+ }
+
if (text) {
memcpy(text, region->data, le32_to_cpu(region->len));
adsp_info(dsp, "%s: %s\n", file, text);
kfree(text);
+ text = NULL;
}
if (reg) {
@@ -1269,6 +1280,7 @@ out_fw:
regmap_async_complete(regmap);
wm_adsp_buf_free(&buf_list);
release_firmware(firmware);
+ kfree(text);
out:
kfree(file);
@@ -1730,6 +1742,17 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp)
}
if (reg) {
+ if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) >
+ firmware->size) {
+ adsp_err(dsp,
+ "%s.%d: %s region len %d bytes exceeds file length %zu\n",
+ file, blocks, region_name,
+ le32_to_cpu(blk->len),
+ firmware->size);
+ ret = -EINVAL;
+ goto out_fw;
+ }
+
buf = wm_adsp_buf_alloc(blk->data,
le32_to_cpu(blk->len),
&buf_list);
diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c
index 362446c36c9e..e00dfbec22c5 100644
--- a/sound/soc/sh/rcar/core.c
+++ b/sound/soc/sh/rcar/core.c
@@ -1049,10 +1049,8 @@ static int __rsnd_kctrl_new(struct rsnd_mod *mod,
return -ENOMEM;
ret = snd_ctl_add(card, kctrl);
- if (ret < 0) {
- snd_ctl_free_one(kctrl);
+ if (ret < 0)
return ret;
- }
cfg->update = update;
cfg->card = card;
diff --git a/sound/usb/clock.c b/sound/usb/clock.c
index 2cd09ceba5e9..2899797610e8 100644
--- a/sound/usb/clock.c
+++ b/sound/usb/clock.c
@@ -43,7 +43,7 @@ static struct uac_clock_source_descriptor *
while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
ctrl_iface->extralen,
cs, UAC2_CLOCK_SOURCE))) {
- if (cs->bClockID == clock_id)
+ if (cs->bLength >= sizeof(*cs) && cs->bClockID == clock_id)
return cs;
}
@@ -59,8 +59,11 @@ static struct uac_clock_selector_descriptor *
while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
ctrl_iface->extralen,
cs, UAC2_CLOCK_SELECTOR))) {
- if (cs->bClockID == clock_id)
+ if (cs->bLength >= sizeof(*cs) && cs->bClockID == clock_id) {
+ if (cs->bLength < 5 + cs->bNrInPins)
+ return NULL;
return cs;
+ }
}
return NULL;
@@ -75,7 +78,7 @@ static struct uac_clock_multiplier_descriptor *
while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
ctrl_iface->extralen,
cs, UAC2_CLOCK_MULTIPLIER))) {
- if (cs->bClockID == clock_id)
+ if (cs->bLength >= sizeof(*cs) && cs->bClockID == clock_id)
return cs;
}
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 413824566102..9d864648c901 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -1541,6 +1541,12 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid,
__u8 *bmaControls;
if (state->mixer->protocol == UAC_VERSION_1) {
+ if (hdr->bLength < 7) {
+ usb_audio_err(state->chip,
+ "unit %u: invalid UAC_FEATURE_UNIT descriptor\n",
+ unitid);
+ return -EINVAL;
+ }
csize = hdr->bControlSize;
if (!csize) {
usb_audio_dbg(state->chip,
@@ -1558,6 +1564,12 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid,
}
} else if (state->mixer->protocol == UAC_VERSION_2) {
struct uac2_feature_unit_descriptor *ftr = _ftr;
+ if (hdr->bLength < 6) {
+ usb_audio_err(state->chip,
+ "unit %u: invalid UAC_FEATURE_UNIT descriptor\n",
+ unitid);
+ return -EINVAL;
+ }
csize = 4;
channels = (hdr->bLength - 6) / 4 - 1;
bmaControls = ftr->bmaControls;
@@ -2277,7 +2289,8 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid,
const struct usbmix_name_map *map;
char **namelist;
- if (!desc->bNrInPins || desc->bLength < 5 + desc->bNrInPins) {
+ if (desc->bLength < 5 || !desc->bNrInPins ||
+ desc->bLength < 5 + desc->bNrInPins) {
usb_audio_err(state->chip,
"invalid SELECTOR UNIT descriptor %d\n", unitid);
return -EINVAL;