summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorBlagovest Kolenichev <bkolenichev@codeaurora.org>2017-04-25 07:51:26 -0700
committerBlagovest Kolenichev <bkolenichev@codeaurora.org>2017-04-25 12:51:55 -0700
commitd877e94313437ef265d49be0b810bb36f6a7b05e (patch)
tree8559b8fa9d28f6c9dc5f3c4ea27a1a7f6299d874 /fs
parentb47135257c42a70a26247b05ff0ab42b6ad8fc8a (diff)
parentb834e929774513db929303125486c2c34bedba73 (diff)
Merge branch 'android-4.4@b834e92' into branch 'msm-4.4'
* refs/heads/tmp-b834e92 Revert "USB: gadget: u_ether: Fix data stall issue in RNDIS tethering mode" Linux 4.4.63 MIPS: fix Select HAVE_IRQ_EXIT_ON_IRQ_STACK patch. sctp: deny peeloff operation on asocs with threads sleeping on it net: ipv6: check route protocol when deleting routes tty/serial: atmel: RS485 half duplex w/DMA: enable RX after TX is done SUNRPC: fix refcounting problems with auth_gss messages. ibmveth: calculate gso_segs for large packets catc: Use heap buffer for memory size test catc: Combine failure cleanup code in catc_probe() rtl8150: Use heap buffers for all register access pegasus: Use heap buffers for all register access virtio-console: avoid DMA from stack dvb-usb-firmware: don't do DMA on stack dvb-usb: don't use stack for firmware load mm: Tighten x86 /dev/mem with zeroing reads rtc: tegra: Implement clock handling platform/x86: acer-wmi: setup accelerometer when machine has appropriate notify event ext4: fix inode checksum calculation problem if i_extra_size is small dvb-usb-v2: avoid use-after-free ath9k: fix NULL pointer dereference crypto: ahash - Fix EINPROGRESS notification callback powerpc: Disable HFSCR[TM] if TM is not supported zram: do not use copy_page with non-page aligned address kvm: fix page struct leak in handle_vmon Revert "MIPS: Lantiq: Fix cascaded IRQ setup" char: lack of bool string made CONFIG_DEVPORT always on char: Drop bogus dependency of DEVPORT on !M68K ftrace: Fix removing of second function probe irqchip/irq-imx-gpcv2: Fix spinlock initialization libnvdimm: fix reconfig_mutex, mmap_sem, and jbd2_handle lockdep splat xen, fbfront: fix connecting to backend scsi: sd: Fix capacity calculation with 32-bit sector_t scsi: sd: Consider max_xfer_blocks if opt_xfer_blocks is unusable scsi: sr: Sanity check returned mode data iscsi-target: Drop work-around for legacy GlobalSAN initiator iscsi-target: Fix TMR reference leak during session shutdown acpi, nfit, libnvdimm: fix interleave set cookie calculation (64-bit comparison) x86/vdso: Plug race between mapping and ELF header setup x86/vdso: Ensure vdso32_enabled gets set to valid values only perf/x86: Avoid exposing wrong/stale data in intel_pmu_lbr_read_32() Input: xpad - add support for Razer Wildcat gamepad CIFS: store results of cifs_reopen_file to avoid infinite wait drm/nouveau/mmu/nv4a: use nv04 mmu rather than the nv44 one drm/nouveau/mpeg: mthd returns true on success now thp: fix MADV_DONTNEED vs clear soft dirty race cgroup, kthread: close race window where new kthreads can be migrated to non-root cgroups ANDROID: uid_sys_stats: reduce update_io_stats overhead UPSTREAM: char: lack of bool string made CONFIG_DEVPORT always on UPSTREAM: char: Drop bogus dependency of DEVPORT on !M68K Revert "Android: sdcardfs: Don't do d_add for lower fs" ANDROID: usb: gadget: fix MTP enumeration issue under super speed mode Android: sdcardfs: Don't complain in fixup_lower_ownership Android: sdcardfs: Don't do d_add for lower fs ANDROID: sdcardfs: ->iget fixes Android: sdcardfs: Change cache GID value BACKPORT: [UPSTREAM] ext2: convert to mbcache2 BACKPORT [UPSTREAM] ext4: convert to mbcache2 BACKPORT: [UPSTREAM] mbcache2: reimplement mbcache Linux 4.4.62 ibmveth: set correct gso_size and gso_type net/mlx4_core: Fix when to save some qp context flags for dynamic VST to VGT transitions net/mlx4_core: Fix racy CQ (Completion Queue) free net/mlx4_en: Fix bad WQE issue usb: hub: Wait for connection to be reestablished after port reset blk-mq: Avoid memory reclaim when remapping queues net/packet: fix overflow in check for priv area size crypto: caam - fix RNG deinstantiation error checking MIPS: IRQ Stack: Fix erroneous jal to plat_irq_dispatch MIPS: Select HAVE_IRQ_EXIT_ON_IRQ_STACK MIPS: Switch to the irq_stack in interrupts MIPS: Only change $28 to thread_info if coming from user mode MIPS: Stack unwinding while on IRQ stack MIPS: Introduce irq_stack mtd: bcm47xxpart: fix parsing first block after aligned TRX usb: dwc3: gadget: delay unmap of bounced requests drm/i915: Stop using RP_DOWN_EI on Baytrail drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3 UPSTREAM: net: socket: Make unnecessarily global sockfs_setattr() static UPSTREAM: net: ipv4: Don't crash if passing a null sk to ip_do_redirect. UPSTREAM: net/packet: fix overflow in check for priv area size Linux 4.4.61 mm/mempolicy.c: fix error handling in set_mempolicy and mbind. MIPS: Flush wrong invalid FTLB entry for huge page MIPS: Lantiq: fix missing xbar kernel panic MIPS: End spinlocks with .insn MIPS: ralink: Fix typos in rt3883 pinctrl MIPS: Force o32 fp64 support on 32bit MIPS64r6 kernels s390/uaccess: get_user() should zero on failure (again) s390/decompressor: fix initrd corruption caused by bss clear nios2: reserve boot memory for device tree powerpc: Don't try to fix up misaligned load-with-reservation instructions powerpc/mm: Add missing global TLB invalidate if cxl is active metag/usercopy: Add missing fixups metag/usercopy: Fix src fixup in from user rapf loops metag/usercopy: Set flags before ADDZ metag/usercopy: Zero rest of buffer from copy_from_user metag/usercopy: Add early abort to copy_to_user metag/usercopy: Fix alignment error checking metag/usercopy: Drop unused macros ring-buffer: Fix return value check in test_ringbuffer() ptrace: fix PTRACE_LISTEN race corrupting task->state Reset TreeId to zero on SMB2 TREE_CONNECT iio: bmg160: reset chip when probing arm/arm64: KVM: Take mmap_sem in kvm_arch_prepare_memory_region arm/arm64: KVM: Take mmap_sem in stage2_unmap_vm staging: android: ashmem: lseek failed due to no FMODE_LSEEK. sysfs: be careful of error returns from ops->show() drm/vmwgfx: fix integer overflow in vmw_surface_define_ioctl() drm/vmwgfx: Remove getparam error message drm/ttm, drm/vmwgfx: Relax permission checking when opening surfaces drm/vmwgfx: avoid calling vzalloc with a 0 size in vmw_get_cap_3d_ioctl() drm/vmwgfx: NULL pointer dereference in vmw_surface_define_ioctl() drm/vmwgfx: Type-check lookups of fence objects Revert "Revert "Revert "CHROMIUM: android: binder: Fix potential scheduling-while-atomic""" ANDROID: sdcardfs: Directly pass lower file for mmap UPSTREAM: checkpatch: special audit for revert commit line UPSTREAM: PM / sleep: make PM notifiers called symmetrically Revert "Revert "CHROMIUM: android: binder: Fix potential scheduling-while-atomic"" Linux 4.4.60 padata: avoid race in reordering blk: Ensure users for current->bio_list can see the full list. blk: improve order of bio handling in generic_make_request() power: reset: at91-poweroff: timely shutdown LPDDR memories KVM: kvm_io_bus_unregister_dev() should never fail rtc: s35390a: improve irq handling rtc: s35390a: implement reset routine as suggested by the reference rtc: s35390a: make sure all members in the output are set rtc: s35390a: fix reading out alarm MIPS: Lantiq: Fix cascaded IRQ setup mm, hugetlb: use pte_present() instead of pmd_present() in follow_huge_pmd() drm/radeon: Override fpfn for all VRAM placements in radeon_evict_flags KVM: x86: clear bus pointer when destroyed USB: fix linked-list corruption in rh_call_control() tty/serial: atmel: fix TX path in atmel_console_write() tty/serial: atmel: fix race condition (TX+DMA) ACPI: Do not create a platform_device for IOAPIC/IOxAPIC ACPI: Fix incompatibility with mcount-based function graph tracing ASoC: atmel-classd: fix audio clock rate ALSA: hda - fix a problem for lineout on a Dell AIO machine ALSA: seq: Fix race during FIFO resize scsi: libsas: fix ata xfer length scsi: sg: check length passed to SG_NEXT_CMD_LEN scsi: mpt3sas: fix hang on ata passthrough commands xen/setup: Don't relocate p2m over existing one libceph: force GFP_NOIO for socket allocations Linux 4.4.59 sched/rt: Add a missing rescheduling point fscrypt: remove broken support for detecting keyring key revocation metag/ptrace: Reject partial NT_METAG_RPIPE writes metag/ptrace: Provide default TXSTATUS for short NT_PRSTATUS metag/ptrace: Preserve previous registers for short regset write sparc/ptrace: Preserve previous registers for short regset write mips/ptrace: Preserve previous registers for short regset write h8300/ptrace: Fix incorrect register transfer count c6x/ptrace: Remove useless PTRACE_SETREGSET implementation pinctrl: qcom: Don't clear status bit on irq_unmask virtio_balloon: init 1st buffer in stats vq xfrm_user: validate XFRM_MSG_NEWAE incoming ESN size harder xfrm_user: validate XFRM_MSG_NEWAE XFRMA_REPLAY_ESN_VAL replay_window xfrm: policy: init locks early Conflicts: drivers/scsi/sd.c drivers/usb/gadget/function/f_mtp.c drivers/usb/gadget/function/u_ether.c Change-Id: I80501cf02d04204f8c0f3a7f5a036eaa4d54546e Signed-off-by: Blagovest Kolenichev <bkolenichev@codeaurora.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile2
-rw-r--r--fs/cifs/file.c6
-rw-r--r--fs/cifs/smb2pdu.c4
-rw-r--r--fs/ext2/ext2.h3
-rw-r--r--fs/ext2/super.c25
-rw-r--r--fs/ext2/xattr.c143
-rw-r--r--fs/ext2/xattr.h21
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/inode.c5
-rw-r--r--fs/ext4/super.c7
-rw-r--r--fs/ext4/xattr.c136
-rw-r--r--fs/ext4/xattr.h5
-rw-r--r--fs/mbcache2.c359
-rw-r--r--fs/proc/task_mmu.c9
-rw-r--r--fs/sdcardfs/derived_perm.c4
-rw-r--r--fs/sdcardfs/file.c3
-rw-r--r--fs/sdcardfs/lookup.c17
-rw-r--r--fs/sdcardfs/mmap.c57
-rw-r--r--fs/sdcardfs/multiuser.h6
-rw-r--r--fs/sysfs/file.c6
20 files changed, 587 insertions, 233 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 3b54070cd629..dee237540bc0 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o
obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o
obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat.o
-obj-$(CONFIG_FS_MBCACHE) += mbcache.o
+obj-$(CONFIG_FS_MBCACHE) += mbcache.o mbcache2.o
obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
obj-$(CONFIG_COREDUMP) += coredump.o
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 72f270d4bd17..a0c0a49b6620 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2545,7 +2545,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
wdata->credits = credits;
if (!wdata->cfile->invalidHandle ||
- !cifs_reopen_file(wdata->cfile, false))
+ !(rc = cifs_reopen_file(wdata->cfile, false)))
rc = server->ops->async_writev(wdata,
cifs_uncached_writedata_release);
if (rc) {
@@ -2958,7 +2958,7 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
rdata->credits = credits;
if (!rdata->cfile->invalidHandle ||
- !cifs_reopen_file(rdata->cfile, true))
+ !(rc = cifs_reopen_file(rdata->cfile, true)))
rc = server->ops->async_readv(rdata);
error:
if (rc) {
@@ -3544,7 +3544,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
}
if (!rdata->cfile->invalidHandle ||
- !cifs_reopen_file(rdata->cfile, true))
+ !(rc = cifs_reopen_file(rdata->cfile, true)))
rc = server->ops->async_readv(rdata);
if (rc) {
add_credits_and_wake_if(server, rdata->credits, 0);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 2fa754c5fd62..6cb5c4b30e78 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -952,6 +952,10 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
return -EINVAL;
}
+ /* SMB2 TREE_CONNECT request must be called with TreeId == 0 */
+ if (tcon)
+ tcon->tid = 0;
+
rc = small_smb2_init(SMB2_TREE_CONNECT, tcon, (void **) &req);
if (rc) {
kfree(unc_path);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 4c69c94cafd8..f98ce7e60a0f 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -61,6 +61,8 @@ struct ext2_block_alloc_info {
#define rsv_start rsv_window._rsv_start
#define rsv_end rsv_window._rsv_end
+struct mb2_cache;
+
/*
* second extended-fs super-block data in memory
*/
@@ -111,6 +113,7 @@ struct ext2_sb_info {
* of the mount options.
*/
spinlock_t s_lock;
+ struct mb2_cache *s_mb_cache;
};
static inline spinlock_t *
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 748d35afc902..111a31761ffa 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -131,7 +131,10 @@ static void ext2_put_super (struct super_block * sb)
dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
- ext2_xattr_put_super(sb);
+ if (sbi->s_mb_cache) {
+ ext2_xattr_destroy_cache(sbi->s_mb_cache);
+ sbi->s_mb_cache = NULL;
+ }
if (!(sb->s_flags & MS_RDONLY)) {
struct ext2_super_block *es = sbi->s_es;
@@ -1104,6 +1107,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
ext2_msg(sb, KERN_ERR, "error: insufficient memory");
goto failed_mount3;
}
+
+#ifdef CONFIG_EXT2_FS_XATTR
+ sbi->s_mb_cache = ext2_xattr_create_cache();
+ if (!sbi->s_mb_cache) {
+ ext2_msg(sb, KERN_ERR, "Failed to create an mb_cache");
+ goto failed_mount3;
+ }
+#endif
/*
* set up enough so that it can read an inode
*/
@@ -1149,6 +1160,8 @@ cantfind_ext2:
sb->s_id);
goto failed_mount;
failed_mount3:
+ if (sbi->s_mb_cache)
+ ext2_xattr_destroy_cache(sbi->s_mb_cache);
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -1555,20 +1568,17 @@ MODULE_ALIAS_FS("ext2");
static int __init init_ext2_fs(void)
{
- int err = init_ext2_xattr();
- if (err)
- return err;
+ int err;
+
err = init_inodecache();
if (err)
- goto out1;
+ return err;
err = register_filesystem(&ext2_fs_type);
if (err)
goto out;
return 0;
out:
destroy_inodecache();
-out1:
- exit_ext2_xattr();
return err;
}
@@ -1576,7 +1586,6 @@ static void __exit exit_ext2_fs(void)
{
unregister_filesystem(&ext2_fs_type);
destroy_inodecache();
- exit_ext2_xattr();
}
MODULE_AUTHOR("Remy Card and others");
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index fa70848afa8f..24736c8b3d51 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -56,7 +56,7 @@
#include <linux/buffer_head.h>
#include <linux/init.h>
#include <linux/slab.h>
-#include <linux/mbcache.h>
+#include <linux/mbcache2.h>
#include <linux/quotaops.h>
#include <linux/rwsem.h>
#include <linux/security.h>
@@ -92,14 +92,12 @@
static int ext2_xattr_set2(struct inode *, struct buffer_head *,
struct ext2_xattr_header *);
-static int ext2_xattr_cache_insert(struct buffer_head *);
+static int ext2_xattr_cache_insert(struct mb2_cache *, struct buffer_head *);
static struct buffer_head *ext2_xattr_cache_find(struct inode *,
struct ext2_xattr_header *);
static void ext2_xattr_rehash(struct ext2_xattr_header *,
struct ext2_xattr_entry *);
-static struct mb_cache *ext2_xattr_cache;
-
static const struct xattr_handler *ext2_xattr_handler_map[] = {
[EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler,
#ifdef CONFIG_EXT2_FS_POSIX_ACL
@@ -154,6 +152,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
size_t name_len, size;
char *end;
int error;
+ struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
@@ -198,7 +197,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
goto found;
entry = next;
}
- if (ext2_xattr_cache_insert(bh))
+ if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
ea_idebug(inode, "cache insert failed");
error = -ENODATA;
goto cleanup;
@@ -211,7 +210,7 @@ found:
le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
goto bad_block;
- if (ext2_xattr_cache_insert(bh))
+ if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
ea_idebug(inode, "cache insert failed");
if (buffer) {
error = -ERANGE;
@@ -249,6 +248,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
char *end;
size_t rest = buffer_size;
int error;
+ struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
@@ -283,7 +283,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
goto bad_block;
entry = next;
}
- if (ext2_xattr_cache_insert(bh))
+ if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
ea_idebug(inode, "cache insert failed");
/* list the attribute names */
@@ -480,22 +480,23 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
/* Here we know that we can set the new attribute. */
if (header) {
- struct mb_cache_entry *ce;
-
/* assert(header == HDR(bh)); */
- ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev,
- bh->b_blocknr);
lock_buffer(bh);
if (header->h_refcount == cpu_to_le32(1)) {
+ __u32 hash = le32_to_cpu(header->h_hash);
+
ea_bdebug(bh, "modifying in-place");
- if (ce)
- mb_cache_entry_free(ce);
+ /*
+ * This must happen under buffer lock for
+ * ext2_xattr_set2() to reliably detect modified block
+ */
+ mb2_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache,
+ hash, bh->b_blocknr);
+
/* keep the buffer locked while modifying it. */
} else {
int offset;
- if (ce)
- mb_cache_entry_release(ce);
unlock_buffer(bh);
ea_bdebug(bh, "cloning");
header = kmalloc(bh->b_size, GFP_KERNEL);
@@ -623,6 +624,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh = NULL;
int error;
+ struct mb2_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache;
if (header) {
new_bh = ext2_xattr_cache_find(inode, header);
@@ -650,7 +652,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
don't need to change the reference count. */
new_bh = old_bh;
get_bh(new_bh);
- ext2_xattr_cache_insert(new_bh);
+ ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
} else {
/* We need to allocate a new block */
ext2_fsblk_t goal = ext2_group_first_block_no(sb,
@@ -671,7 +673,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
memcpy(new_bh->b_data, header, new_bh->b_size);
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
- ext2_xattr_cache_insert(new_bh);
+ ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
ext2_xattr_update_super_block(sb);
}
@@ -704,19 +706,21 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
error = 0;
if (old_bh && old_bh != new_bh) {
- struct mb_cache_entry *ce;
-
/*
* If there was an old block and we are no longer using it,
* release the old block.
*/
- ce = mb_cache_entry_get(ext2_xattr_cache, old_bh->b_bdev,
- old_bh->b_blocknr);
lock_buffer(old_bh);
if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
+ __u32 hash = le32_to_cpu(HDR(old_bh)->h_hash);
+
+ /*
+ * This must happen under buffer lock for
+ * ext2_xattr_set2() to reliably detect freed block
+ */
+ mb2_cache_entry_delete_block(ext2_mb_cache,
+ hash, old_bh->b_blocknr);
/* Free the old block. */
- if (ce)
- mb_cache_entry_free(ce);
ea_bdebug(old_bh, "freeing");
ext2_free_blocks(inode, old_bh->b_blocknr, 1);
mark_inode_dirty(inode);
@@ -727,8 +731,6 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
} else {
/* Decrement the refcount only. */
le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
- if (ce)
- mb_cache_entry_release(ce);
dquot_free_block_nodirty(inode, 1);
mark_inode_dirty(inode);
mark_buffer_dirty(old_bh);
@@ -754,7 +756,6 @@ void
ext2_xattr_delete_inode(struct inode *inode)
{
struct buffer_head *bh = NULL;
- struct mb_cache_entry *ce;
down_write(&EXT2_I(inode)->xattr_sem);
if (!EXT2_I(inode)->i_file_acl)
@@ -774,19 +775,22 @@ ext2_xattr_delete_inode(struct inode *inode)
EXT2_I(inode)->i_file_acl);
goto cleanup;
}
- ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr);
lock_buffer(bh);
if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
- if (ce)
- mb_cache_entry_free(ce);
+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
+
+ /*
+ * This must happen under buffer lock for ext2_xattr_set2() to
+ * reliably detect freed block
+ */
+ mb2_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache,
+ hash, bh->b_blocknr);
ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
get_bh(bh);
bforget(bh);
unlock_buffer(bh);
} else {
le32_add_cpu(&HDR(bh)->h_refcount, -1);
- if (ce)
- mb_cache_entry_release(ce);
ea_bdebug(bh, "refcount now=%d",
le32_to_cpu(HDR(bh)->h_refcount));
unlock_buffer(bh);
@@ -803,18 +807,6 @@ cleanup:
}
/*
- * ext2_xattr_put_super()
- *
- * This is called when a file system is unmounted.
- */
-void
-ext2_xattr_put_super(struct super_block *sb)
-{
- mb_cache_shrink(sb->s_bdev);
-}
-
-
-/*
* ext2_xattr_cache_insert()
*
* Create a new entry in the extended attribute cache, and insert
@@ -823,28 +815,20 @@ ext2_xattr_put_super(struct super_block *sb)
* Returns 0, or a negative error number on failure.
*/
static int
-ext2_xattr_cache_insert(struct buffer_head *bh)
+ext2_xattr_cache_insert(struct mb2_cache *cache, struct buffer_head *bh)
{
__u32 hash = le32_to_cpu(HDR(bh)->h_hash);
- struct mb_cache_entry *ce;
int error;
- ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS);
- if (!ce)
- return -ENOMEM;
- error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
+ error = mb2_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr);
if (error) {
- mb_cache_entry_free(ce);
if (error == -EBUSY) {
ea_bdebug(bh, "already in cache (%d cache entries)",
atomic_read(&ext2_xattr_cache->c_entry_count));
error = 0;
}
- } else {
- ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
- atomic_read(&ext2_xattr_cache->c_entry_count));
- mb_cache_entry_release(ce);
- }
+ } else
+ ea_bdebug(bh, "inserting [%x]", (int)hash);
return error;
}
@@ -900,23 +884,17 @@ static struct buffer_head *
ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
{
__u32 hash = le32_to_cpu(header->h_hash);
- struct mb_cache_entry *ce;
+ struct mb2_cache_entry *ce;
+ struct mb2_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
again:
- ce = mb_cache_entry_find_first(ext2_xattr_cache, inode->i_sb->s_bdev,
- hash);
+ ce = mb2_cache_entry_find_first(ext2_mb_cache, hash);
while (ce) {
struct buffer_head *bh;
- if (IS_ERR(ce)) {
- if (PTR_ERR(ce) == -EAGAIN)
- goto again;
- break;
- }
-
bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
ext2_error(inode->i_sb, "ext2_xattr_cache_find",
@@ -924,7 +902,21 @@ again:
inode->i_ino, (unsigned long) ce->e_block);
} else {
lock_buffer(bh);
- if (le32_to_cpu(HDR(bh)->h_refcount) >
+ /*
+ * We have to be careful about races with freeing or
+ * rehashing of xattr block. Once we hold buffer lock
+ * xattr block's state is stable so we can check
+ * whether the block got freed / rehashed or not.
+ * Since we unhash mbcache entry under buffer lock when
+ * freeing / rehashing xattr block, checking whether
+ * entry is still hashed is reliable.
+ */
+ if (hlist_bl_unhashed(&ce->e_hash_list)) {
+ mb2_cache_entry_put(ext2_mb_cache, ce);
+ unlock_buffer(bh);
+ brelse(bh);
+ goto again;
+ } else if (le32_to_cpu(HDR(bh)->h_refcount) >
EXT2_XATTR_REFCOUNT_MAX) {
ea_idebug(inode, "block %ld refcount %d>%d",
(unsigned long) ce->e_block,
@@ -933,13 +925,14 @@ again:
} else if (!ext2_xattr_cmp(header, HDR(bh))) {
ea_bdebug(bh, "b_count=%d",
atomic_read(&(bh->b_count)));
- mb_cache_entry_release(ce);
+ mb2_cache_entry_touch(ext2_mb_cache, ce);
+ mb2_cache_entry_put(ext2_mb_cache, ce);
return bh;
}
unlock_buffer(bh);
brelse(bh);
}
- ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
+ ce = mb2_cache_entry_find_next(ext2_mb_cache, ce);
}
return NULL;
}
@@ -1012,17 +1005,15 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header,
#undef BLOCK_HASH_SHIFT
-int __init
-init_ext2_xattr(void)
+#define HASH_BUCKET_BITS 10
+
+struct mb2_cache *ext2_xattr_create_cache(void)
{
- ext2_xattr_cache = mb_cache_create("ext2_xattr", 6);
- if (!ext2_xattr_cache)
- return -ENOMEM;
- return 0;
+ return mb2_cache_create(HASH_BUCKET_BITS);
}
-void
-exit_ext2_xattr(void)
+void ext2_xattr_destroy_cache(struct mb2_cache *cache)
{
- mb_cache_destroy(ext2_xattr_cache);
+ if (cache)
+ mb2_cache_destroy(cache);
}
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index 60edf298644e..6ea38aa9563a 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -53,6 +53,8 @@ struct ext2_xattr_entry {
#define EXT2_XATTR_SIZE(size) \
(((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND)
+struct mb2_cache;
+
# ifdef CONFIG_EXT2_FS_XATTR
extern const struct xattr_handler ext2_xattr_user_handler;
@@ -65,10 +67,9 @@ extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
extern void ext2_xattr_delete_inode(struct inode *);
-extern void ext2_xattr_put_super(struct super_block *);
-extern int init_ext2_xattr(void);
-extern void exit_ext2_xattr(void);
+extern struct mb2_cache *ext2_xattr_create_cache(void);
+extern void ext2_xattr_destroy_cache(struct mb2_cache *cache);
extern const struct xattr_handler *ext2_xattr_handlers[];
@@ -93,19 +94,7 @@ ext2_xattr_delete_inode(struct inode *inode)
{
}
-static inline void
-ext2_xattr_put_super(struct super_block *sb)
-{
-}
-
-static inline int
-init_ext2_xattr(void)
-{
- return 0;
-}
-
-static inline void
-exit_ext2_xattr(void)
+static inline void ext2_xattr_destroy_cache(struct mb2_cache *cache)
{
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 482d1caee849..6c910f127f1f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1443,7 +1443,7 @@ struct ext4_sb_info {
struct list_head s_es_list; /* List of inodes with reclaimable extents */
long s_es_nr_inode;
struct ext4_es_stats s_es_stats;
- struct mb_cache *s_mb_cache;
+ struct mb2_cache *s_mb_cache;
spinlock_t s_es_lock ____cacheline_aligned_in_smp;
/* Ratelimit ext4 messages. */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 72b384931f66..4caa0c1f77d8 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -73,10 +73,9 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum,
csum_size);
offset += csum_size;
- csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
- EXT4_INODE_SIZE(inode->i_sb) -
- offset);
}
+ csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
+ EXT4_INODE_SIZE(inode->i_sb) - offset);
}
return csum;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 68345a9e59b8..bd8831bfbafe 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -816,7 +816,6 @@ static void ext4_put_super(struct super_block *sb)
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
- ext4_xattr_put_super(sb);
if (!(sb->s_flags & MS_RDONLY) && !aborted) {
ext4_clear_feature_journal_needs_recovery(sb);
@@ -3833,7 +3832,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
no_journal:
if (ext4_mballoc_ready) {
- sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id);
+ sbi->s_mb_cache = ext4_xattr_create_cache();
if (!sbi->s_mb_cache) {
ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
goto failed_mount_wq;
@@ -4065,6 +4064,10 @@ failed_mount4:
if (EXT4_SB(sb)->rsv_conversion_wq)
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
failed_mount_wq:
+ if (sbi->s_mb_cache) {
+ ext4_xattr_destroy_cache(sbi->s_mb_cache);
+ sbi->s_mb_cache = NULL;
+ }
if (sbi->s_journal) {
jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 7c23363ecf19..b310ed81c10e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -53,7 +53,7 @@
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/slab.h>
-#include <linux/mbcache.h>
+#include <linux/mbcache2.h>
#include <linux/quotaops.h>
#include "ext4_jbd2.h"
#include "ext4.h"
@@ -80,10 +80,10 @@
# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
-static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
+static void ext4_xattr_cache_insert(struct mb2_cache *, struct buffer_head *);
static struct buffer_head *ext4_xattr_cache_find(struct inode *,
struct ext4_xattr_header *,
- struct mb_cache_entry **);
+ struct mb2_cache_entry **);
static void ext4_xattr_rehash(struct ext4_xattr_header *,
struct ext4_xattr_entry *);
static int ext4_xattr_list(struct dentry *dentry, char *buffer,
@@ -300,7 +300,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
struct ext4_xattr_entry *entry;
size_t size;
int error;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
@@ -447,7 +447,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
struct inode *inode = d_inode(dentry);
struct buffer_head *bh = NULL;
int error;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
@@ -564,11 +564,8 @@ static void
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
struct buffer_head *bh)
{
- struct mb_cache_entry *ce = NULL;
int error = 0;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
- ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr);
BUFFER_TRACE(bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bh);
if (error)
@@ -576,9 +573,15 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
lock_buffer(bh);
if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
+ __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
+
ea_bdebug(bh, "refcount now=0; freeing");
- if (ce)
- mb_cache_entry_free(ce);
+ /*
+ * This must happen under buffer lock for
+ * ext4_xattr_block_set() to reliably detect freed block
+ */
+ mb2_cache_entry_delete_block(EXT4_GET_MB_CACHE(inode), hash,
+ bh->b_blocknr);
get_bh(bh);
unlock_buffer(bh);
ext4_free_blocks(handle, inode, bh, 0, 1,
@@ -586,8 +589,6 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
EXT4_FREE_BLOCKS_FORGET);
} else {
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
- if (ce)
- mb_cache_entry_release(ce);
/*
* Beware of this ugliness: Releasing of xattr block references
* from different inodes can race and so we have to protect
@@ -800,17 +801,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh = NULL;
struct ext4_xattr_search *s = &bs->s;
- struct mb_cache_entry *ce = NULL;
+ struct mb2_cache_entry *ce = NULL;
int error = 0;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
#define header(x) ((struct ext4_xattr_header *)(x))
if (i->value && i->value_len > sb->s_blocksize)
return -ENOSPC;
if (s->base) {
- ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev,
- bs->bh->b_blocknr);
BUFFER_TRACE(bs->bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bs->bh);
if (error)
@@ -818,10 +817,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
lock_buffer(bs->bh);
if (header(s->base)->h_refcount == cpu_to_le32(1)) {
- if (ce) {
- mb_cache_entry_free(ce);
- ce = NULL;
- }
+ __u32 hash = le32_to_cpu(BHDR(bs->bh)->h_hash);
+
+ /*
+ * This must happen under buffer lock for
+ * ext4_xattr_block_set() to reliably detect modified
+ * block
+ */
+ mb2_cache_entry_delete_block(ext4_mb_cache, hash,
+ bs->bh->b_blocknr);
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s);
if (!error) {
@@ -845,10 +849,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
int offset = (char *)s->here - bs->bh->b_data;
unlock_buffer(bs->bh);
- if (ce) {
- mb_cache_entry_release(ce);
- ce = NULL;
- }
ea_bdebug(bs->bh, "cloning");
s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
error = -ENOMEM;
@@ -903,6 +903,31 @@ inserted:
if (error)
goto cleanup_dquot;
lock_buffer(new_bh);
+ /*
+ * We have to be careful about races with
+ * freeing or rehashing of xattr block. Once we
+ * hold buffer lock xattr block's state is
+ * stable so we can check whether the block got
+ * freed / rehashed or not. Since we unhash
+ * mbcache entry under buffer lock when freeing
+ * / rehashing xattr block, checking whether
+ * entry is still hashed is reliable.
+ */
+ if (hlist_bl_unhashed(&ce->e_hash_list)) {
+ /*
+ * Undo everything and check mbcache
+ * again.
+ */
+ unlock_buffer(new_bh);
+ dquot_free_block(inode,
+ EXT4_C2B(EXT4_SB(sb),
+ 1));
+ brelse(new_bh);
+ mb2_cache_entry_put(ext4_mb_cache, ce);
+ ce = NULL;
+ new_bh = NULL;
+ goto inserted;
+ }
le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
ea_bdebug(new_bh, "reusing; refcount now=%d",
le32_to_cpu(BHDR(new_bh)->h_refcount));
@@ -913,7 +938,8 @@ inserted:
if (error)
goto cleanup_dquot;
}
- mb_cache_entry_release(ce);
+ mb2_cache_entry_touch(ext4_mb_cache, ce);
+ mb2_cache_entry_put(ext4_mb_cache, ce);
ce = NULL;
} else if (bs->bh && s->base == bs->bh->b_data) {
/* We were modifying this block in-place. */
@@ -978,7 +1004,7 @@ getblk_failed:
cleanup:
if (ce)
- mb_cache_entry_release(ce);
+ mb2_cache_entry_put(ext4_mb_cache, ce);
brelse(new_bh);
if (!(bs->bh && s->base == bs->bh->b_data))
kfree(s->base);
@@ -1543,17 +1569,6 @@ cleanup:
}
/*
- * ext4_xattr_put_super()
- *
- * This is called when a file system is unmounted.
- */
-void
-ext4_xattr_put_super(struct super_block *sb)
-{
- mb_cache_shrink(sb->s_bdev);
-}
-
-/*
* ext4_xattr_cache_insert()
*
* Create a new entry in the extended attribute cache, and insert
@@ -1562,28 +1577,18 @@ ext4_xattr_put_super(struct super_block *sb)
* Returns 0, or a negative error number on failure.
*/
static void
-ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
+ext4_xattr_cache_insert(struct mb2_cache *ext4_mb_cache, struct buffer_head *bh)
{
__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
- struct mb_cache_entry *ce;
int error;
- ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS);
- if (!ce) {
- ea_bdebug(bh, "out of memory");
- return;
- }
- error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
+ error = mb2_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
+ bh->b_blocknr);
if (error) {
- mb_cache_entry_free(ce);
- if (error == -EBUSY) {
+ if (error == -EBUSY)
ea_bdebug(bh, "already in cache");
- error = 0;
- }
- } else {
+ } else
ea_bdebug(bh, "inserting [%x]", (int)hash);
- mb_cache_entry_release(ce);
- }
}
/*
@@ -1636,26 +1641,19 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
*/
static struct buffer_head *
ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
- struct mb_cache_entry **pce)
+ struct mb2_cache_entry **pce)
{
__u32 hash = le32_to_cpu(header->h_hash);
- struct mb_cache_entry *ce;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb2_cache_entry *ce;
+ struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
-again:
- ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev,
- hash);
+ ce = mb2_cache_entry_find_first(ext4_mb_cache, hash);
while (ce) {
struct buffer_head *bh;
- if (IS_ERR(ce)) {
- if (PTR_ERR(ce) == -EAGAIN)
- goto again;
- break;
- }
bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
EXT4_ERROR_INODE(inode, "block %lu read error",
@@ -1671,7 +1669,7 @@ again:
return bh;
}
brelse(bh);
- ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
+ ce = mb2_cache_entry_find_next(ext4_mb_cache, ce);
}
return NULL;
}
@@ -1746,15 +1744,15 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
#define HASH_BUCKET_BITS 10
-struct mb_cache *
-ext4_xattr_create_cache(char *name)
+struct mb2_cache *
+ext4_xattr_create_cache(void)
{
- return mb_cache_create(name, HASH_BUCKET_BITS);
+ return mb2_cache_create(HASH_BUCKET_BITS);
}
-void ext4_xattr_destroy_cache(struct mb_cache *cache)
+void ext4_xattr_destroy_cache(struct mb2_cache *cache)
{
if (cache)
- mb_cache_destroy(cache);
+ mb2_cache_destroy(cache);
}
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index ddc0957760ba..10b0f7323ed6 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -108,7 +108,6 @@ extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
-extern void ext4_xattr_put_super(struct super_block *);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
@@ -124,8 +123,8 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
struct ext4_xattr_info *i,
struct ext4_xattr_ibody_find *is);
-extern struct mb_cache *ext4_xattr_create_cache(char *name);
-extern void ext4_xattr_destroy_cache(struct mb_cache *);
+extern struct mb2_cache *ext4_xattr_create_cache(void);
+extern void ext4_xattr_destroy_cache(struct mb2_cache *);
#ifdef CONFIG_EXT4_FS_SECURITY
extern int ext4_init_security(handle_t *handle, struct inode *inode,
diff --git a/fs/mbcache2.c b/fs/mbcache2.c
new file mode 100644
index 000000000000..5c3e1a8c38f6
--- /dev/null
+++ b/fs/mbcache2.c
@@ -0,0 +1,359 @@
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/list_bl.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mbcache2.h>
+
+/*
+ * Mbcache is a simple key-value store. Keys need not be unique, however
+ * key-value pairs are expected to be unique (we use this fact in
+ * mb2_cache_entry_delete_block()).
+ *
+ * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
+ * They use hash of a block contents as a key and block number as a value.
+ * That's why keys need not be unique (different xattr blocks may end up having
+ * the same hash). However block number always uniquely identifies a cache
+ * entry.
+ *
+ * We provide functions for creation and removal of entries, search by key,
+ * and a special "delete entry with given key-value pair" operation. Fixed
+ * size hash table is used for fast key lookups.
+ */
+
+struct mb2_cache {
+ /* Hash table of entries */
+ struct hlist_bl_head *c_hash;
+ /* log2 of hash table size */
+ int c_bucket_bits;
+ /* Protects c_lru_list, c_entry_count */
+ spinlock_t c_lru_list_lock;
+ struct list_head c_lru_list;
+ /* Number of entries in cache */
+ unsigned long c_entry_count;
+ struct shrinker c_shrink;
+};
+
+static struct kmem_cache *mb2_entry_cache;
+
+/*
+ * mb2_cache_entry_create - create entry in cache
+ * @cache - cache where the entry should be created
+ * @mask - gfp mask with which the entry should be allocated
+ * @key - key of the entry
+ * @block - block that contains data
+ *
+ * Creates entry in @cache with key @key and records that data is stored in
+ * block @block. The function returns -EBUSY if entry with the same key
+ * and for the same block already exists in cache. Otherwise 0 is returned.
+ */
+int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
+ sector_t block)
+{
+ struct mb2_cache_entry *entry, *dup;
+ struct hlist_bl_node *dup_node;
+ struct hlist_bl_head *head;
+
+ entry = kmem_cache_alloc(mb2_entry_cache, mask);
+ if (!entry)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&entry->e_lru_list);
+ /* One ref for hash, one ref returned */
+ atomic_set(&entry->e_refcnt, 1);
+ entry->e_key = key;
+ entry->e_block = block;
+ head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+ entry->e_hash_list_head = head;
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
+ if (dup->e_key == key && dup->e_block == block) {
+ hlist_bl_unlock(head);
+ kmem_cache_free(mb2_entry_cache, entry);
+ return -EBUSY;
+ }
+ }
+ hlist_bl_add_head(&entry->e_hash_list, head);
+ hlist_bl_unlock(head);
+
+ spin_lock(&cache->c_lru_list_lock);
+ list_add_tail(&entry->e_lru_list, &cache->c_lru_list);
+ /* Grab ref for LRU list */
+ atomic_inc(&entry->e_refcnt);
+ cache->c_entry_count++;
+ spin_unlock(&cache->c_lru_list_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(mb2_cache_entry_create);
+
+void __mb2_cache_entry_free(struct mb2_cache_entry *entry)
+{
+ kmem_cache_free(mb2_entry_cache, entry);
+}
+EXPORT_SYMBOL(__mb2_cache_entry_free);
+
+static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache,
+ struct mb2_cache_entry *entry,
+ u32 key)
+{
+ struct mb2_cache_entry *old_entry = entry;
+ struct hlist_bl_node *node;
+ struct hlist_bl_head *head;
+
+ if (entry)
+ head = entry->e_hash_list_head;
+ else
+ head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+ hlist_bl_lock(head);
+ if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
+ node = entry->e_hash_list.next;
+ else
+ node = hlist_bl_first(head);
+ while (node) {
+ entry = hlist_bl_entry(node, struct mb2_cache_entry,
+ e_hash_list);
+ if (entry->e_key == key) {
+ atomic_inc(&entry->e_refcnt);
+ goto out;
+ }
+ node = node->next;
+ }
+ entry = NULL;
+out:
+ hlist_bl_unlock(head);
+ if (old_entry)
+ mb2_cache_entry_put(cache, old_entry);
+
+ return entry;
+}
+
+/*
+ * mb2_cache_entry_find_first - find the first entry in cache with given key
+ * @cache: cache where we should search
+ * @key: key to look for
+ *
+ * Search in @cache for entry with key @key. Grabs reference to the first
+ * entry found and returns the entry.
+ */
+struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache,
+ u32 key)
+{
+ return __entry_find(cache, NULL, key);
+}
+EXPORT_SYMBOL(mb2_cache_entry_find_first);
+
+/*
+ * mb2_cache_entry_find_next - find next entry in cache with the same
+ * @cache: cache where we should search
+ * @entry: entry to start search from
+ *
+ * Finds next entry in the hash chain which has the same key as @entry.
+ * If @entry is unhashed (which can happen when deletion of entry races
+ * with the search), finds the first entry in the hash chain. The function
+ * drops reference to @entry and returns with a reference to the found entry.
+ */
+struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache,
+ struct mb2_cache_entry *entry)
+{
+ return __entry_find(cache, entry, entry->e_key);
+}
+EXPORT_SYMBOL(mb2_cache_entry_find_next);
+
+/* mb2_cache_entry_delete_block - remove information about block from cache
+ * @cache - cache we work with
+ * @key - key of the entry to remove
+ * @block - block containing data for @key
+ *
+ * Remove entry from cache @cache with key @key with data stored in @block.
+ */
+void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key,
+ sector_t block)
+{
+ struct hlist_bl_node *node;
+ struct hlist_bl_head *head;
+ struct mb2_cache_entry *entry;
+
+ head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+ if (entry->e_key == key && entry->e_block == block) {
+ /* We keep hash list reference to keep entry alive */
+ hlist_bl_del_init(&entry->e_hash_list);
+ hlist_bl_unlock(head);
+ spin_lock(&cache->c_lru_list_lock);
+ if (!list_empty(&entry->e_lru_list)) {
+ list_del_init(&entry->e_lru_list);
+ cache->c_entry_count--;
+ atomic_dec(&entry->e_refcnt);
+ }
+ spin_unlock(&cache->c_lru_list_lock);
+ mb2_cache_entry_put(cache, entry);
+ return;
+ }
+ }
+ hlist_bl_unlock(head);
+}
+EXPORT_SYMBOL(mb2_cache_entry_delete_block);
+
+/* mb2_cache_entry_touch - cache entry got used
+ * @cache - cache the entry belongs to
+ * @entry - entry that got used
+ *
+ * Move entry in lru list to reflect the fact that it was used.
+ */
+void mb2_cache_entry_touch(struct mb2_cache *cache,
+ struct mb2_cache_entry *entry)
+{
+ spin_lock(&cache->c_lru_list_lock);
+ if (!list_empty(&entry->e_lru_list))
+ list_move_tail(&cache->c_lru_list, &entry->e_lru_list);
+ spin_unlock(&cache->c_lru_list_lock);
+}
+EXPORT_SYMBOL(mb2_cache_entry_touch);
+
+static unsigned long mb2_cache_count(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
+ c_shrink);
+
+ return cache->c_entry_count;
+}
+
+/* Shrink number of entries in cache */
+static unsigned long mb2_cache_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ int nr_to_scan = sc->nr_to_scan;
+ struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
+ c_shrink);
+ struct mb2_cache_entry *entry;
+ struct hlist_bl_head *head;
+ unsigned int shrunk = 0;
+
+ spin_lock(&cache->c_lru_list_lock);
+ while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) {
+ entry = list_first_entry(&cache->c_lru_list,
+ struct mb2_cache_entry, e_lru_list);
+ list_del_init(&entry->e_lru_list);
+ cache->c_entry_count--;
+ /*
+ * We keep LRU list reference so that entry doesn't go away
+ * from under us.
+ */
+ spin_unlock(&cache->c_lru_list_lock);
+ head = entry->e_hash_list_head;
+ hlist_bl_lock(head);
+ if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+ hlist_bl_del_init(&entry->e_hash_list);
+ atomic_dec(&entry->e_refcnt);
+ }
+ hlist_bl_unlock(head);
+ if (mb2_cache_entry_put(cache, entry))
+ shrunk++;
+ cond_resched();
+ spin_lock(&cache->c_lru_list_lock);
+ }
+ spin_unlock(&cache->c_lru_list_lock);
+
+ return shrunk;
+}
+
+/*
+ * mb2_cache_create - create cache
+ * @bucket_bits: log2 of the hash table size
+ *
+ * Create cache for keys with 2^bucket_bits hash entries.
+ */
+struct mb2_cache *mb2_cache_create(int bucket_bits)
+{
+ struct mb2_cache *cache;
+ int bucket_count = 1 << bucket_bits;
+ int i;
+
+ if (!try_module_get(THIS_MODULE))
+ return NULL;
+
+ cache = kzalloc(sizeof(struct mb2_cache), GFP_KERNEL);
+ if (!cache)
+ goto err_out;
+ cache->c_bucket_bits = bucket_bits;
+ INIT_LIST_HEAD(&cache->c_lru_list);
+ spin_lock_init(&cache->c_lru_list_lock);
+ cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
+ GFP_KERNEL);
+ if (!cache->c_hash) {
+ kfree(cache);
+ goto err_out;
+ }
+ for (i = 0; i < bucket_count; i++)
+ INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
+
+ cache->c_shrink.count_objects = mb2_cache_count;
+ cache->c_shrink.scan_objects = mb2_cache_scan;
+ cache->c_shrink.seeks = DEFAULT_SEEKS;
+ register_shrinker(&cache->c_shrink);
+
+ return cache;
+
+err_out:
+ module_put(THIS_MODULE);
+ return NULL;
+}
+EXPORT_SYMBOL(mb2_cache_create);
+
+/*
+ * mb2_cache_destroy - destroy cache
+ * @cache: the cache to destroy
+ *
+ * Free all entries in cache and cache itself. Caller must make sure nobody
+ * (except shrinker) can reach @cache when calling this.
+ */
+void mb2_cache_destroy(struct mb2_cache *cache)
+{
+ struct mb2_cache_entry *entry, *next;
+
+ unregister_shrinker(&cache->c_shrink);
+
+ /*
+ * We don't bother with any locking. Cache must not be used at this
+ * point.
+ */
+ list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) {
+ if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+ hlist_bl_del_init(&entry->e_hash_list);
+ atomic_dec(&entry->e_refcnt);
+ } else
+ WARN_ON(1);
+ list_del(&entry->e_lru_list);
+ WARN_ON(atomic_read(&entry->e_refcnt) != 1);
+ mb2_cache_entry_put(cache, entry);
+ }
+ kfree(cache->c_hash);
+ kfree(cache);
+ module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL(mb2_cache_destroy);
+
+static int __init mb2cache_init(void)
+{
+ mb2_entry_cache = kmem_cache_create("mbcache",
+ sizeof(struct mb2_cache_entry), 0,
+ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
+ BUG_ON(!mb2_entry_cache);
+ return 0;
+}
+
+static void __exit mb2cache_exit(void)
+{
+ kmem_cache_destroy(mb2_entry_cache);
+}
+
+module_init(mb2cache_init)
+module_exit(mb2cache_exit)
+
+MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
+MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
+MODULE_LICENSE("GPL");
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 6a44fb94228c..fc5243c331c4 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -867,7 +867,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
- pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
+ pmd_t pmd = *pmdp;
+
+ /* See comment in change_huge_pmd() */
+ pmdp_invalidate(vma, addr, pmdp);
+ if (pmd_dirty(*pmdp))
+ pmd = pmd_mkdirty(pmd);
+ if (pmd_young(*pmdp))
+ pmd = pmd_mkyoung(pmd);
pmd = pmd_wrprotect(pmd);
pmd = pmd_clear_soft_dirty(pmd);
diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c
index f82fedd29007..cddfc79ea365 100644
--- a/fs/sdcardfs/derived_perm.c
+++ b/fs/sdcardfs/derived_perm.c
@@ -222,7 +222,7 @@ void fixup_lower_ownership(struct dentry *dentry, const char *name)
break;
case PERM_ANDROID_PACKAGE_CACHE:
if (info->d_uid != 0)
- gid = multiuser_get_cache_gid(info->d_uid);
+ gid = multiuser_get_ext_cache_gid(info->d_uid);
else
gid = multiuser_get_uid(info->userid, uid);
break;
@@ -252,7 +252,7 @@ retry_deleg:
goto retry_deleg;
}
if (error)
- pr_err("sdcardfs: Failed to touch up lower fs gid/uid.\n");
+ pr_debug("sdcardfs: Failed to touch up lower fs gid/uid for %s\n", name);
}
sdcardfs_put_lower_path(dentry, &path);
}
diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c
index c0146e03fa2e..1f6921e2ffbf 100644
--- a/fs/sdcardfs/file.c
+++ b/fs/sdcardfs/file.c
@@ -192,6 +192,9 @@ static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma)
file->f_mapping->a_ops = &sdcardfs_aops; /* set our aops */
if (!SDCARDFS_F(file)->lower_vm_ops) /* save for our ->fault */
SDCARDFS_F(file)->lower_vm_ops = saved_vm_ops;
+ vma->vm_private_data = file;
+ get_file(lower_file);
+ vma->vm_file = lower_file;
out:
return err;
diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c
index f10f7752f514..a0f221501b4c 100644
--- a/fs/sdcardfs/lookup.c
+++ b/fs/sdcardfs/lookup.c
@@ -91,7 +91,9 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, u
struct sdcardfs_inode_info *info;
struct inode_data data;
struct inode *inode; /* the new inode to return */
- int err;
+
+ if (!igrab(lower_inode))
+ return ERR_PTR(-ESTALE);
data.id = id;
data.lower_inode = lower_inode;
@@ -106,22 +108,19 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, u
sdcardfs_inode_set, /* inode init function */
&data); /* data passed to test+set fxns */
if (!inode) {
- err = -EACCES;
iput(lower_inode);
- return ERR_PTR(err);
+ return ERR_PTR(-ENOMEM);
}
- /* if found a cached inode, then just return it */
- if (!(inode->i_state & I_NEW))
+ /* if found a cached inode, then just return it (after iput) */
+ if (!(inode->i_state & I_NEW)) {
+ iput(lower_inode);
return inode;
+ }
/* initialize new inode */
info = SDCARDFS_I(inode);
inode->i_ino = lower_inode->i_ino;
- if (!igrab(lower_inode)) {
- err = -ESTALE;
- return ERR_PTR(err);
- }
sdcardfs_set_lower_inode(inode, lower_inode);
inode->i_version++;
diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c
index 0d4089c62c3a..b61f82275e7d 100644
--- a/fs/sdcardfs/mmap.c
+++ b/fs/sdcardfs/mmap.c
@@ -23,60 +23,45 @@
static int sdcardfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
int err;
- struct file *file, *lower_file;
+ struct file *file;
const struct vm_operations_struct *lower_vm_ops;
- struct vm_area_struct lower_vma;
- memcpy(&lower_vma, vma, sizeof(struct vm_area_struct));
- file = lower_vma.vm_file;
+ file = (struct file *)vma->vm_private_data;
lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops;
BUG_ON(!lower_vm_ops);
- lower_file = sdcardfs_lower_file(file);
- /*
- * XXX: vm_ops->fault may be called in parallel. Because we have to
- * resort to temporarily changing the vma->vm_file to point to the
- * lower file, a concurrent invocation of sdcardfs_fault could see a
- * different value. In this workaround, we keep a different copy of
- * the vma structure in our stack, so we never expose a different
- * value of the vma->vm_file called to us, even temporarily. A
- * better fix would be to change the calling semantics of ->fault to
- * take an explicit file pointer.
- */
- lower_vma.vm_file = lower_file;
- err = lower_vm_ops->fault(&lower_vma, vmf);
+ err = lower_vm_ops->fault(vma, vmf);
return err;
}
+static void sdcardfs_vm_open(struct vm_area_struct *vma)
+{
+ struct file *file = (struct file *)vma->vm_private_data;
+
+ get_file(file);
+}
+
+static void sdcardfs_vm_close(struct vm_area_struct *vma)
+{
+ struct file *file = (struct file *)vma->vm_private_data;
+
+ fput(file);
+}
+
static int sdcardfs_page_mkwrite(struct vm_area_struct *vma,
struct vm_fault *vmf)
{
int err = 0;
- struct file *file, *lower_file;
+ struct file *file;
const struct vm_operations_struct *lower_vm_ops;
- struct vm_area_struct lower_vma;
- memcpy(&lower_vma, vma, sizeof(struct vm_area_struct));
- file = lower_vma.vm_file;
+ file = (struct file *)vma->vm_private_data;
lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops;
BUG_ON(!lower_vm_ops);
if (!lower_vm_ops->page_mkwrite)
goto out;
- lower_file = sdcardfs_lower_file(file);
- /*
- * XXX: vm_ops->page_mkwrite may be called in parallel.
- * Because we have to resort to temporarily changing the
- * vma->vm_file to point to the lower file, a concurrent
- * invocation of sdcardfs_page_mkwrite could see a different
- * value. In this workaround, we keep a different copy of the
- * vma structure in our stack, so we never expose a different
- * value of the vma->vm_file called to us, even temporarily.
- * A better fix would be to change the calling semantics of
- * ->page_mkwrite to take an explicit file pointer.
- */
- lower_vma.vm_file = lower_file;
- err = lower_vm_ops->page_mkwrite(&lower_vma, vmf);
+ err = lower_vm_ops->page_mkwrite(vma, vmf);
out:
return err;
}
@@ -99,4 +84,6 @@ const struct address_space_operations sdcardfs_aops = {
const struct vm_operations_struct sdcardfs_vm_ops = {
.fault = sdcardfs_fault,
.page_mkwrite = sdcardfs_page_mkwrite,
+ .open = sdcardfs_vm_open,
+ .close = sdcardfs_vm_close,
};
diff --git a/fs/sdcardfs/multiuser.h b/fs/sdcardfs/multiuser.h
index 2e89b5872314..d0c925cda299 100644
--- a/fs/sdcardfs/multiuser.h
+++ b/fs/sdcardfs/multiuser.h
@@ -23,6 +23,8 @@
#define AID_APP_END 19999 /* last app user */
#define AID_CACHE_GID_START 20000 /* start of gids for apps to mark cached data */
#define AID_EXT_GID_START 30000 /* start of gids for apps to mark external data */
+#define AID_EXT_CACHE_GID_START 40000 /* start of gids for apps to mark external cached data */
+#define AID_EXT_CACHE_GID_END 49999 /* end of gids for apps to mark external cached data */
#define AID_SHARED_GID_START 50000 /* start of gids for apps in each user to share */
typedef uid_t userid_t;
@@ -33,9 +35,9 @@ static inline uid_t multiuser_get_uid(userid_t user_id, appid_t app_id)
return (user_id * AID_USER_OFFSET) + (app_id % AID_USER_OFFSET);
}
-static inline gid_t multiuser_get_cache_gid(uid_t uid)
+static inline gid_t multiuser_get_ext_cache_gid(uid_t uid)
{
- return uid - AID_APP_START + AID_CACHE_GID_START;
+ return uid - AID_APP_START + AID_EXT_CACHE_GID_START;
}
static inline gid_t multiuser_get_ext_gid(uid_t uid)
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index b803213d1307..39c75a86c67f 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -108,7 +108,7 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
{
const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
struct kobject *kobj = of->kn->parent->priv;
- size_t len;
+ ssize_t len;
/*
* If buf != of->prealloc_buf, we don't know how
@@ -117,13 +117,15 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
if (WARN_ON_ONCE(buf != of->prealloc_buf))
return 0;
len = ops->show(kobj, of->kn->priv, buf);
+ if (len < 0)
+ return len;
if (pos) {
if (len <= pos)
return 0;
len -= pos;
memmove(buf, buf + pos, len);
}
- return min(count, len);
+ return min_t(ssize_t, count, len);
}
/* kernfs write callback for regular sysfs files */