diff options
Diffstat (limited to 'fs')
93 files changed, 873 insertions, 425 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index e5733bb537c9..26bbaaefdff4 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -88,12 +88,11 @@ void invalidate_bdev(struct block_device *bdev) { struct address_space *mapping = bdev->bd_inode->i_mapping; - if (mapping->nrpages == 0) - return; - - invalidate_bh_lrus(); - lru_add_drain_all(); /* make sure all lru add caches are flushed */ - invalidate_mapping_pages(mapping, 0, -1); + if (mapping->nrpages) { + invalidate_bh_lrus(); + lru_add_drain_all(); /* make sure all lru add caches are flushed */ + invalidate_mapping_pages(mapping, 0, -1); + } /* 99% of the time, we don't need to flush the cleancache on the bdev. * But, for the strange corners, lets be cautious */ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2a2e370399ba..c36a03fa7678 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3854,6 +3854,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, info->space_info_kobj, "%s", alloc_name(found->flags)); if (ret) { + percpu_counter_destroy(&found->total_bytes_pinned); kfree(found); return ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 353f4bae658c..d4a6eef31854 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2771,7 +2771,7 @@ static long btrfs_fallocate(struct file *file, int mode, if (!ret) ret = btrfs_prealloc_file_range(inode, mode, range->start, - range->len, 1 << inode->i_blkbits, + range->len, i_blocksize(inode), offset + len, &alloc_hint); list_del(&range->list); kfree(range); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3cff6523f27d..863fa0f1972b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7318,8 +7318,8 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end) int found = false; void **pagep = NULL; struct page *page = NULL; - int start_idx; - int end_idx; + unsigned long start_idx; + unsigned long end_idx; start_idx = start >> PAGE_CACHE_SHIFT; diff --git a/fs/buffer.c b/fs/buffer.c index fc35dd27cc0f..14ce7b24f32a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2347,7 +2347,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping, loff_t pos, loff_t *bytes) { struct inode *inode = mapping->host; - unsigned blocksize = 1 << inode->i_blkbits; + unsigned int blocksize = i_blocksize(inode); struct page *page; void *fsdata; pgoff_t index, curidx; @@ -2427,8 +2427,8 @@ int cont_write_begin(struct file *file, struct address_space *mapping, get_block_t *get_block, loff_t *bytes) { struct inode *inode = mapping->host; - unsigned blocksize = 1 << inode->i_blkbits; - unsigned zerofrom; + unsigned int blocksize = i_blocksize(inode); + unsigned int zerofrom; int err; err = cont_expand_zero(file, mapping, pos, bytes); @@ -2790,7 +2790,7 @@ int nobh_truncate_page(struct address_space *mapping, struct buffer_head map_bh; int err; - blocksize = 1 << inode->i_blkbits; + blocksize = i_blocksize(inode); length = offset & (blocksize - 1); /* Block boundary? Nothing to do */ @@ -2868,7 +2868,7 @@ int block_truncate_page(struct address_space *mapping, struct buffer_head *bh; int err; - blocksize = 1 << inode->i_blkbits; + blocksize = i_blocksize(inode); length = offset & (blocksize - 1); /* Block boundary? Nothing to do */ @@ -2980,7 +2980,7 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block, struct inode *inode = mapping->host; tmp.b_state = 0; tmp.b_blocknr = 0; - tmp.b_size = 1 << inode->i_blkbits; + tmp.b_size = i_blocksize(inode); get_block(inode, block, &tmp, 0); return tmp.b_blocknr; } diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 4d8caeb94a11..bdb9c94335f1 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -128,7 +128,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (new_mode != old_mode) { newattrs.ia_mode = new_mode; newattrs.ia_valid = ATTR_MODE; - ret = ceph_setattr(dentry, &newattrs); + ret = __ceph_setattr(dentry, &newattrs); if (ret) goto out_dput; } @@ -138,7 +138,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (new_mode != old_mode) { newattrs.ia_mode = old_mode; newattrs.ia_valid = ATTR_MODE; - ceph_setattr(dentry, &newattrs); + __ceph_setattr(dentry, &newattrs); } goto out_dput; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index b7d218a168fb..c6a1ec110c01 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -697,7 +697,7 @@ static int ceph_writepages_start(struct address_space *mapping, struct pagevec pvec; int done = 0; int rc = 0; - unsigned wsize = 1 << inode->i_blkbits; + unsigned int wsize = i_blocksize(inode); struct ceph_osd_request *req = NULL; int do_sync = 0; loff_t snap_size, i_size; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index d98536c8abfc..9f0d99094cc1 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1773,7 +1773,7 @@ static const struct inode_operations ceph_symlink_iops = { /* * setattr */ -int ceph_setattr(struct dentry *dentry, struct iattr *attr) +int __ceph_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); struct ceph_inode_info *ci = ceph_inode(inode); @@ -1975,11 +1975,6 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) if (inode_dirty_flags) __mark_inode_dirty(inode, inode_dirty_flags); - if (ia_valid & ATTR_MODE) { - err = posix_acl_chmod(inode, attr->ia_mode); - if (err) - goto out_put; - } if (mask) { req->r_inode = inode; @@ -1993,13 +1988,23 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) ceph_cap_string(dirtied), mask); ceph_mdsc_put_request(req); - if (mask & CEPH_SETATTR_SIZE) - __ceph_do_pending_vmtruncate(inode); ceph_free_cap_flush(prealloc_cf); + + if (err >= 0 && (mask & CEPH_SETATTR_SIZE)) + __ceph_do_pending_vmtruncate(inode); + return err; -out_put: - ceph_mdsc_put_request(req); - ceph_free_cap_flush(prealloc_cf); +} + +int ceph_setattr(struct dentry *dentry, struct iattr *attr) +{ + int err; + + err = __ceph_setattr(dentry, attr); + + if (err >= 0 && (attr->ia_valid & ATTR_MODE)) + err = posix_acl_chmod(d_inode(dentry), attr->ia_mode); + return err; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 75b7d125ce66..8c8cb8fe3d32 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -788,6 +788,7 @@ static inline int ceph_do_getattr(struct inode *inode, int mask, bool force) return __ceph_do_getattr(inode, NULL, mask, force); } extern int ceph_permission(struct inode *inode, int mask); +extern int __ceph_setattr(struct dentry *dentry, struct iattr *attr); extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 819163d8313b..b24275ef97f7 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -369,6 +369,7 @@ static int __set_xattr(struct ceph_inode_info *ci, if (update_xattr) { int err = 0; + if (xattr && (flags & XATTR_CREATE)) err = -EEXIST; else if (!xattr && (flags & XATTR_REPLACE)) @@ -376,12 +377,14 @@ static int __set_xattr(struct ceph_inode_info *ci, if (err) { kfree(name); kfree(val); + kfree(*newxattr); return err; } if (update_xattr < 0) { if (xattr) __remove_xattr(ci, xattr); kfree(name); + kfree(*newxattr); return 0; } } diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 02b071bf3732..a0b3e7d1be48 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -83,6 +83,9 @@ convert_sfm_char(const __u16 src_char, char *target) case SFM_COLON: *target = ':'; break; + case SFM_DOUBLEQUOTE: + *target = '"'; + break; case SFM_ASTERISK: *target = '*'; break; @@ -418,6 +421,9 @@ static __le16 convert_to_sfm_char(char src_char, bool end_of_string) case ':': dest_char = cpu_to_le16(SFM_COLON); break; + case '"': + dest_char = cpu_to_le16(SFM_DOUBLEQUOTE); + break; case '*': dest_char = cpu_to_le16(SFM_ASTERISK); break; diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 479bc0a941f3..07ade707fa60 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -57,6 +57,7 @@ * not conflict (although almost does) with the mapping above. */ +#define SFM_DOUBLEQUOTE ((__u16) 0xF020) #define SFM_ASTERISK ((__u16) 0xF021) #define SFM_QUESTION ((__u16) 0xF025) #define SFM_COLON ((__u16) 0xF022) @@ -64,8 +65,8 @@ #define SFM_LESSTHAN ((__u16) 0xF023) #define SFM_PIPE ((__u16) 0xF027) #define SFM_SLASH ((__u16) 0xF026) -#define SFM_PERIOD ((__u16) 0xF028) -#define SFM_SPACE ((__u16) 0xF029) +#define SFM_SPACE ((__u16) 0xF028) +#define SFM_PERIOD ((__u16) 0xF029) /* * Mapping mechanism to use when one of the seven reserved characters is diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 5e2f8b8ca08a..b60150e5b5ce 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -717,6 +717,9 @@ CIFSSMBEcho(struct TCP_Server_Info *server) if (rc) return rc; + if (server->capabilities & CAP_UNICODE) + smb->hdr.Flags2 |= SMBFLG2_UNICODE; + /* set up echo request */ smb->hdr.Tid = 0xffff; smb->hdr.WordCount = 1; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 156bc18eac69..53a827c6d8b1 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -412,6 +412,9 @@ cifs_reconnect(struct TCP_Server_Info *server) } } while (server->tcpStatus == CifsNeedReconnect); + if (server->tcpStatus == CifsNeedNegotiate) + mod_delayed_work(cifsiod_wq, &server->echo, 0); + return rc; } @@ -421,18 +424,27 @@ cifs_echo_request(struct work_struct *work) int rc; struct TCP_Server_Info *server = container_of(work, struct TCP_Server_Info, echo.work); + unsigned long echo_interval; + + /* + * If we need to renegotiate, set echo interval to zero to + * immediately call echo service where we can renegotiate. + */ + if (server->tcpStatus == CifsNeedNegotiate) + echo_interval = 0; + else + echo_interval = SMB_ECHO_INTERVAL; /* - * We cannot send an echo if it is disabled or until the - * NEGOTIATE_PROTOCOL request is done, which is indicated by - * server->ops->need_neg() == true. Also, no need to ping if - * we got a response recently. + * We cannot send an echo if it is disabled. + * Also, no need to ping if we got a response recently. */ if (server->tcpStatus == CifsNeedReconnect || - server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew || + server->tcpStatus == CifsExiting || + server->tcpStatus == CifsNew || (server->ops->can_echo && !server->ops->can_echo(server)) || - time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ)) + time_before(jiffies, server->lstrp + echo_interval - HZ)) goto requeue_echo; rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS; diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 35cf990f87d3..a8f5b31636dc 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -272,6 +272,8 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) rc = -EOPNOTSUPP; break; case CIFS_IOC_GET_MNT_INFO: + if (pSMBFile == NULL) + break; tcon = tlink_tcon(pSMBFile->tlink); rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg); break; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 6cb2603f8a5c..f4afa3b1cc56 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -564,8 +564,12 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) } if (rsplen != sizeof(struct validate_negotiate_info_rsp)) { - cifs_dbg(VFS, "invalid size of protocol negotiate response\n"); - return -EIO; + cifs_dbg(VFS, "invalid protocol negotiate response size: %d\n", + rsplen); + + /* relax check since Mac returns max bufsize allowed on ioctl */ + if (rsplen > CIFSMaxBufSize) + return -EIO; } /* check validate negotiate info response matches what we got earlier */ @@ -1518,8 +1522,12 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, * than one credit. Windows typically sets this smaller, but for some * ioctls it may be useful to allow server to send more. No point * limiting what the server can send as long as fits in one credit + * Unfortunately - we can not handle more than CIFS_MAX_MSG_SIZE + * (by default, note that it can be overridden to make max larger) + * in responses (except for read responses which can be bigger. + * We may want to bump this limit up */ - req->MaxOutputResponse = cpu_to_le32(0xFF00); /* < 64K uses 1 credit */ + req->MaxOutputResponse = cpu_to_le32(CIFSMaxBufSize); if (is_fsctl) req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL); diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index ec5c8325b503..0525ebc3aea2 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -83,14 +83,13 @@ static int create_link(struct config_item *parent_item, ret = -ENOMEM; sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); if (sl) { - sl->sl_target = config_item_get(item); spin_lock(&configfs_dirent_lock); if (target_sd->s_type & CONFIGFS_USET_DROPPING) { spin_unlock(&configfs_dirent_lock); - config_item_put(item); kfree(sl); return -ENOENT; } + sl->sl_target = config_item_get(item); list_add(&sl->sl_list, &target_sd->s_links); spin_unlock(&configfs_dirent_lock); ret = configfs_create_link(sl, parent_item->ci_dentry, diff --git a/fs/direct-io.c b/fs/direct-io.c index 0f1517d0b969..7cdf8ad1bb5b 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -589,7 +589,7 @@ static int dio_set_defer_completion(struct dio *dio) /* * Call into the fs to map some more disk blocks. We record the current number * of available blocks at sdio->blocks_available. These are in units of the - * fs blocksize, (1 << inode->i_blkbits). + * fs blocksize, i_blocksize(inode). * * The fs is allowed to map lots of blocks at once. If it wants to do that, * it uses the passed inode-relative block number as the file offset, as usual. diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index e14b1b8fceb0..b9f838af5a72 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -271,7 +271,7 @@ static int ext4_page_crypto(struct inode *inode, struct crypto_ablkcipher *tfm = ci->ci_ctfm; int res = 0; - req = ablkcipher_request_alloc(tfm, GFP_NOFS); + req = ablkcipher_request_alloc(tfm, gfp_flags); if (!req) { printk_ratelimited(KERN_ERR "%s: crypto_request_alloc() failed\n", diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c index e2645ca9b95e..026716bdbbfc 100644 --- a/fs/ext4/crypto_fname.c +++ b/fs/ext4/crypto_fname.c @@ -344,7 +344,7 @@ int _ext4_fname_disk_to_usr(struct inode *inode, memcpy(buf+4, &hinfo->minor_hash, 4); } else memset(buf, 0, 8); - memcpy(buf + 8, iname->name + iname->len - 16, 16); + memcpy(buf + 8, iname->name + ((iname->len - 17) & ~15), 16); oname->name[0] = '_'; ret = digest_encode(buf, 24, oname->name+1); oname->len = ret + 1; diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c index dd561f916f0b..e4f4fc4e56ab 100644 --- a/fs/ext4/crypto_policy.c +++ b/fs/ext4/crypto_policy.c @@ -148,26 +148,38 @@ int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy) int ext4_is_child_context_consistent_with_parent(struct inode *parent, struct inode *child) { - struct ext4_crypt_info *parent_ci, *child_ci; + const struct ext4_crypt_info *parent_ci, *child_ci; + struct ext4_encryption_context parent_ctx, child_ctx; int res; - if ((parent == NULL) || (child == NULL)) { - pr_err("parent %p child %p\n", parent, child); - WARN_ON(1); /* Should never happen */ - return 0; - } - /* No restrictions on file types which are never encrypted */ if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && !S_ISLNK(child->i_mode)) return 1; - /* no restrictions if the parent directory is not encrypted */ + /* No restrictions if the parent directory is unencrypted */ if (!ext4_encrypted_inode(parent)) return 1; - /* if the child directory is not encrypted, this is always a problem */ + + /* Encrypted directories must not contain unencrypted files */ if (!ext4_encrypted_inode(child)) return 0; + + /* + * Both parent and child are encrypted, so verify they use the same + * encryption policy. Compare the fscrypt_info structs if the keys are + * available, otherwise retrieve and compare the fscrypt_contexts. + * + * Note that the fscrypt_context retrieval will be required frequently + * when accessing an encrypted directory tree without the key. + * Performance-wise this is not a big deal because we already don't + * really optimize for file access without the key (to the extent that + * such access is even possible), given that any attempted access + * already causes a fscrypt_context retrieval and keyring search. + * + * In any case, if an unexpected error occurs, fall back to "forbidden". + */ + res = ext4_get_encryption_info(parent); if (res) return 0; @@ -176,17 +188,35 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent, return 0; parent_ci = EXT4_I(parent)->i_crypt_info; child_ci = EXT4_I(child)->i_crypt_info; - if (!parent_ci && !child_ci) - return 1; - if (!parent_ci || !child_ci) + if (parent_ci && child_ci) { + return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key, + EXT4_KEY_DESCRIPTOR_SIZE) == 0 && + (parent_ci->ci_data_mode == child_ci->ci_data_mode) && + (parent_ci->ci_filename_mode == + child_ci->ci_filename_mode) && + (parent_ci->ci_flags == child_ci->ci_flags); + } + + res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION, + EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, + &parent_ctx, sizeof(parent_ctx)); + if (res != sizeof(parent_ctx)) + return 0; + + res = ext4_xattr_get(child, EXT4_XATTR_INDEX_ENCRYPTION, + EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, + &child_ctx, sizeof(child_ctx)); + if (res != sizeof(child_ctx)) return 0; - return (memcmp(parent_ci->ci_master_key, - child_ci->ci_master_key, - EXT4_KEY_DESCRIPTOR_SIZE) == 0 && - (parent_ci->ci_data_mode == child_ci->ci_data_mode) && - (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) && - (parent_ci->ci_flags == child_ci->ci_flags)); + return memcmp(parent_ctx.master_key_descriptor, + child_ctx.master_key_descriptor, + EXT4_KEY_DESCRIPTOR_SIZE) == 0 && + (parent_ctx.contents_encryption_mode == + child_ctx.contents_encryption_mode) && + (parent_ctx.filenames_encryption_mode == + child_ctx.filenames_encryption_mode) && + (parent_ctx.flags == child_ctx.flags); } /** diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 8a456f9b8a44..61d5bfc7318c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4902,6 +4902,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, /* Zero out partial block at the edges of the range */ ret = ext4_zero_partial_blocks(handle, inode, offset, len); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); if (file->f_flags & O_SYNC) ext4_handle_sync(handle); @@ -5597,6 +5599,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ext4_handle_sync(handle); inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); + ext4_update_inode_fsync_trans(handle, inode, 1); out_stop: ext4_journal_stop(handle); @@ -5770,6 +5773,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) up_write(&EXT4_I(inode)->i_data_sem); if (IS_SYNC(inode)) ext4_handle_sync(handle); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); out_stop: ext4_journal_stop(handle); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 0d24ebcd7c9e..8772bfc3415b 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -463,47 +463,27 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, num = min_t(pgoff_t, end - index, PAGEVEC_SIZE); nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, (pgoff_t)num); - if (nr_pages == 0) { - if (whence == SEEK_DATA) - break; - - BUG_ON(whence != SEEK_HOLE); - /* - * If this is the first time to go into the loop and - * offset is not beyond the end offset, it will be a - * hole at this offset - */ - if (lastoff == startoff || lastoff < endoff) - found = 1; + if (nr_pages == 0) break; - } - - /* - * If this is the first time to go into the loop and - * offset is smaller than the first page offset, it will be a - * hole at this offset. - */ - if (lastoff == startoff && whence == SEEK_HOLE && - lastoff < page_offset(pvec.pages[0])) { - found = 1; - break; - } for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; struct buffer_head *bh, *head; /* - * If the current offset is not beyond the end of given - * range, it will be a hole. + * If current offset is smaller than the page offset, + * there is a hole at this offset. */ - if (lastoff < endoff && whence == SEEK_HOLE && - page->index > end) { + if (whence == SEEK_HOLE && lastoff < endoff && + lastoff < page_offset(pvec.pages[i])) { found = 1; *offset = lastoff; goto out; } + if (page->index > end) + goto out; + lock_page(page); if (unlikely(page->mapping != inode->i_mapping)) { @@ -543,20 +523,18 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, unlock_page(page); } - /* - * The no. of pages is less than our desired, that would be a - * hole in there. - */ - if (nr_pages < num && whence == SEEK_HOLE) { - found = 1; - *offset = lastoff; + /* The no. of pages is less than our desired, we are done. */ + if (nr_pages < num) break; - } index = pvec.pages[i - 1]->index + 1; pagevec_release(&pvec); } while (index <= end); + if (whence == SEEK_HOLE && lastoff < endoff) { + found = 1; + *offset = lastoff; + } out: pagevec_release(&pvec); return found; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4caa0c1f77d8..46d30bc18c0d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -660,6 +660,20 @@ has_zeroout: ret = check_block_validity(inode, map); if (ret != 0) return ret; + + /* + * Inodes with freshly allocated blocks where contents will be + * visible after transaction commit must be on transaction's + * ordered data list. + */ + if (map->m_flags & EXT4_MAP_NEW && + !(map->m_flags & EXT4_MAP_UNWRITTEN) && + !IS_NOQUOTA(inode) && + ext4_should_order_data(inode)) { + ret = ext4_jbd2_file_inode(handle, inode); + if (ret) + return ret; + } } return retval; } @@ -1166,15 +1180,6 @@ static int ext4_write_end(struct file *file, trace_android_fs_datawrite_end(inode, pos, len); trace_ext4_write_end(inode, pos, len, copied); - if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) { - ret = ext4_jbd2_file_inode(handle, inode); - if (ret) { - unlock_page(page); - page_cache_release(page); - goto errout; - } - } - if (ext4_has_inline_data(inode)) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); @@ -2059,7 +2064,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd, { struct inode *inode = mpd->inode; int err; - ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) + ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1) >> inode->i_blkbits; do { @@ -3855,6 +3860,8 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); + if (ret >= 0) + ext4_update_inode_fsync_trans(handle, inode, 1); out_stop: ext4_journal_stop(handle); out_dio: @@ -5230,8 +5237,9 @@ static int ext4_expand_extra_isize(struct inode *inode, /* No extended attributes present */ if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { - memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, - new_extra_isize); + memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + + EXT4_I(inode)->i_extra_isize, 0, + new_extra_isize - EXT4_I(inode)->i_extra_isize); EXT4_I(inode)->i_extra_isize = new_extra_isize; return 0; } @@ -5461,6 +5469,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) file_update_time(vma->vm_file); down_read(&EXT4_I(inode)->i_mmap_sem); + + ret = ext4_convert_inline_data(inode); + if (ret) + goto out_ret; + /* Delalloc case is easy... */ if (test_opt(inode->i_sb, DELALLOC) && !ext4_should_journal_data(inode) && diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index c21826be1cb3..3a2594665b44 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -626,9 +626,6 @@ resizefs_out: struct ext4_encryption_policy policy; int err = 0; - if (!ext4_has_feature_encrypt(sb)) - return -EOPNOTSUPP; - if (copy_from_user(&policy, (struct ext4_encryption_policy __user *)arg, sizeof(policy))) { diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 7861d801b048..05048fcfd602 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -187,7 +187,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to) if (PageUptodate(page)) return 0; - blocksize = 1 << inode->i_blkbits; + blocksize = i_blocksize(inode); if (!page_has_buffers(page)) create_empty_buffers(page, blocksize, 0); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index b936da9d3d0c..8a196e9b0bf3 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1243,9 +1243,9 @@ static inline int ext4_match(struct ext4_filename *fname, if (unlikely(!name)) { if (fname->usr_fname->name[0] == '_') { int ret; - if (de->name_len < 16) + if (de->name_len <= 32) return 0; - ret = memcmp(de->name + de->name_len - 16, + ret = memcmp(de->name + ((de->name_len - 17) & ~15), fname->crypto_buf.name + 8, 16); return (ret == 0) ? 1 : 0; } diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 1a6a8ca4de3a..978141e8b800 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -497,7 +497,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, if (IS_ERR(data_page)) { ret = PTR_ERR(data_page); - if (ret == ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { + if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { if (io->io_bio) { ext4_io_submit(io); congestion_wait(BLK_RW_ASYNC, HZ/50); diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c index ab377d496a39..38349ed5ea51 100644 --- a/fs/f2fs/crypto_fname.c +++ b/fs/f2fs/crypto_fname.c @@ -333,7 +333,7 @@ int f2fs_fname_disk_to_usr(struct inode *inode, memset(buf + 4, 0, 4); } else memset(buf, 0, 8); - memcpy(buf + 8, iname->name + iname->len - 16, 16); + memcpy(buf + 8, iname->name + ((iname->len - 17) & ~15), 16); oname->name[0] = '_'; ret = digest_encode(buf, 24, oname->name + 1); oname->len = ret + 1; diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c index 5bbd1989d5e6..884f3f0fe29d 100644 --- a/fs/f2fs/crypto_policy.c +++ b/fs/f2fs/crypto_policy.c @@ -141,25 +141,38 @@ int f2fs_get_policy(struct inode *inode, struct f2fs_encryption_policy *policy) int f2fs_is_child_context_consistent_with_parent(struct inode *parent, struct inode *child) { - struct f2fs_crypt_info *parent_ci, *child_ci; + const struct f2fs_crypt_info *parent_ci, *child_ci; + struct f2fs_encryption_context parent_ctx, child_ctx; int res; - if ((parent == NULL) || (child == NULL)) { - pr_err("parent %p child %p\n", parent, child); - BUG_ON(1); - } - /* No restrictions on file types which are never encrypted */ if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && !S_ISLNK(child->i_mode)) return 1; - /* no restrictions if the parent directory is not encrypted */ + /* No restrictions if the parent directory is unencrypted */ if (!f2fs_encrypted_inode(parent)) return 1; - /* if the child directory is not encrypted, this is always a problem */ + + /* Encrypted directories must not contain unencrypted files */ if (!f2fs_encrypted_inode(child)) return 0; + + /* + * Both parent and child are encrypted, so verify they use the same + * encryption policy. Compare the fscrypt_info structs if the keys are + * available, otherwise retrieve and compare the fscrypt_contexts. + * + * Note that the fscrypt_context retrieval will be required frequently + * when accessing an encrypted directory tree without the key. + * Performance-wise this is not a big deal because we already don't + * really optimize for file access without the key (to the extent that + * such access is even possible), given that any attempted access + * already causes a fscrypt_context retrieval and keyring search. + * + * In any case, if an unexpected error occurs, fall back to "forbidden". + */ + res = f2fs_get_encryption_info(parent); if (res) return 0; @@ -168,17 +181,35 @@ int f2fs_is_child_context_consistent_with_parent(struct inode *parent, return 0; parent_ci = F2FS_I(parent)->i_crypt_info; child_ci = F2FS_I(child)->i_crypt_info; - if (!parent_ci && !child_ci) - return 1; - if (!parent_ci || !child_ci) + if (parent_ci && child_ci) { + return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key, + F2FS_KEY_DESCRIPTOR_SIZE) == 0 && + (parent_ci->ci_data_mode == child_ci->ci_data_mode) && + (parent_ci->ci_filename_mode == + child_ci->ci_filename_mode) && + (parent_ci->ci_flags == child_ci->ci_flags); + } + + res = f2fs_getxattr(parent, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, + &parent_ctx, sizeof(parent_ctx), NULL); + if (res != sizeof(parent_ctx)) + return 0; + + res = f2fs_getxattr(child, F2FS_XATTR_INDEX_ENCRYPTION, + F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, + &child_ctx, sizeof(child_ctx), NULL); + if (res != sizeof(child_ctx)) return 0; - return (memcmp(parent_ci->ci_master_key, - child_ci->ci_master_key, - F2FS_KEY_DESCRIPTOR_SIZE) == 0 && - (parent_ci->ci_data_mode == child_ci->ci_data_mode) && - (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) && - (parent_ci->ci_flags == child_ci->ci_flags)); + return memcmp(parent_ctx.master_key_descriptor, + child_ctx.master_key_descriptor, + F2FS_KEY_DESCRIPTOR_SIZE) == 0 && + (parent_ctx.contents_encryption_mode == + child_ctx.contents_encryption_mode) && + (parent_ctx.filenames_encryption_mode == + child_ctx.filenames_encryption_mode) && + (parent_ctx.flags == child_ctx.flags); } /** diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 7c1678ba8f92..60972a559685 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -124,19 +124,29 @@ struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *fname, de = &d->dentry[bit_pos]; - /* encrypted case */ + if (de->hash_code != namehash) + goto not_match; + de_name.name = d->filename[bit_pos]; de_name.len = le16_to_cpu(de->name_len); - /* show encrypted name */ - if (fname->hash) { - if (de->hash_code == fname->hash) - goto found; - } else if (de_name.len == name->len && - de->hash_code == namehash && - !memcmp(de_name.name, name->name, name->len)) +#ifdef CONFIG_F2FS_FS_ENCRYPTION + if (unlikely(!name->name)) { + if (fname->usr_fname->name[0] == '_') { + if (de_name.len > 32 && + !memcmp(de_name.name + ((de_name.len - 17) & ~15), + fname->crypto_buf.name + 8, 16)) + goto found; + goto not_match; + } + name->name = fname->crypto_buf.name; + name->len = fname->crypto_buf.len; + } +#endif + if (de_name.len == name->len && + !memcmp(de_name.name, name->name, name->len)) goto found; - +not_match: if (max_slots && max_len > *max_slots) *max_slots = max_len; max_len = 0; @@ -170,7 +180,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, int max_slots; f2fs_hash_t namehash; - namehash = f2fs_dentry_hash(&name); + namehash = f2fs_dentry_hash(&name, fname); f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); @@ -547,7 +557,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, level = 0; slots = GET_DENTRY_SLOTS(new_name.len); - dentry_hash = f2fs_dentry_hash(&new_name); + dentry_hash = f2fs_dentry_hash(&new_name, NULL); current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == dentry_hash) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3c7594b9d109..9dfbfe6dc775 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1723,7 +1723,8 @@ void f2fs_msg(struct super_block *, const char *, const char *, ...); /* * hash.c */ -f2fs_hash_t f2fs_dentry_hash(const struct qstr *); +f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, + struct f2fs_filename *fname); /* * node.c diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 71b7206c431e..b238d2fec3e5 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -70,7 +70,8 @@ static void str2hashbuf(const unsigned char *msg, size_t len, *buf++ = pad; } -f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) +f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info, + struct f2fs_filename *fname) { __u32 hash; f2fs_hash_t f2fs_hash; @@ -79,6 +80,10 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) const unsigned char *name = name_info->name; size_t len = name_info->len; + /* encrypted bigname case */ + if (fname && !fname->disk_name.name) + return cpu_to_le32(fname->hash); + if (is_dot_dotdot(name_info)) return 0; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index dbb2cc4df603..f35f3eb3541f 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -321,7 +321,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, if (IS_ERR(ipage)) return NULL; - namehash = f2fs_dentry_hash(&name); + namehash = f2fs_dentry_hash(&name, fname); inline_dentry = inline_data_addr(ipage); @@ -486,7 +486,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, f2fs_wait_on_page_writeback(ipage, NODE); - name_hash = f2fs_dentry_hash(name); + name_hash = f2fs_dentry_hash(name, NULL); make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f77b3258454a..7965957dd0e6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1623,6 +1623,10 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) static int restore_curseg_summaries(struct f2fs_sb_info *sbi) { + struct f2fs_summary_block *s_sits = + CURSEG_I(sbi, CURSEG_COLD_DATA)->sum_blk; + struct f2fs_summary_block *s_nats = + CURSEG_I(sbi, CURSEG_HOT_DATA)->sum_blk; int type = CURSEG_HOT_DATA; int err; @@ -1649,6 +1653,11 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) return err; } + /* sanity check for summary blocks */ + if (nats_in_cursum(s_nats) > NAT_JOURNAL_ENTRIES || + sits_in_cursum(s_sits) > SIT_JOURNAL_ENTRIES) + return -EINVAL; + return 0; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ba5e702b19b4..5d3e745d33ae 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1078,6 +1078,8 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int total, fsmeta; struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned int main_segs, blocks_per_seg; + int i; total = le32_to_cpu(raw_super->segment_count); fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); @@ -1089,6 +1091,22 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi) if (unlikely(fsmeta >= total)) return 1; + main_segs = le32_to_cpu(sbi->raw_super->segment_count_main); + blocks_per_seg = sbi->blocks_per_seg; + + for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { + if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs || + le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg) { + return 1; + } + } + for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { + if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs || + le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg) { + return 1; + } + } + if (unlikely(f2fs_cp_error(sbi))) { f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); return 1; diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 4304072161aa..40d61077bead 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -542,6 +542,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { if (invalidate) set_bit(FSCACHE_OBJECT_RETIRED, &object->flags); + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL); } } else { @@ -560,6 +561,10 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t, TASK_UNINTERRUPTIBLE); + /* Make sure any pending writes are cancelled. */ + if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) + fscache_invalidate_writes(cookie); + /* Reset the cookie state if it wasn't relinquished */ if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags)) { atomic_inc(&cookie->n_active); diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c index 9b28649df3a1..a8aa00be4444 100644 --- a/fs/fscache/netfs.c +++ b/fs/fscache/netfs.c @@ -48,6 +48,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) cookie->flags = 1 << FSCACHE_COOKIE_ENABLED; spin_lock_init(&cookie->lock); + spin_lock_init(&cookie->stores_lock); INIT_HLIST_HEAD(&cookie->backing_objects); /* check the netfs type is not already present */ diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 9e792e30f4db..7a182c87f378 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -30,6 +30,7 @@ static const struct fscache_state *fscache_look_up_object(struct fscache_object static const struct fscache_state *fscache_object_available(struct fscache_object *, int); static const struct fscache_state *fscache_parent_ready(struct fscache_object *, int); static const struct fscache_state *fscache_update_object(struct fscache_object *, int); +static const struct fscache_state *fscache_object_dead(struct fscache_object *, int); #define __STATE_NAME(n) fscache_osm_##n #define STATE(n) (&__STATE_NAME(n)) @@ -91,7 +92,7 @@ static WORK_STATE(LOOKUP_FAILURE, "LCFL", fscache_lookup_failure); static WORK_STATE(KILL_OBJECT, "KILL", fscache_kill_object); static WORK_STATE(KILL_DEPENDENTS, "KDEP", fscache_kill_dependents); static WORK_STATE(DROP_OBJECT, "DROP", fscache_drop_object); -static WORK_STATE(OBJECT_DEAD, "DEAD", (void*)2UL); +static WORK_STATE(OBJECT_DEAD, "DEAD", fscache_object_dead); static WAIT_STATE(WAIT_FOR_INIT, "?INI", TRANSIT_TO(INIT_OBJECT, 1 << FSCACHE_OBJECT_EV_NEW_CHILD)); @@ -229,6 +230,10 @@ execute_work_state: event = -1; if (new_state == NO_TRANSIT) { _debug("{OBJ%x} %s notrans", object->debug_id, state->name); + if (unlikely(state == STATE(OBJECT_DEAD))) { + _leave(" [dead]"); + return; + } fscache_enqueue_object(object); event_mask = object->oob_event_mask; goto unmask_events; @@ -239,7 +244,7 @@ execute_work_state: object->state = state = new_state; if (state->work) { - if (unlikely(state->work == ((void *)2UL))) { + if (unlikely(state == STATE(OBJECT_DEAD))) { _leave(" [dead]"); return; } @@ -645,6 +650,12 @@ static const struct fscache_state *fscache_kill_object(struct fscache_object *ob fscache_mark_object_dead(object); object->oob_event_mask = 0; + if (test_bit(FSCACHE_OBJECT_RETIRED, &object->flags)) { + /* Reject any new read/write ops and abort any that are pending. */ + clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); + fscache_cancel_all_ops(object); + } + if (list_empty(&object->dependents) && object->n_ops == 0 && object->n_children == 0) @@ -1077,3 +1088,20 @@ void fscache_object_mark_killed(struct fscache_object *object, } } EXPORT_SYMBOL(fscache_object_mark_killed); + +/* + * The object is dead. We can get here if an object gets queued by an event + * that would lead to its death (such as EV_KILL) when the dispatcher is + * already running (and so can be requeued) but hasn't yet cleared the event + * mask. + */ +static const struct fscache_state *fscache_object_dead(struct fscache_object *object, + int event) +{ + if (!test_and_set_bit(FSCACHE_OBJECT_RUN_AFTER_DEAD, + &object->flags)) + return NO_TRANSIT; + + WARN(true, "FS-Cache object redispatched after death"); + return NO_TRANSIT; +} diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 595ebdb41846..a17da8b57fc6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -191,7 +191,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 8f9176caf098..c8d58c5ac8ae 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -758,7 +758,7 @@ static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data, sb->s_blocksize - offset : toread; tmp_bh.b_state = 0; - tmp_bh.b_size = 1 << inode->i_blkbits; + tmp_bh.b_size = i_blocksize(inode); err = jfs_get_block(inode, blk, &tmp_bh, 0); if (err) return err; @@ -798,7 +798,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type, sb->s_blocksize - offset : towrite; tmp_bh.b_state = 0; - tmp_bh.b_size = 1 << inode->i_blkbits; + tmp_bh.b_size = i_blocksize(inode); err = jfs_get_block(inode, blk, &tmp_bh, 1); if (err) goto out; diff --git a/fs/mbcache.c b/fs/mbcache.c index ab1da987d1ae..de509271d031 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -222,8 +222,19 @@ __mb_cache_entry_release(struct mb_cache_entry *ce) * then reacquire the lock in the proper order. */ spin_lock(&mb_cache_spinlock); - if (list_empty(&ce->e_lru_list)) - list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); + /* + * Evaluate the conditions under global lock mb_cache_spinlock, + * to check if mb_cache_entry_get() is running now + * and has already deleted the entry from mb_cache_lru_list + * and incremented ce->e_refcnt to prevent further additions + * to mb_cache_lru_list. + */ + if (!(ce->e_used || ce->e_queued || + atomic_read(&ce->e_refcnt))) { + if (list_empty(&ce->e_lru_list)) + list_add_tail(&ce->e_lru_list, + &mb_cache_lru_list); + } spin_unlock(&mb_cache_spinlock); } __spin_unlock_mb_cache_entry(ce); diff --git a/fs/mpage.c b/fs/mpage.c index 0fd48fdcc1b1..f37bb01a333b 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -147,7 +147,7 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) SetPageUptodate(page); return; } - create_empty_buffers(page, 1 << inode->i_blkbits, 0); + create_empty_buffers(page, i_blocksize(inode), 0); } head = page_buffers(page); page_bh = head; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 52ee0b73ab4a..5b21b1ca2341 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2421,6 +2421,20 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) } EXPORT_SYMBOL_GPL(nfs_may_open); +static int nfs_execute_ok(struct inode *inode, int mask) +{ + struct nfs_server *server = NFS_SERVER(inode); + int ret; + + if (mask & MAY_NOT_BLOCK) + ret = nfs_revalidate_inode_rcu(server, inode); + else + ret = nfs_revalidate_inode(server, inode); + if (ret == 0 && !execute_ok(inode)) + ret = -EACCES; + return ret; +} + int nfs_permission(struct inode *inode, int mask) { struct rpc_cred *cred; @@ -2438,6 +2452,9 @@ int nfs_permission(struct inode *inode, int mask) case S_IFLNK: goto out; case S_IFREG: + if ((mask & MAY_OPEN) && + nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)) + return 0; break; case S_IFDIR: /* @@ -2470,8 +2487,8 @@ force_lookup: res = PTR_ERR(cred); } out: - if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) - res = -EACCES; + if (!res && (mask & MAY_EXEC)) + res = nfs_execute_ok(inode, mask); dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n", inode->i_sb->s_id, inode->i_ino, mask, res); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 82dc3035ea45..e8d1d6c5000c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1072,6 +1072,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) case -NFS4ERR_BADXDR: case -NFS4ERR_RESOURCE: case -NFS4ERR_NOFILEHANDLE: + case -NFS4ERR_MOVED: /* Non-seqid mutating errors */ return; }; diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index c29d9421bd5e..0976f8dad4ce 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -50,7 +50,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, { struct nfsd4_layout_seg *seg = &args->lg_seg; struct super_block *sb = inode->i_sb; - u32 block_size = (1 << inode->i_blkbits); + u32 block_size = i_blocksize(inode); struct pnfs_block_extent *bex; struct iomap iomap; u32 device_generation = 0; @@ -151,7 +151,7 @@ nfsd4_block_proc_layoutcommit(struct inode *inode, int error; nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout, - lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits); + lcp->lc_up_len, &iomaps, i_blocksize(inode)); if (nr_iomaps < 0) return nfserrno(nr_iomaps); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 7d5351cd67fb..209dbfc50cd4 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1690,6 +1690,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, opdesc->op_get_currentstateid(cstate, &op->u); op->status = opdesc->op_func(rqstp, cstate, &op->u); + /* Only from SEQUENCE */ + if (cstate->status == nfserr_replay_cache) { + dprintk("%s NFS4.1 replay from cache\n", __func__); + status = op->status; + goto out; + } if (!op->status) { if (opdesc->op_set_currentstateid) opdesc->op_set_currentstateid(cstate, &op->u); @@ -1700,14 +1706,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, if (need_wrongsec_check(rqstp)) op->status = check_nfsd_access(current_fh->fh_export, rqstp); } - encode_op: - /* Only from SEQUENCE */ - if (cstate->status == nfserr_replay_cache) { - dprintk("%s NFS4.1 replay from cache\n", __func__); - status = op->status; - goto out; - } if (op->status == nfserr_replay_me) { op->replay = &cstate->replay_owner->so_replay; nfsd4_encode_replay(&resp->xdr, op); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 12935209deca..3f68a25f2169 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2753,9 +2753,16 @@ out_acl: } #endif /* CONFIG_NFSD_PNFS */ if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { - status = nfsd4_encode_bitmap(xdr, NFSD_SUPPATTR_EXCLCREAT_WORD0, - NFSD_SUPPATTR_EXCLCREAT_WORD1, - NFSD_SUPPATTR_EXCLCREAT_WORD2); + u32 supp[3]; + + supp[0] = nfsd_suppattrs0(minorversion); + supp[1] = nfsd_suppattrs1(minorversion); + supp[2] = nfsd_suppattrs2(minorversion); + supp[0] &= NFSD_SUPPATTR_EXCLCREAT_WORD0; + supp[1] &= NFSD_SUPPATTR_EXCLCREAT_WORD1; + supp[2] &= NFSD_SUPPATTR_EXCLCREAT_WORD2; + + status = nfsd4_encode_bitmap(xdr, supp[0], supp[1], supp[2]); if (status) goto out; } @@ -4041,8 +4048,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getdeviceinfo *gdev) { struct xdr_stream *xdr = &resp->xdr; - const struct nfsd4_layout_ops *ops = - nfsd4_layout_ops[gdev->gd_layout_type]; + const struct nfsd4_layout_ops *ops; u32 starting_len = xdr->buf->len, needed_len; __be32 *p; @@ -4059,6 +4065,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, /* If maxcount is 0 then just update notifications */ if (gdev->gd_maxcount != 0) { + ops = nfsd4_layout_ops[gdev->gd_layout_type]; nfserr = ops->encode_getdeviceinfo(xdr, gdev); if (nfserr) { /* @@ -4111,8 +4118,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_layoutget *lgp) { struct xdr_stream *xdr = &resp->xdr; - const struct nfsd4_layout_ops *ops = - nfsd4_layout_ops[lgp->lg_layout_type]; + const struct nfsd4_layout_ops *ops; __be32 *p; dprintk("%s: err %d\n", __func__, nfserr); @@ -4135,6 +4141,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = cpu_to_be32(lgp->lg_seg.iomode); *p++ = cpu_to_be32(lgp->lg_layout_type); + ops = nfsd4_layout_ops[lgp->lg_layout_type]; nfserr = ops->encode_layoutget(xdr, lgp); out: kfree(lgp->lg_content); diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index a35ae35e6932..cd39b57288c2 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -55,7 +55,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) brelse(bh); BUG(); } - memset(bh->b_data, 0, 1 << inode->i_blkbits); + memset(bh->b_data, 0, i_blocksize(inode)); bh->b_bdev = inode->i_sb->s_bdev; bh->b_blocknr = blocknr; set_buffer_mapped(bh); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index ac2f64943ff4..00877ef0b120 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -55,7 +55,7 @@ void nilfs_inode_add_blocks(struct inode *inode, int n) { struct nilfs_root *root = NILFS_I(inode)->i_root; - inode_add_bytes(inode, (1 << inode->i_blkbits) * n); + inode_add_bytes(inode, i_blocksize(inode) * n); if (root) atomic64_add(n, &root->blocks_count); } @@ -64,7 +64,7 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n) { struct nilfs_root *root = NILFS_I(inode)->i_root; - inode_sub_bytes(inode, (1 << inode->i_blkbits) * n); + inode_sub_bytes(inode, i_blocksize(inode) * n); if (root) atomic64_sub(n, &root->blocks_count); } diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 1125f40233ff..612a2457243d 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -60,7 +60,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, set_buffer_mapped(bh); kaddr = kmap_atomic(bh->b_page); - memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); + memset(kaddr + bh_offset(bh), 0, i_blocksize(inode)); if (init_block) init_block(inode, bh, kaddr); flush_dcache_page(bh->b_page); @@ -503,7 +503,7 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, struct nilfs_mdt_info *mi = NILFS_MDT(inode); mi->mi_entry_size = entry_size; - mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size; + mi->mi_entries_per_block = i_blocksize(inode) / entry_size; mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 3b65adaae7e4..2f27c935bd57 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -719,7 +719,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, lock_page(page); if (!page_has_buffers(page)) - create_empty_buffers(page, 1 << inode->i_blkbits, 0); + create_empty_buffers(page, i_blocksize(inode), 0); unlock_page(page); bh = head = page_buffers(page); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index e6795c7c76a8..e4184bd2a954 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1103,7 +1103,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, int ret = 0; struct buffer_head *head, *bh, *wait[2], **wait_bh = wait; unsigned int block_end, block_start; - unsigned int bsize = 1 << inode->i_blkbits; + unsigned int bsize = i_blocksize(inode); if (!page_has_buffers(page)) create_empty_buffers(page, bsize, 0); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 56dd3957cc91..1d738723a41a 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -808,7 +808,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, /* We know that zero_from is block aligned */ for (block_start = zero_from; block_start < zero_to; block_start = block_end) { - block_end = block_start + (1 << inode->i_blkbits); + block_end = block_start + i_blocksize(inode); /* * block_start is block-aligned. Bump it by one to force diff --git a/fs/pnode.c b/fs/pnode.c index b5f97c605d98..e4e428d621e9 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -504,9 +504,14 @@ static struct mount *next_descendent(struct mount *root, struct mount *cur) if (!IS_MNT_NEW(cur) && !list_empty(&cur->mnt_slave_list)) return first_slave(cur); do { - if (cur->mnt_slave.next != &cur->mnt_master->mnt_slave_list) - return next_slave(cur); - cur = cur->mnt_master; + struct mount *master = cur->mnt_master; + + if (!master || cur->mnt_slave.next != &master->mnt_slave_list) { + struct mount *next = next_slave(cur); + + return (next == root) ? NULL : next; + } + cur = master; } while (cur != root); return NULL; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 1f5f654983ec..cdd820771425 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3358,6 +3358,8 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) iter.tgid += 1, iter = next_tgid(ns, iter)) { char name[PROC_NUMBUF]; int len; + + cond_resched(); if (!has_pid_permissions(ns, iter.task, 2)) continue; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index ff3ffc76a937..3773335791da 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -469,6 +469,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name) ent->data = NULL; ent->proc_fops = NULL; ent->proc_iops = NULL; + parent->nlink++; if (proc_register(parent, ent) < 0) { kfree(ent); parent->nlink--; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 420f623e3441..f6c512a550e5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -347,11 +347,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) /* We don't show the stack guard page in /proc/maps */ start = vma->vm_start; - if (stack_guard_page_start(vma, start)) - start += PAGE_SIZE; end = vma->vm_end; - if (stack_guard_page_end(vma, end)) - end -= PAGE_SIZE; seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 8f5ccdf81c25..38187300a2b4 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -189,7 +189,7 @@ int reiserfs_commit_page(struct inode *inode, struct page *page, int ret = 0; th.t_trans_id = 0; - blocksize = 1 << inode->i_blkbits; + blocksize = i_blocksize(inode); if (logit) { reiserfs_write_lock(s); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 3d8e7e671d5b..60ba35087d12 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -524,7 +524,7 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode, * referenced in convert_tail_for_hole() that may be called from * reiserfs_get_block() */ - bh_result->b_size = (1 << inode->i_blkbits); + bh_result->b_size = i_blocksize(inode); ret = reiserfs_get_block(inode, iblock, bh_result, create | GET_BLOCK_NO_DANGLE); diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 268733cda397..5f4f1882dc7d 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -74,6 +74,7 @@ #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/uaccess.h> +#include <linux/major.h> #include "internal.h" static struct kmem_cache *romfs_inode_cachep; @@ -415,7 +416,22 @@ static void romfs_destroy_inode(struct inode *inode) static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; - u64 id = huge_encode_dev(sb->s_bdev->bd_dev); + u64 id = 0; + + /* When calling huge_encode_dev(), + * use sb->s_bdev->bd_dev when, + * - CONFIG_ROMFS_ON_BLOCK defined + * use sb->s_dev when, + * - CONFIG_ROMFS_ON_BLOCK undefined and + * - CONFIG_ROMFS_ON_MTD defined + * leave id as 0 when, + * - CONFIG_ROMFS_ON_BLOCK undefined and + * - CONFIG_ROMFS_ON_MTD undefined + */ + if (sb->s_bdev) + id = huge_encode_dev(sb->s_bdev->bd_dev); + else if (sb->s_dev) + id = huge_encode_dev(sb->s_dev); buf->f_type = ROMFS_MAGIC; buf->f_namelen = ROMFS_MAXFN; @@ -488,6 +504,11 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_flags |= MS_RDONLY | MS_NOATIME; sb->s_op = &romfs_super_ops; +#ifdef CONFIG_ROMFS_ON_MTD + /* Use same dev ID from the underlying mtdblock device */ + if (sb->s_mtd) + sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, sb->s_mtd->index); +#endif /* read the image superblock and check it */ rsb = kmalloc(512, GFP_KERNEL); if (!rsb) diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c index 7a19e77fce99..13da7e5245bd 100644 --- a/fs/sdcardfs/dentry.c +++ b/fs/sdcardfs/dentry.c @@ -34,6 +34,8 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags) struct dentry *parent_lower_dentry = NULL; struct dentry *lower_cur_parent_dentry = NULL; struct dentry *lower_dentry = NULL; + struct inode *inode; + struct sdcardfs_inode_data *data; if (flags & LOOKUP_RCU) return -ECHILD; @@ -103,6 +105,21 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags) spin_unlock(&dentry->d_lock); spin_unlock(&lower_dentry->d_lock); } + if (!err) + goto out; + + /* If our top's inode is gone, we may be out of date */ + inode = igrab(d_inode(dentry)); + if (inode) { + data = top_data_get(SDCARDFS_I(inode)); + if (!data || data->abandoned) { + d_drop(dentry); + err = 0; + } + if (data) + data_put(data); + iput(inode); + } out: dput(parent_dentry); diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index b4595aab5713..85a60fb5ff39 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -26,28 +26,28 @@ static void inherit_derived_state(struct inode *parent, struct inode *child) struct sdcardfs_inode_info *pi = SDCARDFS_I(parent); struct sdcardfs_inode_info *ci = SDCARDFS_I(child); - ci->perm = PERM_INHERIT; - ci->userid = pi->userid; - ci->d_uid = pi->d_uid; - ci->under_android = pi->under_android; - ci->under_cache = pi->under_cache; - ci->under_obb = pi->under_obb; - set_top(ci, pi->top); + ci->data->perm = PERM_INHERIT; + ci->data->userid = pi->data->userid; + ci->data->d_uid = pi->data->d_uid; + ci->data->under_android = pi->data->under_android; + ci->data->under_cache = pi->data->under_cache; + ci->data->under_obb = pi->data->under_obb; + set_top(ci, pi->top_data); } /* helper function for derived state */ void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid, - uid_t uid, bool under_android, - struct inode *top) + uid_t uid, bool under_android, + struct sdcardfs_inode_data *top) { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); - info->perm = perm; - info->userid = userid; - info->d_uid = uid; - info->under_android = under_android; - info->under_cache = false; - info->under_obb = false; + info->data->perm = perm; + info->data->userid = userid; + info->data->d_uid = uid; + info->data->under_android = under_android; + info->data->under_cache = false; + info->data->under_obb = false; set_top(info, top); } @@ -58,7 +58,8 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name) { struct sdcardfs_inode_info *info = SDCARDFS_I(d_inode(dentry)); - struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent)); + struct sdcardfs_inode_data *parent_data = + SDCARDFS_I(d_inode(parent))->data; appid_t appid; unsigned long user_num; int err; @@ -82,60 +83,61 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, if (!S_ISDIR(d_inode(dentry)->i_mode)) return; /* Derive custom permissions based on parent and current node */ - switch (parent_info->perm) { + switch (parent_data->perm) { case PERM_INHERIT: case PERM_ANDROID_PACKAGE_CACHE: /* Already inherited above */ break; case PERM_PRE_ROOT: /* Legacy internal layout places users at top level */ - info->perm = PERM_ROOT; + info->data->perm = PERM_ROOT; err = kstrtoul(name->name, 10, &user_num); if (err) - info->userid = 0; + info->data->userid = 0; else - info->userid = user_num; - set_top(info, &info->vfs_inode); + info->data->userid = user_num; + set_top(info, info->data); break; case PERM_ROOT: /* Assume masked off by default. */ if (qstr_case_eq(name, &q_Android)) { /* App-specific directories inside; let anyone traverse */ - info->perm = PERM_ANDROID; - info->under_android = true; - set_top(info, &info->vfs_inode); + info->data->perm = PERM_ANDROID; + info->data->under_android = true; + set_top(info, info->data); } break; case PERM_ANDROID: if (qstr_case_eq(name, &q_data)) { /* App-specific directories inside; let anyone traverse */ - info->perm = PERM_ANDROID_DATA; - set_top(info, &info->vfs_inode); + info->data->perm = PERM_ANDROID_DATA; + set_top(info, info->data); } else if (qstr_case_eq(name, &q_obb)) { /* App-specific directories inside; let anyone traverse */ - info->perm = PERM_ANDROID_OBB; - info->under_obb = true; - set_top(info, &info->vfs_inode); + info->data->perm = PERM_ANDROID_OBB; + info->data->under_obb = true; + set_top(info, info->data); /* Single OBB directory is always shared */ } else if (qstr_case_eq(name, &q_media)) { /* App-specific directories inside; let anyone traverse */ - info->perm = PERM_ANDROID_MEDIA; - set_top(info, &info->vfs_inode); + info->data->perm = PERM_ANDROID_MEDIA; + set_top(info, info->data); } break; case PERM_ANDROID_OBB: case PERM_ANDROID_DATA: case PERM_ANDROID_MEDIA: - info->perm = PERM_ANDROID_PACKAGE; + info->data->perm = PERM_ANDROID_PACKAGE; appid = get_appid(name->name); - if (appid != 0 && !is_excluded(name->name, parent_info->userid)) - info->d_uid = multiuser_get_uid(parent_info->userid, appid); - set_top(info, &info->vfs_inode); + if (appid != 0 && !is_excluded(name->name, parent_data->userid)) + info->data->d_uid = + multiuser_get_uid(parent_data->userid, appid); + set_top(info, info->data); break; case PERM_ANDROID_PACKAGE: if (qstr_case_eq(name, &q_cache)) { - info->perm = PERM_ANDROID_PACKAGE_CACHE; - info->under_cache = true; + info->data->perm = PERM_ANDROID_PACKAGE_CACHE; + info->data->under_cache = true; } break; } @@ -166,7 +168,8 @@ void fixup_lower_ownership(struct dentry *dentry, const char *name) struct inode *delegated_inode = NULL; int error; struct sdcardfs_inode_info *info; - struct sdcardfs_inode_info *info_top; + struct sdcardfs_inode_data *info_d; + struct sdcardfs_inode_data *info_top; perm_t perm; struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); uid_t uid = sbi->options.fs_low_uid; @@ -174,15 +177,16 @@ void fixup_lower_ownership(struct dentry *dentry, const char *name) struct iattr newattrs; info = SDCARDFS_I(d_inode(dentry)); - perm = info->perm; - if (info->under_obb) { + info_d = info->data; + perm = info_d->perm; + if (info_d->under_obb) { perm = PERM_ANDROID_OBB; - } else if (info->under_cache) { + } else if (info_d->under_cache) { perm = PERM_ANDROID_PACKAGE_CACHE; } else if (perm == PERM_INHERIT) { - info_top = SDCARDFS_I(grab_top(info)); + info_top = top_data_get(info); perm = info_top->perm; - release_top(info); + data_put(info_top); } switch (perm) { @@ -192,7 +196,7 @@ void fixup_lower_ownership(struct dentry *dentry, const char *name) case PERM_ANDROID_MEDIA: case PERM_ANDROID_PACKAGE: case PERM_ANDROID_PACKAGE_CACHE: - uid = multiuser_get_uid(info->userid, uid); + uid = multiuser_get_uid(info_d->userid, uid); break; case PERM_ANDROID_OBB: uid = AID_MEDIA_OBB; @@ -207,24 +211,24 @@ void fixup_lower_ownership(struct dentry *dentry, const char *name) case PERM_ANDROID_DATA: case PERM_ANDROID_MEDIA: if (S_ISDIR(d_inode(dentry)->i_mode)) - gid = multiuser_get_uid(info->userid, AID_MEDIA_RW); + gid = multiuser_get_uid(info_d->userid, AID_MEDIA_RW); else - gid = multiuser_get_uid(info->userid, get_type(name)); + gid = multiuser_get_uid(info_d->userid, get_type(name)); break; case PERM_ANDROID_OBB: gid = AID_MEDIA_OBB; break; case PERM_ANDROID_PACKAGE: - if (uid_is_app(info->d_uid)) - gid = multiuser_get_ext_gid(info->d_uid); + if (uid_is_app(info_d->d_uid)) + gid = multiuser_get_ext_gid(info_d->d_uid); else - gid = multiuser_get_uid(info->userid, AID_MEDIA_RW); + gid = multiuser_get_uid(info_d->userid, AID_MEDIA_RW); break; case PERM_ANDROID_PACKAGE_CACHE: - if (uid_is_app(info->d_uid)) - gid = multiuser_get_ext_cache_gid(info->d_uid); + if (uid_is_app(info_d->d_uid)) + gid = multiuser_get_ext_cache_gid(info_d->d_uid); else - gid = multiuser_get_uid(info->userid, AID_MEDIA_RW); + gid = multiuser_get_uid(info_d->userid, AID_MEDIA_RW); break; case PERM_PRE_ROOT: default: @@ -257,11 +261,13 @@ retry_deleg: sdcardfs_put_lower_path(dentry, &path); } -static int descendant_may_need_fixup(struct sdcardfs_inode_info *info, struct limit_search *limit) +static int descendant_may_need_fixup(struct sdcardfs_inode_data *data, + struct limit_search *limit) { - if (info->perm == PERM_ROOT) - return (limit->flags & BY_USERID)?info->userid == limit->userid:1; - if (info->perm == PERM_PRE_ROOT || info->perm == PERM_ANDROID) + if (data->perm == PERM_ROOT) + return (limit->flags & BY_USERID) ? + data->userid == limit->userid : 1; + if (data->perm == PERM_PRE_ROOT || data->perm == PERM_ANDROID) return 1; return 0; } @@ -292,7 +298,7 @@ static void __fixup_perms_recursive(struct dentry *dentry, struct limit_search * } info = SDCARDFS_I(d_inode(dentry)); - if (needs_fixup(info->perm)) { + if (needs_fixup(info->data->perm)) { list_for_each_entry(child, &dentry->d_subdirs, d_child) { spin_lock_nested(&child->d_lock, depth + 1); if (!(limit->flags & BY_NAME) || qstr_case_eq(&child->d_name, &limit->name)) { @@ -305,7 +311,7 @@ static void __fixup_perms_recursive(struct dentry *dentry, struct limit_search * } spin_unlock(&child->d_lock); } - } else if (descendant_may_need_fixup(info, limit)) { + } else if (descendant_may_need_fixup(info->data, limit)) { list_for_each_entry(child, &dentry->d_subdirs, d_child) { __fixup_perms_recursive(child, limit, depth + 1); } @@ -349,12 +355,12 @@ int need_graft_path(struct dentry *dentry) struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); struct qstr obb = QSTR_LITERAL("obb"); - if (parent_info->perm == PERM_ANDROID && + if (parent_info->data->perm == PERM_ANDROID && qstr_case_eq(&dentry->d_name, &obb)) { /* /Android/obb is the base obbpath of DERIVED_UNIFIED */ if (!(sbi->options.multiuser == false - && parent_info->userid == 0)) { + && parent_info->data->userid == 0)) { ret = 1; } } @@ -415,11 +421,11 @@ int is_base_obbpath(struct dentry *dentry) spin_lock(&SDCARDFS_D(dentry)->lock); if (sbi->options.multiuser) { - if (parent_info->perm == PERM_PRE_ROOT && + if (parent_info->data->perm == PERM_PRE_ROOT && qstr_case_eq(&dentry->d_name, &q_obb)) { ret = 1; } - } else if (parent_info->perm == PERM_ANDROID && + } else if (parent_info->data->perm == PERM_ANDROID && qstr_case_eq(&dentry->d_name, &q_obb)) { ret = 1; } diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 4f09eebd7d95..60fea424835f 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -23,7 +23,8 @@ #include <linux/ratelimit.h> /* Do not directly use this function. Use OVERRIDE_CRED() instead. */ -const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, struct sdcardfs_inode_info *info) +const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, + struct sdcardfs_inode_data *data) { struct cred *cred; const struct cred *old_cred; @@ -33,10 +34,10 @@ const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, struct sdcardfs_ if (!cred) return NULL; - if (info->under_obb) + if (data->under_obb) uid = AID_MEDIA_OBB; else - uid = multiuser_get_uid(info->userid, sbi->options.fs_low_uid); + uid = multiuser_get_uid(data->userid, sbi->options.fs_low_uid); cred->fsuid = make_kuid(&init_user_ns, uid); cred->fsgid = make_kgid(&init_user_ns, sbi->options.fs_low_gid); @@ -96,7 +97,8 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, if (err) goto out; - err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, SDCARDFS_I(dir)->userid); + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, + SDCARDFS_I(dir)->data->userid); if (err) goto out; fsstack_copy_attr_times(dir, sdcardfs_lower_inode(dir)); @@ -267,7 +269,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode struct path lower_path; struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); const struct cred *saved_cred = NULL; - struct sdcardfs_inode_info *pi = SDCARDFS_I(dir); + struct sdcardfs_inode_data *pd = SDCARDFS_I(dir)->data; int touch_err = 0; struct fs_struct *saved_fs; struct fs_struct *copied_fs; @@ -336,7 +338,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode make_nomedia_in_obb = 1; } - err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, pi->userid); + err = sdcardfs_interpose(dentry, dir->i_sb, &lower_path, pd->userid); if (err) { unlock_dir(lower_parent_dentry); goto out; @@ -349,12 +351,13 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode fixup_lower_ownership(dentry, dentry->d_name.name); unlock_dir(lower_parent_dentry); if ((!sbi->options.multiuser) && (qstr_case_eq(&dentry->d_name, &q_obb)) - && (pi->perm == PERM_ANDROID) && (pi->userid == 0)) + && (pd->perm == PERM_ANDROID) && (pd->userid == 0)) make_nomedia_in_obb = 1; /* When creating /Android/data and /Android/obb, mark them as .nomedia */ if (make_nomedia_in_obb || - ((pi->perm == PERM_ANDROID) && (qstr_case_eq(&dentry->d_name, &q_data)))) { + ((pd->perm == PERM_ANDROID) + && (qstr_case_eq(&dentry->d_name, &q_data)))) { REVERT_CRED(saved_cred); OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(d_inode(dentry))); set_fs_pwd(current->fs, &lower_path); @@ -616,7 +619,7 @@ static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int ma { int err; struct inode tmp; - struct inode *top = grab_top(SDCARDFS_I(inode)); + struct sdcardfs_inode_data *top = top_data_get(SDCARDFS_I(inode)); if (!top) return -EINVAL; @@ -633,10 +636,11 @@ static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int ma * locks must be dealt with to avoid undefined behavior. */ copy_attrs(&tmp, inode); - tmp.i_uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid); - tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, SDCARDFS_I(top))); - tmp.i_mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(top)); - release_top(SDCARDFS_I(inode)); + tmp.i_uid = make_kuid(&init_user_ns, top->d_uid); + tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, top)); + tmp.i_mode = (inode->i_mode & S_IFMT) + | get_mode(mnt, SDCARDFS_I(inode), top); + data_put(top); tmp.i_sb = inode->i_sb; if (IS_POSIXACL(inode)) pr_warn("%s: This may be undefined behavior...\n", __func__); @@ -687,11 +691,11 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct struct iattr lower_ia; struct dentry *parent; struct inode tmp; - struct inode *top; + struct sdcardfs_inode_data *top; const struct cred *saved_cred = NULL; inode = d_inode(dentry); - top = grab_top(SDCARDFS_I(inode)); + top = top_data_get(SDCARDFS_I(inode)); if (!top) return -EINVAL; @@ -709,11 +713,12 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct * */ copy_attrs(&tmp, inode); - tmp.i_uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid); - tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, SDCARDFS_I(top))); - tmp.i_mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(top)); + tmp.i_uid = make_kuid(&init_user_ns, top->d_uid); + tmp.i_gid = make_kgid(&init_user_ns, get_gid(mnt, top)); + tmp.i_mode = (inode->i_mode & S_IFMT) + | get_mode(mnt, SDCARDFS_I(inode), top); tmp.i_size = i_size_read(inode); - release_top(SDCARDFS_I(inode)); + data_put(top); tmp.i_sb = inode->i_sb; /* @@ -815,17 +820,17 @@ static int sdcardfs_fillattr(struct vfsmount *mnt, struct inode *inode, struct kstat *stat) { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); - struct inode *top = grab_top(info); + struct sdcardfs_inode_data *top = top_data_get(info); if (!top) return -EINVAL; stat->dev = inode->i_sb->s_dev; stat->ino = inode->i_ino; - stat->mode = (inode->i_mode & S_IFMT) | get_mode(mnt, SDCARDFS_I(top)); + stat->mode = (inode->i_mode & S_IFMT) | get_mode(mnt, info, top); stat->nlink = inode->i_nlink; - stat->uid = make_kuid(&init_user_ns, SDCARDFS_I(top)->d_uid); - stat->gid = make_kgid(&init_user_ns, get_gid(mnt, SDCARDFS_I(top))); + stat->uid = make_kuid(&init_user_ns, top->d_uid); + stat->gid = make_kgid(&init_user_ns, get_gid(mnt, top)); stat->rdev = inode->i_rdev; stat->size = i_size_read(inode); stat->atime = inode->i_atime; @@ -833,7 +838,7 @@ static int sdcardfs_fillattr(struct vfsmount *mnt, stat->ctime = inode->i_ctime; stat->blksize = (1 << inode->i_blkbits); stat->blocks = inode->i_blocks; - release_top(info); + data_put(top); return 0; } diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index 509d5fbcb472..676e394e07be 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -71,7 +71,7 @@ struct inode_data { static int sdcardfs_inode_test(struct inode *inode, void *candidate_data/*void *candidate_lower_inode*/) { struct inode *current_lower_inode = sdcardfs_lower_inode(inode); - userid_t current_userid = SDCARDFS_I(inode)->userid; + userid_t current_userid = SDCARDFS_I(inode)->data->userid; if (current_lower_inode == ((struct inode_data *)candidate_data)->lower_inode && current_userid == ((struct inode_data *)candidate_data)->id) @@ -199,7 +199,8 @@ static struct dentry *__sdcardfs_interpose(struct dentry *dentry, ret_dentry = d_splice_alias(inode, dentry); dentry = ret_dentry ?: dentry; - update_derived_permission_lock(dentry); + if (!IS_ERR(dentry)) + update_derived_permission_lock(dentry); out: return ret_dentry; } @@ -438,7 +439,8 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path, SDCARDFS_I(dir)->userid); + ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path, + SDCARDFS_I(dir)->data->userid); if (IS_ERR(ret)) goto out; if (ret) diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 953d2156d2e9..3c5b51d49d21 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -327,13 +327,13 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, mutex_lock(&sdcardfs_super_list_lock); if (sb_info->options.multiuser) { setup_derived_state(d_inode(sb->s_root), PERM_PRE_ROOT, - sb_info->options.fs_user_id, AID_ROOT, - false, d_inode(sb->s_root)); + sb_info->options.fs_user_id, AID_ROOT, + false, SDCARDFS_I(d_inode(sb->s_root))->data); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); } else { setup_derived_state(d_inode(sb->s_root), PERM_ROOT, - sb_info->options.fs_user_id, AID_ROOT, - false, d_inode(sb->s_root)); + sb_info->options.fs_user_id, AID_ROOT, + false, SDCARDFS_I(d_inode(sb->s_root))->data); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); } fixup_tmp_permissions(d_inode(sb->s_root)); diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c index 89196e31073e..66c6f8e72a02 100644 --- a/fs/sdcardfs/packagelist.c +++ b/fs/sdcardfs/packagelist.c @@ -156,7 +156,7 @@ int check_caller_access_to_name(struct inode *parent_node, const struct qstr *na struct qstr q_android_secure = QSTR_LITERAL("android_secure"); /* Always block security-sensitive files at root */ - if (parent_node && SDCARDFS_I(parent_node)->perm == PERM_ROOT) { + if (parent_node && SDCARDFS_I(parent_node)->data->perm == PERM_ROOT) { if (qstr_case_eq(name, &q_autorun) || qstr_case_eq(name, &q__android_secure) || qstr_case_eq(name, &q_android_secure)) { @@ -174,19 +174,6 @@ int check_caller_access_to_name(struct inode *parent_node, const struct qstr *na return 1; } -/* This function is used when file opening. The open flags must be - * checked before calling check_caller_access_to_name() - */ -int open_flags_to_access_mode(int open_flags) -{ - if ((open_flags & O_ACCMODE) == O_RDONLY) - return 0; /* R_OK */ - if ((open_flags & O_ACCMODE) == O_WRONLY) - return 1; /* W_OK */ - /* Probably O_RDRW, but treat as default to be safe */ - return 1; /* R_OK | W_OK */ -} - static struct hashtable_entry *alloc_hashtable_entry(const struct qstr *key, appid_t value) { diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 2b67b9a8ef9f..b8624fd8b817 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -30,6 +30,7 @@ #include <linux/file.h> #include <linux/fs.h> #include <linux/aio.h> +#include <linux/kref.h> #include <linux/mm.h> #include <linux/mount.h> #include <linux/namei.h> @@ -81,7 +82,8 @@ */ #define fixup_tmp_permissions(x) \ do { \ - (x)->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(x)->d_uid); \ + (x)->i_uid = make_kuid(&init_user_ns, \ + SDCARDFS_I(x)->data->d_uid); \ (x)->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW); \ (x)->i_mode = ((x)->i_mode & S_IFMT) | 0775;\ } while (0) @@ -97,16 +99,16 @@ */ #define OVERRIDE_CRED(sdcardfs_sbi, saved_cred, info) \ do { \ - saved_cred = override_fsids(sdcardfs_sbi, info); \ - if (!saved_cred) \ - return -ENOMEM; \ + saved_cred = override_fsids(sdcardfs_sbi, info->data); \ + if (!saved_cred) \ + return -ENOMEM; \ } while (0) #define OVERRIDE_CRED_PTR(sdcardfs_sbi, saved_cred, info) \ do { \ - saved_cred = override_fsids(sdcardfs_sbi, info); \ - if (!saved_cred) \ - return ERR_PTR(-ENOMEM); \ + saved_cred = override_fsids(sdcardfs_sbi, info->data); \ + if (!saved_cred) \ + return ERR_PTR(-ENOMEM); \ } while (0) #define REVERT_CRED(saved_cred) revert_fsids(saved_cred) @@ -142,9 +144,11 @@ typedef enum { struct sdcardfs_sb_info; struct sdcardfs_mount_options; struct sdcardfs_inode_info; +struct sdcardfs_inode_data; /* Do not directly use this function. Use OVERRIDE_CRED() instead. */ -const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, struct sdcardfs_inode_info *info); +const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, + struct sdcardfs_inode_data *data); /* Do not directly use this function, use REVERT_CRED() instead. */ void revert_fsids(const struct cred *old_cred); @@ -178,18 +182,26 @@ struct sdcardfs_file_info { const struct vm_operations_struct *lower_vm_ops; }; -/* sdcardfs inode data in memory */ -struct sdcardfs_inode_info { - struct inode *lower_inode; - /* state derived based on current position in hierachy */ +struct sdcardfs_inode_data { + struct kref refcount; + bool abandoned; + perm_t perm; userid_t userid; uid_t d_uid; bool under_android; bool under_cache; bool under_obb; +}; + +/* sdcardfs inode data in memory */ +struct sdcardfs_inode_info { + struct inode *lower_inode; + /* state derived based on current position in hierarchy */ + struct sdcardfs_inode_data *data; + /* top folder for ownership */ - struct inode *top; + struct sdcardfs_inode_data *top_data; struct inode vfs_inode; }; @@ -351,39 +363,56 @@ SDCARDFS_DENT_FUNC(orig_path) static inline bool sbinfo_has_sdcard_magic(struct sdcardfs_sb_info *sbinfo) { - return sbinfo && sbinfo->sb && sbinfo->sb->s_magic == SDCARDFS_SUPER_MAGIC; + return sbinfo && sbinfo->sb + && sbinfo->sb->s_magic == SDCARDFS_SUPER_MAGIC; } -/* grab a refererence if we aren't linking to ourself */ -static inline void set_top(struct sdcardfs_inode_info *info, struct inode *top) +static inline struct sdcardfs_inode_data *data_get( + struct sdcardfs_inode_data *data) { - struct inode *old_top = NULL; - - BUG_ON(IS_ERR_OR_NULL(top)); - if (info->top && info->top != &info->vfs_inode) - old_top = info->top; - if (top != &info->vfs_inode) - igrab(top); - info->top = top; - iput(old_top); + if (data) + kref_get(&data->refcount); + return data; } -static inline struct inode *grab_top(struct sdcardfs_inode_info *info) +static inline struct sdcardfs_inode_data *top_data_get( + struct sdcardfs_inode_info *info) { - struct inode *top = info->top; + return data_get(info->top_data); +} - if (top) - return igrab(top); - else - return NULL; +extern void data_release(struct kref *ref); + +static inline void data_put(struct sdcardfs_inode_data *data) +{ + kref_put(&data->refcount, data_release); +} + +static inline void release_own_data(struct sdcardfs_inode_info *info) +{ + /* + * This happens exactly once per inode. At this point, the inode that + * originally held this data is about to be freed, and all references + * to it are held as a top value, and will likely be released soon. + */ + info->data->abandoned = true; + data_put(info->data); } -static inline void release_top(struct sdcardfs_inode_info *info) +static inline void set_top(struct sdcardfs_inode_info *info, + struct sdcardfs_inode_data *top) { - iput(info->top); + struct sdcardfs_inode_data *old_top = info->top_data; + + if (top) + data_get(top); + info->top_data = top; + if (old_top) + data_put(old_top); } -static inline int get_gid(struct vfsmount *mnt, struct sdcardfs_inode_info *info) +static inline int get_gid(struct vfsmount *mnt, + struct sdcardfs_inode_data *data) { struct sdcardfs_vfsmount_options *opts = mnt->data; @@ -396,10 +425,12 @@ static inline int get_gid(struct vfsmount *mnt, struct sdcardfs_inode_info *info */ return AID_SDCARD_RW; else - return multiuser_get_uid(info->userid, opts->gid); + return multiuser_get_uid(data->userid, opts->gid); } -static inline int get_mode(struct vfsmount *mnt, struct sdcardfs_inode_info *info) +static inline int get_mode(struct vfsmount *mnt, + struct sdcardfs_inode_info *info, + struct sdcardfs_inode_data *data) { int owner_mode; int filtered_mode; @@ -407,12 +438,12 @@ static inline int get_mode(struct vfsmount *mnt, struct sdcardfs_inode_info *inf int visible_mode = 0775 & ~opts->mask; - if (info->perm == PERM_PRE_ROOT) { + if (data->perm == PERM_PRE_ROOT) { /* Top of multi-user view should always be visible to ensure * secondary users can traverse inside. */ visible_mode = 0711; - } else if (info->under_android) { + } else if (data->under_android) { /* Block "other" access to Android directories, since only apps * belonging to a specific user should be in there; we still * leave +x open for the default view. @@ -468,7 +499,6 @@ extern appid_t get_appid(const char *app_name); extern appid_t get_ext_gid(const char *app_name); extern appid_t is_excluded(const char *app_name, userid_t userid); extern int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name); -extern int open_flags_to_access_mode(int open_flags); extern int packagelist_init(void); extern void packagelist_exit(void); @@ -481,8 +511,9 @@ struct limit_search { userid_t userid; }; -extern void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid, - uid_t uid, bool under_android, struct inode *top); +extern void setup_derived_state(struct inode *inode, perm_t perm, + userid_t userid, uid_t uid, bool under_android, + struct sdcardfs_inode_data *top); extern void get_derived_permission(struct dentry *parent, struct dentry *dentry); extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name); extern void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit); @@ -601,7 +632,7 @@ static inline void sdcardfs_copy_and_fix_attrs(struct inode *dest, const struct { dest->i_mode = (src->i_mode & S_IFMT) | S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH; /* 0775 */ - dest->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(dest)->d_uid); + dest->i_uid = make_kuid(&init_user_ns, SDCARDFS_I(dest)->data->d_uid); dest->i_gid = make_kgid(&init_user_ns, AID_SDCARD_RW); dest->i_rdev = src->i_rdev; dest->i_atime = src->i_atime; diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index 8a9c9c7adca2..7f4539b4b249 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -26,6 +26,23 @@ */ static struct kmem_cache *sdcardfs_inode_cachep; +/* + * To support the top references, we must track some data separately. + * An sdcardfs_inode_info always has a reference to its data, and once set up, + * also has a reference to its top. The top may be itself, in which case it + * holds two references to its data. When top is changed, it takes a ref to the + * new data and then drops the ref to the old data. + */ +static struct kmem_cache *sdcardfs_inode_data_cachep; + +void data_release(struct kref *ref) +{ + struct sdcardfs_inode_data *data = + container_of(ref, struct sdcardfs_inode_data, refcount); + + kmem_cache_free(sdcardfs_inode_data_cachep, data); +} + /* final actions when unmounting a file system */ static void sdcardfs_put_super(struct super_block *sb) { @@ -166,6 +183,7 @@ static void sdcardfs_evict_inode(struct inode *inode) struct inode *lower_inode; truncate_inode_pages(&inode->i_data, 0); + set_top(SDCARDFS_I(inode), NULL); clear_inode(inode); /* * Decrement a reference to a lower_inode, which was incremented @@ -173,13 +191,13 @@ static void sdcardfs_evict_inode(struct inode *inode) */ lower_inode = sdcardfs_lower_inode(inode); sdcardfs_set_lower_inode(inode, NULL); - set_top(SDCARDFS_I(inode), inode); iput(lower_inode); } static struct inode *sdcardfs_alloc_inode(struct super_block *sb) { struct sdcardfs_inode_info *i; + struct sdcardfs_inode_data *d; i = kmem_cache_alloc(sdcardfs_inode_cachep, GFP_KERNEL); if (!i) @@ -188,6 +206,16 @@ static struct inode *sdcardfs_alloc_inode(struct super_block *sb) /* memset everything up to the inode to 0 */ memset(i, 0, offsetof(struct sdcardfs_inode_info, vfs_inode)); + d = kmem_cache_alloc(sdcardfs_inode_data_cachep, + GFP_KERNEL | __GFP_ZERO); + if (!d) { + kmem_cache_free(sdcardfs_inode_cachep, i); + return NULL; + } + + i->data = d; + kref_init(&d->refcount); + i->vfs_inode.i_version = 1; return &i->vfs_inode; } @@ -196,6 +224,7 @@ static void i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); + release_own_data(SDCARDFS_I(inode)); kmem_cache_free(sdcardfs_inode_cachep, SDCARDFS_I(inode)); } @@ -214,20 +243,30 @@ static void init_once(void *obj) int sdcardfs_init_inode_cache(void) { - int err = 0; - sdcardfs_inode_cachep = kmem_cache_create("sdcardfs_inode_cache", sizeof(struct sdcardfs_inode_info), 0, SLAB_RECLAIM_ACCOUNT, init_once); + if (!sdcardfs_inode_cachep) - err = -ENOMEM; - return err; + return -ENOMEM; + + sdcardfs_inode_data_cachep = + kmem_cache_create("sdcardfs_inode_data_cache", + sizeof(struct sdcardfs_inode_data), 0, + SLAB_RECLAIM_ACCOUNT, NULL); + if (!sdcardfs_inode_data_cachep) { + kmem_cache_destroy(sdcardfs_inode_cachep); + return -ENOMEM; + } + + return 0; } /* sdcardfs inode cache destructor */ void sdcardfs_destroy_inode_cache(void) { + kmem_cache_destroy(sdcardfs_inode_data_cachep); kmem_cache_destroy(sdcardfs_inode_cachep); } diff --git a/fs/stat.c b/fs/stat.c index d4a61d8dc021..004dd77c3b93 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -31,7 +31,7 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; stat->ctime = inode->i_ctime; - stat->blksize = (1 << inode->i_blkbits); + stat->blksize = i_blocksize(inode); stat->blocks = inode->i_blocks; } @@ -454,6 +454,7 @@ void __inode_add_bytes(struct inode *inode, loff_t bytes) inode->i_bytes -= 512; } } +EXPORT_SYMBOL(__inode_add_bytes); void inode_add_bytes(struct inode *inode, loff_t bytes) { diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 566df9b5a6cb..7be3166ba553 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1206,7 +1206,7 @@ int udf_setsize(struct inode *inode, loff_t newsize) { int err; struct udf_inode_info *iinfo; - int bsize = 1 << inode->i_blkbits; + int bsize = i_blocksize(inode); if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index dc5fae601c24..637e17cb0edd 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -81,7 +81,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ufs_error (sb, "ufs_free_fragments", "bit already cleared for fragment %u", i); } - + + inode_sub_bytes(inode, count << uspi->s_fshift); fs32_add(sb, &ucg->cg_cs.cs_nffree, count); uspi->cs_total.cs_nffree += count; fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count); @@ -183,6 +184,7 @@ do_more: ufs_error(sb, "ufs_free_blocks", "freeing free fragment"); } ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); + inode_sub_bytes(inode, uspi->s_fpb << uspi->s_fshift); if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) ufs_clusteracct (sb, ucpi, blkno, 1); @@ -494,6 +496,20 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, return 0; } +static bool try_add_frags(struct inode *inode, unsigned frags) +{ + unsigned size = frags * i_blocksize(inode); + spin_lock(&inode->i_lock); + __inode_add_bytes(inode, size); + if (unlikely((u32)inode->i_blocks != inode->i_blocks)) { + __inode_sub_bytes(inode, size); + spin_unlock(&inode->i_lock); + return false; + } + spin_unlock(&inode->i_lock); + return true; +} + static u64 ufs_add_fragments(struct inode *inode, u64 fragment, unsigned oldcount, unsigned newcount) { @@ -530,6 +546,9 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, for (i = oldcount; i < newcount; i++) if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i)) return 0; + + if (!try_add_frags(inode, count)) + return 0; /* * Block can be extended */ @@ -647,6 +666,7 @@ cg_found: ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i); i = uspi->s_fpb - count; + inode_sub_bytes(inode, i << uspi->s_fshift); fs32_add(sb, &ucg->cg_cs.cs_nffree, i); uspi->cs_total.cs_nffree += i; fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, i); @@ -657,6 +677,8 @@ cg_found: result = ufs_bitmap_search (sb, ucpi, goal, allocsize); if (result == INVBLOCK) return 0; + if (!try_add_frags(inode, count)) + return 0; for (i = 0; i < count; i++) ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i); @@ -716,6 +738,8 @@ norot: return INVBLOCK; ucpi->c_rotor = result; gotit: + if (!try_add_frags(inode, uspi->s_fpb)) + return 0; blkno = ufs_fragstoblks(result); ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno); if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD) diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index a064cf44b143..1f69bb9b1e9d 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -235,7 +235,8 @@ ufs_extend_tail(struct inode *inode, u64 writes_to, p = ufs_get_direct_data_ptr(uspi, ufsi, block); tmp = ufs_new_fragments(inode, p, lastfrag, ufs_data_ptr_to_cpu(sb, p), - new_size, err, locked_page); + new_size - (lastfrag & uspi->s_fpbmask), err, + locked_page); return tmp != 0; } @@ -284,7 +285,7 @@ ufs_inode_getfrag(struct inode *inode, unsigned index, goal += uspi->s_fpb; } tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment), - goal, uspi->s_fpb, err, locked_page); + goal, nfrags, err, locked_page); if (!tmp) { *err = -ENOSPC; @@ -402,7 +403,9 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff if (!create) { phys64 = ufs_frag_map(inode, offsets, depth); - goto out; + if (phys64) + map_bh(bh_result, sb, phys64 + frag); + return 0; } /* This code entered only while writing ....? */ diff --git a/fs/ufs/super.c b/fs/ufs/super.c index f6390eec02ca..10f364490833 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -746,6 +746,23 @@ static void ufs_put_super(struct super_block *sb) return; } +static u64 ufs_max_bytes(struct super_block *sb) +{ + struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; + int bits = uspi->s_apbshift; + u64 res; + + if (bits > 21) + res = ~0ULL; + else + res = UFS_NDADDR + (1LL << bits) + (1LL << (2*bits)) + + (1LL << (3*bits)); + + if (res >= (MAX_LFS_FILESIZE >> uspi->s_bshift)) + return MAX_LFS_FILESIZE; + return res << uspi->s_bshift; +} + static int ufs_fill_super(struct super_block *sb, void *data, int silent) { struct ufs_sb_info * sbi; @@ -1212,6 +1229,7 @@ magic_found: "fast symlink size (%u)\n", uspi->s_maxsymlinklen); uspi->s_maxsymlinklen = maxsymlen; } + sb->s_maxbytes = ufs_max_bytes(sb); sb->s_max_links = UFS_LINK_MAX; inode = ufs_iget(sb, UFS_ROOTINO); diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 954175928240..3f9463f8cf2f 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -473,15 +473,19 @@ static inline unsigned _ubh_find_last_zero_bit_( static inline int _ubh_isblockset_(struct ufs_sb_private_info * uspi, struct ufs_buffer_head * ubh, unsigned begin, unsigned block) { + u8 mask; switch (uspi->s_fpb) { case 8: return (*ubh_get_addr (ubh, begin + block) == 0xff); case 4: - return (*ubh_get_addr (ubh, begin + (block >> 1)) == (0x0f << ((block & 0x01) << 2))); + mask = 0x0f << ((block & 0x01) << 2); + return (*ubh_get_addr (ubh, begin + (block >> 1)) & mask) == mask; case 2: - return (*ubh_get_addr (ubh, begin + (block >> 2)) == (0x03 << ((block & 0x03) << 1))); + mask = 0x03 << ((block & 0x03) << 1); + return (*ubh_get_addr (ubh, begin + (block >> 2)) & mask) == mask; case 1: - return (*ubh_get_addr (ubh, begin + (block >> 3)) == (0x01 << (block & 0x07))); + mask = 0x01 << (block & 0x07); + return (*ubh_get_addr (ubh, begin + (block >> 3)) & mask) == mask; } return 0; } diff --git a/fs/xattr.c b/fs/xattr.c index 9b932b95d74e..f0da9d24e9ca 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -442,7 +442,7 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, size = XATTR_SIZE_MAX; kvalue = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); if (!kvalue) { - vvalue = vmalloc(size); + vvalue = vzalloc(size); if (!vvalue) return -ENOMEM; kvalue = vvalue; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 119c2422aac7..75884aecf920 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -2179,8 +2179,10 @@ xfs_bmap_add_extent_delay_real( } temp = xfs_bmap_worst_indlen(bma->ip, temp); temp2 = xfs_bmap_worst_indlen(bma->ip, temp2); - diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_private.b.allocated : 0)); + diff = (int)(temp + temp2 - + (startblockval(PREV.br_startblock) - + (bma->cur ? + bma->cur->bc_private.b.allocated : 0))); if (diff > 0) { error = xfs_mod_fdblocks(bma->ip->i_mount, -((int64_t)diff), false); @@ -2232,7 +2234,6 @@ xfs_bmap_add_extent_delay_real( temp = da_new; if (bma->cur) temp += bma->cur->bc_private.b.allocated; - ASSERT(temp <= da_old); if (temp < da_old) xfs_mod_fdblocks(bma->ip->i_mount, (int64_t)(da_old - temp), false); diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index af1bbee5586e..28bc5e78b110 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4064,7 +4064,7 @@ xfs_btree_change_owner( xfs_btree_readahead_ptr(cur, ptr, 1); /* save for the next iteration of the loop */ - lptr = *ptr; + xfs_btree_copy_ptrs(cur, &lptr, ptr, 1); } /* for each buffer in the level */ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 29e7e5dd5178..187b80267ff9 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -288,7 +288,7 @@ xfs_map_blocks( { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - ssize_t count = 1 << inode->i_blkbits; + ssize_t count = i_blocksize(inode); xfs_fileoff_t offset_fsb, end_fsb; int error = 0; int bmapi_flags = XFS_BMAPI_ENTIRE; @@ -921,7 +921,7 @@ xfs_aops_discard_page( break; } next_buffer: - offset += 1 << inode->i_blkbits; + offset += i_blocksize(inode); } while ((bh = bh->b_this_page) != head); @@ -1363,7 +1363,7 @@ xfs_map_trim_size( offset + mapping_size >= i_size_read(inode)) { /* limit mapping to block that spans EOF */ mapping_size = roundup_64(i_size_read(inode) - offset, - 1 << inode->i_blkbits); + i_blocksize(inode)); } if (mapping_size > LONG_MAX) mapping_size = LONG_MAX; @@ -1395,7 +1395,7 @@ __xfs_get_blocks( return -EIO; offset = (xfs_off_t)iblock << inode->i_blkbits; - ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); + ASSERT(bh_result->b_size >= i_blocksize(inode)); size = bh_result->b_size; if (!create && direct && offset >= i_size_read(inode)) @@ -1968,7 +1968,7 @@ xfs_vm_set_page_dirty( if (offset < end_offset) set_buffer_dirty(bh); bh = bh->b_this_page; - offset += 1 << inode->i_blkbits; + offset += i_blocksize(inode); } while (bh != head); } /* diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h index dd4824589470..234331227c0c 100644 --- a/fs/xfs/xfs_attr.h +++ b/fs/xfs/xfs_attr.h @@ -112,6 +112,7 @@ typedef struct attrlist_cursor_kern { *========================================================================*/ +/* Return 0 on success, or -errno; other state communicated via *context */ typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int, unsigned char *, int, int, unsigned char *); diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 4fa14820e2e2..c8be331a3196 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -108,16 +108,14 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) (int)sfe->namelen, (int)sfe->valuelen, &sfe->nameval[sfe->namelen]); - + if (error) + return error; /* * Either search callback finished early or * didn't fit it all in the buffer after all. */ if (context->seen_enough) break; - - if (error) - return error; sfe = XFS_ATTR_SF_NEXTENTRY(sfe); } trace_xfs_attr_list_sf_all(context); @@ -581,7 +579,7 @@ xfs_attr_put_listent( trace_xfs_attr_list_full(context); alist->al_more = 1; context->seen_enough = 1; - return 1; + return 0; } aep = (attrlist_ent_t *)&context->alist[context->firstu]; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 832764ee035a..863e1bff403b 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -682,7 +682,7 @@ xfs_getbmap( * extents. */ if (map[i].br_startblock == DELAYSTARTBLOCK && - map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) + map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip))) ASSERT((iflags & BMV_IF_DELALLOC) != 0); if (map[i].br_startblock == HOLESTARTBLOCK && diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 8146b0cf20ce..dcb70969ff1c 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -979,6 +979,8 @@ void xfs_buf_unlock( struct xfs_buf *bp) { + ASSERT(xfs_buf_islocked(bp)); + XB_CLEAR_OWNER(bp); up(&bp->b_sema); @@ -1713,6 +1715,28 @@ error: } /* + * Cancel a delayed write list. + * + * Remove each buffer from the list, clear the delwri queue flag and drop the + * associated buffer reference. + */ +void +xfs_buf_delwri_cancel( + struct list_head *list) +{ + struct xfs_buf *bp; + + while (!list_empty(list)) { + bp = list_first_entry(list, struct xfs_buf, b_list); + + xfs_buf_lock(bp); + bp->b_flags &= ~_XBF_DELWRI_Q; + list_del_init(&bp->b_list); + xfs_buf_relse(bp); + } +} + +/* * Add a buffer to the delayed write list. * * This queues a buffer for writeout if it hasn't already been. Note that diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index c75721acd867..149bbd451731 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -304,6 +304,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, extern void *xfs_buf_offset(struct xfs_buf *, size_t); /* Delayed Write Buffer Routines */ +extern void xfs_buf_delwri_cancel(struct list_head *); extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); extern int xfs_buf_delwri_submit(struct list_head *); extern int xfs_buf_delwri_submit_nowait(struct list_head *); diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 642d55d10075..2fbf643fa10a 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -406,6 +406,7 @@ xfs_dir2_leaf_readbuf( /* * Do we need more readahead? + * Each loop tries to process 1 full dir blk; last may be partial. */ blk_start_plug(&plug); for (mip->ra_index = mip->ra_offset = i = 0; @@ -416,7 +417,8 @@ xfs_dir2_leaf_readbuf( * Read-ahead a contiguous directory block. */ if (i > mip->ra_current && - map[mip->ra_index].br_blockcount >= geo->fsbcount) { + (map[mip->ra_index].br_blockcount - mip->ra_offset) >= + geo->fsbcount) { xfs_dir3_data_readahead(dp, map[mip->ra_index].br_startoff + mip->ra_offset, XFS_FSB_TO_DADDR(dp->i_mount, @@ -437,14 +439,19 @@ xfs_dir2_leaf_readbuf( } /* - * Advance offset through the mapping table. + * Advance offset through the mapping table, processing a full + * dir block even if it is fragmented into several extents. + * But stop if we have consumed all valid mappings, even if + * it's not yet a full directory block. */ - for (j = 0; j < geo->fsbcount; j += length ) { + for (j = 0; + j < geo->fsbcount && mip->ra_index < mip->map_valid; + j += length ) { /* * The rest of this extent but not more than a dir * block. */ - length = min_t(int, geo->fsbcount, + length = min_t(int, geo->fsbcount - j, map[mip->ra_index].br_blockcount - mip->ra_offset); mip->ra_offset += length; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f5392ab2def1..3dd47307363f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -947,7 +947,7 @@ xfs_file_fallocate( if (error) goto out_unlock; } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { - unsigned blksize_mask = (1 << inode->i_blkbits) - 1; + unsigned int blksize_mask = i_blocksize(inode) - 1; if (offset & blksize_mask || len & blksize_mask) { error = -EINVAL; @@ -969,7 +969,7 @@ xfs_file_fallocate( if (error) goto out_unlock; } else if (mode & FALLOC_FL_INSERT_RANGE) { - unsigned blksize_mask = (1 << inode->i_blkbits) - 1; + unsigned int blksize_mask = i_blocksize(inode) - 1; new_size = i_size_read(inode) + len; if (offset & blksize_mask || len & blksize_mask) { @@ -1208,7 +1208,7 @@ xfs_find_get_desired_pgoff( unsigned nr_pages; unsigned int i; - want = min_t(pgoff_t, end - index, PAGEVEC_SIZE); + want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1; nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want); /* @@ -1235,17 +1235,6 @@ xfs_find_get_desired_pgoff( break; } - /* - * At lease we found one page. If this is the first time we - * step into the loop, and if the first page index offset is - * greater than the given search offset, a hole was found. - */ - if (type == HOLE_OFF && lastoff == startoff && - lastoff < page_offset(pvec.pages[0])) { - found = true; - break; - } - for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; loff_t b_offset; @@ -1257,18 +1246,18 @@ xfs_find_get_desired_pgoff( * file mapping. However, page->index will not change * because we have a reference on the page. * - * Searching done if the page index is out of range. - * If the current offset is not reaches the end of - * the specified search range, there should be a hole - * between them. + * If current page offset is beyond where we've ended, + * we've found a hole. */ - if (page->index > end) { - if (type == HOLE_OFF && lastoff < endoff) { - *offset = lastoff; - found = true; - } + if (type == HOLE_OFF && lastoff < endoff && + lastoff < page_offset(pvec.pages[i])) { + found = true; + *offset = lastoff; goto out; } + /* Searching done if the page index is out of range. */ + if (page->index > end) + goto out; lock_page(page); /* diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index d7a490f24ead..adbc1f59969a 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -210,14 +210,17 @@ xfs_iget_cache_hit( error = inode_init_always(mp->m_super, inode); if (error) { + bool wake; /* * Re-initializing the inode failed, and we are in deep * trouble. Try to re-add it to the reclaim list. */ rcu_read_lock(); spin_lock(&ip->i_flags_lock); - + wake = !!__xfs_iflags_test(ip, XFS_INEW); ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM); + if (wake) + wake_up_bit(&ip->i_flags, __XFS_INEW_BIT); ASSERT(ip->i_flags & XFS_IRECLAIMABLE); trace_xfs_iget_reclaim_fail(ip); goto out_error; @@ -363,6 +366,22 @@ out_destroy: return error; } +static void +xfs_inew_wait( + struct xfs_inode *ip) +{ + wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_INEW_BIT); + DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT); + + do { + prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); + if (!xfs_iflags_test(ip, XFS_INEW)) + break; + schedule(); + } while (true); + finish_wait(wq, &wait.wait); +} + /* * Look up an inode by number in the given file system. * The inode is looked up in the cache held in each AG. @@ -467,9 +486,11 @@ out_error_or_again: STATIC int xfs_inode_ag_walk_grab( - struct xfs_inode *ip) + struct xfs_inode *ip, + int flags) { struct inode *inode = VFS_I(ip); + bool newinos = !!(flags & XFS_AGITER_INEW_WAIT); ASSERT(rcu_read_lock_held()); @@ -487,7 +508,8 @@ xfs_inode_ag_walk_grab( goto out_unlock_noent; /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ - if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) + if ((!newinos && __xfs_iflags_test(ip, XFS_INEW)) || + __xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM)) goto out_unlock_noent; spin_unlock(&ip->i_flags_lock); @@ -515,7 +537,8 @@ xfs_inode_ag_walk( void *args), int flags, void *args, - int tag) + int tag, + int iter_flags) { uint32_t first_index; int last_error = 0; @@ -557,7 +580,7 @@ restart: for (i = 0; i < nr_found; i++) { struct xfs_inode *ip = batch[i]; - if (done || xfs_inode_ag_walk_grab(ip)) + if (done || xfs_inode_ag_walk_grab(ip, iter_flags)) batch[i] = NULL; /* @@ -585,6 +608,9 @@ restart: for (i = 0; i < nr_found; i++) { if (!batch[i]) continue; + if ((iter_flags & XFS_AGITER_INEW_WAIT) && + xfs_iflags_test(batch[i], XFS_INEW)) + xfs_inew_wait(batch[i]); error = execute(batch[i], flags, args); IRELE(batch[i]); if (error == -EAGAIN) { @@ -637,12 +663,13 @@ xfs_eofblocks_worker( } int -xfs_inode_ag_iterator( +xfs_inode_ag_iterator_flags( struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, - void *args) + void *args, + int iter_flags) { struct xfs_perag *pag; int error = 0; @@ -652,7 +679,8 @@ xfs_inode_ag_iterator( ag = 0; while ((pag = xfs_perag_get(mp, ag))) { ag = pag->pag_agno + 1; - error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1); + error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1, + iter_flags); xfs_perag_put(pag); if (error) { last_error = error; @@ -664,6 +692,17 @@ xfs_inode_ag_iterator( } int +xfs_inode_ag_iterator( + struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, int flags, + void *args), + int flags, + void *args) +{ + return xfs_inode_ag_iterator_flags(mp, execute, flags, args, 0); +} + +int xfs_inode_ag_iterator_tag( struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, @@ -680,7 +719,8 @@ xfs_inode_ag_iterator_tag( ag = 0; while ((pag = xfs_perag_get_tag(mp, ag, tag))) { ag = pag->pag_agno + 1; - error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag); + error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag, + 0); xfs_perag_put(pag); if (error) { last_error = error; diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 62f1f91c32cb..147a79212e63 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -48,6 +48,11 @@ struct xfs_eofblocks { #define XFS_IGET_UNTRUSTED 0x2 #define XFS_IGET_DONTCACHE 0x4 +/* + * flags for AG inode iterator + */ +#define XFS_AGITER_INEW_WAIT 0x1 /* wait on new inodes */ + int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino, uint flags, uint lock_flags, xfs_inode_t **ipp); @@ -72,6 +77,9 @@ void xfs_eofblocks_worker(struct work_struct *); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args); +int xfs_inode_ag_iterator_flags(struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, int flags, void *args), + int flags, void *args, int iter_flags); int xfs_inode_ag_iterator_tag(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, int flags, void *args), int flags, void *args, int tag); diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ca9e11989cbd..ae1a49845744 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -208,7 +208,8 @@ xfs_get_initial_prid(struct xfs_inode *dp) #define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */ #define XFS_ISTALE (1 << 1) /* inode has been staled */ #define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ -#define XFS_INEW (1 << 3) /* inode has just been allocated */ +#define __XFS_INEW_BIT 3 /* inode has just been allocated */ +#define XFS_INEW (1 << __XFS_INEW_BIT) #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ #define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ @@ -453,6 +454,7 @@ static inline void xfs_finish_inode_setup(struct xfs_inode *ip) xfs_iflags_clear(ip, XFS_INEW); barrier(); unlock_new_inode(VFS_I(ip)); + wake_up_bit(&ip->i_flags, __XFS_INEW_BIT); } static inline void xfs_setup_existing_inode(struct xfs_inode *ip) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index d42738deec6d..e4a4f82ea13f 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -403,6 +403,7 @@ xfs_attrlist_by_handle( { int error = -ENOMEM; attrlist_cursor_kern_t *cursor; + struct xfs_fsop_attrlist_handlereq __user *p = arg; xfs_fsop_attrlist_handlereq_t al_hreq; struct dentry *dentry; char *kbuf; @@ -435,6 +436,11 @@ xfs_attrlist_by_handle( if (error) goto out_kfree; + if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) { + error = -EFAULT; + goto out_kfree; + } + if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen)) error = -EFAULT; @@ -1379,10 +1385,11 @@ xfs_ioc_getbmap( unsigned int cmd, void __user *arg) { - struct getbmapx bmx; + struct getbmapx bmx = { 0 }; int error; - if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) + /* struct getbmap is a strict subset of struct getbmapx. */ + if (copy_from_user(&bmx, arg, offsetof(struct getbmapx, bmv_iflags))) return -EFAULT; if (bmx.bmv_count < 2) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 532ab79d38fe..572b64a135b3 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1355,12 +1355,7 @@ xfs_qm_quotacheck( mp->m_qflags |= flags; error_return: - while (!list_empty(&buffer_list)) { - struct xfs_buf *bp = - list_first_entry(&buffer_list, struct xfs_buf, b_list); - list_del_init(&bp->b_list); - xfs_buf_relse(bp); - } + xfs_buf_delwri_cancel(&buffer_list); if (error) { xfs_warn(mp, diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 3640c6e896af..4d334440bd94 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -764,5 +764,6 @@ xfs_qm_dqrele_all_inodes( uint flags) { ASSERT(mp->m_quotainfo); - xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, NULL); + xfs_inode_ag_iterator_flags(mp, xfs_dqrele_inode, flags, NULL, + XFS_AGITER_INEW_WAIT); } diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 839b35ca21c6..9beaf192b4bb 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -180,7 +180,8 @@ xfs_xattr_put_listent( arraytop = context->count + prefix_len + namelen + 1; if (arraytop > context->firstu) { context->count = -1; /* insufficient space */ - return 1; + context->seen_enough = 1; + return 0; } offset = (char *)context->alist + context->count; strncpy(offset, xfs_xattr_prefix(flags), prefix_len); @@ -222,12 +223,15 @@ list_one_attr(const char *name, const size_t len, void *data, } ssize_t -xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) +xfs_vn_listxattr( + struct dentry *dentry, + char *data, + size_t size) { struct xfs_attr_list_context context; struct attrlist_cursor_kern cursor = { 0 }; - struct inode *inode = d_inode(dentry); - int error; + struct inode *inode = d_inode(dentry); + int error; /* * First read the regular on-disk attributes. @@ -245,7 +249,9 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) else context.put_listent = xfs_xattr_put_listent_sizes; - xfs_attr_list_int(&context); + error = xfs_attr_list_int(&context); + if (error) + return error; if (context.count < 0) return -ERANGE; |