diff options
Diffstat (limited to 'fs')
55 files changed, 1919 insertions, 1187 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 929b618da43b..c30c6ceac2c4 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -283,6 +283,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler, case ACL_TYPE_ACCESS: if (acl) { struct iattr iattr; + struct posix_acl *old_acl = acl; retval = posix_acl_update_mode(inode, &iattr.ia_mode, &acl); if (retval) @@ -293,6 +294,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler, * by the mode bits. So don't * update ACL. */ + posix_acl_release(old_acl); value = NULL; size = 0; } diff --git a/fs/attr.c b/fs/attr.c index 11be2265a2d5..c86b37c38fb7 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -211,7 +211,7 @@ int notify_change2(struct vfsmount *mnt, struct dentry * dentry, struct iattr * return -EPERM; if (!inode_owner_or_capable(inode)) { - error = inode_permission(inode, MAY_WRITE); + error = inode_permission2(mnt, inode, MAY_WRITE); if (error) return error; } diff --git a/fs/dcache.c b/fs/dcache.c index 7b8feb6d60c8..3a3b0f370304 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1364,7 +1364,7 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) goto out; if (dentry->d_flags & DCACHE_SHRINK_LIST) { - data->found++; + goto out; } else { if (dentry->d_flags & DCACHE_LRU_LIST) d_lru_del(dentry); diff --git a/fs/exec.c b/fs/exec.c index 8c58183eccb7..b1f5ddbf7d56 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1132,8 +1132,10 @@ EXPORT_SYMBOL(flush_old_exec); void would_dump(struct linux_binprm *bprm, struct file *file) { struct inode *inode = file_inode(file); + if (inode_permission2(file->f_path.mnt, inode, MAY_READ) < 0) { struct user_namespace *old, *user_ns; + bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; /* Ensure mm->user_ns contains the executable */ diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 95a49ef2781a..ebaff5ab93da 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -106,6 +106,7 @@ config EXT4_ENCRYPTION select CRYPTO_ECB select CRYPTO_XTS select CRYPTO_CTS + select CRYPTO_HEH select CRYPTO_CTR select CRYPTO_SHA256 select KEYS diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c index 2fbef8a14760..e2645ca9b95e 100644 --- a/fs/ext4/crypto_fname.c +++ b/fs/ext4/crypto_fname.c @@ -44,7 +44,8 @@ static void ext4_dir_crypt_complete(struct crypto_async_request *req, int res) bool ext4_valid_filenames_enc_mode(uint32_t mode) { - return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS); + return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS || + mode == EXT4_ENCRYPTION_MODE_AES_256_HEH); } static unsigned max_name_len(struct inode *inode) diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c index 15342bfff70d..363e4c6bf37f 100644 --- a/fs/ext4/crypto_key.c +++ b/fs/ext4/crypto_key.c @@ -30,16 +30,16 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc) } /** - * ext4_derive_key_aes() - Derive a key using AES-128-ECB + * ext4_derive_key_v1() - Derive a key using AES-128-ECB * @deriving_key: Encryption key used for derivation. * @source_key: Source key to which to apply derivation. * @derived_key: Derived key. * - * Return: Zero on success; non-zero otherwise. + * Return: 0 on success, -errno on failure */ -static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE], - char source_key[EXT4_AES_256_XTS_KEY_SIZE], - char derived_key[EXT4_AES_256_XTS_KEY_SIZE]) +static int ext4_derive_key_v1(const char deriving_key[EXT4_AES_128_ECB_KEY_SIZE], + const char source_key[EXT4_AES_256_XTS_KEY_SIZE], + char derived_key[EXT4_AES_256_XTS_KEY_SIZE]) { int res = 0; struct ablkcipher_request *req = NULL; @@ -84,6 +84,91 @@ out: return res; } +/** + * ext4_derive_key_v2() - Derive a key non-reversibly + * @nonce: the nonce associated with the file + * @master_key: the master key referenced by the file + * @derived_key: (output) the resulting derived key + * + * This function computes the following: + * derived_key[0:127] = AES-256-ENCRYPT(master_key[0:255], nonce) + * derived_key[128:255] = AES-256-ENCRYPT(master_key[0:255], nonce ^ 0x01) + * derived_key[256:383] = AES-256-ENCRYPT(master_key[256:511], nonce) + * derived_key[384:511] = AES-256-ENCRYPT(master_key[256:511], nonce ^ 0x01) + * + * 'nonce ^ 0x01' denotes flipping the low order bit of the last byte. + * + * Unlike the v1 algorithm, the v2 algorithm is "non-reversible", meaning that + * compromising a derived key does not also compromise the master key. + * + * Return: 0 on success, -errno on failure + */ +static int ext4_derive_key_v2(const char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE], + const char master_key[EXT4_MAX_KEY_SIZE], + char derived_key[EXT4_MAX_KEY_SIZE]) +{ + const int noncelen = EXT4_KEY_DERIVATION_NONCE_SIZE; + struct crypto_cipher *tfm; + int err; + int i; + + /* + * Since we only use each transform for a small number of encryptions, + * requesting just "aes" turns out to be significantly faster than + * "ecb(aes)", by about a factor of two. + */ + tfm = crypto_alloc_cipher("aes", 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + BUILD_BUG_ON(4 * EXT4_KEY_DERIVATION_NONCE_SIZE != EXT4_MAX_KEY_SIZE); + BUILD_BUG_ON(2 * EXT4_AES_256_ECB_KEY_SIZE != EXT4_MAX_KEY_SIZE); + for (i = 0; i < 2; i++) { + memcpy(derived_key, nonce, noncelen); + memcpy(derived_key + noncelen, nonce, noncelen); + derived_key[2 * noncelen - 1] ^= 0x01; + err = crypto_cipher_setkey(tfm, master_key, + EXT4_AES_256_ECB_KEY_SIZE); + if (err) + break; + crypto_cipher_encrypt_one(tfm, derived_key, derived_key); + crypto_cipher_encrypt_one(tfm, derived_key + noncelen, + derived_key + noncelen); + master_key += EXT4_AES_256_ECB_KEY_SIZE; + derived_key += 2 * noncelen; + } + crypto_free_cipher(tfm); + return err; +} + +/** + * ext4_derive_key() - Derive a per-file key from a nonce and master key + * @ctx: the encryption context associated with the file + * @master_key: the master key referenced by the file + * @derived_key: (output) the resulting derived key + * + * Return: 0 on success, -errno on failure + */ +static int ext4_derive_key(const struct ext4_encryption_context *ctx, + const char master_key[EXT4_MAX_KEY_SIZE], + char derived_key[EXT4_MAX_KEY_SIZE]) +{ + BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE != EXT4_KEY_DERIVATION_NONCE_SIZE); + BUILD_BUG_ON(EXT4_AES_256_XTS_KEY_SIZE != EXT4_MAX_KEY_SIZE); + + /* + * Although the key derivation algorithm is logically independent of the + * choice of encryption modes, in this kernel it is bundled with HEH + * encryption of filenames, which is another crypto improvement that + * requires an on-disk format change and requires userspace to specify + * different encryption policies. + */ + if (ctx->filenames_encryption_mode == EXT4_ENCRYPTION_MODE_AES_256_HEH) + return ext4_derive_key_v2(ctx->nonce, master_key, derived_key); + else + return ext4_derive_key_v1(ctx->nonce, master_key, derived_key); +} + void ext4_free_crypt_info(struct ext4_crypt_info *ci) { if (!ci) @@ -192,6 +277,8 @@ retry: break; case EXT4_ENCRYPTION_MODE_PRIVATE: cipher_str = "bugon"; + case EXT4_ENCRYPTION_MODE_AES_256_HEH: + cipher_str = "heh(aes)"; break; default: printk_once(KERN_WARNING @@ -242,7 +329,7 @@ retry: up_read(&keyring_key->sem); goto out; } - res = ext4_derive_key_aes(ctx.nonce, master_key->raw, + res = ext4_derive_key(&ctx, master_key->raw, crypt_info->ci_raw_key); up_read(&keyring_key->sem); if (res) diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c index 8a9feb341f31..dd561f916f0b 100644 --- a/fs/ext4/crypto_policy.c +++ b/fs/ext4/crypto_policy.c @@ -156,6 +156,12 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent, WARN_ON(1); /* Should never happen */ return 0; } + + /* No restrictions on file types which are never encrypted */ + if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && + !S_ISLNK(child->i_mode)) + return 1; + /* no restrictions if the parent directory is not encrypted */ if (!ext4_encrypted_inode(parent)) return 1; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 14d7f72f8092..482d1caee849 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -590,6 +590,7 @@ enum { #define EXT4_ENCRYPTION_MODE_AES_256_CBC 3 #define EXT4_ENCRYPTION_MODE_AES_256_CTS 4 #define EXT4_ENCRYPTION_MODE_PRIVATE 127 +#define EXT4_ENCRYPTION_MODE_AES_256_HEH 126 #include "ext4_crypto.h" @@ -2339,19 +2340,6 @@ int _ext4_get_encryption_info(struct inode *inode); #ifdef CONFIG_EXT4_FS_ENCRYPTION int ext4_has_encryption_key(struct inode *inode); -static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode) -{ - return EXT4_I(inode)->i_crypt_info; -} - -static inline int ext4_using_hardware_encryption(struct inode *inode) -{ - struct ext4_crypt_info *ci = ext4_encryption_info(inode); - - return S_ISREG(inode->i_mode) && ci && - ci->ci_data_mode == EXT4_ENCRYPTION_MODE_PRIVATE; -} - static inline int ext4_get_encryption_info(struct inode *inode) { struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info; @@ -2365,6 +2353,19 @@ static inline int ext4_get_encryption_info(struct inode *inode) return 0; } +static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode) +{ + return EXT4_I(inode)->i_crypt_info; +} + +static inline int ext4_using_hardware_encryption(struct inode *inode) +{ + struct ext4_crypt_info *ci = ext4_encryption_info(inode); + + return S_ISREG(inode->i_mode) && ci && + ci->ci_data_mode == EXT4_ENCRYPTION_MODE_PRIVATE; +} + #else static inline int ext4_has_encryption_key(struct inode *inode) { diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h index 95cbc9bc1995..e28cc5aab04a 100644 --- a/fs/ext4/ext4_crypto.h +++ b/fs/ext4/ext4_crypto.h @@ -59,8 +59,10 @@ struct ext4_encryption_context { #define EXT4_XTS_TWEAK_SIZE 16 #define EXT4_AES_128_ECB_KEY_SIZE 16 #define EXT4_AES_256_GCM_KEY_SIZE 32 +#define EXT4_AES_256_ECB_KEY_SIZE 32 #define EXT4_AES_256_CBC_KEY_SIZE 32 #define EXT4_AES_256_CTS_KEY_SIZE 32 +#define EXT4_AES_256_HEH_KEY_SIZE 32 #define EXT4_AES_256_XTS_KEY_SIZE 64 #define EXT4_PRIVATE_KEY_SIZE 64 #define EXT4_MAX_KEY_SIZE 64 @@ -127,6 +129,8 @@ static inline int ext4_encryption_key_size(int mode) return EXT4_AES_256_CBC_KEY_SIZE; case EXT4_ENCRYPTION_MODE_AES_256_CTS: return EXT4_AES_256_CTS_KEY_SIZE; + case EXT4_ENCRYPTION_MODE_AES_256_HEH: + return EXT4_AES_256_HEH_KEY_SIZE; default: BUG(); } diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index cb3d6f6419cd..280d67fe33a7 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -18,6 +18,7 @@ #include "ext4.h" #include "xattr.h" #include "truncate.h" +#include <trace/events/android_fs.h> #define EXT4_XATTR_SYSTEM_DATA "data" #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) @@ -502,6 +503,17 @@ int ext4_readpage_inline(struct inode *inode, struct page *page) return -EAGAIN; } + if (trace_android_fs_dataread_start_enabled()) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_dataread_start(inode, page_offset(page), + PAGE_SIZE, current->pid, + path, current->comm); + } + /* * Current inline data can only exist in the 1st page, * So for all the other pages, just set them uptodate. @@ -513,6 +525,8 @@ int ext4_readpage_inline(struct inode *inode, struct page *page) SetPageUptodate(page); } + trace_android_fs_dataread_end(inode, page_offset(page), PAGE_SIZE); + up_read(&EXT4_I(inode)->xattr_sem); unlock_page(page); @@ -1157,10 +1171,9 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, set_buffer_uptodate(dir_block); err = ext4_handle_dirty_dirent_node(handle, inode, dir_block); if (err) - goto out; + return err; set_buffer_verified(dir_block); -out: - return err; + return ext4_mark_inode_dirty(handle, inode); } static int ext4_convert_inline_data_nolock(handle_t *handle, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 99fa2fca52b1..72b384931f66 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -45,6 +45,7 @@ #include "ext4_ice.h" #include <trace/events/ext4.h> +#include <trace/events/android_fs.h> #define MPAGE_DA_EXTENT_TAIL 0x01 @@ -1018,6 +1019,16 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, pgoff_t index; unsigned from, to; + if (trace_android_fs_datawrite_start_enabled()) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_datawrite_start(inode, pos, len, + current->pid, path, + current->comm); + } trace_ext4_write_begin(inode, pos, len, flags); /* * Reserve one block more for addition to orphan list in case @@ -1154,6 +1165,7 @@ static int ext4_write_end(struct file *file, int ret = 0, ret2; int i_size_changed = 0; + trace_android_fs_datawrite_end(inode, pos, len); trace_ext4_write_end(inode, pos, len, copied); if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) { ret = ext4_jbd2_file_inode(handle, inode); @@ -1267,6 +1279,7 @@ static int ext4_journalled_write_end(struct file *file, unsigned from, to; int size_changed = 0; + trace_android_fs_datawrite_end(inode, pos, len); trace_ext4_journalled_write_end(inode, pos, len, copied); from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; @@ -2742,6 +2755,16 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, len, flags, pagep, fsdata); } *fsdata = (void *)0; + if (trace_android_fs_datawrite_start_enabled()) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_datawrite_start(inode, pos, len, + current->pid, + path, current->comm); + } trace_ext4_da_write_begin(inode, pos, len, flags); if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { @@ -2860,6 +2883,7 @@ static int ext4_da_write_end(struct file *file, return ext4_write_end(file, mapping, pos, len, copied, page, fsdata); + trace_android_fs_datawrite_end(inode, pos, len); trace_ext4_da_write_end(inode, pos, len, copied); start = pos & (PAGE_CACHE_SIZE - 1); end = start + copied - 1; @@ -3352,12 +3376,42 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (ext4_has_inline_data(inode)) return 0; + if (trace_android_fs_dataread_start_enabled() && + (iov_iter_rw(iter) == READ)) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_dataread_start(inode, offset, count, + current->pid, path, + current->comm); + } + if (trace_android_fs_datawrite_start_enabled() && + (iov_iter_rw(iter) == WRITE)) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_datawrite_start(inode, offset, count, + current->pid, path, + current->comm); + } trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) ret = ext4_ext_direct_IO(iocb, iter, offset); else ret = ext4_ind_direct_IO(iocb, iter, offset); trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); + + if (trace_android_fs_dataread_start_enabled() && + (iov_iter_rw(iter) == READ)) + trace_android_fs_dataread_end(inode, offset, count); + if (trace_android_fs_datawrite_start_enabled() && + (iov_iter_rw(iter) == WRITE)) + trace_android_fs_datawrite_end(inode, offset, count); + return ret; } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7e974878d9a9..3a2594665b44 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -637,8 +637,12 @@ resizefs_out: if (err) goto encryption_policy_out; + mutex_lock(&inode->i_mutex); + err = ext4_process_policy(&policy, inode); + mutex_unlock(&inode->i_mutex); + mnt_drop_write_file(filp); encryption_policy_out: return err; diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 71a08a294529..99f1bd8c7f05 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -46,6 +46,7 @@ #include "ext4.h" #include "ext4_ice.h" +#include <trace/events/android_fs.h> /* * Call ext4_decrypt on every single page, reusing the encryption @@ -92,6 +93,17 @@ static inline bool ext4_bio_encrypted(struct bio *bio) #endif } +static void +ext4_trace_read_completion(struct bio *bio) +{ + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL) + trace_android_fs_dataread_end(first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size); +} + /* * I/O completion handler for multipage BIOs. * @@ -109,6 +121,9 @@ static void mpage_end_io(struct bio *bio) struct bio_vec *bv; int i; + if (trace_android_fs_dataread_start_enabled()) + ext4_trace_read_completion(bio); + if (ext4_bio_encrypted(bio)) { struct ext4_crypto_ctx *ctx = bio->bi_private; @@ -136,6 +151,30 @@ static void mpage_end_io(struct bio *bio) bio_put(bio); } +static void +ext4_submit_bio_read(struct bio *bio) +{ + if (trace_android_fs_dataread_start_enabled()) { + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + first_page->mapping->host); + trace_android_fs_dataread_start( + first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size, + current->pid, + path, + current->comm); + } + } + submit_bio(READ, bio); +} + int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, unsigned nr_pages) @@ -277,7 +316,7 @@ int ext4_mpage_readpages(struct address_space *mapping, */ if (bio && (last_block_in_bio != blocks[0] - 1)) { submit_and_realloc: - submit_bio(READ, bio); + ext4_submit_bio_read(bio); bio = NULL; } if (bio == NULL) { @@ -309,14 +348,14 @@ int ext4_mpage_readpages(struct address_space *mapping, if (((map.m_flags & EXT4_MAP_BOUNDARY) && (relative_block == map.m_len)) || (first_hole != blocks_per_page)) { - submit_bio(READ, bio); + ext4_submit_bio_read(bio); bio = NULL; } else last_block_in_bio = blocks[blocks_per_page - 1]; goto next_page; confused: if (bio) { - submit_bio(READ, bio); + ext4_submit_bio_read(bio); bio = NULL; } if (!PageUptodate(page)) @@ -329,6 +368,6 @@ int ext4_mpage_readpages(struct address_space *mapping, } BUG_ON(pages && !list_empty(pages)); if (bio) - submit_bio(READ, bio); + ext4_submit_bio_read(bio); return 0; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6fe8e30eeb99..68345a9e59b8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3666,7 +3666,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); if (ext4_has_feature_meta_bg(sb)) { - if (le32_to_cpu(es->s_first_meta_bg) >= db_count) { + if (le32_to_cpu(es->s_first_meta_bg) > db_count) { ext4_msg(sb, KERN_WARNING, "first meta block group too large: %u " "(group descriptor block count %u)", diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c index e504f548b64e..5bbd1989d5e6 100644 --- a/fs/f2fs/crypto_policy.c +++ b/fs/f2fs/crypto_policy.c @@ -149,6 +149,11 @@ int f2fs_is_child_context_consistent_with_parent(struct inode *parent, BUG_ON(1); } + /* No restrictions on file types which are never encrypted */ + if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) && + !S_ISLNK(child->i_mode)) + return 1; + /* no restrictions if the parent directory is not encrypted */ if (!f2fs_encrypted_inode(parent)) return 1; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f53826ec30f3..4fb5709256fd 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -26,6 +26,7 @@ #include "segment.h" #include "trace.h" #include <trace/events/f2fs.h> +#include <trace/events/android_fs.h> static void f2fs_read_end_io(struct bio *bio) { @@ -1405,6 +1406,16 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, struct dnode_of_data dn; int err = 0; + if (trace_android_fs_datawrite_start_enabled()) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_datawrite_start(inode, pos, len, + current->pid, path, + current->comm); + } trace_f2fs_write_begin(inode, pos, len, flags); f2fs_balance_fs(sbi); @@ -1533,6 +1544,7 @@ static int f2fs_write_end(struct file *file, { struct inode *inode = page->mapping->host; + trace_android_fs_datawrite_end(inode, pos, len); trace_f2fs_write_end(inode, pos, len, copied); set_page_dirty(page); @@ -1586,6 +1598,28 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); + if (trace_android_fs_dataread_start_enabled() && + (iov_iter_rw(iter) == READ)) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_dataread_start(inode, offset, + count, current->pid, path, + current->comm); + } + if (trace_android_fs_datawrite_start_enabled() && + (iov_iter_rw(iter) == WRITE)) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_datawrite_start(inode, offset, count, + current->pid, path, + current->comm); + } if (iov_iter_rw(iter) == WRITE) { __allocate_data_blocks(inode, offset, count); if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { @@ -1599,6 +1633,13 @@ out: if (err < 0 && iov_iter_rw(iter) == WRITE) f2fs_write_failed(mapping, offset + count); + if (trace_android_fs_dataread_start_enabled() && + (iov_iter_rw(iter) == READ)) + trace_android_fs_dataread_end(inode, offset, count); + if (trace_android_fs_datawrite_start_enabled() && + (iov_iter_rw(iter) == WRITE)) + trace_android_fs_datawrite_end(inode, offset, count); + trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err); return err; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index a197215ad52b..4b449d263333 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1535,12 +1535,19 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg) #ifdef CONFIG_F2FS_FS_ENCRYPTION struct f2fs_encryption_policy policy; struct inode *inode = file_inode(filp); + int err; if (copy_from_user(&policy, (struct f2fs_encryption_policy __user *)arg, sizeof(policy))) return -EFAULT; - return f2fs_process_policy(&policy, inode); + mutex_lock(&inode->i_mutex); + + err = f2fs_process_policy(&policy, inode); + + mutex_unlock(&inode->i_mutex); + + return err; #else return -EOPNOTSUPP; #endif diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index bda7126466c0..dbb2cc4df603 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -13,6 +13,7 @@ #include "f2fs.h" #include "node.h" +#include <trace/events/android_fs.h> bool f2fs_may_inline_data(struct inode *inode) { @@ -84,14 +85,29 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) { struct page *ipage; + if (trace_android_fs_dataread_start_enabled()) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_dataread_start(inode, page_offset(page), + PAGE_SIZE, current->pid, + path, current->comm); + } + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) { + trace_android_fs_dataread_end(inode, page_offset(page), + PAGE_SIZE); unlock_page(page); return PTR_ERR(ipage); } if (!f2fs_has_inline_data(inode)) { f2fs_put_page(ipage, 1); + trace_android_fs_dataread_end(inode, page_offset(page), + PAGE_SIZE); return -EAGAIN; } @@ -102,6 +118,8 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) SetPageUptodate(page); f2fs_put_page(ipage, 1); + trace_android_fs_dataread_end(inode, page_offset(page), + PAGE_SIZE); unlock_page(page); return 0; } diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index ff0ac96a8e7b..db4c867369b5 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -78,8 +78,11 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (type == ACL_TYPE_ACCESS) { umode_t mode = inode->i_mode; - + struct posix_acl *old_acl = acl; error = posix_acl_update_mode(inode, &inode->i_mode, &acl); + + if (!acl) + posix_acl_release(old_acl); if (error) return error; if (mode != inode->i_mode) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index de7b4f97ac75..be519416c112 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -207,7 +207,7 @@ struct lm_lockname { struct gfs2_sbd *ln_sbd; u64 ln_number; unsigned int ln_type; -}; +} __packed __aligned(sizeof(int)); #define lm_name_equal(name1, name2) \ (((name1)->ln_number == (name2)->ln_number) && \ diff --git a/fs/mpage.c b/fs/mpage.c index 1480d3a18037..0fd48fdcc1b1 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -30,6 +30,14 @@ #include <linux/cleancache.h> #include "internal.h" +#define CREATE_TRACE_POINTS +#include <trace/events/android_fs.h> + +EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_start); +EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_end); +EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_start); +EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_end); + /* * I/O completion handler for multipage BIOs. * @@ -47,6 +55,16 @@ static void mpage_end_io(struct bio *bio) struct bio_vec *bv; int i; + if (trace_android_fs_dataread_end_enabled() && + (bio_data_dir(bio) == READ)) { + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL) + trace_android_fs_dataread_end(first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size); + } + bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; page_endio(page, bio_data_dir(bio), bio->bi_error); @@ -57,6 +75,24 @@ static void mpage_end_io(struct bio *bio) static struct bio *mpage_bio_submit(int rw, struct bio *bio) { + if (trace_android_fs_dataread_start_enabled() && (rw == READ)) { + struct page *first_page = bio->bi_io_vec[0].bv_page; + + if (first_page != NULL) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + first_page->mapping->host); + trace_android_fs_dataread_start( + first_page->mapping->host, + page_offset(first_page), + bio->bi_iter.bi_size, + current->pid, + path, + current->comm); + } + } bio->bi_end_io = mpage_end_io; guard_bio_eod(rw, bio); submit_bio(rw, bio); diff --git a/fs/sdcardfs/dentry.c b/fs/sdcardfs/dentry.c index 971928ab6c21..8e31d1a80f0c 100644 --- a/fs/sdcardfs/dentry.c +++ b/fs/sdcardfs/dentry.c @@ -46,7 +46,8 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags) spin_unlock(&dentry->d_lock); /* check uninitialized obb_dentry and - * whether the base obbpath has been changed or not */ + * whether the base obbpath has been changed or not + */ if (is_obbpath_invalid(dentry)) { d_drop(dentry); return 0; @@ -76,17 +77,13 @@ static int sdcardfs_d_revalidate(struct dentry *dentry, unsigned int flags) if (dentry < lower_dentry) { spin_lock(&dentry->d_lock); - spin_lock(&lower_dentry->d_lock); + spin_lock_nested(&lower_dentry->d_lock, DENTRY_D_LOCK_NESTED); } else { spin_lock(&lower_dentry->d_lock); - spin_lock(&dentry->d_lock); + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); } - if (dentry->d_name.len != lower_dentry->d_name.len) { - __d_drop(dentry); - err = 0; - } else if (strncasecmp(dentry->d_name.name, lower_dentry->d_name.name, - dentry->d_name.len) != 0) { + if (!qstr_case_eq(&dentry->d_name, &lower_dentry->d_name)) { __d_drop(dentry); err = 0; } @@ -110,12 +107,10 @@ out: static void sdcardfs_d_release(struct dentry *dentry) { /* release and reset the lower paths */ - if(has_graft_path(dentry)) { + if (has_graft_path(dentry)) sdcardfs_put_reset_orig_path(dentry); - } sdcardfs_put_reset_lower_path(dentry); free_dentry_private_data(dentry); - return; } static int sdcardfs_hash_ci(const struct dentry *dentry, @@ -132,12 +127,10 @@ static int sdcardfs_hash_ci(const struct dentry *dentry, unsigned long hash; name = qstr->name; - //len = vfat_striptail_len(qstr); len = qstr->len; hash = init_name_hash(); while (len--) - //hash = partial_name_hash(nls_tolower(t, *name++), hash); hash = partial_name_hash(tolower(*name++), hash); qstr->hash = end_name_hash(hash); @@ -151,35 +144,25 @@ static int sdcardfs_cmp_ci(const struct dentry *parent, const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { - /* This function is copy of vfat_cmpi */ - // FIXME Should we support national language? - //struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io; - //unsigned int alen, blen; + /* FIXME Should we support national language? */ - /* A filename cannot end in '.' or we treat it like it has none */ - /* - alen = vfat_striptail_len(name); - blen = __vfat_striptail_len(len, str); - if (alen == blen) { - if (nls_strnicmp(t, name->name, str, alen) == 0) - return 0; - } - */ if (name->len == len) { - if (strncasecmp(name->name, str, len) == 0) + if (str_n_case_eq(name->name, str, len)) return 0; } return 1; } -static void sdcardfs_canonical_path(const struct path *path, struct path *actual_path) { +static void sdcardfs_canonical_path(const struct path *path, + struct path *actual_path) +{ sdcardfs_get_real_lower(path->dentry, actual_path); } const struct dentry_operations sdcardfs_ci_dops = { .d_revalidate = sdcardfs_d_revalidate, .d_release = sdcardfs_d_release, - .d_hash = sdcardfs_hash_ci, + .d_hash = sdcardfs_hash_ci, .d_compare = sdcardfs_cmp_ci, .d_canonical_path = sdcardfs_canonical_path, }; diff --git a/fs/sdcardfs/derived_perm.c b/fs/sdcardfs/derived_perm.c index 0bb442338a85..f82fedd29007 100644 --- a/fs/sdcardfs/derived_perm.c +++ b/fs/sdcardfs/derived_perm.c @@ -37,7 +37,8 @@ static void inherit_derived_state(struct inode *parent, struct inode *child) /* helper function for derived state */ void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid, - uid_t uid, bool under_android, struct inode *top) + uid_t uid, bool under_android, + struct inode *top) { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); @@ -50,12 +51,17 @@ void setup_derived_state(struct inode *inode, perm_t perm, userid_t userid, set_top(info, top); } -/* While renaming, there is a point where we want the path from dentry, but the name from newdentry */ -void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name) +/* While renaming, there is a point where we want the path from dentry, + * but the name from newdentry + */ +void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, + const struct qstr *name) { struct sdcardfs_inode_info *info = SDCARDFS_I(d_inode(dentry)); - struct sdcardfs_inode_info *parent_info= SDCARDFS_I(d_inode(parent)); + struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent)); appid_t appid; + unsigned long user_num; + int err; struct qstr q_Android = QSTR_LITERAL("Android"); struct qstr q_data = QSTR_LITERAL("data"); struct qstr q_obb = QSTR_LITERAL("obb"); @@ -77,58 +83,61 @@ void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, co return; /* Derive custom permissions based on parent and current node */ switch (parent_info->perm) { - case PERM_INHERIT: - case PERM_ANDROID_PACKAGE_CACHE: - /* Already inherited above */ - break; - case PERM_PRE_ROOT: - /* Legacy internal layout places users at top level */ - info->perm = PERM_ROOT; - info->userid = simple_strtoul(name->name, NULL, 10); + case PERM_INHERIT: + case PERM_ANDROID_PACKAGE_CACHE: + /* Already inherited above */ + break; + case PERM_PRE_ROOT: + /* Legacy internal layout places users at top level */ + info->perm = PERM_ROOT; + err = kstrtoul(name->name, 10, &user_num); + if (err) + info->userid = 0; + else + info->userid = user_num; + set_top(info, &info->vfs_inode); + break; + case PERM_ROOT: + /* Assume masked off by default. */ + if (qstr_case_eq(name, &q_Android)) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID; + info->under_android = true; set_top(info, &info->vfs_inode); - break; - case PERM_ROOT: - /* Assume masked off by default. */ - if (qstr_case_eq(name, &q_Android)) { - /* App-specific directories inside; let anyone traverse */ - info->perm = PERM_ANDROID; - info->under_android = true; - set_top(info, &info->vfs_inode); - } - break; - case PERM_ANDROID: - if (qstr_case_eq(name, &q_data)) { - /* App-specific directories inside; let anyone traverse */ - info->perm = PERM_ANDROID_DATA; - set_top(info, &info->vfs_inode); - } else if (qstr_case_eq(name, &q_obb)) { - /* App-specific directories inside; let anyone traverse */ - info->perm = PERM_ANDROID_OBB; - info->under_obb = true; - set_top(info, &info->vfs_inode); - /* Single OBB directory is always shared */ - } else if (qstr_case_eq(name, &q_media)) { - /* App-specific directories inside; let anyone traverse */ - info->perm = PERM_ANDROID_MEDIA; - set_top(info, &info->vfs_inode); - } - break; - case PERM_ANDROID_OBB: - case PERM_ANDROID_DATA: - case PERM_ANDROID_MEDIA: - info->perm = PERM_ANDROID_PACKAGE; - appid = get_appid(name->name); - if (appid != 0 && !is_excluded(name->name, parent_info->userid)) { - info->d_uid = multiuser_get_uid(parent_info->userid, appid); - } + } + break; + case PERM_ANDROID: + if (qstr_case_eq(name, &q_data)) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID_DATA; set_top(info, &info->vfs_inode); - break; - case PERM_ANDROID_PACKAGE: - if (qstr_case_eq(name, &q_cache)) { - info->perm = PERM_ANDROID_PACKAGE_CACHE; - info->under_cache = true; - } - break; + } else if (qstr_case_eq(name, &q_obb)) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID_OBB; + info->under_obb = true; + set_top(info, &info->vfs_inode); + /* Single OBB directory is always shared */ + } else if (qstr_case_eq(name, &q_media)) { + /* App-specific directories inside; let anyone traverse */ + info->perm = PERM_ANDROID_MEDIA; + set_top(info, &info->vfs_inode); + } + break; + case PERM_ANDROID_OBB: + case PERM_ANDROID_DATA: + case PERM_ANDROID_MEDIA: + info->perm = PERM_ANDROID_PACKAGE; + appid = get_appid(name->name); + if (appid != 0 && !is_excluded(name->name, parent_info->userid)) + info->d_uid = multiuser_get_uid(parent_info->userid, appid); + set_top(info, &info->vfs_inode); + break; + case PERM_ANDROID_PACKAGE: + if (qstr_case_eq(name, &q_cache)) { + info->perm = PERM_ANDROID_PACKAGE_CACHE; + info->under_cache = true; + } + break; } } @@ -137,7 +146,8 @@ void get_derived_permission(struct dentry *parent, struct dentry *dentry) get_derived_permission_new(parent, dentry, &dentry->d_name); } -static appid_t get_type(const char *name) { +static appid_t get_type(const char *name) +{ const char *ext = strrchr(name, '.'); appid_t id; @@ -149,7 +159,8 @@ static appid_t get_type(const char *name) { return AID_MEDIA_RW; } -void fixup_lower_ownership(struct dentry* dentry, const char *name) { +void fixup_lower_ownership(struct dentry *dentry, const char *name) +{ struct path path; struct inode *inode; struct inode *delegated_inode = NULL; @@ -175,49 +186,49 @@ void fixup_lower_ownership(struct dentry* dentry, const char *name) { } switch (perm) { - case PERM_ROOT: - case PERM_ANDROID: - case PERM_ANDROID_DATA: - case PERM_ANDROID_MEDIA: - case PERM_ANDROID_PACKAGE: - case PERM_ANDROID_PACKAGE_CACHE: - uid = multiuser_get_uid(info->userid, uid); - break; - case PERM_ANDROID_OBB: - uid = AID_MEDIA_OBB; - break; - case PERM_PRE_ROOT: - default: - break; + case PERM_ROOT: + case PERM_ANDROID: + case PERM_ANDROID_DATA: + case PERM_ANDROID_MEDIA: + case PERM_ANDROID_PACKAGE: + case PERM_ANDROID_PACKAGE_CACHE: + uid = multiuser_get_uid(info->userid, uid); + break; + case PERM_ANDROID_OBB: + uid = AID_MEDIA_OBB; + break; + case PERM_PRE_ROOT: + default: + break; } switch (perm) { - case PERM_ROOT: - case PERM_ANDROID: - case PERM_ANDROID_DATA: - case PERM_ANDROID_MEDIA: - if (S_ISDIR(d_inode(dentry)->i_mode)) - gid = multiuser_get_uid(info->userid, AID_MEDIA_RW); - else - gid = multiuser_get_uid(info->userid, get_type(name)); - break; - case PERM_ANDROID_OBB: - gid = AID_MEDIA_OBB; - break; - case PERM_ANDROID_PACKAGE: - if (info->d_uid != 0) - gid = multiuser_get_ext_gid(info->userid, info->d_uid); - else - gid = multiuser_get_uid(info->userid, uid); - break; - case PERM_ANDROID_PACKAGE_CACHE: - if (info->d_uid != 0) - gid = multiuser_get_cache_gid(info->userid, info->d_uid); - else - gid = multiuser_get_uid(info->userid, uid); - break; - case PERM_PRE_ROOT: - default: - break; + case PERM_ROOT: + case PERM_ANDROID: + case PERM_ANDROID_DATA: + case PERM_ANDROID_MEDIA: + if (S_ISDIR(d_inode(dentry)->i_mode)) + gid = multiuser_get_uid(info->userid, AID_MEDIA_RW); + else + gid = multiuser_get_uid(info->userid, get_type(name)); + break; + case PERM_ANDROID_OBB: + gid = AID_MEDIA_OBB; + break; + case PERM_ANDROID_PACKAGE: + if (info->d_uid != 0) + gid = multiuser_get_ext_gid(info->d_uid); + else + gid = multiuser_get_uid(info->userid, uid); + break; + case PERM_ANDROID_PACKAGE_CACHE: + if (info->d_uid != 0) + gid = multiuser_get_cache_gid(info->d_uid); + else + gid = multiuser_get_uid(info->userid, uid); + break; + case PERM_PRE_ROOT: + default: + break; } sdcardfs_get_lower_path(dentry, &path); @@ -243,9 +254,11 @@ retry_deleg: if (error) pr_err("sdcardfs: Failed to touch up lower fs gid/uid.\n"); } + sdcardfs_put_lower_path(dentry, &path); } -static int descendant_may_need_fixup(struct sdcardfs_inode_info *info, struct limit_search *limit) { +static int descendant_may_need_fixup(struct sdcardfs_inode_info *info, struct limit_search *limit) +{ if (info->perm == PERM_ROOT) return (limit->flags & BY_USERID)?info->userid == limit->userid:1; if (info->perm == PERM_PRE_ROOT || info->perm == PERM_ANDROID) @@ -253,85 +266,56 @@ static int descendant_may_need_fixup(struct sdcardfs_inode_info *info, struct li return 0; } -static int needs_fixup(perm_t perm) { +static int needs_fixup(perm_t perm) +{ if (perm == PERM_ANDROID_DATA || perm == PERM_ANDROID_OBB || perm == PERM_ANDROID_MEDIA) return 1; return 0; } -void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit) { +static void __fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit, int depth) +{ struct dentry *child; struct sdcardfs_inode_info *info; - if (!dget(dentry)) - return; + + /* + * All paths will terminate their recursion on hitting PERM_ANDROID_OBB, + * PERM_ANDROID_MEDIA, or PERM_ANDROID_DATA. This happens at a depth of + * at most 3. + */ + WARN(depth > 3, "%s: Max expected depth exceeded!\n", __func__); + spin_lock_nested(&dentry->d_lock, depth); if (!d_inode(dentry)) { - dput(dentry); + spin_unlock(&dentry->d_lock); return; } info = SDCARDFS_I(d_inode(dentry)); if (needs_fixup(info->perm)) { - spin_lock(&dentry->d_lock); list_for_each_entry(child, &dentry->d_subdirs, d_child) { - dget(child); - if (!(limit->flags & BY_NAME) || !strncasecmp(child->d_name.name, limit->name, limit->length)) { + spin_lock_nested(&child->d_lock, depth + 1); + if (!(limit->flags & BY_NAME) || qstr_case_eq(&child->d_name, &limit->name)) { if (d_inode(child)) { get_derived_permission(dentry, child); fixup_tmp_permissions(d_inode(child)); - dput(child); + spin_unlock(&child->d_lock); break; } } - dput(child); + spin_unlock(&child->d_lock); } - spin_unlock(&dentry->d_lock); - } else if (descendant_may_need_fixup(info, limit)) { - spin_lock(&dentry->d_lock); + } else if (descendant_may_need_fixup(info, limit)) { list_for_each_entry(child, &dentry->d_subdirs, d_child) { - fixup_perms_recursive(child, limit); - } - spin_unlock(&dentry->d_lock); - } - dput(dentry); -} - -void drop_recursive(struct dentry *parent) { - struct dentry *dentry; - struct sdcardfs_inode_info *info; - if (!d_inode(parent)) - return; - info = SDCARDFS_I(d_inode(parent)); - spin_lock(&parent->d_lock); - list_for_each_entry(dentry, &parent->d_subdirs, d_child) { - if (d_inode(dentry)) { - if (SDCARDFS_I(d_inode(parent))->top != SDCARDFS_I(d_inode(dentry))->top) { - drop_recursive(dentry); - d_drop(dentry); - } + __fixup_perms_recursive(child, limit, depth + 1); } } - spin_unlock(&parent->d_lock); + spin_unlock(&dentry->d_lock); } -void fixup_top_recursive(struct dentry *parent) { - struct dentry *dentry; - struct sdcardfs_inode_info *info; - - if (!d_inode(parent)) - return; - info = SDCARDFS_I(d_inode(parent)); - spin_lock(&parent->d_lock); - list_for_each_entry(dentry, &parent->d_subdirs, d_child) { - if (d_inode(dentry)) { - if (SDCARDFS_I(d_inode(parent))->top != SDCARDFS_I(d_inode(dentry))->top) { - get_derived_permission(parent, dentry); - fixup_tmp_permissions(d_inode(dentry)); - fixup_top_recursive(dentry); - } - } - } - spin_unlock(&parent->d_lock); +void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit) +{ + __fixup_perms_recursive(dentry, limit, 0); } /* main function for updating derived permission */ @@ -339,19 +323,17 @@ inline void update_derived_permission_lock(struct dentry *dentry) { struct dentry *parent; - if(!dentry || !d_inode(dentry)) { - printk(KERN_ERR "sdcardfs: %s: invalid dentry\n", __func__); + if (!dentry || !d_inode(dentry)) { + pr_err("sdcardfs: %s: invalid dentry\n", __func__); return; } /* FIXME: * 1. need to check whether the dentry is updated or not * 2. remove the root dentry update */ - if(IS_ROOT(dentry)) { - //setup_default_pre_root_state(d_inode(dentry)); - } else { + if (!IS_ROOT(dentry)) { parent = dget_parent(dentry); - if(parent) { + if (parent) { get_derived_permission(parent, dentry); dput(parent); } @@ -363,15 +345,15 @@ int need_graft_path(struct dentry *dentry) { int ret = 0; struct dentry *parent = dget_parent(dentry); - struct sdcardfs_inode_info *parent_info= SDCARDFS_I(d_inode(parent)); + struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent)); struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); struct qstr obb = QSTR_LITERAL("obb"); - if(parent_info->perm == PERM_ANDROID && + if (parent_info->perm == PERM_ANDROID && qstr_case_eq(&dentry->d_name, &obb)) { /* /Android/obb is the base obbpath of DERIVED_UNIFIED */ - if(!(sbi->options.multiuser == false + if (!(sbi->options.multiuser == false && parent_info->userid == 0)) { ret = 1; } @@ -386,22 +368,24 @@ int is_obbpath_invalid(struct dentry *dent) struct sdcardfs_dentry_info *di = SDCARDFS_D(dent); struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dent->d_sb); char *path_buf, *obbpath_s; + int need_put = 0; + struct path lower_path; /* check the base obbpath has been changed. * this routine can check an uninitialized obb dentry as well. - * regarding the uninitialized obb, refer to the sdcardfs_mkdir() */ + * regarding the uninitialized obb, refer to the sdcardfs_mkdir() + */ spin_lock(&di->lock); - if(di->orig_path.dentry) { - if(!di->lower_path.dentry) { + if (di->orig_path.dentry) { + if (!di->lower_path.dentry) { ret = 1; } else { path_get(&di->lower_path); - //lower_parent = lock_parent(lower_path->dentry); path_buf = kmalloc(PATH_MAX, GFP_ATOMIC); - if(!path_buf) { + if (!path_buf) { ret = 1; - printk(KERN_ERR "sdcardfs: fail to allocate path_buf in %s.\n", __func__); + pr_err("sdcardfs: fail to allocate path_buf in %s.\n", __func__); } else { obbpath_s = d_path(&di->lower_path, path_buf, PATH_MAX); if (d_unhashed(di->lower_path.dentry) || @@ -411,11 +395,13 @@ int is_obbpath_invalid(struct dentry *dent) kfree(path_buf); } - //unlock_dir(lower_parent); - path_put(&di->lower_path); + pathcpy(&lower_path, &di->lower_path); + need_put = 1; } } spin_unlock(&di->lock); + if (need_put) + path_put(&lower_path); return ret; } @@ -423,13 +409,13 @@ int is_base_obbpath(struct dentry *dentry) { int ret = 0; struct dentry *parent = dget_parent(dentry); - struct sdcardfs_inode_info *parent_info= SDCARDFS_I(d_inode(parent)); + struct sdcardfs_inode_info *parent_info = SDCARDFS_I(d_inode(parent)); struct sdcardfs_sb_info *sbi = SDCARDFS_SB(dentry->d_sb); struct qstr q_obb = QSTR_LITERAL("obb"); spin_lock(&SDCARDFS_D(dentry)->lock); if (sbi->options.multiuser) { - if(parent_info->perm == PERM_PRE_ROOT && + if (parent_info->perm == PERM_PRE_ROOT && qstr_case_eq(&dentry->d_name, &q_obb)) { ret = 1; } @@ -444,7 +430,8 @@ int is_base_obbpath(struct dentry *dentry) /* The lower_path will be stored to the dentry's orig_path * and the base obbpath will be copyed to the lower_path variable. * if an error returned, there's no change in the lower_path - * returns: -ERRNO if error (0: no error) */ + * returns: -ERRNO if error (0: no error) + */ int setup_obb_dentry(struct dentry *dentry, struct path *lower_path) { int err = 0; @@ -453,23 +440,24 @@ int setup_obb_dentry(struct dentry *dentry, struct path *lower_path) /* A local obb dentry must have its own orig_path to support rmdir * and mkdir of itself. Usually, we expect that the sbi->obbpath - * is avaiable on this stage. */ + * is avaiable on this stage. + */ sdcardfs_set_orig_path(dentry, lower_path); err = kern_path(sbi->obbpath_s, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &obbpath); - if(!err) { + if (!err) { /* the obbpath base has been found */ - printk(KERN_INFO "sdcardfs: the sbi->obbpath is found\n"); pathcpy(lower_path, &obbpath); } else { /* if the sbi->obbpath is not available, we can optionally * setup the lower_path with its orig_path. * but, the current implementation just returns an error * because the sdcard daemon also regards this case as - * a lookup fail. */ - printk(KERN_INFO "sdcardfs: the sbi->obbpath is not available\n"); + * a lookup fail. + */ + pr_info("sdcardfs: the sbi->obbpath is not available\n"); } return err; } diff --git a/fs/sdcardfs/file.c b/fs/sdcardfs/file.c index 23f8cd7f8877..c0146e03fa2e 100644 --- a/fs/sdcardfs/file.c +++ b/fs/sdcardfs/file.c @@ -65,7 +65,7 @@ static ssize_t sdcardfs_write(struct file *file, const char __user *buf, /* check disk space */ if (!check_min_free_space(dentry, count, 0)) { - printk(KERN_INFO "No minimum free space.\n"); + pr_err("No minimum free space.\n"); return -ENOSPC; } @@ -113,6 +113,10 @@ static long sdcardfs_unlocked_ioctl(struct file *file, unsigned int cmd, if (lower_file->f_op->unlocked_ioctl) err = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); + /* some ioctls can change inode attributes (EXT2_IOC_SETFLAGS) */ + if (!err) + sdcardfs_copy_and_fix_attrs(file_inode(file), + file_inode(lower_file)); out: return err; } @@ -160,8 +164,7 @@ static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma) lower_file = sdcardfs_lower_file(file); if (willwrite && !lower_file->f_mapping->a_ops->writepage) { err = -EINVAL; - printk(KERN_ERR "sdcardfs: lower file system does not " - "support writeable mmap\n"); + pr_err("sdcardfs: lower file system does not support writeable mmap\n"); goto out; } @@ -173,16 +176,10 @@ static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma) if (!SDCARDFS_F(file)->lower_vm_ops) { err = lower_file->f_op->mmap(lower_file, vma); if (err) { - printk(KERN_ERR "sdcardfs: lower mmap failed %d\n", err); + pr_err("sdcardfs: lower mmap failed %d\n", err); goto out; } saved_vm_ops = vma->vm_ops; /* save: came from lower ->mmap */ - err = do_munmap(current->mm, vma->vm_start, - vma->vm_end - vma->vm_start); - if (err) { - printk(KERN_ERR "sdcardfs: do_munmap failed %d\n", err); - goto out; - } } /* @@ -216,10 +213,7 @@ static int sdcardfs_open(struct inode *inode, struct file *file) goto out_err; } - if(!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); + if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { err = -EACCES; goto out_err; } @@ -251,9 +245,8 @@ static int sdcardfs_open(struct inode *inode, struct file *file) if (err) kfree(SDCARDFS_F(file)); - else { + else sdcardfs_copy_and_fix_attrs(inode, sdcardfs_lower_inode(inode)); - } out_revert_cred: REVERT_CRED(saved_cred); @@ -323,6 +316,75 @@ static int sdcardfs_fasync(int fd, struct file *file, int flag) return err; } +/* + * Sdcardfs cannot use generic_file_llseek as ->llseek, because it would + * only set the offset of the upper file. So we have to implement our + * own method to set both the upper and lower file offsets + * consistently. + */ +static loff_t sdcardfs_file_llseek(struct file *file, loff_t offset, int whence) +{ + int err; + struct file *lower_file; + + err = generic_file_llseek(file, offset, whence); + if (err < 0) + goto out; + + lower_file = sdcardfs_lower_file(file); + err = generic_file_llseek(lower_file, offset, whence); + +out: + return err; +} + +/* + * Sdcardfs read_iter, redirect modified iocb to lower read_iter + */ +ssize_t sdcardfs_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + int err; + struct file *file = iocb->ki_filp, *lower_file; + + lower_file = sdcardfs_lower_file(file); + if (!lower_file->f_op->read_iter) { + err = -EINVAL; + goto out; + } + + get_file(lower_file); /* prevent lower_file from being released */ + iocb->ki_filp = lower_file; + err = lower_file->f_op->read_iter(iocb, iter); + /* ? wait IO finish to update atime as ecryptfs ? */ + iocb->ki_filp = file; + fput(lower_file); +out: + return err; +} + +/* + * Sdcardfs write_iter, redirect modified iocb to lower write_iter + */ +ssize_t sdcardfs_write_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + int err; + struct file *file = iocb->ki_filp, *lower_file; + + lower_file = sdcardfs_lower_file(file); + if (!lower_file->f_op->write_iter) { + err = -EINVAL; + goto out; + } + + get_file(lower_file); /* prevent lower_file from being released */ + iocb->ki_filp = lower_file; + err = lower_file->f_op->write_iter(iocb, iter); + iocb->ki_filp = file; + fput(lower_file); +out: + return err; +} + const struct file_operations sdcardfs_main_fops = { .llseek = generic_file_llseek, .read = sdcardfs_read, @@ -337,11 +399,13 @@ const struct file_operations sdcardfs_main_fops = { .release = sdcardfs_file_release, .fsync = sdcardfs_fsync, .fasync = sdcardfs_fasync, + .read_iter = sdcardfs_read_iter, + .write_iter = sdcardfs_write_iter, }; /* trimmed directory options */ const struct file_operations sdcardfs_dir_fops = { - .llseek = generic_file_llseek, + .llseek = sdcardfs_file_llseek, .read = generic_read_dir, .iterate = sdcardfs_readdir, .unlocked_ioctl = sdcardfs_unlocked_ioctl, diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index 68e615045616..f15cb11ca8fd 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -20,12 +20,13 @@ #include "sdcardfs.h" #include <linux/fs_struct.h> +#include <linux/ratelimit.h> /* Do not directly use this function. Use OVERRIDE_CRED() instead. */ -const struct cred * override_fsids(struct sdcardfs_sb_info* sbi, struct sdcardfs_inode_info *info) +const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, struct sdcardfs_inode_info *info) { - struct cred * cred; - const struct cred * old_cred; + struct cred *cred; + const struct cred *old_cred; uid_t uid; cred = prepare_creds(); @@ -45,9 +46,9 @@ const struct cred * override_fsids(struct sdcardfs_sb_info* sbi, struct sdcardfs } /* Do not directly use this function, use REVERT_CRED() instead. */ -void revert_fsids(const struct cred * old_cred) +void revert_fsids(const struct cred *old_cred) { - const struct cred * cur_cred; + const struct cred *cur_cred; cur_cred = current->cred; revert_creds(old_cred); @@ -66,10 +67,7 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, struct fs_struct *saved_fs; struct fs_struct *copied_fs; - if(!check_caller_access_to_name(dir, &dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); + if (!check_caller_access_to_name(dir, &dentry->d_name)) { err = -EACCES; goto out_eacces; } @@ -168,10 +166,7 @@ static int sdcardfs_unlink(struct inode *dir, struct dentry *dentry) struct path lower_path; const struct cred *saved_cred = NULL; - if(!check_caller_access_to_name(dir, &dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); + if (!check_caller_access_to_name(dir, &dentry->d_name)) { err = -EACCES; goto out_eacces; } @@ -245,14 +240,15 @@ out: } #endif -static int touch(char *abs_path, mode_t mode) { +static int touch(char *abs_path, mode_t mode) +{ struct file *filp = filp_open(abs_path, O_RDWR|O_CREAT|O_EXCL|O_NOFOLLOW, mode); + if (IS_ERR(filp)) { if (PTR_ERR(filp) == -EEXIST) { return 0; - } - else { - printk(KERN_ERR "sdcardfs: failed to open(%s): %ld\n", + } else { + pr_err("sdcardfs: failed to open(%s): %ld\n", abs_path, PTR_ERR(filp)); return PTR_ERR(filp); } @@ -278,10 +274,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode struct qstr q_obb = QSTR_LITERAL("obb"); struct qstr q_data = QSTR_LITERAL("data"); - if(!check_caller_access_to_name(dir, &dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); + if (!check_caller_access_to_name(dir, &dentry->d_name)) { err = -EACCES; goto out_eacces; } @@ -291,7 +284,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode /* check disk space */ if (!check_min_free_space(dentry, 0, 1)) { - printk(KERN_INFO "sdcardfs: No minimum free space.\n"); + pr_err("sdcardfs: No minimum free space.\n"); err = -ENOSPC; goto out_revert; } @@ -323,19 +316,21 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode } /* if it is a local obb dentry, setup it with the base obbpath */ - if(need_graft_path(dentry)) { + if (need_graft_path(dentry)) { err = setup_obb_dentry(dentry, &lower_path); - if(err) { + if (err) { /* if the sbi->obbpath is not available, the lower_path won't be * changed by setup_obb_dentry() but the lower path is saved to * its orig_path. this dentry will be revalidated later. - * but now, the lower_path should be NULL */ + * but now, the lower_path should be NULL + */ sdcardfs_put_reset_lower_path(dentry); /* the newly created lower path which saved to its orig_path or * the lower_path is the base obbpath. - * therefore, an additional path_get is required */ + * therefore, an additional path_get is required + */ path_get(&lower_path); } else make_nomedia_in_obb = 1; @@ -365,7 +360,7 @@ static int sdcardfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode set_fs_pwd(current->fs, &lower_path); touch_err = touch(".nomedia", 0664); if (touch_err) { - printk(KERN_ERR "sdcardfs: failed to create .nomedia in %s: %d\n", + pr_err("sdcardfs: failed to create .nomedia in %s: %d\n", lower_path.dentry->d_name.name, touch_err); goto out; } @@ -390,10 +385,7 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) struct path lower_path; const struct cred *saved_cred = NULL; - if(!check_caller_access_to_name(dir, &dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); + if (!check_caller_access_to_name(dir, &dentry->d_name)) { err = -EACCES; goto out_eacces; } @@ -402,7 +394,8 @@ static int sdcardfs_rmdir(struct inode *dir, struct dentry *dentry) OVERRIDE_CRED(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir)); /* sdcardfs_get_real_lower(): in case of remove an user's obb dentry - * the dentry on the original path should be deleted. */ + * the dentry on the original path should be deleted. + */ sdcardfs_get_real_lower(dentry, &lower_path); lower_dentry = lower_path.dentry; @@ -478,11 +471,8 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct path lower_old_path, lower_new_path; const struct cred *saved_cred = NULL; - if(!check_caller_access_to_name(old_dir, &old_dentry->d_name) || + if (!check_caller_access_to_name(old_dir, &old_dentry->d_name) || !check_caller_access_to_name(new_dir, &new_dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " new_dentry: %s, task:%s\n", - __func__, new_dentry->d_name.name, current->comm); err = -EACCES; goto out_eacces; } @@ -528,7 +518,7 @@ static int sdcardfs_rename(struct inode *old_dir, struct dentry *old_dentry, get_derived_permission_new(new_dentry->d_parent, old_dentry, &new_dentry->d_name); fixup_tmp_permissions(d_inode(old_dentry)); fixup_lower_ownership(old_dentry, new_dentry->d_name.name); - drop_recursive(old_dentry); /* Can't fixup ownership recursively :( */ + d_invalidate(old_dentry); /* Can't fixup ownership recursively :( */ out: unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry); dput(lower_old_dir_dentry); @@ -599,7 +589,7 @@ static const char *sdcardfs_follow_link(struct dentry *dentry, void **cookie) static int sdcardfs_permission_wrn(struct inode *inode, int mask) { - WARN(1, "sdcardfs does not support permission. Use permission2.\n"); + WARN_RATELIMIT(1, "sdcardfs does not support permission. Use permission2.\n"); return -EINVAL; } @@ -652,7 +642,7 @@ static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int ma release_top(SDCARDFS_I(inode)); tmp.i_sb = inode->i_sb; if (IS_POSIXACL(inode)) - printk(KERN_WARNING "%s: This may be undefined behavior... \n", __func__); + pr_warn("%s: This may be undefined behavior...\n", __func__); err = generic_permission(&tmp, mask); /* XXX * Original sdcardfs code calls inode_permission(lower_inode,.. ) @@ -670,6 +660,7 @@ static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int ma * we check it with AID_MEDIA_RW permission */ struct inode *lower_inode; + OVERRIDE_CRED(SDCARDFS_SB(inode->sb)); lower_inode = sdcardfs_lower_inode(inode); @@ -684,7 +675,7 @@ static int sdcardfs_permission(struct vfsmount *mnt, struct inode *inode, int ma static int sdcardfs_setattr_wrn(struct dentry *dentry, struct iattr *ia) { - WARN(1, "sdcardfs does not support setattr. User setattr2.\n"); + WARN_RATELIMIT(1, "sdcardfs does not support setattr. User setattr2.\n"); return -EINVAL; } @@ -738,19 +729,16 @@ static int sdcardfs_setattr(struct vfsmount *mnt, struct dentry *dentry, struct /* prepare our own lower struct iattr (with the lower file) */ memcpy(&lower_ia, ia, sizeof(lower_ia)); /* Allow touch updating timestamps. A previous permission check ensures - * we have write access. Changes to mode, owner, and group are ignored*/ + * we have write access. Changes to mode, owner, and group are ignored + */ ia->ia_valid |= ATTR_FORCE; err = inode_change_ok(&tmp, ia); if (!err) { /* check the Android group ID */ parent = dget_parent(dentry); - if(!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); + if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) err = -EACCES; - } dput(parent); } @@ -828,10 +816,12 @@ out_err: return err; } -static int sdcardfs_fillattr(struct vfsmount *mnt, struct inode *inode, struct kstat *stat) +static int sdcardfs_fillattr(struct vfsmount *mnt, + struct inode *inode, struct kstat *stat) { struct sdcardfs_inode_info *info = SDCARDFS_I(inode); struct inode *top = grab_top(info); + if (!top) return -EINVAL; @@ -855,33 +845,27 @@ static int sdcardfs_fillattr(struct vfsmount *mnt, struct inode *inode, struct k static int sdcardfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { - struct dentry *lower_dentry; - struct inode *inode; - struct inode *lower_inode; + struct kstat lower_stat; struct path lower_path; struct dentry *parent; int err; parent = dget_parent(dentry); - if(!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); + if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { dput(parent); return -EACCES; } dput(parent); - inode = d_inode(dentry); - sdcardfs_get_lower_path(dentry, &lower_path); - lower_dentry = lower_path.dentry; - lower_inode = sdcardfs_lower_inode(inode); - - sdcardfs_copy_and_fix_attrs(inode, lower_inode); - fsstack_copy_inode_size(inode, lower_inode); - - err = sdcardfs_fillattr(mnt, inode, stat); + err = vfs_getattr(&lower_path, &lower_stat); + if (err) + goto out; + sdcardfs_copy_and_fix_attrs(d_inode(dentry), + d_inode(lower_path.dentry)); + err = sdcardfs_fillattr(mnt, d_inode(dentry), stat); + stat->blocks = lower_stat.blocks; +out: sdcardfs_put_lower_path(dentry, &lower_path); return err; } diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index 9135866b7766..f10f7752f514 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -36,8 +36,7 @@ int sdcardfs_init_dentry_cache(void) void sdcardfs_destroy_dentry_cache(void) { - if (sdcardfs_dentry_cachep) - kmem_cache_destroy(sdcardfs_dentry_cachep); + kmem_cache_destroy(sdcardfs_dentry_cachep); } void free_dentry_private_data(struct dentry *dentry) @@ -73,6 +72,7 @@ static int sdcardfs_inode_test(struct inode *inode, void *candidate_data/*void * { struct inode *current_lower_inode = sdcardfs_lower_inode(inode); userid_t current_userid = SDCARDFS_I(inode)->userid; + if (current_lower_inode == ((struct inode_data *)candidate_data)->lower_inode && current_userid == ((struct inode_data *)candidate_data)->id) return 1; /* found a match */ @@ -102,7 +102,7 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, u * instead. */ lower_inode->i_ino, /* hashval */ - sdcardfs_inode_test, /* inode comparison function */ + sdcardfs_inode_test, /* inode comparison function */ sdcardfs_inode_set, /* inode init function */ &data); /* data passed to test+set fxns */ if (!inode) { @@ -164,27 +164,25 @@ struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, u } /* - * Connect a sdcardfs inode dentry/inode with several lower ones. This is - * the classic stackable file system "vnode interposition" action. - * - * @dentry: sdcardfs's dentry which interposes on lower one - * @sb: sdcardfs's super_block - * @lower_path: the lower path (caller does path_get/put) + * Helper interpose routine, called directly by ->lookup to handle + * spliced dentries. */ -int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, - struct path *lower_path, userid_t id) +static struct dentry *__sdcardfs_interpose(struct dentry *dentry, + struct super_block *sb, + struct path *lower_path, + userid_t id) { - int err = 0; struct inode *inode; struct inode *lower_inode; struct super_block *lower_sb; + struct dentry *ret_dentry; lower_inode = d_inode(lower_path->dentry); lower_sb = sdcardfs_lower_super(sb); /* check that the lower file system didn't cross a mount point */ if (lower_inode->i_sb != lower_sb) { - err = -EXDEV; + ret_dentry = ERR_PTR(-EXDEV); goto out; } @@ -196,14 +194,54 @@ int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, /* inherit lower inode number for sdcardfs's inode */ inode = sdcardfs_iget(sb, lower_inode, id); if (IS_ERR(inode)) { - err = PTR_ERR(inode); + ret_dentry = ERR_CAST(inode); goto out; } - d_add(dentry, inode); + ret_dentry = d_splice_alias(inode, dentry); + dentry = ret_dentry ?: dentry; update_derived_permission_lock(dentry); out: - return err; + return ret_dentry; +} + +/* + * Connect an sdcardfs inode dentry/inode with several lower ones. This is + * the classic stackable file system "vnode interposition" action. + * + * @dentry: sdcardfs's dentry which interposes on lower one + * @sb: sdcardfs's super_block + * @lower_path: the lower path (caller does path_get/put) + */ +int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, + struct path *lower_path, userid_t id) +{ + struct dentry *ret_dentry; + + ret_dentry = __sdcardfs_interpose(dentry, sb, lower_path, id); + return PTR_ERR(ret_dentry); +} + +struct sdcardfs_name_data { + struct dir_context ctx; + const struct qstr *to_find; + char *name; + bool found; +}; + +static int sdcardfs_name_match(struct dir_context *ctx, const char *name, + int namelen, loff_t offset, u64 ino, unsigned int d_type) +{ + struct sdcardfs_name_data *buf = container_of(ctx, struct sdcardfs_name_data, ctx); + struct qstr candidate = QSTR_INIT(name, namelen); + + if (qstr_case_eq(buf->to_find, &candidate)) { + memcpy(buf->name, name, namelen); + buf->name[namelen] = 0; + buf->found = true; + return 1; + } + return 0; } /* @@ -221,6 +259,8 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, struct dentry *lower_dentry; const struct qstr *name; struct path lower_path; + struct qstr dname; + struct dentry *ret_dentry = NULL; struct sdcardfs_sb_info *sbi; sbi = SDCARDFS_SB(dentry->d_sb); @@ -241,60 +281,79 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, &lower_path); /* check for other cases */ if (err == -ENOENT) { - struct dentry *child; - struct dentry *match = NULL; - mutex_lock(&d_inode(lower_dir_dentry)->i_mutex); - spin_lock(&lower_dir_dentry->d_lock); - list_for_each_entry(child, &lower_dir_dentry->d_subdirs, d_child) { - if (child && d_inode(child)) { - if (qstr_case_eq(&child->d_name, name)) { - match = dget(child); - break; - } - } + struct file *file; + const struct cred *cred = current_cred(); + + struct sdcardfs_name_data buffer = { + .ctx.actor = sdcardfs_name_match, + .to_find = name, + .name = __getname(), + .found = false, + }; + + if (!buffer.name) { + err = -ENOMEM; + goto out; + } + file = dentry_open(lower_parent_path, O_RDONLY, cred); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto put_name; } - spin_unlock(&lower_dir_dentry->d_lock); - mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex); - if (match) { + err = iterate_dir(file, &buffer.ctx); + fput(file); + if (err) + goto put_name; + + if (buffer.found) err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, - match->d_name.name, 0, + buffer.name, 0, &lower_path); - dput(match); - } + else + err = -ENOENT; +put_name: + __putname(buffer.name); } /* no error: handle positive dentries */ if (!err) { /* check if the dentry is an obb dentry * if true, the lower_inode must be replaced with - * the inode of the graft path */ + * the inode of the graft path + */ - if(need_graft_path(dentry)) { + if (need_graft_path(dentry)) { /* setup_obb_dentry() - * The lower_path will be stored to the dentry's orig_path + * The lower_path will be stored to the dentry's orig_path * and the base obbpath will be copyed to the lower_path variable. * if an error returned, there's no change in the lower_path - * returns: -ERRNO if error (0: no error) */ + * returns: -ERRNO if error (0: no error) + */ err = setup_obb_dentry(dentry, &lower_path); - if(err) { + if (err) { /* if the sbi->obbpath is not available, we can optionally * setup the lower_path with its orig_path. * but, the current implementation just returns an error * because the sdcard daemon also regards this case as - * a lookup fail. */ - printk(KERN_INFO "sdcardfs: base obbpath is not available\n"); + * a lookup fail. + */ + pr_info("sdcardfs: base obbpath is not available\n"); sdcardfs_put_reset_orig_path(dentry); goto out; } } sdcardfs_set_lower_path(dentry, &lower_path); - err = sdcardfs_interpose(dentry, dentry->d_sb, &lower_path, id); - if (err) /* path_put underlying path on error */ + ret_dentry = + __sdcardfs_interpose(dentry, dentry->d_sb, &lower_path, id); + if (IS_ERR(ret_dentry)) { + err = PTR_ERR(ret_dentry); + /* path_put underlying path on error */ sdcardfs_put_reset_lower_path(dentry); + } goto out; } @@ -306,11 +365,14 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, goto out; /* instatiate a new negative dentry */ - lower_dentry = d_lookup(lower_dir_dentry, name); + dname.name = name->name; + dname.len = name->len; + dname.hash = full_name_hash(dname.name, dname.len); + lower_dentry = d_lookup(lower_dir_dentry, &dname); if (lower_dentry) goto setup_lower; - lower_dentry = d_alloc(lower_dir_dentry, name); + lower_dentry = d_alloc(lower_dir_dentry, &dname); if (!lower_dentry) { err = -ENOMEM; goto out; @@ -331,14 +393,16 @@ setup_lower: err = 0; out: - return ERR_PTR(err); + if (err) + return ERR_PTR(err); + return ret_dentry; } /* * On success: - * fills dentry object appropriate values and returns NULL. + * fills dentry object appropriate values and returns NULL. * On fail (== error) - * returns error ptr + * returns error ptr * * @dir : Parent inode. It is locked (dir->i_mutex) * @dentry : Target dentry to lookup. we should set each of fields. @@ -355,13 +419,10 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, parent = dget_parent(dentry); - if(!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { + if (!check_caller_access_to_name(d_inode(parent), &dentry->d_name)) { ret = ERR_PTR(-EACCES); - printk(KERN_INFO "%s: need to check the caller's gid in packages.list\n" - " dentry: %s, task:%s\n", - __func__, dentry->d_name.name, current->comm); goto out_err; - } + } /* save current_cred and override it */ OVERRIDE_CRED_PTR(SDCARDFS_SB(dir->i_sb), saved_cred, SDCARDFS_I(dir)); @@ -377,9 +438,7 @@ struct dentry *sdcardfs_lookup(struct inode *dir, struct dentry *dentry, ret = __sdcardfs_lookup(dentry, flags, &lower_parent_path, SDCARDFS_I(dir)->userid); if (IS_ERR(ret)) - { goto out; - } if (ret) dentry = ret; if (d_inode(dentry)) { diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index 7a8eae29e44d..953d2156d2e9 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -29,7 +29,7 @@ enum { Opt_gid, Opt_debug, Opt_mask, - Opt_multiuser, // May need? + Opt_multiuser, Opt_userid, Opt_reserved_mb, Opt_err, @@ -72,6 +72,7 @@ static int parse_options(struct super_block *sb, char *options, int silent, while ((p = strsep(&options, ",")) != NULL) { int token; + if (!*p) continue; @@ -116,19 +117,17 @@ static int parse_options(struct super_block *sb, char *options, int silent, break; /* unknown option */ default: - if (!silent) { - printk( KERN_ERR "Unrecognized mount option \"%s\" " - "or missing value", p); - } + if (!silent) + pr_err("Unrecognized mount option \"%s\" or missing value", p); return -EINVAL; } } if (*debug) { - printk( KERN_INFO "sdcardfs : options - debug:%d\n", *debug); - printk( KERN_INFO "sdcardfs : options - uid:%d\n", + pr_info("sdcardfs : options - debug:%d\n", *debug); + pr_info("sdcardfs : options - uid:%d\n", opts->fs_low_uid); - printk( KERN_INFO "sdcardfs : options - gid:%d\n", + pr_info("sdcardfs : options - gid:%d\n", opts->fs_low_gid); } @@ -148,6 +147,7 @@ int parse_options_remount(struct super_block *sb, char *options, int silent, while ((p = strsep(&options, ",")) != NULL) { int token; + if (!*p) continue; @@ -173,22 +173,20 @@ int parse_options_remount(struct super_block *sb, char *options, int silent, case Opt_fsuid: case Opt_fsgid: case Opt_reserved_mb: - printk( KERN_WARNING "Option \"%s\" can't be changed during remount\n", p); + pr_warn("Option \"%s\" can't be changed during remount\n", p); break; /* unknown option */ default: - if (!silent) { - printk( KERN_ERR "Unrecognized mount option \"%s\" " - "or missing value", p); - } + if (!silent) + pr_err("Unrecognized mount option \"%s\" or missing value", p); return -EINVAL; } } if (debug) { - printk( KERN_INFO "sdcardfs : options - debug:%d\n", debug); - printk( KERN_INFO "sdcardfs : options - gid:%d\n", vfsopts->gid); - printk( KERN_INFO "sdcardfs : options - mask:%d\n", vfsopts->mask); + pr_info("sdcardfs : options - debug:%d\n", debug); + pr_info("sdcardfs : options - gid:%d\n", vfsopts->gid); + pr_info("sdcardfs : options - mask:%d\n", vfsopts->mask); } return 0; @@ -223,8 +221,8 @@ static struct dentry *sdcardfs_d_alloc_root(struct super_block *sb) #endif DEFINE_MUTEX(sdcardfs_super_list_lock); -LIST_HEAD(sdcardfs_super_list); EXPORT_SYMBOL_GPL(sdcardfs_super_list_lock); +LIST_HEAD(sdcardfs_super_list); EXPORT_SYMBOL_GPL(sdcardfs_super_list); /* @@ -242,31 +240,30 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, struct sdcardfs_vfsmount_options *mnt_opt = mnt->data; struct inode *inode; - printk(KERN_INFO "sdcardfs version 2.0\n"); + pr_info("sdcardfs version 2.0\n"); if (!dev_name) { - printk(KERN_ERR - "sdcardfs: read_super: missing dev_name argument\n"); + pr_err("sdcardfs: read_super: missing dev_name argument\n"); err = -EINVAL; goto out; } - printk(KERN_INFO "sdcardfs: dev_name -> %s\n", dev_name); - printk(KERN_INFO "sdcardfs: options -> %s\n", (char *)raw_data); - printk(KERN_INFO "sdcardfs: mnt -> %p\n", mnt); + pr_info("sdcardfs: dev_name -> %s\n", dev_name); + pr_info("sdcardfs: options -> %s\n", (char *)raw_data); + pr_info("sdcardfs: mnt -> %p\n", mnt); /* parse lower path */ err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &lower_path); if (err) { - printk(KERN_ERR "sdcardfs: error accessing lower directory '%s'\n", dev_name); + pr_err("sdcardfs: error accessing lower directory '%s'\n", dev_name); goto out; } /* allocate superblock private data */ sb->s_fs_info = kzalloc(sizeof(struct sdcardfs_sb_info), GFP_KERNEL); if (!SDCARDFS_SB(sb)) { - printk(KERN_CRIT "sdcardfs: read_super: out of memory\n"); + pr_crit("sdcardfs: read_super: out of memory\n"); err = -ENOMEM; goto out_free; } @@ -275,7 +272,7 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, /* parse options */ err = parse_options(sb, raw_data, silent, &debug, mnt_opt, &sb_info->options); if (err) { - printk(KERN_ERR "sdcardfs: invalid options\n"); + pr_err("sdcardfs: invalid options\n"); goto out_freesbi; } @@ -328,14 +325,15 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, /* setup permission policy */ sb_info->obbpath_s = kzalloc(PATH_MAX, GFP_KERNEL); mutex_lock(&sdcardfs_super_list_lock); - if(sb_info->options.multiuser) { - setup_derived_state(d_inode(sb->s_root), PERM_PRE_ROOT, sb_info->options.fs_user_id, AID_ROOT, false, d_inode(sb->s_root)); + if (sb_info->options.multiuser) { + setup_derived_state(d_inode(sb->s_root), PERM_PRE_ROOT, + sb_info->options.fs_user_id, AID_ROOT, + false, d_inode(sb->s_root)); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/obb", dev_name); - /*err = prepare_dir(sb_info->obbpath_s, - sb_info->options.fs_low_uid, - sb_info->options.fs_low_gid, 00755);*/ } else { - setup_derived_state(d_inode(sb->s_root), PERM_ROOT, sb_info->options.fs_user_id, AID_ROOT, false, d_inode(sb->s_root)); + setup_derived_state(d_inode(sb->s_root), PERM_ROOT, + sb_info->options.fs_user_id, AID_ROOT, + false, d_inode(sb->s_root)); snprintf(sb_info->obbpath_s, PATH_MAX, "%s/Android/obb", dev_name); } fixup_tmp_permissions(d_inode(sb->s_root)); @@ -344,7 +342,7 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, mutex_unlock(&sdcardfs_super_list_lock); if (!silent) - printk(KERN_INFO "sdcardfs: mounted on top of %s type %s\n", + pr_info("sdcardfs: mounted on top of %s type %s\n", dev_name, lower_sb->s_type->name); goto out; /* all is well */ @@ -368,8 +366,10 @@ out: /* A feature which supports mount_nodev() with options */ static struct dentry *mount_nodev_with_options(struct vfsmount *mnt, - struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - int (*fill_super)(struct vfsmount *, struct super_block *, const char *, void *, int)) + struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, + int (*fill_super)(struct vfsmount *, struct super_block *, + const char *, void *, int)) { int error; @@ -401,19 +401,22 @@ static struct dentry *sdcardfs_mount(struct vfsmount *mnt, raw_data, sdcardfs_read_super); } -static struct dentry *sdcardfs_mount_wrn(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) +static struct dentry *sdcardfs_mount_wrn(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) { WARN(1, "sdcardfs does not support mount. Use mount2.\n"); return ERR_PTR(-EINVAL); } -void *sdcardfs_alloc_mnt_data(void) { +void *sdcardfs_alloc_mnt_data(void) +{ return kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL); } -void sdcardfs_kill_sb(struct super_block *sb) { +void sdcardfs_kill_sb(struct super_block *sb) +{ struct sdcardfs_sb_info *sbi; + if (sb->s_magic == SDCARDFS_SUPER_MAGIC) { sbi = SDCARDFS_SB(sb); mutex_lock(&sdcardfs_super_list_lock); @@ -432,6 +435,7 @@ static struct file_system_type sdcardfs_fs_type = { .kill_sb = sdcardfs_kill_sb, .fs_flags = 0, }; +MODULE_ALIAS_FS(SDCARDFS_NAME); static int __init init_sdcardfs_fs(void) { @@ -467,10 +471,15 @@ static void __exit exit_sdcardfs_fs(void) pr_info("Completed sdcardfs module unload\n"); } -MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University" - " (http://www.fsl.cs.sunysb.edu/)"); -MODULE_DESCRIPTION("Wrapfs " SDCARDFS_VERSION - " (http://wrapfs.filesystems.org/)"); +/* Original wrapfs authors */ +MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University (http://www.fsl.cs.sunysb.edu/)"); + +/* Original sdcardfs authors */ +MODULE_AUTHOR("Woojoong Lee, Daeho Jeong, Kitae Lee, Yeongjin Gil System Memory Lab., Samsung Electronics"); + +/* Current maintainer */ +MODULE_AUTHOR("Daniel Rosenberg, Google"); +MODULE_DESCRIPTION("Sdcardfs " SDCARDFS_VERSION); MODULE_LICENSE("GPL"); module_init(init_sdcardfs_fs); diff --git a/fs/sdcardfs/mmap.c b/fs/sdcardfs/mmap.c index e21f64675a80..0d4089c62c3a 100644 --- a/fs/sdcardfs/mmap.c +++ b/fs/sdcardfs/mmap.c @@ -48,34 +48,55 @@ static int sdcardfs_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return err; } +static int sdcardfs_page_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + int err = 0; + struct file *file, *lower_file; + const struct vm_operations_struct *lower_vm_ops; + struct vm_area_struct lower_vma; + + memcpy(&lower_vma, vma, sizeof(struct vm_area_struct)); + file = lower_vma.vm_file; + lower_vm_ops = SDCARDFS_F(file)->lower_vm_ops; + BUG_ON(!lower_vm_ops); + if (!lower_vm_ops->page_mkwrite) + goto out; + + lower_file = sdcardfs_lower_file(file); + /* + * XXX: vm_ops->page_mkwrite may be called in parallel. + * Because we have to resort to temporarily changing the + * vma->vm_file to point to the lower file, a concurrent + * invocation of sdcardfs_page_mkwrite could see a different + * value. In this workaround, we keep a different copy of the + * vma structure in our stack, so we never expose a different + * value of the vma->vm_file called to us, even temporarily. + * A better fix would be to change the calling semantics of + * ->page_mkwrite to take an explicit file pointer. + */ + lower_vma.vm_file = lower_file; + err = lower_vm_ops->page_mkwrite(&lower_vma, vmf); +out: + return err; +} + static ssize_t sdcardfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { /* - * This function returns zero on purpose in order to support direct IO. - * __dentry_open checks a_ops->direct_IO and returns EINVAL if it is null. - * - * However, this function won't be called by certain file operations - * including generic fs functions. * reads and writes are delivered to - * the lower file systems and the direct IOs will be handled by them. - * - * NOTE: exceptionally, on the recent kernels (since Linux 3.8.x), - * swap_writepage invokes this function directly. + * This function should never be called directly. We need it + * to exist, to get past a check in open_check_o_direct(), + * which is called from do_last(). */ - printk(KERN_INFO "%s, operation is not supported\n", __func__); - return 0; + return -EINVAL; } -/* - * XXX: the default address_space_ops for sdcardfs is empty. We cannot set - * our inode->i_mapping->a_ops to NULL because too many code paths expect - * the a_ops vector to be non-NULL. - */ const struct address_space_operations sdcardfs_aops = { - /* empty on purpose */ .direct_IO = sdcardfs_direct_IO, }; const struct vm_operations_struct sdcardfs_vm_ops = { .fault = sdcardfs_fault, + .page_mkwrite = sdcardfs_page_mkwrite, }; diff --git a/fs/sdcardfs/multiuser.h b/fs/sdcardfs/multiuser.h index 52bc20080904..2e89b5872314 100644 --- a/fs/sdcardfs/multiuser.h +++ b/fs/sdcardfs/multiuser.h @@ -28,22 +28,17 @@ typedef uid_t userid_t; typedef uid_t appid_t; -static inline uid_t multiuser_get_uid(userid_t user_id, appid_t app_id) { - return (user_id * AID_USER_OFFSET) + (app_id % AID_USER_OFFSET); +static inline uid_t multiuser_get_uid(userid_t user_id, appid_t app_id) +{ + return (user_id * AID_USER_OFFSET) + (app_id % AID_USER_OFFSET); } -static inline gid_t multiuser_get_cache_gid(userid_t user_id, appid_t app_id) { - if (app_id >= AID_APP_START && app_id <= AID_APP_END) { - return multiuser_get_uid(user_id, (app_id - AID_APP_START) + AID_CACHE_GID_START); - } else { - return -1; - } +static inline gid_t multiuser_get_cache_gid(uid_t uid) +{ + return uid - AID_APP_START + AID_CACHE_GID_START; } -static inline gid_t multiuser_get_ext_gid(userid_t user_id, appid_t app_id) { - if (app_id >= AID_APP_START && app_id <= AID_APP_END) { - return multiuser_get_uid(user_id, (app_id - AID_APP_START) + AID_EXT_GID_START); - } else { - return -1; - } +static inline gid_t multiuser_get_ext_gid(uid_t uid) +{ + return uid - AID_APP_START + AID_EXT_GID_START; } diff --git a/fs/sdcardfs/packagelist.c b/fs/sdcardfs/packagelist.c index d96fcde041cc..89196e31073e 100644 --- a/fs/sdcardfs/packagelist.c +++ b/fs/sdcardfs/packagelist.c @@ -20,6 +20,7 @@ #include "sdcardfs.h" #include <linux/hashtable.h> +#include <linux/ctype.h> #include <linux/delay.h> #include <linux/radix-tree.h> #include <linux/dcache.h> @@ -44,13 +45,24 @@ static DEFINE_HASHTABLE(ext_to_groupid, 8); static struct kmem_cache *hashtable_entry_cachep; -static void inline qstr_init(struct qstr *q, const char *name) { +static unsigned int full_name_case_hash(const unsigned char *name, unsigned int len) +{ + unsigned long hash = init_name_hash(); + + while (len--) + hash = partial_name_hash(tolower(*name++), hash); + return end_name_hash(hash); +} + +static inline void qstr_init(struct qstr *q, const char *name) +{ q->name = name; q->len = strlen(q->name); - q->hash = full_name_hash(q->name, q->len); + q->hash = full_name_case_hash(q->name, q->len); } -static inline int qstr_copy(const struct qstr *src, struct qstr *dest) { +static inline int qstr_copy(const struct qstr *src, struct qstr *dest) +{ dest->name = kstrdup(src->name, GFP_KERNEL); dest->hash_len = src->hash_len; return !!dest->name; @@ -78,6 +90,7 @@ static appid_t __get_appid(const struct qstr *key) appid_t get_appid(const char *key) { struct qstr q; + qstr_init(&q, key); return __get_appid(&q); } @@ -103,6 +116,7 @@ static appid_t __get_ext_gid(const struct qstr *key) appid_t get_ext_gid(const char *key) { struct qstr q; + qstr_init(&q, key); return __get_ext_gid(&q); } @@ -133,8 +147,10 @@ appid_t is_excluded(const char *key, userid_t user) /* Kernel has already enforced everything we returned through * derive_permissions_locked(), so this is used to lock down access - * even further, such as enforcing that apps hold sdcard_rw. */ -int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name) { + * even further, such as enforcing that apps hold sdcard_rw. + */ +int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name) +{ struct qstr q_autorun = QSTR_LITERAL("autorun.inf"); struct qstr q__android_secure = QSTR_LITERAL(".android_secure"); struct qstr q_android_secure = QSTR_LITERAL("android_secure"); @@ -149,26 +165,26 @@ int check_caller_access_to_name(struct inode *parent_node, const struct qstr *na } /* Root always has access; access for any other UIDs should always - * be controlled through packages.list. */ - if (from_kuid(&init_user_ns, current_fsuid()) == 0) { + * be controlled through packages.list. + */ + if (from_kuid(&init_user_ns, current_fsuid()) == 0) return 1; - } /* No extra permissions to enforce */ return 1; } /* This function is used when file opening. The open flags must be - * checked before calling check_caller_access_to_name() */ -int open_flags_to_access_mode(int open_flags) { - if((open_flags & O_ACCMODE) == O_RDONLY) { + * checked before calling check_caller_access_to_name() + */ +int open_flags_to_access_mode(int open_flags) +{ + if ((open_flags & O_ACCMODE) == O_RDONLY) return 0; /* R_OK */ - } else if ((open_flags & O_ACCMODE) == O_WRONLY) { + if ((open_flags & O_ACCMODE) == O_WRONLY) return 1; /* W_OK */ - } else { - /* Probably O_RDRW, but treat as default to be safe */ + /* Probably O_RDRW, but treat as default to be safe */ return 1; /* R_OK | W_OK */ - } } static struct hashtable_entry *alloc_hashtable_entry(const struct qstr *key, @@ -178,6 +194,8 @@ static struct hashtable_entry *alloc_hashtable_entry(const struct qstr *key, GFP_KERNEL); if (!ret) return NULL; + INIT_HLIST_NODE(&ret->dlist); + INIT_HLIST_NODE(&ret->hlist); if (!qstr_copy(key, &ret->key)) { kmem_cache_free(hashtable_entry_cachep, ret); @@ -249,8 +267,7 @@ static void fixup_all_perms_name(const struct qstr *key) struct sdcardfs_sb_info *sbinfo; struct limit_search limit = { .flags = BY_NAME, - .name = key->name, - .length = key->len, + .name = QSTR_INIT(key->name, key->len), }; list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { if (sbinfo_has_sdcard_magic(sbinfo)) @@ -263,8 +280,7 @@ static void fixup_all_perms_name_userid(const struct qstr *key, userid_t userid) struct sdcardfs_sb_info *sbinfo; struct limit_search limit = { .flags = BY_NAME | BY_USERID, - .name = key->name, - .length = key->len, + .name = QSTR_INIT(key->name, key->len), .userid = userid, }; list_for_each_entry(sbinfo, &sdcardfs_super_list, list) { @@ -326,7 +342,6 @@ static int insert_userid_exclude_entry(const struct qstr *key, userid_t value) static void free_hashtable_entry(struct hashtable_entry *entry) { kfree(entry->key.name); - hash_del_rcu(&entry->dlist); kmem_cache_free(hashtable_entry_cachep, entry); } @@ -361,7 +376,6 @@ static void remove_packagelist_entry(const struct qstr *key) remove_packagelist_entry_locked(key); fixup_all_perms_name(key); mutex_unlock(&sdcardfs_super_list_lock); - return; } static void remove_ext_gid_entry_locked(const struct qstr *key, gid_t group) @@ -384,7 +398,6 @@ static void remove_ext_gid_entry(const struct qstr *key, gid_t group) mutex_lock(&sdcardfs_super_list_lock); remove_ext_gid_entry_locked(key, group); mutex_unlock(&sdcardfs_super_list_lock); - return; } static void remove_userid_all_entry_locked(userid_t userid) @@ -412,7 +425,6 @@ static void remove_userid_all_entry(userid_t userid) remove_userid_all_entry_locked(userid); fixup_all_perms_userid(userid); mutex_unlock(&sdcardfs_super_list_lock); - return; } static void remove_userid_exclude_entry_locked(const struct qstr *key, userid_t userid) @@ -437,7 +449,6 @@ static void remove_userid_exclude_entry(const struct qstr *key, userid_t userid) remove_userid_exclude_entry_locked(key, userid); fixup_all_perms_name_userid(key, userid); mutex_unlock(&sdcardfs_super_list_lock); - return; } static void packagelist_destroy(void) @@ -446,6 +457,7 @@ static void packagelist_destroy(void) struct hlist_node *h_t; HLIST_HEAD(free_list); int i; + mutex_lock(&sdcardfs_super_list_lock); hash_for_each_rcu(package_to_appid, i, hash_cur, hlist) { hash_del_rcu(&hash_cur->hlist); @@ -459,7 +471,7 @@ static void packagelist_destroy(void) hlist_for_each_entry_safe(hash_cur, h_t, &free_list, dlist) free_hashtable_entry(hash_cur); mutex_unlock(&sdcardfs_super_list_lock); - printk(KERN_INFO "sdcardfs: destroyed packagelist pkgld\n"); + pr_info("sdcardfs: destroyed packagelist pkgld\n"); } #define SDCARDFS_CONFIGFS_ATTR(_pfx, _name) \ @@ -575,7 +587,8 @@ static ssize_t package_details_clear_userid_store(struct config_item *item, static void package_details_release(struct config_item *item) { struct package_details *package_details = to_package_details(item); - printk(KERN_INFO "sdcardfs: removing %s\n", package_details->name.name); + + pr_info("sdcardfs: removing %s\n", package_details->name.name); remove_packagelist_entry(&package_details->name); kfree(package_details->name.name); kfree(package_details); @@ -593,7 +606,7 @@ static struct configfs_attribute *package_details_attrs[] = { }; static struct configfs_item_operations package_details_item_ops = { - .release = package_details_release, + .release = package_details_release, }; static struct config_item_type package_appid_type = { @@ -627,7 +640,7 @@ static void extension_details_release(struct config_item *item) { struct extension_details *extension_details = to_extension_details(item); - printk(KERN_INFO "sdcardfs: No longer mapping %s files to gid %d\n", + pr_info("sdcardfs: No longer mapping %s files to gid %d\n", extension_details->name.name, extension_details->num); remove_ext_gid_entry(&extension_details->name, extension_details->num); kfree(extension_details->name.name); @@ -649,6 +662,7 @@ static struct config_item *extension_details_make_item(struct config_group *grou struct extension_details *extension_details = kzalloc(sizeof(struct extension_details), GFP_KERNEL); const char *tmp; int ret; + if (!extension_details) return ERR_PTR(-ENOMEM); @@ -703,7 +717,8 @@ static struct config_group *extensions_make_group(struct config_group *group, co static void extensions_drop_group(struct config_group *group, struct config_item *item) { struct extensions_value *value = to_extensions_value(item); - printk(KERN_INFO "sdcardfs: No longer mapping any files to gid %d\n", value->num); + + pr_info("sdcardfs: No longer mapping any files to gid %d\n", value->num); kfree(value); } @@ -837,14 +852,14 @@ static int configfs_sdcardfs_init(void) { int ret, i; struct configfs_subsystem *subsys = &sdcardfs_packages; - for (i = 0; sd_default_groups[i]; i++) { + + for (i = 0; sd_default_groups[i]; i++) config_group_init(sd_default_groups[i]); - } config_group_init(&subsys->su_group); mutex_init(&subsys->su_mutex); ret = configfs_register_subsystem(subsys); if (ret) { - printk(KERN_ERR "Error %d while registering subsystem %s\n", + pr_err("Error %d while registering subsystem %s\n", ret, subsys->su_group.cg_item.ci_namebuf); } @@ -862,18 +877,17 @@ int packagelist_init(void) kmem_cache_create("packagelist_hashtable_entry", sizeof(struct hashtable_entry), 0, 0, NULL); if (!hashtable_entry_cachep) { - printk(KERN_ERR "sdcardfs: failed creating pkgl_hashtable entry slab cache\n"); + pr_err("sdcardfs: failed creating pkgl_hashtable entry slab cache\n"); return -ENOMEM; } configfs_sdcardfs_init(); - return 0; + return 0; } void packagelist_exit(void) { configfs_sdcardfs_exit(); packagelist_destroy(); - if (hashtable_entry_cachep) - kmem_cache_destroy(hashtable_entry_cachep); + kmem_cache_destroy(hashtable_entry_cachep); } diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index f3cced313108..2b67b9a8ef9f 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -29,6 +29,7 @@ #include <linux/dcache.h> #include <linux/file.h> #include <linux/fs.h> +#include <linux/aio.h> #include <linux/mm.h> #include <linux/mount.h> #include <linux/namei.h> @@ -52,7 +53,7 @@ #define SDCARDFS_ROOT_INO 1 /* useful for tracking code reachability */ -#define UDBG printk(KERN_DEFAULT "DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__) +#define UDBG pr_default("DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__) #define SDCARDFS_DIRENT_SIZE 256 @@ -86,54 +87,56 @@ } while (0) /* OVERRIDE_CRED() and REVERT_CRED() - * OVERRID_CRED() - * backup original task->cred - * and modifies task->cred->fsuid/fsgid to specified value. + * OVERRIDE_CRED() + * backup original task->cred + * and modifies task->cred->fsuid/fsgid to specified value. * REVERT_CRED() - * restore original task->cred->fsuid/fsgid. + * restore original task->cred->fsuid/fsgid. * These two macro should be used in pair, and OVERRIDE_CRED() should be * placed at the beginning of a function, right after variable declaration. */ #define OVERRIDE_CRED(sdcardfs_sbi, saved_cred, info) \ + do { \ saved_cred = override_fsids(sdcardfs_sbi, info); \ - if (!saved_cred) { return -ENOMEM; } + if (!saved_cred) \ + return -ENOMEM; \ + } while (0) #define OVERRIDE_CRED_PTR(sdcardfs_sbi, saved_cred, info) \ + do { \ saved_cred = override_fsids(sdcardfs_sbi, info); \ - if (!saved_cred) { return ERR_PTR(-ENOMEM); } + if (!saved_cred) \ + return ERR_PTR(-ENOMEM); \ + } while (0) #define REVERT_CRED(saved_cred) revert_fsids(saved_cred) -#define DEBUG_CRED() \ - printk("KAKJAGI: %s:%d fsuid %d fsgid %d\n", \ - __FUNCTION__, __LINE__, \ - (int)current->cred->fsuid, \ - (int)current->cred->fsgid); - /* Android 5.0 support */ /* Permission mode for a specific node. Controls how file permissions - * are derived for children nodes. */ + * are derived for children nodes. + */ typedef enum { - /* Nothing special; this node should just inherit from its parent. */ - PERM_INHERIT, - /* This node is one level above a normal root; used for legacy layouts - * which use the first level to represent user_id. */ - PERM_PRE_ROOT, - /* This node is "/" */ - PERM_ROOT, - /* This node is "/Android" */ - PERM_ANDROID, - /* This node is "/Android/data" */ - PERM_ANDROID_DATA, - /* This node is "/Android/obb" */ - PERM_ANDROID_OBB, - /* This node is "/Android/media" */ - PERM_ANDROID_MEDIA, - /* This node is "/Android/[data|media|obb]/[package]" */ - PERM_ANDROID_PACKAGE, - /* This node is "/Android/[data|media|obb]/[package]/cache" */ - PERM_ANDROID_PACKAGE_CACHE, + /* Nothing special; this node should just inherit from its parent. */ + PERM_INHERIT, + /* This node is one level above a normal root; used for legacy layouts + * which use the first level to represent user_id. + */ + PERM_PRE_ROOT, + /* This node is "/" */ + PERM_ROOT, + /* This node is "/Android" */ + PERM_ANDROID, + /* This node is "/Android/data" */ + PERM_ANDROID_DATA, + /* This node is "/Android/obb" */ + PERM_ANDROID_OBB, + /* This node is "/Android/media" */ + PERM_ANDROID_MEDIA, + /* This node is "/Android/[data|media|obb]/[package]" */ + PERM_ANDROID_PACKAGE, + /* This node is "/Android/[data|media|obb]/[package]/cache" */ + PERM_ANDROID_PACKAGE_CACHE, } perm_t; struct sdcardfs_sb_info; @@ -141,9 +144,9 @@ struct sdcardfs_mount_options; struct sdcardfs_inode_info; /* Do not directly use this function. Use OVERRIDE_CRED() instead. */ -const struct cred * override_fsids(struct sdcardfs_sb_info* sbi, struct sdcardfs_inode_info *info); +const struct cred *override_fsids(struct sdcardfs_sb_info *sbi, struct sdcardfs_inode_info *info); /* Do not directly use this function, use REVERT_CRED() instead. */ -void revert_fsids(const struct cred * old_cred); +void revert_fsids(const struct cred *old_cred); /* operations vectors defined in specific files */ extern const struct file_operations sdcardfs_main_fops; @@ -220,7 +223,8 @@ struct sdcardfs_sb_info { struct super_block *sb; struct super_block *lower_sb; /* derived perm policy : some of options have been added - * to sdcardfs_mount_options (Android 4.4 support) */ + * to sdcardfs_mount_options (Android 4.4 support) + */ struct sdcardfs_mount_options options; spinlock_t lock; /* protects obbpath */ char *obbpath_s; @@ -331,7 +335,7 @@ static inline void sdcardfs_put_reset_##pname(const struct dentry *dent) \ { \ struct path pname; \ spin_lock(&SDCARDFS_D(dent)->lock); \ - if(SDCARDFS_D(dent)->pname.dentry) { \ + if (SDCARDFS_D(dent)->pname.dentry) { \ pathcpy(&pname, &SDCARDFS_D(dent)->pname); \ SDCARDFS_D(dent)->pname.dentry = NULL; \ SDCARDFS_D(dent)->pname.mnt = NULL; \ @@ -347,17 +351,17 @@ SDCARDFS_DENT_FUNC(orig_path) static inline bool sbinfo_has_sdcard_magic(struct sdcardfs_sb_info *sbinfo) { - return sbinfo && sbinfo->sb && sbinfo->sb->s_magic == SDCARDFS_SUPER_MAGIC; + return sbinfo && sbinfo->sb && sbinfo->sb->s_magic == SDCARDFS_SUPER_MAGIC; } /* grab a refererence if we aren't linking to ourself */ static inline void set_top(struct sdcardfs_inode_info *info, struct inode *top) { struct inode *old_top = NULL; + BUG_ON(IS_ERR_OR_NULL(top)); - if (info->top && info->top != &info->vfs_inode) { + if (info->top && info->top != &info->vfs_inode) old_top = info->top; - } if (top != &info->vfs_inode) igrab(top); info->top = top; @@ -367,11 +371,11 @@ static inline void set_top(struct sdcardfs_inode_info *info, struct inode *top) static inline struct inode *grab_top(struct sdcardfs_inode_info *info) { struct inode *top = info->top; - if (top) { + + if (top) return igrab(top); - } else { + else return NULL; - } } static inline void release_top(struct sdcardfs_inode_info *info) @@ -379,21 +383,24 @@ static inline void release_top(struct sdcardfs_inode_info *info) iput(info->top); } -static inline int get_gid(struct vfsmount *mnt, struct sdcardfs_inode_info *info) { +static inline int get_gid(struct vfsmount *mnt, struct sdcardfs_inode_info *info) +{ struct sdcardfs_vfsmount_options *opts = mnt->data; - if (opts->gid == AID_SDCARD_RW) { + if (opts->gid == AID_SDCARD_RW) /* As an optimization, certain trusted system components only run * as owner but operate across all users. Since we're now handing * out the sdcard_rw GID only to trusted apps, we're okay relaxing * the user boundary enforcement for the default view. The UIDs - * assigned to app directories are still multiuser aware. */ + * assigned to app directories are still multiuser aware. + */ return AID_SDCARD_RW; - } else { + else return multiuser_get_uid(info->userid, opts->gid); - } } -static inline int get_mode(struct vfsmount *mnt, struct sdcardfs_inode_info *info) { + +static inline int get_mode(struct vfsmount *mnt, struct sdcardfs_inode_info *info) +{ int owner_mode; int filtered_mode; struct sdcardfs_vfsmount_options *opts = mnt->data; @@ -402,17 +409,18 @@ static inline int get_mode(struct vfsmount *mnt, struct sdcardfs_inode_info *inf if (info->perm == PERM_PRE_ROOT) { /* Top of multi-user view should always be visible to ensure - * secondary users can traverse inside. */ + * secondary users can traverse inside. + */ visible_mode = 0711; } else if (info->under_android) { /* Block "other" access to Android directories, since only apps * belonging to a specific user should be in there; we still - * leave +x open for the default view. */ - if (opts->gid == AID_SDCARD_RW) { + * leave +x open for the default view. + */ + if (opts->gid == AID_SDCARD_RW) visible_mode = visible_mode & ~0006; - } else { + else visible_mode = visible_mode & ~0007; - } } owner_mode = info->lower_inode->i_mode & 0700; filtered_mode = visible_mode & (owner_mode | (owner_mode >> 3) | (owner_mode >> 6)); @@ -437,7 +445,7 @@ static inline void sdcardfs_get_real_lower(const struct dentry *dent, /* in case of a local obb dentry * the orig_path should be returned */ - if(has_graft_path(dent)) + if (has_graft_path(dent)) sdcardfs_get_orig_path(dent, real_lower); else sdcardfs_get_lower_path(dent, real_lower); @@ -446,7 +454,7 @@ static inline void sdcardfs_get_real_lower(const struct dentry *dent, static inline void sdcardfs_put_real_lower(const struct dentry *dent, struct path *real_lower) { - if(has_graft_path(dent)) + if (has_graft_path(dent)) sdcardfs_put_orig_path(dent, real_lower); else sdcardfs_put_lower_path(dent, real_lower); @@ -459,7 +467,7 @@ extern struct list_head sdcardfs_super_list; extern appid_t get_appid(const char *app_name); extern appid_t get_ext_gid(const char *app_name); extern appid_t is_excluded(const char *app_name, userid_t userid); -extern int check_caller_access_to_name(struct inode *parent_node, const struct qstr* name); +extern int check_caller_access_to_name(struct inode *parent_node, const struct qstr *name); extern int open_flags_to_access_mode(int open_flags); extern int packagelist_init(void); extern void packagelist_exit(void); @@ -469,8 +477,7 @@ extern void packagelist_exit(void); #define BY_USERID (1 << 1) struct limit_search { unsigned int flags; - const char *name; - size_t length; + struct qstr name; userid_t userid; }; @@ -478,12 +485,10 @@ extern void setup_derived_state(struct inode *inode, perm_t perm, userid_t useri uid_t uid, bool under_android, struct inode *top); extern void get_derived_permission(struct dentry *parent, struct dentry *dentry); extern void get_derived_permission_new(struct dentry *parent, struct dentry *dentry, const struct qstr *name); -extern void drop_recursive(struct dentry *parent); -extern void fixup_top_recursive(struct dentry *parent); extern void fixup_perms_recursive(struct dentry *dentry, struct limit_search *limit); extern void update_derived_permission_lock(struct dentry *dentry); -void fixup_lower_ownership(struct dentry* dentry, const char *name); +void fixup_lower_ownership(struct dentry *dentry, const char *name); extern int need_graft_path(struct dentry *dentry); extern int is_base_obbpath(struct dentry *dentry); extern int is_obbpath_invalid(struct dentry *dentry); @@ -493,6 +498,7 @@ extern int setup_obb_dentry(struct dentry *dentry, struct path *lower_path); static inline struct dentry *lock_parent(struct dentry *dentry) { struct dentry *dir = dget_parent(dentry); + mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT); return dir; } @@ -611,6 +617,11 @@ static inline bool str_case_eq(const char *s1, const char *s2) return !strcasecmp(s1, s2); } +static inline bool str_n_case_eq(const char *s1, const char *s2, size_t len) +{ + return !strncasecmp(s1, s2, len); +} + static inline bool qstr_case_eq(const struct qstr *q1, const struct qstr *q2) { return q1->len == q2->len && str_case_eq(q1->name, q2->name); diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index edda32b68dc0..a3393e959c63 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -36,7 +36,7 @@ static void sdcardfs_put_super(struct super_block *sb) if (!spd) return; - if(spd->obbpath_s) { + if (spd->obbpath_s) { kfree(spd->obbpath_s); path_put(&spd->obbpath); } @@ -64,7 +64,7 @@ static int sdcardfs_statfs(struct dentry *dentry, struct kstatfs *buf) if (sbi->options.reserved_mb) { /* Invalid statfs informations. */ if (buf->f_bsize == 0) { - printk(KERN_ERR "Returned block size is zero.\n"); + pr_err("Returned block size is zero.\n"); return -EINVAL; } @@ -100,8 +100,7 @@ static int sdcardfs_remount_fs(struct super_block *sb, int *flags, char *options * SILENT, but anything else left over is an error. */ if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT)) != 0) { - printk(KERN_ERR - "sdcardfs: remount flags 0x%x unsupported\n", *flags); + pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags); err = -EINVAL; } @@ -125,29 +124,33 @@ static int sdcardfs_remount_fs2(struct vfsmount *mnt, struct super_block *sb, * SILENT, but anything else left over is an error. */ if ((*flags & ~(MS_RDONLY | MS_MANDLOCK | MS_SILENT | MS_REMOUNT)) != 0) { - printk(KERN_ERR - "sdcardfs: remount flags 0x%x unsupported\n", *flags); + pr_err("sdcardfs: remount flags 0x%x unsupported\n", *flags); err = -EINVAL; } - printk(KERN_INFO "Remount options were %s for vfsmnt %p.\n", options, mnt); + pr_info("Remount options were %s for vfsmnt %p.\n", options, mnt); err = parse_options_remount(sb, options, *flags & ~MS_SILENT, mnt->data); return err; } -static void* sdcardfs_clone_mnt_data(void *data) { - struct sdcardfs_vfsmount_options* opt = kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL); - struct sdcardfs_vfsmount_options* old = data; - if(!opt) return NULL; +static void *sdcardfs_clone_mnt_data(void *data) +{ + struct sdcardfs_vfsmount_options *opt = kmalloc(sizeof(struct sdcardfs_vfsmount_options), GFP_KERNEL); + struct sdcardfs_vfsmount_options *old = data; + + if (!opt) + return NULL; opt->gid = old->gid; opt->mask = old->mask; return opt; } -static void sdcardfs_copy_mnt_data(void *data, void *newdata) { - struct sdcardfs_vfsmount_options* old = data; - struct sdcardfs_vfsmount_options* new = newdata; +static void sdcardfs_copy_mnt_data(void *data, void *newdata) +{ + struct sdcardfs_vfsmount_options *old = data; + struct sdcardfs_vfsmount_options *new = newdata; + old->gid = new->gid; old->mask = new->mask; } @@ -218,8 +221,7 @@ int sdcardfs_init_inode_cache(void) /* sdcardfs inode cache destructor */ void sdcardfs_destroy_inode_cache(void) { - if (sdcardfs_inode_cachep) - kmem_cache_destroy(sdcardfs_inode_cachep); + kmem_cache_destroy(sdcardfs_inode_cachep); } /* @@ -235,7 +237,8 @@ static void sdcardfs_umount_begin(struct super_block *sb) lower_sb->s_op->umount_begin(lower_sb); } -static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, struct dentry *root) +static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, + struct dentry *root) { struct sdcardfs_sb_info *sbi = SDCARDFS_SB(root->d_sb); struct sdcardfs_mount_options *opts = &sbi->options; @@ -248,7 +251,7 @@ static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, struc if (vfsopts->gid != 0) seq_printf(m, ",gid=%u", vfsopts->gid); if (opts->multiuser) - seq_printf(m, ",multiuser"); + seq_puts(m, ",multiuser"); if (vfsopts->mask) seq_printf(m, ",mask=%u", vfsopts->mask); if (opts->fs_user_id) diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index ffb093e72b6c..6dd158a216f4 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -26,34 +26,6 @@ config SQUASHFS If unsure, say N. choice - prompt "File decompression options" - depends on SQUASHFS - help - Squashfs now supports two options for decompressing file - data. Traditionally Squashfs has decompressed into an - intermediate buffer and then memcopied it into the page cache. - Squashfs now supports the ability to decompress directly into - the page cache. - - If unsure, select "Decompress file data into an intermediate buffer" - -config SQUASHFS_FILE_CACHE - bool "Decompress file data into an intermediate buffer" - help - Decompress file data into an intermediate buffer and then - memcopy it into the page cache. - -config SQUASHFS_FILE_DIRECT - bool "Decompress files directly into the page cache" - help - Directly decompress file data into the page cache. - Doing so can significantly improve performance because - it eliminates a memcpy and it also removes the lock contention - on the single buffer. - -endchoice - -choice prompt "Decompressor parallelisation options" depends on SQUASHFS help diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 246a6f329d89..fe51f1507ed1 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -5,8 +5,7 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o squashfs-y += namei.o super.o symlink.o decompressor.o -squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o -squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o +squashfs-y += file_direct.o page_actor.o squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 0cea9b9236d0..2eb66decc5ab 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -28,9 +28,12 @@ #include <linux/fs.h> #include <linux/vfs.h> +#include <linux/bio.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/pagemap.h> #include <linux/buffer_head.h> +#include <linux/workqueue.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -38,177 +41,434 @@ #include "decompressor.h" #include "page_actor.h" -/* - * Read the metadata block length, this is stored in the first two - * bytes of the metadata block. - */ -static struct buffer_head *get_block_length(struct super_block *sb, - u64 *cur_index, int *offset, int *length) +static struct workqueue_struct *squashfs_read_wq; + +struct squashfs_read_request { + struct super_block *sb; + u64 index; + int length; + int compressed; + int offset; + u64 read_end; + struct squashfs_page_actor *output; + enum { + SQUASHFS_COPY, + SQUASHFS_DECOMPRESS, + SQUASHFS_METADATA, + } data_processing; + bool synchronous; + + /* + * If the read is synchronous, it is possible to retrieve information + * about the request by setting these pointers. + */ + int *res; + int *bytes_read; + int *bytes_uncompressed; + + int nr_buffers; + struct buffer_head **bh; + struct work_struct offload; +}; + +struct squashfs_bio_request { + struct buffer_head **bh; + int nr_buffers; +}; + +static int squashfs_bio_submit(struct squashfs_read_request *req); + +int squashfs_init_read_wq(void) { - struct squashfs_sb_info *msblk = sb->s_fs_info; - struct buffer_head *bh; + squashfs_read_wq = create_workqueue("SquashFS read wq"); + return !!squashfs_read_wq; +} + +void squashfs_destroy_read_wq(void) +{ + flush_workqueue(squashfs_read_wq); + destroy_workqueue(squashfs_read_wq); +} + +static void free_read_request(struct squashfs_read_request *req, int error) +{ + if (!req->synchronous) + squashfs_page_actor_free(req->output, error); + if (req->res) + *(req->res) = error; + kfree(req->bh); + kfree(req); +} + +static void squashfs_process_blocks(struct squashfs_read_request *req) +{ + int error = 0; + int bytes, i, length; + struct squashfs_sb_info *msblk = req->sb->s_fs_info; + struct squashfs_page_actor *actor = req->output; + struct buffer_head **bh = req->bh; + int nr_buffers = req->nr_buffers; + + for (i = 0; i < nr_buffers; ++i) { + if (!bh[i]) + continue; + wait_on_buffer(bh[i]); + if (!buffer_uptodate(bh[i])) + error = -EIO; + } + if (error) + goto cleanup; + + if (req->data_processing == SQUASHFS_METADATA) { + /* Extract the length of the metadata block */ + if (req->offset != msblk->devblksize - 1) + length = *((u16 *)(bh[0]->b_data + req->offset)); + else { + length = bh[0]->b_data[req->offset]; + length |= bh[1]->b_data[0] << 8; + } + req->compressed = SQUASHFS_COMPRESSED(length); + req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS + : SQUASHFS_COPY; + length = SQUASHFS_COMPRESSED_SIZE(length); + if (req->index + length + 2 > req->read_end) { + for (i = 0; i < nr_buffers; ++i) + put_bh(bh[i]); + kfree(bh); + req->length = length; + req->index += 2; + squashfs_bio_submit(req); + return; + } + req->length = length; + req->offset = (req->offset + 2) % PAGE_SIZE; + if (req->offset < 2) { + put_bh(bh[0]); + ++bh; + --nr_buffers; + } + } + if (req->bytes_read) + *(req->bytes_read) = req->length; - bh = sb_bread(sb, *cur_index); - if (bh == NULL) - return NULL; - - if (msblk->devblksize - *offset == 1) { - *length = (unsigned char) bh->b_data[*offset]; - put_bh(bh); - bh = sb_bread(sb, ++(*cur_index)); - if (bh == NULL) - return NULL; - *length |= (unsigned char) bh->b_data[0] << 8; - *offset = 1; - } else { - *length = (unsigned char) bh->b_data[*offset] | - (unsigned char) bh->b_data[*offset + 1] << 8; - *offset += 2; - - if (*offset == msblk->devblksize) { - put_bh(bh); - bh = sb_bread(sb, ++(*cur_index)); - if (bh == NULL) - return NULL; - *offset = 0; + if (req->data_processing == SQUASHFS_COPY) { + squashfs_bh_to_actor(bh, nr_buffers, req->output, req->offset, + req->length, msblk->devblksize); + } else if (req->data_processing == SQUASHFS_DECOMPRESS) { + req->length = squashfs_decompress(msblk, bh, nr_buffers, + req->offset, req->length, actor); + if (req->length < 0) { + error = -EIO; + goto cleanup; } } - return bh; + /* Last page may have trailing bytes not filled */ + bytes = req->length % PAGE_SIZE; + if (bytes && actor->page[actor->pages - 1]) + zero_user_segment(actor->page[actor->pages - 1], bytes, + PAGE_SIZE); + +cleanup: + if (req->bytes_uncompressed) + *(req->bytes_uncompressed) = req->length; + if (error) { + for (i = 0; i < nr_buffers; ++i) + if (bh[i]) + put_bh(bh[i]); + } + free_read_request(req, error); } +static void read_wq_handler(struct work_struct *work) +{ + squashfs_process_blocks(container_of(work, + struct squashfs_read_request, offload)); +} -/* - * Read and decompress a metadata block or datablock. Length is non-zero - * if a datablock is being read (the size is stored elsewhere in the - * filesystem), otherwise the length is obtained from the first two bytes of - * the metadata block. A bit in the length field indicates if the block - * is stored uncompressed in the filesystem (usually because compression - * generated a larger block - this does occasionally happen with compression - * algorithms). - */ -int squashfs_read_data(struct super_block *sb, u64 index, int length, - u64 *next_index, struct squashfs_page_actor *output) +static void squashfs_bio_end_io(struct bio *bio) { - struct squashfs_sb_info *msblk = sb->s_fs_info; - struct buffer_head **bh; - int offset = index & ((1 << msblk->devblksize_log2) - 1); - u64 cur_index = index >> msblk->devblksize_log2; - int bytes, compressed, b = 0, k = 0, avail, i; + int i; + int error = bio->bi_error; + struct squashfs_bio_request *bio_req = bio->bi_private; + + bio_put(bio); + + for (i = 0; i < bio_req->nr_buffers; ++i) { + if (!bio_req->bh[i]) + continue; + if (!error) + set_buffer_uptodate(bio_req->bh[i]); + else + clear_buffer_uptodate(bio_req->bh[i]); + unlock_buffer(bio_req->bh[i]); + } + kfree(bio_req); +} + +static int bh_is_optional(struct squashfs_read_request *req, int idx) +{ + int start_idx, end_idx; + struct squashfs_sb_info *msblk = req->sb->s_fs_info; - bh = kcalloc(((output->length + msblk->devblksize - 1) - >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); - if (bh == NULL) + start_idx = (idx * msblk->devblksize - req->offset) / PAGE_CACHE_SIZE; + end_idx = ((idx + 1) * msblk->devblksize - req->offset + 1) / PAGE_CACHE_SIZE; + if (start_idx >= req->output->pages) + return 1; + if (start_idx < 0) + start_idx = end_idx; + if (end_idx >= req->output->pages) + end_idx = start_idx; + return !req->output->page[start_idx] && !req->output->page[end_idx]; +} + +static int actor_getblks(struct squashfs_read_request *req, u64 block) +{ + int i; + + req->bh = kmalloc_array(req->nr_buffers, sizeof(*(req->bh)), GFP_NOIO); + if (!req->bh) return -ENOMEM; - if (length) { + for (i = 0; i < req->nr_buffers; ++i) { /* - * Datablock. + * When dealing with an uncompressed block, the actor may + * contains NULL pages. There's no need to read the buffers + * associated with these pages. */ - bytes = -offset; - compressed = SQUASHFS_COMPRESSED_BLOCK(length); - length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length); - if (next_index) - *next_index = index + length; - - TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", - index, compressed ? "" : "un", length, output->length); - - if (length < 0 || length > output->length || - (index + length) > msblk->bytes_used) - goto read_failure; - - for (b = 0; bytes < length; b++, cur_index++) { - bh[b] = sb_getblk(sb, cur_index); - if (bh[b] == NULL) - goto block_release; - bytes += msblk->devblksize; + if (!req->compressed && bh_is_optional(req, i)) { + req->bh[i] = NULL; + continue; } - ll_rw_block(READ, b, bh); - } else { - /* - * Metadata block. - */ - if ((index + 2) > msblk->bytes_used) - goto read_failure; + req->bh[i] = sb_getblk(req->sb, block + i); + if (!req->bh[i]) { + while (--i) { + if (req->bh[i]) + put_bh(req->bh[i]); + } + return -1; + } + } + return 0; +} - bh[0] = get_block_length(sb, &cur_index, &offset, &length); - if (bh[0] == NULL) - goto read_failure; - b = 1; +static int squashfs_bio_submit(struct squashfs_read_request *req) +{ + struct bio *bio = NULL; + struct buffer_head *bh; + struct squashfs_bio_request *bio_req = NULL; + int b = 0, prev_block = 0; + struct squashfs_sb_info *msblk = req->sb->s_fs_info; - bytes = msblk->devblksize - offset; - compressed = SQUASHFS_COMPRESSED(length); - length = SQUASHFS_COMPRESSED_SIZE(length); - if (next_index) - *next_index = index + length + 2; + u64 read_start = round_down(req->index, msblk->devblksize); + u64 read_end = round_up(req->index + req->length, msblk->devblksize); + sector_t block = read_start >> msblk->devblksize_log2; + sector_t block_end = read_end >> msblk->devblksize_log2; + int offset = read_start - round_down(req->index, PAGE_SIZE); + int nr_buffers = block_end - block; + int blksz = msblk->devblksize; + int bio_max_pages = nr_buffers > BIO_MAX_PAGES ? BIO_MAX_PAGES + : nr_buffers; - TRACE("Block @ 0x%llx, %scompressed size %d\n", index, - compressed ? "" : "un", length); + /* Setup the request */ + req->read_end = read_end; + req->offset = req->index - read_start; + req->nr_buffers = nr_buffers; + if (actor_getblks(req, block) < 0) + goto getblk_failed; - if (length < 0 || length > output->length || - (index + length) > msblk->bytes_used) - goto block_release; + /* Create and submit the BIOs */ + for (b = 0; b < nr_buffers; ++b, offset += blksz) { + bh = req->bh[b]; + if (!bh || !trylock_buffer(bh)) + continue; + if (buffer_uptodate(bh)) { + unlock_buffer(bh); + continue; + } + offset %= PAGE_SIZE; - for (; bytes < length; b++) { - bh[b] = sb_getblk(sb, ++cur_index); - if (bh[b] == NULL) - goto block_release; - bytes += msblk->devblksize; + /* Append the buffer to the current BIO if it is contiguous */ + if (bio && bio_req && prev_block + 1 == b) { + if (bio_add_page(bio, bh->b_page, blksz, offset)) { + bio_req->nr_buffers += 1; + prev_block = b; + continue; + } } - ll_rw_block(READ, b - 1, bh + 1); + + /* Otherwise, submit the current BIO and create a new one */ + if (bio) + submit_bio(READ, bio); + bio_req = kcalloc(1, sizeof(struct squashfs_bio_request), + GFP_NOIO); + if (!bio_req) + goto req_alloc_failed; + bio_req->bh = &req->bh[b]; + bio = bio_alloc(GFP_NOIO, bio_max_pages); + if (!bio) + goto bio_alloc_failed; + bio->bi_bdev = req->sb->s_bdev; + bio->bi_iter.bi_sector = (block + b) + << (msblk->devblksize_log2 - 9); + bio->bi_private = bio_req; + bio->bi_end_io = squashfs_bio_end_io; + + bio_add_page(bio, bh->b_page, blksz, offset); + bio_req->nr_buffers += 1; + prev_block = b; } + if (bio) + submit_bio(READ, bio); - for (i = 0; i < b; i++) { - wait_on_buffer(bh[i]); - if (!buffer_uptodate(bh[i])) - goto block_release; + if (req->synchronous) + squashfs_process_blocks(req); + else { + INIT_WORK(&req->offload, read_wq_handler); + schedule_work(&req->offload); } + return 0; - if (compressed) { - length = squashfs_decompress(msblk, bh, b, offset, length, - output); - if (length < 0) - goto read_failure; - } else { - /* - * Block is uncompressed. - */ - int in, pg_offset = 0; - void *data = squashfs_first_page(output); - - for (bytes = length; k < b; k++) { - in = min(bytes, msblk->devblksize - offset); - bytes -= in; - while (in) { - if (pg_offset == PAGE_CACHE_SIZE) { - data = squashfs_next_page(output); - pg_offset = 0; - } - avail = min_t(int, in, PAGE_CACHE_SIZE - - pg_offset); - memcpy(data + pg_offset, bh[k]->b_data + offset, - avail); - in -= avail; - pg_offset += avail; - offset += avail; - } - offset = 0; - put_bh(bh[k]); - } - squashfs_finish_page(output); +bio_alloc_failed: + kfree(bio_req); +req_alloc_failed: + unlock_buffer(bh); + while (--nr_buffers >= b) + if (req->bh[nr_buffers]) + put_bh(req->bh[nr_buffers]); + while (--b >= 0) + if (req->bh[b]) + wait_on_buffer(req->bh[b]); +getblk_failed: + free_read_request(req, -ENOMEM); + return -ENOMEM; +} + +static int read_metadata_block(struct squashfs_read_request *req, + u64 *next_index) +{ + int ret, error, bytes_read = 0, bytes_uncompressed = 0; + struct squashfs_sb_info *msblk = req->sb->s_fs_info; + + if (req->index + 2 > msblk->bytes_used) { + free_read_request(req, -EINVAL); + return -EINVAL; + } + req->length = 2; + + /* Do not read beyond the end of the device */ + if (req->index + req->length > msblk->bytes_used) + req->length = msblk->bytes_used - req->index; + req->data_processing = SQUASHFS_METADATA; + + /* + * Reading metadata is always synchronous because we don't know the + * length in advance and the function is expected to update + * 'next_index' and return the length. + */ + req->synchronous = true; + req->res = &error; + req->bytes_read = &bytes_read; + req->bytes_uncompressed = &bytes_uncompressed; + + TRACE("Metadata block @ 0x%llx, %scompressed size %d, src size %d\n", + req->index, req->compressed ? "" : "un", bytes_read, + req->output->length); + + ret = squashfs_bio_submit(req); + if (ret) + return ret; + if (error) + return error; + if (next_index) + *next_index += 2 + bytes_read; + return bytes_uncompressed; +} + +static int read_data_block(struct squashfs_read_request *req, int length, + u64 *next_index, bool synchronous) +{ + int ret, error = 0, bytes_uncompressed = 0, bytes_read = 0; + + req->compressed = SQUASHFS_COMPRESSED_BLOCK(length); + req->length = length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length); + req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS + : SQUASHFS_COPY; + + req->synchronous = synchronous; + if (synchronous) { + req->res = &error; + req->bytes_read = &bytes_read; + req->bytes_uncompressed = &bytes_uncompressed; + } + + TRACE("Data block @ 0x%llx, %scompressed size %d, src size %d\n", + req->index, req->compressed ? "" : "un", req->length, + req->output->length); + + ret = squashfs_bio_submit(req); + if (ret) + return ret; + if (synchronous) + ret = error ? error : bytes_uncompressed; + if (next_index) + *next_index += length; + return ret; +} + +/* + * Read and decompress a metadata block or datablock. Length is non-zero + * if a datablock is being read (the size is stored elsewhere in the + * filesystem), otherwise the length is obtained from the first two bytes of + * the metadata block. A bit in the length field indicates if the block + * is stored uncompressed in the filesystem (usually because compression + * generated a larger block - this does occasionally happen with compression + * algorithms). + */ +static int __squashfs_read_data(struct super_block *sb, u64 index, int length, + u64 *next_index, struct squashfs_page_actor *output, bool sync) +{ + struct squashfs_read_request *req; + + req = kcalloc(1, sizeof(struct squashfs_read_request), GFP_KERNEL); + if (!req) { + if (!sync) + squashfs_page_actor_free(output, -ENOMEM); + return -ENOMEM; + } + + req->sb = sb; + req->index = index; + req->output = output; + + if (next_index) + *next_index = index; + + if (length) + length = read_data_block(req, length, next_index, sync); + else + length = read_metadata_block(req, next_index); + + if (length < 0) { + ERROR("squashfs_read_data failed to read block 0x%llx\n", + (unsigned long long)index); + return -EIO; } - kfree(bh); return length; +} -block_release: - for (; k < b; k++) - put_bh(bh[k]); +int squashfs_read_data(struct super_block *sb, u64 index, int length, + u64 *next_index, struct squashfs_page_actor *output) +{ + return __squashfs_read_data(sb, index, length, next_index, output, + true); +} + +int squashfs_read_data_async(struct super_block *sb, u64 index, int length, + u64 *next_index, struct squashfs_page_actor *output) +{ -read_failure: - ERROR("squashfs_read_data failed to read block 0x%llx\n", - (unsigned long long) index); - kfree(bh); - return -EIO; + return __squashfs_read_data(sb, index, length, next_index, output, + false); } diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index 1cb70a0b2168..6785d086ab38 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -209,17 +209,14 @@ void squashfs_cache_put(struct squashfs_cache_entry *entry) */ void squashfs_cache_delete(struct squashfs_cache *cache) { - int i, j; + int i; if (cache == NULL) return; for (i = 0; i < cache->entries; i++) { - if (cache->entry[i].data) { - for (j = 0; j < cache->pages; j++) - kfree(cache->entry[i].data[j]); - kfree(cache->entry[i].data); - } + if (cache->entry[i].page) + free_page_array(cache->entry[i].page, cache->pages); kfree(cache->entry[i].actor); } @@ -236,7 +233,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache) struct squashfs_cache *squashfs_cache_init(char *name, int entries, int block_size) { - int i, j; + int i; struct squashfs_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) { @@ -268,22 +265,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, init_waitqueue_head(&cache->entry[i].wait_queue); entry->cache = cache; entry->block = SQUASHFS_INVALID_BLK; - entry->data = kcalloc(cache->pages, sizeof(void *), GFP_KERNEL); - if (entry->data == NULL) { + entry->page = alloc_page_array(cache->pages, GFP_KERNEL); + if (!entry->page) { ERROR("Failed to allocate %s cache entry\n", name); goto cleanup; } - - for (j = 0; j < cache->pages; j++) { - entry->data[j] = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); - if (entry->data[j] == NULL) { - ERROR("Failed to allocate %s buffer\n", name); - goto cleanup; - } - } - - entry->actor = squashfs_page_actor_init(entry->data, - cache->pages, 0); + entry->actor = squashfs_page_actor_init(entry->page, + cache->pages, 0, NULL); if (entry->actor == NULL) { ERROR("Failed to allocate %s cache entry\n", name); goto cleanup; @@ -314,18 +302,20 @@ int squashfs_copy_data(void *buffer, struct squashfs_cache_entry *entry, return min(length, entry->length - offset); while (offset < entry->length) { - void *buff = entry->data[offset / PAGE_CACHE_SIZE] - + (offset % PAGE_CACHE_SIZE); + void *buff = kmap_atomic(entry->page[offset / PAGE_CACHE_SIZE]) + + (offset % PAGE_CACHE_SIZE); int bytes = min_t(int, entry->length - offset, PAGE_CACHE_SIZE - (offset % PAGE_CACHE_SIZE)); if (bytes >= remaining) { memcpy(buffer, buff, remaining); + kunmap_atomic(buff); remaining = 0; break; } memcpy(buffer, buff, bytes); + kunmap_atomic(buff); buffer += bytes; remaining -= bytes; offset += bytes; @@ -416,43 +406,38 @@ struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb, void *squashfs_read_table(struct super_block *sb, u64 block, int length) { int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - int i, res; - void *table, *buffer, **data; + struct page **page; + void *buff; + int res; struct squashfs_page_actor *actor; - table = buffer = kmalloc(length, GFP_KERNEL); - if (table == NULL) + page = alloc_page_array(pages, GFP_KERNEL); + if (!page) return ERR_PTR(-ENOMEM); - data = kcalloc(pages, sizeof(void *), GFP_KERNEL); - if (data == NULL) { - res = -ENOMEM; - goto failed; - } - - actor = squashfs_page_actor_init(data, pages, length); + actor = squashfs_page_actor_init(page, pages, length, NULL); if (actor == NULL) { res = -ENOMEM; - goto failed2; + goto failed; } - for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) - data[i] = buffer; - res = squashfs_read_data(sb, block, length | SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor); - kfree(data); - kfree(actor); - if (res < 0) - goto failed; + goto failed2; - return table; + buff = kmalloc(length, GFP_KERNEL); + if (!buff) + goto failed2; + squashfs_actor_to_buf(actor, buff, length); + squashfs_page_actor_free(actor, 0); + free_page_array(page, pages); + return buff; failed2: - kfree(data); + squashfs_page_actor_free(actor, 0); failed: - kfree(table); + free_page_array(page, pages); return ERR_PTR(res); } diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index e9034bf6e5ae..7de35bf297aa 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -24,7 +24,8 @@ #include <linux/types.h> #include <linux/mutex.h> #include <linux/slab.h> -#include <linux/buffer_head.h> +#include <linux/highmem.h> +#include <linux/fs.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -94,40 +95,44 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id) static void *get_comp_opts(struct super_block *sb, unsigned short flags) { struct squashfs_sb_info *msblk = sb->s_fs_info; - void *buffer = NULL, *comp_opts; + void *comp_opts, *buffer = NULL; + struct page *page; struct squashfs_page_actor *actor = NULL; int length = 0; + if (!SQUASHFS_COMP_OPTS(flags)) + return squashfs_comp_opts(msblk, buffer, length); + /* * Read decompressor specific options from file system if present */ - if (SQUASHFS_COMP_OPTS(flags)) { - buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); - if (buffer == NULL) { - comp_opts = ERR_PTR(-ENOMEM); - goto out; - } - - actor = squashfs_page_actor_init(&buffer, 1, 0); - if (actor == NULL) { - comp_opts = ERR_PTR(-ENOMEM); - goto out; - } - - length = squashfs_read_data(sb, - sizeof(struct squashfs_super_block), 0, NULL, actor); - - if (length < 0) { - comp_opts = ERR_PTR(length); - goto out; - } + + page = alloc_page(GFP_KERNEL); + if (!page) + return ERR_PTR(-ENOMEM); + + actor = squashfs_page_actor_init(&page, 1, 0, NULL); + if (actor == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto actor_error; + } + + length = squashfs_read_data(sb, + sizeof(struct squashfs_super_block), 0, NULL, actor); + + if (length < 0) { + comp_opts = ERR_PTR(length); + goto read_error; } + buffer = kmap_atomic(page); comp_opts = squashfs_comp_opts(msblk, buffer, length); + kunmap_atomic(buffer); -out: - kfree(actor); - kfree(buffer); +read_error: + squashfs_page_actor_free(actor, 0); +actor_error: + __free_page(page); return comp_opts; } diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index e5c9689062ba..6f5ef8d7e55a 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -47,12 +47,16 @@ #include <linux/string.h> #include <linux/pagemap.h> #include <linux/mutex.h> +#include <linux/mm_inline.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs_fs_i.h" #include "squashfs.h" +// Backported from 4.5 +#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) + /* * Locate cache slot in range [offset, index] for specified inode. If * there's more than one return the slot closest to index. @@ -438,6 +442,21 @@ static int squashfs_readpage_fragment(struct page *page) return res; } +static int squashfs_readpages_fragment(struct page *page, + struct list_head *readahead_pages, struct address_space *mapping) +{ + if (!page) { + page = lru_to_page(readahead_pages); + list_del(&page->lru); + if (add_to_page_cache_lru(page, mapping, page->index, + mapping_gfp_constraint(mapping, GFP_KERNEL))) { + put_page(page); + return 0; + } + } + return squashfs_readpage_fragment(page); +} + static int squashfs_readpage_sparse(struct page *page, int index, int file_end) { struct inode *inode = page->mapping->host; @@ -450,54 +469,105 @@ static int squashfs_readpage_sparse(struct page *page, int index, int file_end) return 0; } -static int squashfs_readpage(struct file *file, struct page *page) +static int squashfs_readpages_sparse(struct page *page, + struct list_head *readahead_pages, int index, int file_end, + struct address_space *mapping) { - struct inode *inode = page->mapping->host; + if (!page) { + page = lru_to_page(readahead_pages); + list_del(&page->lru); + if (add_to_page_cache_lru(page, mapping, page->index, + mapping_gfp_constraint(mapping, GFP_KERNEL))) { + put_page(page); + return 0; + } + } + return squashfs_readpage_sparse(page, index, file_end); +} + +static int __squashfs_readpages(struct file *file, struct page *page, + struct list_head *readahead_pages, unsigned int nr_pages, + struct address_space *mapping) +{ + struct inode *inode = mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); int file_end = i_size_read(inode) >> msblk->block_log; int res; - void *pageaddr; - TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", - page->index, squashfs_i(inode)->start); + do { + struct page *cur_page = page ? page + : lru_to_page(readahead_pages); + int page_index = cur_page->index; + int index = page_index >> (msblk->block_log - PAGE_CACHE_SHIFT); + + if (page_index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) + return 1; + + if (index < file_end || squashfs_i(inode)->fragment_block == + SQUASHFS_INVALID_BLK) { + u64 block = 0; + int bsize = read_blocklist(inode, index, &block); + + if (bsize < 0) + return -1; + + if (bsize == 0) { + res = squashfs_readpages_sparse(page, + readahead_pages, index, file_end, + mapping); + } else { + res = squashfs_readpages_block(page, + readahead_pages, &nr_pages, mapping, + page_index, block, bsize); + } + } else { + res = squashfs_readpages_fragment(page, + readahead_pages, mapping); + } + if (res) + return 0; + page = NULL; + } while (readahead_pages && !list_empty(readahead_pages)); - if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT)) - goto out; + return 0; +} - if (index < file_end || squashfs_i(inode)->fragment_block == - SQUASHFS_INVALID_BLK) { - u64 block = 0; - int bsize = read_blocklist(inode, index, &block); - if (bsize < 0) - goto error_out; +static int squashfs_readpage(struct file *file, struct page *page) +{ + int ret; - if (bsize == 0) - res = squashfs_readpage_sparse(page, index, file_end); + TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", + page->index, squashfs_i(page->mapping->host)->start); + + get_page(page); + + ret = __squashfs_readpages(file, page, NULL, 1, page->mapping); + if (ret) { + flush_dcache_page(page); + if (ret < 0) + SetPageError(page); else - res = squashfs_readpage_block(page, block, bsize); - } else - res = squashfs_readpage_fragment(page); - - if (!res) - return 0; - -error_out: - SetPageError(page); -out: - pageaddr = kmap_atomic(page); - memset(pageaddr, 0, PAGE_CACHE_SIZE); - kunmap_atomic(pageaddr); - flush_dcache_page(page); - if (!PageError(page)) - SetPageUptodate(page); - unlock_page(page); + SetPageUptodate(page); + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + unlock_page(page); + put_page(page); + } return 0; } +static int squashfs_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned int nr_pages) +{ + TRACE("Entered squashfs_readpages, %u pages, first page index %lx\n", + nr_pages, lru_to_page(pages)->index); + __squashfs_readpages(file, NULL, pages, nr_pages, mapping); + return 0; +} + const struct address_space_operations squashfs_aops = { - .readpage = squashfs_readpage + .readpage = squashfs_readpage, + .readpages = squashfs_readpages, }; diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c deleted file mode 100644 index f2310d2a2019..000000000000 --- a/fs/squashfs/file_cache.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2013 - * Phillip Lougher <phillip@squashfs.org.uk> - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - */ - -#include <linux/fs.h> -#include <linux/vfs.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/pagemap.h> -#include <linux/mutex.h> - -#include "squashfs_fs.h" -#include "squashfs_fs_sb.h" -#include "squashfs_fs_i.h" -#include "squashfs.h" - -/* Read separately compressed datablock and memcopy into page cache */ -int squashfs_readpage_block(struct page *page, u64 block, int bsize) -{ - struct inode *i = page->mapping->host; - struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, - block, bsize); - int res = buffer->error; - - if (res) - ERROR("Unable to read page, block %llx, size %x\n", block, - bsize); - else - squashfs_copy_cache(page, buffer, buffer->length, 0); - - squashfs_cache_put(buffer); - return res; -} diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index 43e7a7eddac0..c97af4c6ccd0 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -13,6 +13,7 @@ #include <linux/string.h> #include <linux/pagemap.h> #include <linux/mutex.h> +#include <linux/mm_inline.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -20,157 +21,139 @@ #include "squashfs.h" #include "page_actor.h" -static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page); - -/* Read separately compressed datablock directly into page cache */ -int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) +// Backported from 4.5 +#define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) +static void release_actor_pages(struct page **page, int pages, int error) { - struct inode *inode = target_page->mapping->host; - struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int i; - int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; - int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; - int start_index = target_page->index & ~mask; - int end_index = start_index | mask; - int i, n, pages, missing_pages, bytes, res = -ENOMEM; + for (i = 0; i < pages; i++) { + if (!page[i]) + continue; + flush_dcache_page(page[i]); + if (!error) + SetPageUptodate(page[i]); + else { + SetPageError(page[i]); + zero_user_segment(page[i], 0, PAGE_CACHE_SIZE); + } + unlock_page(page[i]); + put_page(page[i]); + } + kfree(page); +} + +/* + * Create a "page actor" which will kmap and kunmap the + * page cache pages appropriately within the decompressor + */ +static struct squashfs_page_actor *actor_from_page_cache( + unsigned int actor_pages, struct page *target_page, + struct list_head *rpages, unsigned int *nr_pages, int start_index, + struct address_space *mapping) +{ struct page **page; struct squashfs_page_actor *actor; - void *pageaddr; - - if (end_index > file_end) - end_index = file_end; - - pages = end_index - start_index + 1; - - page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL); - if (page == NULL) - return res; - - /* - * Create a "page actor" which will kmap and kunmap the - * page cache pages appropriately within the decompressor - */ - actor = squashfs_page_actor_init_special(page, pages, 0); - if (actor == NULL) - goto out; - - /* Try to grab all the pages covered by the Squashfs block */ - for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) { - page[i] = (n == target_page->index) ? target_page : - grab_cache_page_nowait(target_page->mapping, n); + int i, n; + gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); + + page = kmalloc_array(actor_pages, sizeof(void *), GFP_KERNEL); + if (!page) + return NULL; + + for (i = 0, n = start_index; i < actor_pages; i++, n++) { + if (target_page == NULL && rpages && !list_empty(rpages)) { + struct page *cur_page = lru_to_page(rpages); + + if (cur_page->index < start_index + actor_pages) { + list_del(&cur_page->lru); + --(*nr_pages); + if (add_to_page_cache_lru(cur_page, mapping, + cur_page->index, gfp)) + put_page(cur_page); + else + target_page = cur_page; + } else + rpages = NULL; + } - if (page[i] == NULL) { - missing_pages++; - continue; + if (target_page && target_page->index == n) { + page[i] = target_page; + target_page = NULL; + } else { + page[i] = grab_cache_page_nowait(mapping, n); + if (page[i] == NULL) + continue; } if (PageUptodate(page[i])) { unlock_page(page[i]); - page_cache_release(page[i]); + put_page(page[i]); page[i] = NULL; - missing_pages++; } } - if (missing_pages) { - /* - * Couldn't get one or more pages, this page has either - * been VM reclaimed, but others are still in the page cache - * and uptodate, or we're racing with another thread in - * squashfs_readpage also trying to grab them. Fall back to - * using an intermediate buffer. - */ - res = squashfs_read_cache(target_page, block, bsize, pages, - page); - if (res < 0) - goto mark_errored; - - goto out; + actor = squashfs_page_actor_init(page, actor_pages, 0, + release_actor_pages); + if (!actor) { + release_actor_pages(page, actor_pages, -ENOMEM); + kfree(page); + return NULL; } - - /* Decompress directly into the page cache buffers */ - res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); - if (res < 0) - goto mark_errored; - - /* Last page may have trailing bytes not filled */ - bytes = res % PAGE_CACHE_SIZE; - if (bytes) { - pageaddr = kmap_atomic(page[pages - 1]); - memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); - kunmap_atomic(pageaddr); - } - - /* Mark pages as uptodate, unlock and release */ - for (i = 0; i < pages; i++) { - flush_dcache_page(page[i]); - SetPageUptodate(page[i]); - unlock_page(page[i]); - if (page[i] != target_page) - page_cache_release(page[i]); - } - - kfree(actor); - kfree(page); - - return 0; - -mark_errored: - /* Decompression failed, mark pages as errored. Target_page is - * dealt with by the caller - */ - for (i = 0; i < pages; i++) { - if (page[i] == NULL || page[i] == target_page) - continue; - flush_dcache_page(page[i]); - SetPageError(page[i]); - unlock_page(page[i]); - page_cache_release(page[i]); - } - -out: - kfree(actor); - kfree(page); - return res; + return actor; } +int squashfs_readpages_block(struct page *target_page, + struct list_head *readahead_pages, + unsigned int *nr_pages, + struct address_space *mapping, + int page_index, u64 block, int bsize) -static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, - int pages, struct page **page) { - struct inode *i = target_page->mapping->host; - struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, - block, bsize); - int bytes = buffer->length, res = buffer->error, n, offset = 0; - void *pageaddr; - - if (res) { - ERROR("Unable to read page, block %llx, size %x\n", block, - bsize); - goto out; - } - - for (n = 0; n < pages && bytes > 0; n++, - bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { - int avail = min_t(int, bytes, PAGE_CACHE_SIZE); - - if (page[n] == NULL) - continue; + struct squashfs_page_actor *actor; + struct inode *inode = mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int start_index, end_index, file_end, actor_pages, res; + int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; - pageaddr = kmap_atomic(page[n]); - squashfs_copy_data(pageaddr, buffer, offset, avail); - memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); - kunmap_atomic(pageaddr); - flush_dcache_page(page[n]); - SetPageUptodate(page[n]); - unlock_page(page[n]); - if (page[n] != target_page) - page_cache_release(page[n]); + /* + * If readpage() is called on an uncompressed datablock, we can just + * read the pages instead of fetching the whole block. + * This greatly improves the performance when a process keep doing + * random reads because we only fetch the necessary data. + * The readahead algorithm will take care of doing speculative reads + * if necessary. + * We can't read more than 1 block even if readahead provides use more + * pages because we don't know yet if the next block is compressed or + * not. + */ + if (bsize && !SQUASHFS_COMPRESSED_BLOCK(bsize)) { + u64 block_end = block + msblk->block_size; + + block += (page_index & mask) * PAGE_CACHE_SIZE; + actor_pages = (block_end - block) / PAGE_CACHE_SIZE; + if (*nr_pages < actor_pages) + actor_pages = *nr_pages; + start_index = page_index; + bsize = min_t(int, bsize, (PAGE_CACHE_SIZE * actor_pages) + | SQUASHFS_COMPRESSED_BIT_BLOCK); + } else { + file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; + start_index = page_index & ~mask; + end_index = start_index | mask; + if (end_index > file_end) + end_index = file_end; + actor_pages = end_index - start_index + 1; } -out: - squashfs_cache_put(buffer); - return res; + actor = actor_from_page_cache(actor_pages, target_page, + readahead_pages, nr_pages, start_index, + mapping); + if (!actor) + return -ENOMEM; + + res = squashfs_read_data_async(inode->i_sb, block, bsize, NULL, + actor); + return res < 0 ? res : 0; } diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c index c31e2bc9c081..df4fa3c7ddd0 100644 --- a/fs/squashfs/lz4_wrapper.c +++ b/fs/squashfs/lz4_wrapper.c @@ -94,39 +94,17 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm, struct buffer_head **bh, int b, int offset, int length, struct squashfs_page_actor *output) { - struct squashfs_lz4 *stream = strm; - void *buff = stream->input, *data; - int avail, i, bytes = length, res; + int res; size_t dest_len = output->length; + struct squashfs_lz4 *stream = strm; - for (i = 0; i < b; i++) { - avail = min(bytes, msblk->devblksize - offset); - memcpy(buff, bh[i]->b_data + offset, avail); - buff += avail; - bytes -= avail; - offset = 0; - put_bh(bh[i]); - } - + squashfs_bh_to_buf(bh, b, stream->input, offset, length, + msblk->devblksize); res = lz4_decompress_unknownoutputsize(stream->input, length, stream->output, &dest_len); if (res) return -EIO; - - bytes = dest_len; - data = squashfs_first_page(output); - buff = stream->output; - while (data) { - if (bytes <= PAGE_CACHE_SIZE) { - memcpy(data, buff, bytes); - break; - } - memcpy(data, buff, PAGE_CACHE_SIZE); - buff += PAGE_CACHE_SIZE; - bytes -= PAGE_CACHE_SIZE; - data = squashfs_next_page(output); - } - squashfs_finish_page(output); + squashfs_buf_to_actor(stream->output, output, dest_len); return dest_len; } diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 244b9fbfff7b..2c844d53a59e 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -79,45 +79,19 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, struct buffer_head **bh, int b, int offset, int length, struct squashfs_page_actor *output) { - struct squashfs_lzo *stream = strm; - void *buff = stream->input, *data; - int avail, i, bytes = length, res; + int res; size_t out_len = output->length; + struct squashfs_lzo *stream = strm; - for (i = 0; i < b; i++) { - avail = min(bytes, msblk->devblksize - offset); - memcpy(buff, bh[i]->b_data + offset, avail); - buff += avail; - bytes -= avail; - offset = 0; - put_bh(bh[i]); - } - + squashfs_bh_to_buf(bh, b, stream->input, offset, length, + msblk->devblksize); res = lzo1x_decompress_safe(stream->input, (size_t)length, stream->output, &out_len); if (res != LZO_E_OK) - goto failed; + return -EIO; + squashfs_buf_to_actor(stream->output, output, out_len); - res = bytes = (int)out_len; - data = squashfs_first_page(output); - buff = stream->output; - while (data) { - if (bytes <= PAGE_CACHE_SIZE) { - memcpy(data, buff, bytes); - break; - } else { - memcpy(data, buff, PAGE_CACHE_SIZE); - buff += PAGE_CACHE_SIZE; - bytes -= PAGE_CACHE_SIZE; - data = squashfs_next_page(output); - } - } - squashfs_finish_page(output); - - return res; - -failed: - return -EIO; + return out_len; } const struct squashfs_decompressor squashfs_lzo_comp_ops = { diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c index 5a1c11f56441..53863508e400 100644 --- a/fs/squashfs/page_actor.c +++ b/fs/squashfs/page_actor.c @@ -9,39 +9,11 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/pagemap.h> +#include <linux/buffer_head.h> #include "page_actor.h" -/* - * This file contains implementations of page_actor for decompressing into - * an intermediate buffer, and for decompressing directly into the - * page cache. - * - * Calling code should avoid sleeping between calls to squashfs_first_page() - * and squashfs_finish_page(). - */ - -/* Implementation of page_actor for decompressing into intermediate buffer */ -static void *cache_first_page(struct squashfs_page_actor *actor) -{ - actor->next_page = 1; - return actor->buffer[0]; -} - -static void *cache_next_page(struct squashfs_page_actor *actor) -{ - if (actor->next_page == actor->pages) - return NULL; - - return actor->buffer[actor->next_page++]; -} - -static void cache_finish_page(struct squashfs_page_actor *actor) -{ - /* empty */ -} - -struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, - int pages, int length) +struct squashfs_page_actor *squashfs_page_actor_init(struct page **page, + int pages, int length, void (*release_pages)(struct page **, int, int)) { struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); @@ -49,52 +21,133 @@ struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, return NULL; actor->length = length ? : pages * PAGE_CACHE_SIZE; - actor->buffer = buffer; + actor->page = page; actor->pages = pages; actor->next_page = 0; - actor->squashfs_first_page = cache_first_page; - actor->squashfs_next_page = cache_next_page; - actor->squashfs_finish_page = cache_finish_page; + actor->pageaddr = NULL; + actor->release_pages = release_pages; return actor; } -/* Implementation of page_actor for decompressing directly into page cache. */ -static void *direct_first_page(struct squashfs_page_actor *actor) +void squashfs_page_actor_free(struct squashfs_page_actor *actor, int error) +{ + if (!actor) + return; + + if (actor->release_pages) + actor->release_pages(actor->page, actor->pages, error); + kfree(actor); +} + +void squashfs_actor_to_buf(struct squashfs_page_actor *actor, void *buf, + int length) { - actor->next_page = 1; - return actor->pageaddr = kmap_atomic(actor->page[0]); + void *pageaddr; + int pos = 0, avail, i; + + for (i = 0; i < actor->pages && pos < length; ++i) { + avail = min_t(int, length - pos, PAGE_CACHE_SIZE); + if (actor->page[i]) { + pageaddr = kmap_atomic(actor->page[i]); + memcpy(buf + pos, pageaddr, avail); + kunmap_atomic(pageaddr); + } + pos += avail; + } } -static void *direct_next_page(struct squashfs_page_actor *actor) +void squashfs_buf_to_actor(void *buf, struct squashfs_page_actor *actor, + int length) { - if (actor->pageaddr) - kunmap_atomic(actor->pageaddr); + void *pageaddr; + int pos = 0, avail, i; + + for (i = 0; i < actor->pages && pos < length; ++i) { + avail = min_t(int, length - pos, PAGE_CACHE_SIZE); + if (actor->page[i]) { + pageaddr = kmap_atomic(actor->page[i]); + memcpy(pageaddr, buf + pos, avail); + kunmap_atomic(pageaddr); + } + pos += avail; + } +} - return actor->pageaddr = actor->next_page == actor->pages ? NULL : - kmap_atomic(actor->page[actor->next_page++]); +void squashfs_bh_to_actor(struct buffer_head **bh, int nr_buffers, + struct squashfs_page_actor *actor, int offset, int length, int blksz) +{ + void *kaddr = NULL; + int bytes = 0, pgoff = 0, b = 0, p = 0, avail, i; + + while (bytes < length) { + if (actor->page[p]) { + kaddr = kmap_atomic(actor->page[p]); + while (pgoff < PAGE_CACHE_SIZE && bytes < length) { + avail = min_t(int, blksz - offset, + PAGE_CACHE_SIZE - pgoff); + memcpy(kaddr + pgoff, bh[b]->b_data + offset, + avail); + pgoff += avail; + bytes += avail; + offset = (offset + avail) % blksz; + if (!offset) { + put_bh(bh[b]); + ++b; + } + } + kunmap_atomic(kaddr); + pgoff = 0; + } else { + for (i = 0; i < PAGE_CACHE_SIZE / blksz; ++i) { + if (bh[b]) + put_bh(bh[b]); + ++b; + } + bytes += PAGE_CACHE_SIZE; + } + ++p; + } } -static void direct_finish_page(struct squashfs_page_actor *actor) +void squashfs_bh_to_buf(struct buffer_head **bh, int nr_buffers, void *buf, + int offset, int length, int blksz) { - if (actor->pageaddr) - kunmap_atomic(actor->pageaddr); + int i, avail, bytes = 0; + + for (i = 0; i < nr_buffers && bytes < length; ++i) { + avail = min_t(int, length - bytes, blksz - offset); + if (bh[i]) { + memcpy(buf + bytes, bh[i]->b_data + offset, avail); + put_bh(bh[i]); + } + bytes += avail; + offset = 0; + } } -struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, - int pages, int length) +void free_page_array(struct page **page, int nr_pages) { - struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + int i; - if (actor == NULL) - return NULL; + for (i = 0; i < nr_pages; ++i) + __free_page(page[i]); + kfree(page); +} - actor->length = length ? : pages * PAGE_CACHE_SIZE; - actor->page = page; - actor->pages = pages; - actor->next_page = 0; - actor->pageaddr = NULL; - actor->squashfs_first_page = direct_first_page; - actor->squashfs_next_page = direct_next_page; - actor->squashfs_finish_page = direct_finish_page; - return actor; +struct page **alloc_page_array(int nr_pages, int gfp_mask) +{ + int i; + struct page **page; + + page = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); + if (!page) + return NULL; + for (i = 0; i < nr_pages; ++i) { + page[i] = alloc_page(gfp_mask); + if (!page[i]) { + free_page_array(page, i); + return NULL; + } + } + return page; } diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index 26dd82008b82..aa1ed790b5a3 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -5,77 +5,61 @@ * Phillip Lougher <phillip@squashfs.org.uk> * * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. + * the COPYING file in the top-level squashfsory. */ -#ifndef CONFIG_SQUASHFS_FILE_DIRECT struct squashfs_page_actor { - void **page; + struct page **page; + void *pageaddr; int pages; int length; int next_page; + void (*release_pages)(struct page **, int, int); }; -static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page, - int pages, int length) -{ - struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); - - if (actor == NULL) - return NULL; +extern struct squashfs_page_actor *squashfs_page_actor_init(struct page **, + int, int, void (*)(struct page **, int, int)); +extern void squashfs_page_actor_free(struct squashfs_page_actor *, int); - actor->length = length ? : pages * PAGE_CACHE_SIZE; - actor->page = page; - actor->pages = pages; - actor->next_page = 0; - return actor; -} +extern void squashfs_actor_to_buf(struct squashfs_page_actor *, void *, int); +extern void squashfs_buf_to_actor(void *, struct squashfs_page_actor *, int); +extern void squashfs_bh_to_actor(struct buffer_head **, int, + struct squashfs_page_actor *, int, int, int); +extern void squashfs_bh_to_buf(struct buffer_head **, int, void *, int, int, + int); +/* + * Calling code should avoid sleeping between calls to squashfs_first_page() + * and squashfs_finish_page(). + */ static inline void *squashfs_first_page(struct squashfs_page_actor *actor) { actor->next_page = 1; - return actor->page[0]; + return actor->pageaddr = actor->page[0] ? kmap_atomic(actor->page[0]) + : NULL; } static inline void *squashfs_next_page(struct squashfs_page_actor *actor) { - return actor->next_page == actor->pages ? NULL : - actor->page[actor->next_page++]; -} + if (!IS_ERR_OR_NULL(actor->pageaddr)) + kunmap_atomic(actor->pageaddr); -static inline void squashfs_finish_page(struct squashfs_page_actor *actor) -{ - /* empty */ -} -#else -struct squashfs_page_actor { - union { - void **buffer; - struct page **page; - }; - void *pageaddr; - void *(*squashfs_first_page)(struct squashfs_page_actor *); - void *(*squashfs_next_page)(struct squashfs_page_actor *); - void (*squashfs_finish_page)(struct squashfs_page_actor *); - int pages; - int length; - int next_page; -}; + if (actor->next_page == actor->pages) + return actor->pageaddr = ERR_PTR(-ENODATA); -extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int); -extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page - **, int, int); -static inline void *squashfs_first_page(struct squashfs_page_actor *actor) -{ - return actor->squashfs_first_page(actor); -} -static inline void *squashfs_next_page(struct squashfs_page_actor *actor) -{ - return actor->squashfs_next_page(actor); + actor->pageaddr = actor->page[actor->next_page] ? + kmap_atomic(actor->page[actor->next_page]) : NULL; + ++actor->next_page; + return actor->pageaddr; } + static inline void squashfs_finish_page(struct squashfs_page_actor *actor) { - actor->squashfs_finish_page(actor); + if (!IS_ERR_OR_NULL(actor->pageaddr)) + kunmap_atomic(actor->pageaddr); } -#endif + +extern struct page **alloc_page_array(int, int); +extern void free_page_array(struct page **, int); + #endif diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 887d6d270080..6093579c6c5d 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -28,8 +28,14 @@ #define WARNING(s, args...) pr_warn("SQUASHFS: "s, ## args) /* block.c */ +extern int squashfs_init_read_wq(void); +extern void squashfs_destroy_read_wq(void); extern int squashfs_read_data(struct super_block *, u64, int, u64 *, struct squashfs_page_actor *); +extern int squashfs_read_data(struct super_block *, u64, int, u64 *, + struct squashfs_page_actor *); +extern int squashfs_read_data_async(struct super_block *, u64, int, u64 *, + struct squashfs_page_actor *); /* cache.c */ extern struct squashfs_cache *squashfs_cache_init(char *, int, int); @@ -70,8 +76,9 @@ extern __le64 *squashfs_read_fragment_index_table(struct super_block *, void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, int); -/* file_xxx.c */ -extern int squashfs_readpage_block(struct page *, u64, int); +/* file_direct.c */ +extern int squashfs_readpages_block(struct page *, struct list_head *, + unsigned int *, struct address_space *, int, u64, int); /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 1da565cb50c3..8a6995de0277 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -49,7 +49,7 @@ struct squashfs_cache_entry { int num_waiters; wait_queue_head_t wait_queue; struct squashfs_cache *cache; - void **data; + struct page **page; struct squashfs_page_actor *actor; }; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 5056babe00df..61cd0b39ed0e 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -444,9 +444,15 @@ static int __init init_squashfs_fs(void) if (err) return err; + if (!squashfs_init_read_wq()) { + destroy_inodecache(); + return -ENOMEM; + } + err = register_filesystem(&squashfs_fs_type); if (err) { destroy_inodecache(); + squashfs_destroy_read_wq(); return err; } @@ -460,6 +466,7 @@ static void __exit exit_squashfs_fs(void) { unregister_filesystem(&squashfs_fs_type); destroy_inodecache(); + squashfs_destroy_read_wq(); } diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index c609624e4b8a..14cd373e1897 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -55,7 +55,7 @@ static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk, struct comp_opts *opts; int err = 0, n; - opts = kmalloc(sizeof(*opts), GFP_KERNEL); + opts = kmalloc(sizeof(*opts), GFP_ATOMIC); if (opts == NULL) { err = -ENOMEM; goto out2; @@ -136,6 +136,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, enum xz_ret xz_err; int avail, total = 0, k = 0; struct squashfs_xz *stream = strm; + void *buf = NULL; xz_dec_reset(stream->state); stream->buf.in_pos = 0; @@ -156,12 +157,20 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, if (stream->buf.out_pos == stream->buf.out_size) { stream->buf.out = squashfs_next_page(output); - if (stream->buf.out != NULL) { + if (!IS_ERR(stream->buf.out)) { stream->buf.out_pos = 0; total += PAGE_CACHE_SIZE; } } + if (!stream->buf.out) { + if (!buf) { + buf = kmalloc(PAGE_CACHE_SIZE, GFP_ATOMIC); + if (!buf) + goto out; + } + stream->buf.out = buf; + } xz_err = xz_dec_run(stream->state, &stream->buf); if (stream->buf.in_pos == stream->buf.in_size && k < b) @@ -173,11 +182,13 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, if (xz_err != XZ_STREAM_END || k < b) goto out; + kfree(buf); return total + stream->buf.out_pos; out: for (; k < b; k++) put_bh(bh[k]); + kfree(buf); return -EIO; } diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 8727caba6882..09c892b5308e 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -66,6 +66,7 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, struct buffer_head **bh, int b, int offset, int length, struct squashfs_page_actor *output) { + void *buf = NULL; int zlib_err, zlib_init = 0, k = 0; z_stream *stream = strm; @@ -84,10 +85,19 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, if (stream->avail_out == 0) { stream->next_out = squashfs_next_page(output); - if (stream->next_out != NULL) + if (!IS_ERR(stream->next_out)) stream->avail_out = PAGE_CACHE_SIZE; } + if (!stream->next_out) { + if (!buf) { + buf = kmalloc(PAGE_CACHE_SIZE, GFP_ATOMIC); + if (!buf) + goto out; + } + stream->next_out = buf; + } + if (!zlib_init) { zlib_err = zlib_inflateInit(stream); if (zlib_err != Z_OK) { @@ -115,11 +125,13 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, if (k < b) goto out; + kfree(buf); return stream->total_out; out: for (; k < b; k++) put_bh(bh[k]); + kfree(buf); return -EIO; } diff --git a/fs/sync.c b/fs/sync.c index dd5d1711c7ac..452179e31c39 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -218,6 +218,7 @@ static int do_fsync(unsigned int fd, int datasync) if (f.file) { ret = vfs_fsync(f.file, datasync); fdput(f); + inc_syscfs(current); } return ret; } diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 1aabfda669b0..7183b7ea065b 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -299,6 +299,14 @@ xfs_dinode_verify( if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) return false; + /* don't allow invalid i_size */ + if (be64_to_cpu(dip->di_size) & (1ULL << 63)) + return false; + + /* No zero-length symlinks. */ + if (S_ISLNK(be16_to_cpu(dip->di_mode)) && dip->di_size == 0) + return false; + /* only version 3 or greater inodes are extensively verified here */ if (dip->di_version < 3) return true; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index c5101a3295d8..62ba66e1c598 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -289,8 +289,10 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (type == ACL_TYPE_ACCESS) { umode_t mode; - + struct posix_acl *old_acl = acl; error = posix_acl_update_mode(inode, &mode, &acl); + if (!acl) + posix_acl_release(old_acl); if (error) return error; error = xfs_set_mode(inode, mode); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index dbae6490a79a..832764ee035a 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1713,6 +1713,7 @@ xfs_swap_extents( xfs_trans_t *tp; xfs_bstat_t *sbp = &sxp->sx_stat; xfs_ifork_t *tempifp, *ifp, *tifp; + xfs_extnum_t nextents; int src_log_flags, target_log_flags; int error = 0; int aforkblks = 0; @@ -1899,7 +1900,8 @@ xfs_swap_extents( * pointer. Otherwise it's already NULL or * pointing to the extent. */ - if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { + nextents = ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + if (nextents <= XFS_INLINE_EXTS) { ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; } @@ -1918,7 +1920,8 @@ xfs_swap_extents( * pointer. Otherwise it's already NULL or * pointing to the extent. */ - if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { + nextents = tip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + if (nextents <= XFS_INLINE_EXTS) { tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index eb1b8c8acfcb..8146b0cf20ce 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -375,6 +375,7 @@ retry: out_free_pages: for (i = 0; i < bp->b_page_count; i++) __free_page(bp->b_pages[i]); + bp->b_flags &= ~_XBF_PAGES; return error; } |