summaryrefslogtreecommitdiff
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/crypto.c12
-rw-r--r--fs/ext4/crypto_fname.c5
-rw-r--r--fs/ext4/crypto_key.c107
-rw-r--r--fs/ext4/crypto_policy.c14
-rw-r--r--fs/ext4/ext4.h11
-rw-r--r--fs/ext4/ext4_crypto.h8
-rw-r--r--fs/ext4/inline.c14
-rw-r--r--fs/ext4/inode.c59
-rw-r--r--fs/ext4/ioctl.c10
-rw-r--r--fs/ext4/mballoc.c28
-rw-r--r--fs/ext4/readpage.c47
-rw-r--r--fs/ext4/super.c7
-rw-r--r--fs/ext4/xattr.c136
-rw-r--r--fs/ext4/xattr.h5
15 files changed, 344 insertions, 120 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index b46e9fc64196..3c8293215603 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -106,6 +106,7 @@ config EXT4_ENCRYPTION
select CRYPTO_ECB
select CRYPTO_XTS
select CRYPTO_CTS
+ select CRYPTO_HEH
select CRYPTO_CTR
select CRYPTO_SHA256
select KEYS
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index f240cef8b326..f6096ee77662 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -457,9 +457,17 @@ errout:
return err;
}
-bool ext4_valid_contents_enc_mode(uint32_t mode)
+bool ext4_valid_enc_modes(uint32_t contents_mode, uint32_t filenames_mode)
{
- return (mode == EXT4_ENCRYPTION_MODE_AES_256_XTS);
+ if (contents_mode == EXT4_ENCRYPTION_MODE_AES_256_XTS) {
+ return (filenames_mode == EXT4_ENCRYPTION_MODE_AES_256_CTS ||
+ filenames_mode == EXT4_ENCRYPTION_MODE_AES_256_HEH);
+ }
+
+ if (contents_mode == EXT4_ENCRYPTION_MODE_SPECK128_256_XTS)
+ return filenames_mode == EXT4_ENCRYPTION_MODE_SPECK128_256_CTS;
+
+ return false;
}
/**
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
index 2cfe3ffc276f..5e5afb6ef71a 100644
--- a/fs/ext4/crypto_fname.c
+++ b/fs/ext4/crypto_fname.c
@@ -42,11 +42,6 @@ static void ext4_dir_crypt_complete(struct crypto_async_request *req, int res)
complete(&ecr->completion);
}
-bool ext4_valid_filenames_enc_mode(uint32_t mode)
-{
- return (mode == EXT4_ENCRYPTION_MODE_AES_256_CTS);
-}
-
static unsigned max_name_len(struct inode *inode)
{
return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index 9308fe4b66e6..68225223ffd8 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -29,16 +29,16 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc)
}
/**
- * ext4_derive_key_aes() - Derive a key using AES-128-ECB
+ * ext4_derive_key_v1() - Derive a key using AES-128-ECB
* @deriving_key: Encryption key used for derivation.
* @source_key: Source key to which to apply derivation.
* @derived_key: Derived key.
*
- * Return: Zero on success; non-zero otherwise.
+ * Return: 0 on success, -errno on failure
*/
-static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE],
- char source_key[EXT4_AES_256_XTS_KEY_SIZE],
- char derived_key[EXT4_AES_256_XTS_KEY_SIZE])
+static int ext4_derive_key_v1(const char deriving_key[EXT4_AES_128_ECB_KEY_SIZE],
+ const char source_key[EXT4_AES_256_XTS_KEY_SIZE],
+ char derived_key[EXT4_AES_256_XTS_KEY_SIZE])
{
int res = 0;
struct ablkcipher_request *req = NULL;
@@ -83,6 +83,91 @@ out:
return res;
}
+/**
+ * ext4_derive_key_v2() - Derive a key non-reversibly
+ * @nonce: the nonce associated with the file
+ * @master_key: the master key referenced by the file
+ * @derived_key: (output) the resulting derived key
+ *
+ * This function computes the following:
+ * derived_key[0:127] = AES-256-ENCRYPT(master_key[0:255], nonce)
+ * derived_key[128:255] = AES-256-ENCRYPT(master_key[0:255], nonce ^ 0x01)
+ * derived_key[256:383] = AES-256-ENCRYPT(master_key[256:511], nonce)
+ * derived_key[384:511] = AES-256-ENCRYPT(master_key[256:511], nonce ^ 0x01)
+ *
+ * 'nonce ^ 0x01' denotes flipping the low order bit of the last byte.
+ *
+ * Unlike the v1 algorithm, the v2 algorithm is "non-reversible", meaning that
+ * compromising a derived key does not also compromise the master key.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int ext4_derive_key_v2(const char nonce[EXT4_KEY_DERIVATION_NONCE_SIZE],
+ const char master_key[EXT4_MAX_KEY_SIZE],
+ char derived_key[EXT4_MAX_KEY_SIZE])
+{
+ const int noncelen = EXT4_KEY_DERIVATION_NONCE_SIZE;
+ struct crypto_cipher *tfm;
+ int err;
+ int i;
+
+ /*
+ * Since we only use each transform for a small number of encryptions,
+ * requesting just "aes" turns out to be significantly faster than
+ * "ecb(aes)", by about a factor of two.
+ */
+ tfm = crypto_alloc_cipher("aes", 0, 0);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ BUILD_BUG_ON(4 * EXT4_KEY_DERIVATION_NONCE_SIZE != EXT4_MAX_KEY_SIZE);
+ BUILD_BUG_ON(2 * EXT4_AES_256_ECB_KEY_SIZE != EXT4_MAX_KEY_SIZE);
+ for (i = 0; i < 2; i++) {
+ memcpy(derived_key, nonce, noncelen);
+ memcpy(derived_key + noncelen, nonce, noncelen);
+ derived_key[2 * noncelen - 1] ^= 0x01;
+ err = crypto_cipher_setkey(tfm, master_key,
+ EXT4_AES_256_ECB_KEY_SIZE);
+ if (err)
+ break;
+ crypto_cipher_encrypt_one(tfm, derived_key, derived_key);
+ crypto_cipher_encrypt_one(tfm, derived_key + noncelen,
+ derived_key + noncelen);
+ master_key += EXT4_AES_256_ECB_KEY_SIZE;
+ derived_key += 2 * noncelen;
+ }
+ crypto_free_cipher(tfm);
+ return err;
+}
+
+/**
+ * ext4_derive_key() - Derive a per-file key from a nonce and master key
+ * @ctx: the encryption context associated with the file
+ * @master_key: the master key referenced by the file
+ * @derived_key: (output) the resulting derived key
+ *
+ * Return: 0 on success, -errno on failure
+ */
+static int ext4_derive_key(const struct ext4_encryption_context *ctx,
+ const char master_key[EXT4_MAX_KEY_SIZE],
+ char derived_key[EXT4_MAX_KEY_SIZE])
+{
+ BUILD_BUG_ON(EXT4_AES_128_ECB_KEY_SIZE != EXT4_KEY_DERIVATION_NONCE_SIZE);
+ BUILD_BUG_ON(EXT4_AES_256_XTS_KEY_SIZE != EXT4_MAX_KEY_SIZE);
+
+ /*
+ * Although the key derivation algorithm is logically independent of the
+ * choice of encryption modes, in this kernel it is bundled with HEH
+ * encryption of filenames, which is another crypto improvement that
+ * requires an on-disk format change and requires userspace to specify
+ * different encryption policies.
+ */
+ if (ctx->filenames_encryption_mode == EXT4_ENCRYPTION_MODE_AES_256_HEH)
+ return ext4_derive_key_v2(ctx->nonce, master_key, derived_key);
+ else
+ return ext4_derive_key_v1(ctx->nonce, master_key, derived_key);
+}
+
void ext4_free_crypt_info(struct ext4_crypt_info *ci)
{
if (!ci)
@@ -170,6 +255,15 @@ int ext4_get_encryption_info(struct inode *inode)
case EXT4_ENCRYPTION_MODE_AES_256_CTS:
cipher_str = "cts(cbc(aes))";
break;
+ case EXT4_ENCRYPTION_MODE_AES_256_HEH:
+ cipher_str = "heh(aes)";
+ break;
+ case EXT4_ENCRYPTION_MODE_SPECK128_256_XTS:
+ cipher_str = "xts(speck128)";
+ break;
+ case EXT4_ENCRYPTION_MODE_SPECK128_256_CTS:
+ cipher_str = "cts(cbc(speck128))";
+ break;
default:
printk_once(KERN_WARNING
"ext4: unsupported key mode %d (ino %u)\n",
@@ -224,8 +318,7 @@ int ext4_get_encryption_info(struct inode *inode)
up_read(&keyring_key->sem);
goto out;
}
- res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
- raw_key);
+ res = ext4_derive_key(&ctx, master_key->raw, raw_key);
up_read(&keyring_key->sem);
if (res)
goto out;
diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
index 77bd7bfb6329..d4b6f3c06ff0 100644
--- a/fs/ext4/crypto_policy.c
+++ b/fs/ext4/crypto_policy.c
@@ -60,16 +60,12 @@ static int ext4_create_encryption_context_from_policy(
ctx.format = EXT4_ENCRYPTION_CONTEXT_FORMAT_V1;
memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
EXT4_KEY_DESCRIPTOR_SIZE);
- if (!ext4_valid_contents_enc_mode(policy->contents_encryption_mode)) {
+ if (!ext4_valid_enc_modes(policy->contents_encryption_mode,
+ policy->filenames_encryption_mode)) {
printk(KERN_WARNING
- "%s: Invalid contents encryption mode %d\n", __func__,
- policy->contents_encryption_mode);
- return -EINVAL;
- }
- if (!ext4_valid_filenames_enc_mode(policy->filenames_encryption_mode)) {
- printk(KERN_WARNING
- "%s: Invalid filenames encryption mode %d\n", __func__,
- policy->filenames_encryption_mode);
+ "%s: Invalid encryption modes (contents %d, filenames %d)\n",
+ __func__, policy->contents_encryption_mode,
+ policy->filenames_encryption_mode);
return -EINVAL;
}
if (policy->flags & ~EXT4_POLICY_FLAGS_VALID)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index abffa2488ae9..9f6c259fc83f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -589,6 +589,9 @@ enum {
#define EXT4_ENCRYPTION_MODE_AES_256_GCM 2
#define EXT4_ENCRYPTION_MODE_AES_256_CBC 3
#define EXT4_ENCRYPTION_MODE_AES_256_CTS 4
+#define EXT4_ENCRYPTION_MODE_SPECK128_256_XTS 7
+#define EXT4_ENCRYPTION_MODE_SPECK128_256_CTS 8
+#define EXT4_ENCRYPTION_MODE_AES_256_HEH 126
#include "ext4_crypto.h"
@@ -1441,7 +1444,7 @@ struct ext4_sb_info {
struct list_head s_es_list; /* List of inodes with reclaimable extents */
long s_es_nr_inode;
struct ext4_es_stats s_es_stats;
- struct mb_cache *s_mb_cache;
+ struct mb2_cache *s_mb_cache;
spinlock_t s_es_lock ____cacheline_aligned_in_smp;
/* Ratelimit ext4 messages. */
@@ -2270,7 +2273,7 @@ int ext4_get_policy(struct inode *inode,
/* crypto.c */
extern struct kmem_cache *ext4_crypt_info_cachep;
-bool ext4_valid_contents_enc_mode(uint32_t mode);
+bool ext4_valid_enc_modes(uint32_t contents_mode, uint32_t filenames_mode);
uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size);
extern struct workqueue_struct *ext4_read_workqueue;
struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode,
@@ -2301,7 +2304,6 @@ static inline int ext4_sb_has_crypto(struct super_block *sb)
#endif
/* crypto_fname.c */
-bool ext4_valid_filenames_enc_mode(uint32_t mode);
u32 ext4_fname_crypto_round_up(u32 size, u32 blksize);
unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen);
int ext4_fname_crypto_alloc_buffer(struct inode *inode,
@@ -2472,7 +2474,8 @@ extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc);
extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count);
-extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
+extern int ext4_trim_fs(struct super_block *, struct fstrim_range *,
+ unsigned long blkdev_flags);
/* inode.c */
int ext4_inode_is_fast_symlink(struct inode *inode);
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
index 1b17b05b9f4d..f7ba3be9d7ac 100644
--- a/fs/ext4/ext4_crypto.h
+++ b/fs/ext4/ext4_crypto.h
@@ -58,8 +58,10 @@ struct ext4_encryption_context {
#define EXT4_XTS_TWEAK_SIZE 16
#define EXT4_AES_128_ECB_KEY_SIZE 16
#define EXT4_AES_256_GCM_KEY_SIZE 32
+#define EXT4_AES_256_ECB_KEY_SIZE 32
#define EXT4_AES_256_CBC_KEY_SIZE 32
#define EXT4_AES_256_CTS_KEY_SIZE 32
+#define EXT4_AES_256_HEH_KEY_SIZE 32
#define EXT4_AES_256_XTS_KEY_SIZE 64
#define EXT4_MAX_KEY_SIZE 64
@@ -120,6 +122,12 @@ static inline int ext4_encryption_key_size(int mode)
return EXT4_AES_256_CBC_KEY_SIZE;
case EXT4_ENCRYPTION_MODE_AES_256_CTS:
return EXT4_AES_256_CTS_KEY_SIZE;
+ case EXT4_ENCRYPTION_MODE_AES_256_HEH:
+ return EXT4_AES_256_HEH_KEY_SIZE;
+ case EXT4_ENCRYPTION_MODE_SPECK128_256_XTS:
+ return 64;
+ case EXT4_ENCRYPTION_MODE_SPECK128_256_CTS:
+ return 32;
default:
BUG();
}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index df585267d3c2..792649dad953 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -18,6 +18,7 @@
#include "ext4.h"
#include "xattr.h"
#include "truncate.h"
+#include <trace/events/android_fs.h>
#define EXT4_XATTR_SYSTEM_DATA "data"
#define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS))
@@ -502,6 +503,17 @@ int ext4_readpage_inline(struct inode *inode, struct page *page)
return -EAGAIN;
}
+ if (trace_android_fs_dataread_start_enabled()) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ inode);
+ trace_android_fs_dataread_start(inode, page_offset(page),
+ PAGE_SIZE, current->pid,
+ path, current->comm);
+ }
+
/*
* Current inline data can only exist in the 1st page,
* So for all the other pages, just set them uptodate.
@@ -513,6 +525,8 @@ int ext4_readpage_inline(struct inode *inode, struct page *page)
SetPageUptodate(page);
}
+ trace_android_fs_dataread_end(inode, page_offset(page), PAGE_SIZE);
+
up_read(&EXT4_I(inode)->xattr_sem);
unlock_page(page);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 881601691bd4..b993fbccefa4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -44,6 +44,7 @@
#include "truncate.h"
#include <trace/events/ext4.h>
+#include <trace/events/android_fs.h>
#define MPAGE_DA_EXTENT_TAIL 0x01
@@ -1033,6 +1034,16 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index;
unsigned from, to;
+ if (trace_android_fs_datawrite_start_enabled()) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ inode);
+ trace_android_fs_datawrite_start(inode, pos, len,
+ current->pid, path,
+ current->comm);
+ }
trace_ext4_write_begin(inode, pos, len, flags);
/*
* Reserve one block more for addition to orphan list in case
@@ -1170,6 +1181,7 @@ static int ext4_write_end(struct file *file,
int i_size_changed = 0;
int inline_data = ext4_has_inline_data(inode);
+ trace_android_fs_datawrite_end(inode, pos, len);
trace_ext4_write_end(inode, pos, len, copied);
if (inline_data) {
ret = ext4_write_inline_data_end(inode, pos, len,
@@ -1275,6 +1287,7 @@ static int ext4_journalled_write_end(struct file *file,
int size_changed = 0;
int inline_data = ext4_has_inline_data(inode);
+ trace_android_fs_datawrite_end(inode, pos, len);
trace_ext4_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
@@ -2762,6 +2775,16 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
len, flags, pagep, fsdata);
}
*fsdata = (void *)0;
+ if (trace_android_fs_datawrite_start_enabled()) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ inode);
+ trace_android_fs_datawrite_start(inode, pos, len,
+ current->pid,
+ path, current->comm);
+ }
trace_ext4_da_write_begin(inode, pos, len, flags);
if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -2880,6 +2903,7 @@ static int ext4_da_write_end(struct file *file,
return ext4_write_end(file, mapping, pos,
len, copied, page, fsdata);
+ trace_android_fs_datawrite_end(inode, pos, len);
trace_ext4_da_write_end(inode, pos, len, copied);
start = pos & (PAGE_CACHE_SIZE - 1);
end = start + copied - 1;
@@ -3375,12 +3399,42 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
if (ext4_has_inline_data(inode))
return 0;
+ if (trace_android_fs_dataread_start_enabled() &&
+ (iov_iter_rw(iter) == READ)) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ inode);
+ trace_android_fs_dataread_start(inode, offset, count,
+ current->pid, path,
+ current->comm);
+ }
+ if (trace_android_fs_datawrite_start_enabled() &&
+ (iov_iter_rw(iter) == WRITE)) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ inode);
+ trace_android_fs_datawrite_start(inode, offset, count,
+ current->pid, path,
+ current->comm);
+ }
trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
ret = ext4_ext_direct_IO(iocb, iter, offset);
else
ret = ext4_ind_direct_IO(iocb, iter, offset);
trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret);
+
+ if (trace_android_fs_dataread_start_enabled() &&
+ (iov_iter_rw(iter) == READ))
+ trace_android_fs_dataread_end(inode, offset, count);
+ if (trace_android_fs_datawrite_start_enabled() &&
+ (iov_iter_rw(iter) == WRITE))
+ trace_android_fs_datawrite_end(inode, offset, count);
+
return ret;
}
@@ -4161,8 +4215,11 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_DIRSYNC;
if (test_opt(inode->i_sb, DAX))
new_fl |= S_DAX;
+ if (flags & EXT4_ENCRYPT_FL)
+ new_fl |= S_ENCRYPTED;
inode_set_flags(inode, new_fl,
- S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX|
+ S_ENCRYPTED);
}
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index d4addcc5e4f1..aca311a57abd 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -587,11 +587,13 @@ resizefs_out:
return err;
}
+ case FIDTRIM:
case FITRIM:
{
struct request_queue *q = bdev_get_queue(sb->s_bdev);
struct fstrim_range range;
int ret = 0;
+ int flags = cmd == FIDTRIM ? BLKDEV_DISCARD_SECURE : 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -599,6 +601,9 @@ resizefs_out:
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
+ if ((flags & BLKDEV_DISCARD_SECURE) && !blk_queue_secdiscard(q))
+ return -EOPNOTSUPP;
+
/*
* We haven't replayed the journal, so we cannot use our
* block-bitmap-guided storage zapping commands.
@@ -612,7 +617,7 @@ resizefs_out:
range.minlen = max((unsigned int)range.minlen,
q->limits.discard_granularity);
- ret = ext4_trim_fs(sb, &range);
+ ret = ext4_trim_fs(sb, &range, flags);
if (ret < 0)
return ret;
@@ -629,9 +634,6 @@ resizefs_out:
struct ext4_encryption_policy policy;
int err = 0;
- if (!ext4_has_feature_encrypt(sb))
- return -EOPNOTSUPP;
-
if (copy_from_user(&policy,
(struct ext4_encryption_policy __user *)arg,
sizeof(policy))) {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 04fab14e630c..c80223bde667 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2795,7 +2795,8 @@ int ext4_mb_release(struct super_block *sb)
}
static inline int ext4_issue_discard(struct super_block *sb,
- ext4_group_t block_group, ext4_grpblk_t cluster, int count)
+ ext4_group_t block_group, ext4_grpblk_t cluster, int count,
+ unsigned long flags)
{
ext4_fsblk_t discard_block;
@@ -2804,7 +2805,7 @@ static inline int ext4_issue_discard(struct super_block *sb,
count = EXT4_C2B(EXT4_SB(sb), count);
trace_ext4_discard_blocks(sb,
(unsigned long long) discard_block, count);
- return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
+ return sb_issue_discard(sb, discard_block, count, GFP_NOFS, flags);
}
/*
@@ -2826,7 +2827,7 @@ static void ext4_free_data_callback(struct super_block *sb,
if (test_opt(sb, DISCARD)) {
err = ext4_issue_discard(sb, entry->efd_group,
entry->efd_start_cluster,
- entry->efd_count);
+ entry->efd_count, 0);
if (err && err != -EOPNOTSUPP)
ext4_msg(sb, KERN_WARNING, "discard request in"
" group:%d block:%d count:%d failed"
@@ -4885,7 +4886,8 @@ do_more:
* them with group lock_held
*/
if (test_opt(sb, DISCARD)) {
- err = ext4_issue_discard(sb, block_group, bit, count);
+ err = ext4_issue_discard(sb, block_group, bit, count,
+ 0);
if (err && err != -EOPNOTSUPP)
ext4_msg(sb, KERN_WARNING, "discard request in"
" group:%d block:%d count:%lu failed"
@@ -5083,13 +5085,15 @@ error_return:
* @count: number of blocks to TRIM
* @group: alloc. group we are working with
* @e4b: ext4 buddy for the group
+ * @blkdev_flags: flags for the block device
*
* Trim "count" blocks starting at "start" in the "group". To assure that no
* one will allocate those blocks, mark it as used in buddy bitmap. This must
* be called with under the group lock.
*/
static int ext4_trim_extent(struct super_block *sb, int start, int count,
- ext4_group_t group, struct ext4_buddy *e4b)
+ ext4_group_t group, struct ext4_buddy *e4b,
+ unsigned long blkdev_flags)
__releases(bitlock)
__acquires(bitlock)
{
@@ -5110,7 +5114,7 @@ __acquires(bitlock)
*/
mb_mark_used(e4b, &ex);
ext4_unlock_group(sb, group);
- ret = ext4_issue_discard(sb, group, start, count);
+ ret = ext4_issue_discard(sb, group, start, count, blkdev_flags);
ext4_lock_group(sb, group);
mb_free_blocks(NULL, e4b, start, ex.fe_len);
return ret;
@@ -5123,6 +5127,7 @@ __acquires(bitlock)
* @start: first group block to examine
* @max: last group block to examine
* @minblocks: minimum extent block count
+ * @blkdev_flags: flags for the block device
*
* ext4_trim_all_free walks through group's buddy bitmap searching for free
* extents. When the free block is found, ext4_trim_extent is called to TRIM
@@ -5137,7 +5142,7 @@ __acquires(bitlock)
static ext4_grpblk_t
ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
ext4_grpblk_t start, ext4_grpblk_t max,
- ext4_grpblk_t minblocks)
+ ext4_grpblk_t minblocks, unsigned long blkdev_flags)
{
void *bitmap;
ext4_grpblk_t next, count = 0, free_count = 0;
@@ -5170,7 +5175,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
if ((next - start) >= minblocks) {
ret = ext4_trim_extent(sb, start,
- next - start, group, &e4b);
+ next - start, group, &e4b,
+ blkdev_flags);
if (ret && ret != -EOPNOTSUPP)
break;
ret = 0;
@@ -5212,6 +5218,7 @@ out:
* ext4_trim_fs() -- trim ioctl handle function
* @sb: superblock for filesystem
* @range: fstrim_range structure
+ * @blkdev_flags: flags for the block device
*
* start: First Byte to trim
* len: number of Bytes to trim from start
@@ -5220,7 +5227,8 @@ out:
* start to start+len. For each such a group ext4_trim_all_free function
* is invoked to trim all free space.
*/
-int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range,
+ unsigned long blkdev_flags)
{
struct ext4_group_info *grp;
ext4_group_t group, first_group, last_group;
@@ -5276,7 +5284,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (grp->bb_free >= minlen) {
cnt = ext4_trim_all_free(sb, group, first_cluster,
- end, minlen);
+ end, minlen, blkdev_flags);
if (cnt < 0) {
ret = cnt;
break;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index bc7642f57dc8..783e33d839cf 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -45,6 +45,7 @@
#include <linux/cleancache.h>
#include "ext4.h"
+#include <trace/events/android_fs.h>
/*
* Call ext4_decrypt on every single page, reusing the encryption
@@ -86,6 +87,17 @@ static inline bool ext4_bio_encrypted(struct bio *bio)
#endif
}
+static void
+ext4_trace_read_completion(struct bio *bio)
+{
+ struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+ if (first_page != NULL)
+ trace_android_fs_dataread_end(first_page->mapping->host,
+ page_offset(first_page),
+ bio->bi_iter.bi_size);
+}
+
/*
* I/O completion handler for multipage BIOs.
*
@@ -103,6 +115,9 @@ static void mpage_end_io(struct bio *bio)
struct bio_vec *bv;
int i;
+ if (trace_android_fs_dataread_start_enabled())
+ ext4_trace_read_completion(bio);
+
if (ext4_bio_encrypted(bio)) {
struct ext4_crypto_ctx *ctx = bio->bi_private;
@@ -130,6 +145,30 @@ static void mpage_end_io(struct bio *bio)
bio_put(bio);
}
+static void
+ext4_submit_bio_read(struct bio *bio)
+{
+ if (trace_android_fs_dataread_start_enabled()) {
+ struct page *first_page = bio->bi_io_vec[0].bv_page;
+
+ if (first_page != NULL) {
+ char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
+
+ path = android_fstrace_get_pathname(pathbuf,
+ MAX_TRACE_PATHBUF_LEN,
+ first_page->mapping->host);
+ trace_android_fs_dataread_start(
+ first_page->mapping->host,
+ page_offset(first_page),
+ bio->bi_iter.bi_size,
+ current->pid,
+ path,
+ current->comm);
+ }
+ }
+ submit_bio(READ, bio);
+}
+
int ext4_mpage_readpages(struct address_space *mapping,
struct list_head *pages, struct page *page,
unsigned nr_pages)
@@ -271,7 +310,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
*/
if (bio && (last_block_in_bio != blocks[0] - 1)) {
submit_and_realloc:
- submit_bio(READ, bio);
+ ext4_submit_bio_read(bio);
bio = NULL;
}
if (bio == NULL) {
@@ -303,14 +342,14 @@ int ext4_mpage_readpages(struct address_space *mapping,
if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
(relative_block == map.m_len)) ||
(first_hole != blocks_per_page)) {
- submit_bio(READ, bio);
+ ext4_submit_bio_read(bio);
bio = NULL;
} else
last_block_in_bio = blocks[blocks_per_page - 1];
goto next_page;
confused:
if (bio) {
- submit_bio(READ, bio);
+ ext4_submit_bio_read(bio);
bio = NULL;
}
if (!PageUptodate(page))
@@ -323,6 +362,6 @@ int ext4_mpage_readpages(struct address_space *mapping,
}
BUG_ON(pages && !list_empty(pages));
if (bio)
- submit_bio(READ, bio);
+ ext4_submit_bio_read(bio);
return 0;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7a5a1602ee01..089e4c4c2df5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -820,7 +820,6 @@ static void ext4_put_super(struct super_block *sb)
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
- ext4_xattr_put_super(sb);
if (!(sb->s_flags & MS_RDONLY) && !aborted) {
ext4_clear_feature_journal_needs_recovery(sb);
@@ -3978,7 +3977,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
no_journal:
if (ext4_mballoc_ready) {
- sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id);
+ sbi->s_mb_cache = ext4_xattr_create_cache();
if (!sbi->s_mb_cache) {
ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
goto failed_mount_wq;
@@ -4197,6 +4196,10 @@ failed_mount4:
if (EXT4_SB(sb)->rsv_conversion_wq)
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
failed_mount_wq:
+ if (sbi->s_mb_cache) {
+ ext4_xattr_destroy_cache(sbi->s_mb_cache);
+ sbi->s_mb_cache = NULL;
+ }
if (sbi->s_journal) {
jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 18b9213ce0bd..157187e6e060 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -53,7 +53,7 @@
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/slab.h>
-#include <linux/mbcache.h>
+#include <linux/mbcache2.h>
#include <linux/quotaops.h>
#include "ext4_jbd2.h"
#include "ext4.h"
@@ -80,10 +80,10 @@
# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
-static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
+static void ext4_xattr_cache_insert(struct mb2_cache *, struct buffer_head *);
static struct buffer_head *ext4_xattr_cache_find(struct inode *,
struct ext4_xattr_header *,
- struct mb_cache_entry **);
+ struct mb2_cache_entry **);
static void ext4_xattr_rehash(struct ext4_xattr_header *,
struct ext4_xattr_entry *);
static int ext4_xattr_list(struct dentry *dentry, char *buffer,
@@ -297,7 +297,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
struct ext4_xattr_entry *entry;
size_t size;
int error;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
@@ -444,7 +444,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
struct inode *inode = d_inode(dentry);
struct buffer_head *bh = NULL;
int error;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
@@ -561,11 +561,8 @@ static void
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
struct buffer_head *bh)
{
- struct mb_cache_entry *ce = NULL;
int error = 0;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
- ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr);
BUFFER_TRACE(bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bh);
if (error)
@@ -573,9 +570,15 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
lock_buffer(bh);
if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
+ __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
+
ea_bdebug(bh, "refcount now=0; freeing");
- if (ce)
- mb_cache_entry_free(ce);
+ /*
+ * This must happen under buffer lock for
+ * ext4_xattr_block_set() to reliably detect freed block
+ */
+ mb2_cache_entry_delete_block(EXT4_GET_MB_CACHE(inode), hash,
+ bh->b_blocknr);
get_bh(bh);
unlock_buffer(bh);
ext4_free_blocks(handle, inode, bh, 0, 1,
@@ -583,8 +586,6 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
EXT4_FREE_BLOCKS_FORGET);
} else {
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
- if (ce)
- mb_cache_entry_release(ce);
ext4_xattr_block_csum_set(inode, bh);
/*
@@ -803,17 +804,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh = NULL;
struct ext4_xattr_search *s = &bs->s;
- struct mb_cache_entry *ce = NULL;
+ struct mb2_cache_entry *ce = NULL;
int error = 0;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
#define header(x) ((struct ext4_xattr_header *)(x))
if (i->value && i->value_len > sb->s_blocksize)
return -ENOSPC;
if (s->base) {
- ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev,
- bs->bh->b_blocknr);
BUFFER_TRACE(bs->bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bs->bh);
if (error)
@@ -821,10 +820,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
lock_buffer(bs->bh);
if (header(s->base)->h_refcount == cpu_to_le32(1)) {
- if (ce) {
- mb_cache_entry_free(ce);
- ce = NULL;
- }
+ __u32 hash = le32_to_cpu(BHDR(bs->bh)->h_hash);
+
+ /*
+ * This must happen under buffer lock for
+ * ext4_xattr_block_set() to reliably detect modified
+ * block
+ */
+ mb2_cache_entry_delete_block(ext4_mb_cache, hash,
+ bs->bh->b_blocknr);
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, inode);
if (!error) {
@@ -847,10 +851,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
int offset = (char *)s->here - bs->bh->b_data;
unlock_buffer(bs->bh);
- if (ce) {
- mb_cache_entry_release(ce);
- ce = NULL;
- }
ea_bdebug(bs->bh, "cloning");
s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
error = -ENOMEM;
@@ -905,6 +905,31 @@ inserted:
if (error)
goto cleanup_dquot;
lock_buffer(new_bh);
+ /*
+ * We have to be careful about races with
+ * freeing or rehashing of xattr block. Once we
+ * hold buffer lock xattr block's state is
+ * stable so we can check whether the block got
+ * freed / rehashed or not. Since we unhash
+ * mbcache entry under buffer lock when freeing
+ * / rehashing xattr block, checking whether
+ * entry is still hashed is reliable.
+ */
+ if (hlist_bl_unhashed(&ce->e_hash_list)) {
+ /*
+ * Undo everything and check mbcache
+ * again.
+ */
+ unlock_buffer(new_bh);
+ dquot_free_block(inode,
+ EXT4_C2B(EXT4_SB(sb),
+ 1));
+ brelse(new_bh);
+ mb2_cache_entry_put(ext4_mb_cache, ce);
+ ce = NULL;
+ new_bh = NULL;
+ goto inserted;
+ }
le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
ea_bdebug(new_bh, "reusing; refcount now=%d",
le32_to_cpu(BHDR(new_bh)->h_refcount));
@@ -916,7 +941,8 @@ inserted:
if (error)
goto cleanup_dquot;
}
- mb_cache_entry_release(ce);
+ mb2_cache_entry_touch(ext4_mb_cache, ce);
+ mb2_cache_entry_put(ext4_mb_cache, ce);
ce = NULL;
} else if (bs->bh && s->base == bs->bh->b_data) {
/* We were modifying this block in-place. */
@@ -983,7 +1009,7 @@ getblk_failed:
cleanup:
if (ce)
- mb_cache_entry_release(ce);
+ mb2_cache_entry_put(ext4_mb_cache, ce);
brelse(new_bh);
if (!(bs->bh && s->base == bs->bh->b_data))
kfree(s->base);
@@ -1537,17 +1563,6 @@ cleanup:
}
/*
- * ext4_xattr_put_super()
- *
- * This is called when a file system is unmounted.
- */
-void
-ext4_xattr_put_super(struct super_block *sb)
-{
- mb_cache_shrink(sb->s_bdev);
-}
-
-/*
* ext4_xattr_cache_insert()
*
* Create a new entry in the extended attribute cache, and insert
@@ -1556,28 +1571,18 @@ ext4_xattr_put_super(struct super_block *sb)
* Returns 0, or a negative error number on failure.
*/
static void
-ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
+ext4_xattr_cache_insert(struct mb2_cache *ext4_mb_cache, struct buffer_head *bh)
{
__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
- struct mb_cache_entry *ce;
int error;
- ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS);
- if (!ce) {
- ea_bdebug(bh, "out of memory");
- return;
- }
- error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
+ error = mb2_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
+ bh->b_blocknr);
if (error) {
- mb_cache_entry_free(ce);
- if (error == -EBUSY) {
+ if (error == -EBUSY)
ea_bdebug(bh, "already in cache");
- error = 0;
- }
- } else {
+ } else
ea_bdebug(bh, "inserting [%x]", (int)hash);
- mb_cache_entry_release(ce);
- }
}
/*
@@ -1630,26 +1635,19 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
*/
static struct buffer_head *
ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
- struct mb_cache_entry **pce)
+ struct mb2_cache_entry **pce)
{
__u32 hash = le32_to_cpu(header->h_hash);
- struct mb_cache_entry *ce;
- struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ struct mb2_cache_entry *ce;
+ struct mb2_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
-again:
- ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev,
- hash);
+ ce = mb2_cache_entry_find_first(ext4_mb_cache, hash);
while (ce) {
struct buffer_head *bh;
- if (IS_ERR(ce)) {
- if (PTR_ERR(ce) == -EAGAIN)
- goto again;
- break;
- }
bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
EXT4_ERROR_INODE(inode, "block %lu read error",
@@ -1665,7 +1663,7 @@ again:
return bh;
}
brelse(bh);
- ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
+ ce = mb2_cache_entry_find_next(ext4_mb_cache, ce);
}
return NULL;
}
@@ -1740,15 +1738,15 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
#define HASH_BUCKET_BITS 10
-struct mb_cache *
-ext4_xattr_create_cache(char *name)
+struct mb2_cache *
+ext4_xattr_create_cache(void)
{
- return mb_cache_create(name, HASH_BUCKET_BITS);
+ return mb2_cache_create(HASH_BUCKET_BITS);
}
-void ext4_xattr_destroy_cache(struct mb_cache *cache)
+void ext4_xattr_destroy_cache(struct mb2_cache *cache)
{
if (cache)
- mb_cache_destroy(cache);
+ mb2_cache_destroy(cache);
}
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index c000ed398555..cdc413476241 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -140,7 +140,6 @@ extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
-extern void ext4_xattr_put_super(struct super_block *);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
@@ -156,8 +155,8 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
struct ext4_xattr_info *i,
struct ext4_xattr_ibody_find *is);
-extern struct mb_cache *ext4_xattr_create_cache(char *name);
-extern void ext4_xattr_destroy_cache(struct mb_cache *);
+extern struct mb2_cache *ext4_xattr_create_cache(void);
+extern void ext4_xattr_destroy_cache(struct mb2_cache *);
#ifdef CONFIG_EXT4_FS_SECURITY
extern int ext4_init_security(handle_t *handle, struct inode *inode,