diff options
Diffstat (limited to 'fs')
37 files changed, 1768 insertions, 116 deletions
@@ -1316,7 +1316,7 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr, SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) { struct kioctx *ioctx = NULL; - unsigned long ctx; + unsigned long ctx = 0; long ret; ret = get_user(ctx, ctxp); diff --git a/fs/buffer.c b/fs/buffer.c index 4f4cd959da7c..fc35dd27cc0f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -621,6 +621,18 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) } EXPORT_SYMBOL(mark_buffer_dirty_inode); +#ifdef CONFIG_BLK_DEV_IO_TRACE +static inline void save_dirty_task(struct page *page) +{ + /* Save the task that is dirtying this page */ + page->tsk_dirty = current; +} +#else +static inline void save_dirty_task(struct page *page) +{ +} +#endif + /* * Mark the page dirty, and set it dirty in the radix tree, and mark the inode * dirty. @@ -641,6 +653,7 @@ static void __set_page_dirty(struct page *page, struct address_space *mapping, account_page_dirtied(page, mapping, memcg); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); + save_dirty_task(page); } spin_unlock_irqrestore(&mapping->tree_lock, flags); } @@ -1466,12 +1479,48 @@ static bool has_bh_in_lru(int cpu, void *dummy) return 0; } +static void __evict_bh_lru(void *arg) +{ + struct bh_lru *b = &get_cpu_var(bh_lrus); + struct buffer_head *bh = arg; + int i; + + for (i = 0; i < BH_LRU_SIZE; i++) { + if (b->bhs[i] == bh) { + brelse(b->bhs[i]); + b->bhs[i] = NULL; + goto out; + } + } +out: + put_cpu_var(bh_lrus); +} + +static bool bh_exists_in_lru(int cpu, void *arg) +{ + struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu); + struct buffer_head *bh = arg; + int i; + + for (i = 0; i < BH_LRU_SIZE; i++) { + if (b->bhs[i] == bh) + return 1; + } + + return 0; + +} void invalidate_bh_lrus(void) { on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1, GFP_KERNEL); } EXPORT_SYMBOL_GPL(invalidate_bh_lrus); +static void evict_bh_lrus(struct buffer_head *bh) +{ + on_each_cpu_cond(bh_exists_in_lru, __evict_bh_lru, bh, 1, GFP_ATOMIC); +} + void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset) { @@ -3192,8 +3241,15 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free) do { if (buffer_write_io_error(bh) && page->mapping) set_bit(AS_EIO, &page->mapping->flags); - if (buffer_busy(bh)) - goto failed; + if (buffer_busy(bh)) { + /* + * Check if the busy failure was due to an + * outstanding LRU reference + */ + evict_bh_lrus(bh); + if (buffer_busy(bh)) + goto failed; + } bh = bh->b_this_page; } while (bh != head); diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index dcf26537c935..b4967f7aaad0 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -871,6 +871,9 @@ COMPATIBLE_IOCTL(TIOCGPTN) COMPATIBLE_IOCTL(TIOCSPTLCK) COMPATIBLE_IOCTL(TIOCSERGETLSR) COMPATIBLE_IOCTL(TIOCSIG) +COMPATIBLE_IOCTL(TIOCPMGET) +COMPATIBLE_IOCTL(TIOCPMPUT) +COMPATIBLE_IOCTL(TIOCPMACT) #ifdef TIOCSRS485 COMPATIBLE_IOCTL(TIOCSRS485) #endif diff --git a/fs/direct-io.c b/fs/direct-io.c index 01171d8a6ee9..0f1517d0b969 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -399,6 +399,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) if (dio->is_async && dio->rw == READ && dio->should_dirty) bio_set_pages_dirty(bio); + bio->bi_dio_inode = dio->inode; dio->bio_bdev = bio->bi_bdev; if (sdio->submit_io) { @@ -413,6 +414,19 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) sdio->logical_offset_in_bio = 0; } +struct inode *dio_bio_get_inode(struct bio *bio) +{ + struct inode *inode = NULL; + + if (bio == NULL) + return NULL; + + inode = bio->bi_dio_inode; + + return inode; +} +EXPORT_SYMBOL(dio_bio_get_inode); + /* * Release any resources in case of a failure */ diff --git a/fs/drop_caches.c b/fs/drop_caches.c index d72d52b90433..ddf319bcfccd 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -13,7 +13,7 @@ /* A global variable is a bit ugly, but it keeps the code simple */ int sysctl_drop_caches; -static void drop_pagecache_sb(struct super_block *sb, void *unused) +void drop_pagecache_sb(struct super_block *sb, void *unused) { struct inode *inode, *toput_inode = NULL; diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile index 49678a69947d..c29cdd20d08a 100644 --- a/fs/ecryptfs/Makefile +++ b/fs/ecryptfs/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o -ecryptfs-y := dentry.o file.o inode.o main.o super.o mmap.o read_write.o \ +ecryptfs-y := dentry.o file.o inode.o main.o super.o mmap.o read_write.o events.o \ crypto.o keystore.o kthread.o debug.o ecryptfs-$(CONFIG_ECRYPT_FS_MESSAGING) += messaging.o miscdev.o diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 80d6901493cf..cf0186fd9bfe 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -35,6 +35,7 @@ #include <linux/scatterlist.h> #include <linux/slab.h> #include <asm/unaligned.h> +#include <linux/ecryptfs.h> #include "ecryptfs_kernel.h" #define DECRYPT 0 @@ -350,9 +351,9 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", - crypt_stat->key_size); + ecryptfs_get_key_size_to_enc_data(crypt_stat)); ecryptfs_dump_hex(crypt_stat->key, - crypt_stat->key_size); + ecryptfs_get_key_size_to_enc_data(crypt_stat)); } init_completion(&ecr.completion); @@ -371,7 +372,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, /* Consider doing this once, when the file is opened */ if (!(crypt_stat->flags & ECRYPTFS_KEY_SET)) { rc = crypto_ablkcipher_setkey(crypt_stat->tfm, crypt_stat->key, - crypt_stat->key_size); + ecryptfs_get_key_size_to_enc_data(crypt_stat)); if (rc) { ecryptfs_printk(KERN_ERR, "Error setting key; rc = [%d]\n", @@ -466,6 +467,30 @@ out: return rc; } +static void init_ecryption_parameters(bool *hw_crypt, bool *cipher_supported, + struct ecryptfs_crypt_stat *crypt_stat) +{ + if (!hw_crypt || !cipher_supported) + return; + + *cipher_supported = false; + *hw_crypt = false; + + if (get_events() && get_events()->is_cipher_supported_cb) { + *cipher_supported = + get_events()->is_cipher_supported_cb(crypt_stat); + if (*cipher_supported) { + + /** + * we should apply external algorythm + * assume that is_hw_crypt() cbck is supplied + */ + if (get_events()->is_hw_crypt_cb) + *hw_crypt = get_events()->is_hw_crypt_cb(); + } + } +} + /** * ecryptfs_encrypt_page * @page: Page mapped from the eCryptfs inode for the file; contains @@ -491,11 +516,18 @@ int ecryptfs_encrypt_page(struct page *page) loff_t extent_offset; loff_t lower_offset; int rc = 0; + bool is_hw_crypt; + bool is_cipher_supported; + ecryptfs_inode = page->mapping->host; crypt_stat = &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); + + init_ecryption_parameters(&is_hw_crypt, + &is_cipher_supported, crypt_stat); + enc_extent_page = alloc_page(GFP_USER); if (!enc_extent_page) { rc = -ENOMEM; @@ -503,24 +535,51 @@ int ecryptfs_encrypt_page(struct page *page) "encrypted extent\n"); goto out; } - - for (extent_offset = 0; - extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); - extent_offset++) { - rc = crypt_extent(crypt_stat, enc_extent_page, page, - extent_offset, ENCRYPT); - if (rc) { - printk(KERN_ERR "%s: Error encrypting extent; " - "rc = [%d]\n", __func__, rc); - goto out; + if (is_hw_crypt) { + /* no need for encryption */ + } else { + for (extent_offset = 0; + extent_offset < + (PAGE_CACHE_SIZE / crypt_stat->extent_size); + extent_offset++) { + + if (is_cipher_supported) { + if (!get_events()->encrypt_cb) { + rc = -EPERM; + goto out; + } + rc = get_events()->encrypt_cb(page, + enc_extent_page, + ecryptfs_inode_to_lower( + ecryptfs_inode), + extent_offset); + } else { + rc = crypt_extent(crypt_stat, + enc_extent_page, page, + extent_offset, ENCRYPT); + } + if (rc) { + ecryptfs_printk(KERN_ERR, + "%s: Error encrypting; rc = [%d]\n", + __func__, rc); + goto out; + } } } lower_offset = lower_offset_for_page(crypt_stat, page); - enc_extent_virt = kmap(enc_extent_page); + if (is_hw_crypt) + enc_extent_virt = kmap(page); + else + enc_extent_virt = kmap(enc_extent_page); + rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset, PAGE_CACHE_SIZE); - kunmap(enc_extent_page); + if (!is_hw_crypt) + kunmap(enc_extent_page); + else + kunmap(page); + if (rc < 0) { ecryptfs_printk(KERN_ERR, "Error attempting to write lower page; rc = [%d]\n", @@ -559,6 +618,8 @@ int ecryptfs_decrypt_page(struct page *page) unsigned long extent_offset; loff_t lower_offset; int rc = 0; + bool is_cipher_supported; + bool is_hw_crypt; ecryptfs_inode = page->mapping->host; crypt_stat = @@ -577,13 +638,33 @@ int ecryptfs_decrypt_page(struct page *page) goto out; } + init_ecryption_parameters(&is_hw_crypt, + &is_cipher_supported, crypt_stat); + + if (is_hw_crypt) { + rc = 0; + return rc; + } + for (extent_offset = 0; extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); extent_offset++) { - rc = crypt_extent(crypt_stat, page, page, + if (is_cipher_supported) { + if (!get_events()->decrypt_cb) { + rc = -EPERM; + goto out; + } + + rc = get_events()->decrypt_cb(page, page, + ecryptfs_inode_to_lower(ecryptfs_inode), + extent_offset); + + } else + rc = crypt_extent(crypt_stat, page, page, extent_offset, DECRYPT); + if (rc) { - printk(KERN_ERR "%s: Error encrypting extent; " + ecryptfs_printk(KERN_ERR, "%s: Error decrypting extent;" "rc = [%d]\n", __func__, rc); goto out; } @@ -612,7 +693,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat) "Initializing cipher [%s]; strlen = [%d]; " "key_size_bits = [%zd]\n", crypt_stat->cipher, (int)strlen(crypt_stat->cipher), - crypt_stat->key_size << 3); + ecryptfs_get_key_size_to_enc_data(crypt_stat) << 3); mutex_lock(&crypt_stat->cs_tfm_mutex); if (crypt_stat->tfm) { rc = 0; @@ -694,7 +775,7 @@ int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat) goto out; } rc = ecryptfs_calculate_md5(dst, crypt_stat, crypt_stat->key, - crypt_stat->key_size); + ecryptfs_get_key_size_to_enc_data(crypt_stat)); if (rc) { ecryptfs_printk(KERN_WARNING, "Error attempting to compute " "MD5 while generating root IV\n"); @@ -721,6 +802,31 @@ static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat) } } +static int ecryptfs_generate_new_salt(struct ecryptfs_crypt_stat *crypt_stat) +{ + size_t salt_size = 0; + + salt_size = ecryptfs_get_salt_size_for_cipher(crypt_stat); + + if (0 == salt_size) + return 0; + + if (!ecryptfs_check_space_for_salt(crypt_stat->key_size, salt_size)) { + ecryptfs_printk(KERN_WARNING, "not enough space for salt\n"); + crypt_stat->flags |= ECRYPTFS_SECURITY_WARNING; + return -EINVAL; + } + + get_random_bytes(crypt_stat->key + crypt_stat->key_size, salt_size); + if (unlikely(ecryptfs_verbosity > 0)) { + ecryptfs_printk(KERN_DEBUG, "Generated new session salt:\n"); + ecryptfs_dump_hex(crypt_stat->key + crypt_stat->key_size, + salt_size); + } + + return 0; +} + /** * ecryptfs_copy_mount_wide_flags_to_inode_flags * @crypt_stat: The inode's cryptographic context @@ -823,7 +929,6 @@ int ecryptfs_new_file_context(struct inode *ecryptfs_inode) struct ecryptfs_mount_crypt_stat *mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_inode->i_sb)->mount_crypt_stat; - int cipher_name_len; int rc = 0; ecryptfs_set_default_crypt_stat_vals(crypt_stat, mount_crypt_stat); @@ -837,15 +942,19 @@ int ecryptfs_new_file_context(struct inode *ecryptfs_inode) "to the inode key sigs; rc = [%d]\n", rc); goto out; } - cipher_name_len = - strlen(mount_crypt_stat->global_default_cipher_name); - memcpy(crypt_stat->cipher, + strlcpy(crypt_stat->cipher, mount_crypt_stat->global_default_cipher_name, - cipher_name_len); - crypt_stat->cipher[cipher_name_len] = '\0'; + sizeof(crypt_stat->cipher)); + + strlcpy(crypt_stat->cipher_mode, + mount_crypt_stat->global_default_cipher_mode, + sizeof(crypt_stat->cipher_mode)); + crypt_stat->key_size = mount_crypt_stat->global_default_cipher_key_size; ecryptfs_generate_new_key(crypt_stat); + ecryptfs_generate_new_salt(crypt_stat); + rc = ecryptfs_init_crypt_ctx(crypt_stat); if (rc) ecryptfs_printk(KERN_ERR, "Error initializing cryptographic " @@ -971,7 +1080,8 @@ ecryptfs_cipher_code_str_map[] = { {"twofish", RFC2440_CIPHER_TWOFISH}, {"cast6", RFC2440_CIPHER_CAST_6}, {"aes", RFC2440_CIPHER_AES_192}, - {"aes", RFC2440_CIPHER_AES_256} + {"aes", RFC2440_CIPHER_AES_256}, + {"aes_xts", RFC2440_CIPHER_AES_XTS_256} }; /** @@ -999,6 +1109,11 @@ u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes) case 32: code = RFC2440_CIPHER_AES_256; } + } else if (strcmp(cipher_name, "aes_xts") == 0) { + switch (key_bytes) { + case 32: + code = RFC2440_CIPHER_AES_XTS_256; + } } else { for (i = 0; i < ARRAY_SIZE(ecryptfs_cipher_code_str_map); i++) if (strcmp(cipher_name, map[i].cipher_str) == 0) { @@ -1038,9 +1153,24 @@ int ecryptfs_read_and_validate_header_region(struct inode *inode) u8 file_size[ECRYPTFS_SIZE_AND_MARKER_BYTES]; u8 *marker = file_size + ECRYPTFS_FILE_SIZE_BYTES; int rc; + unsigned int ra_pages_org; + struct file *lower_file = NULL; + + if (!inode) + return -EIO; + lower_file = ecryptfs_inode_to_private(inode)->lower_file; + if (!lower_file) + return -EIO; + + /*disable read a head mechanism for a while */ + ra_pages_org = lower_file->f_ra.ra_pages; + lower_file->f_ra.ra_pages = 0; rc = ecryptfs_read_lower(file_size, 0, ECRYPTFS_SIZE_AND_MARKER_BYTES, inode); + lower_file->f_ra.ra_pages = ra_pages_org; + /* restore read a head mechanism */ + if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES) return rc >= 0 ? -EINVAL : rc; rc = ecryptfs_validate_marker(marker); @@ -1430,6 +1560,11 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) struct ecryptfs_mount_crypt_stat *mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_dentry->d_sb)->mount_crypt_stat; + unsigned int ra_pages_org; + struct file *lower_file = + ecryptfs_inode_to_private(ecryptfs_inode)->lower_file; + if (!lower_file) + return -EIO; ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat, mount_crypt_stat); @@ -1441,8 +1576,14 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) __func__); goto out; } + /*disable read a head mechanism */ + ra_pages_org = lower_file->f_ra.ra_pages; + lower_file->f_ra.ra_pages = 0; + rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size, ecryptfs_inode); + lower_file->f_ra.ra_pages = ra_pages_org; /* restore it back */ + if (rc >= 0) rc = ecryptfs_read_headers_virt(page_virt, crypt_stat, ecryptfs_dentry, diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c index 3d2bdf546ec6..0556af1adfb7 100644 --- a/fs/ecryptfs/debug.c +++ b/fs/ecryptfs/debug.c @@ -119,3 +119,32 @@ void ecryptfs_dump_hex(char *data, int bytes) printk("\n"); } +void ecryptfs_dump_salt_hex(char *data, int key_size, + const struct ecryptfs_crypt_stat *crypt_stat) +{ + size_t salt_size = ecryptfs_get_salt_size_for_cipher(crypt_stat); + + if (0 == salt_size) + return; + + if (!ecryptfs_check_space_for_salt(key_size, salt_size)) + return; + + ecryptfs_printk(KERN_DEBUG, "Decrypted session salt key:\n"); + ecryptfs_dump_hex(data + key_size, salt_size); +} + +void ecryptfs_dump_cipher(struct ecryptfs_crypt_stat *stat) +{ + if (!stat) + return; + + if (stat->cipher) + ecryptfs_printk(KERN_DEBUG, + "ecryptfs cipher is %s\n", stat->cipher); + + if (stat->cipher_mode) + ecryptfs_printk(KERN_DEBUG, "ecryptfs cipher mode is %s\n", + stat->cipher_mode); + +} diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 7b39260c7bba..89e7aa5f178a 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -244,6 +244,7 @@ struct ecryptfs_crypt_stat { struct mutex cs_tfm_mutex; struct mutex cs_hash_tfm_mutex; struct mutex cs_mutex; + unsigned char cipher_mode[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; }; /* inode private data. */ @@ -344,6 +345,8 @@ struct ecryptfs_mount_crypt_stat { unsigned char global_default_fn_cipher_name[ ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; char global_default_fnek_sig[ECRYPTFS_SIG_SIZE_HEX + 1]; + unsigned char global_default_cipher_mode[ECRYPTFS_MAX_CIPHER_NAME_SIZE + + 1]; }; /* superblock private data. */ @@ -526,6 +529,53 @@ ecryptfs_dentry_to_lower_path(struct dentry *dentry) return &((struct ecryptfs_dentry_info *)dentry->d_fsdata)->lower_path; } +/** + * Given a cipher and mode strings, the function + * concatenates them to create a new string of + * <cipher>_<mode> format. + */ +static inline unsigned char *ecryptfs_get_full_cipher( + unsigned char *cipher, unsigned char *mode, + unsigned char *final, size_t final_size) +{ + memset(final, 0, final_size); + + if (strlen(mode) > 0) { + snprintf(final, final_size, "%s_%s", cipher, mode); + return final; + } + + return cipher; +} + +/** + * Given a <cipher>[_<mode>] formatted string, the function + * extracts cipher string and/or mode string. + * Note: the passed cipher and/or mode strings will be null-terminated. + */ +static inline void ecryptfs_parse_full_cipher( + char *s, char *cipher, char *mode) +{ + char input[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1+1]; + /* +1 for '_'; +1 for '\0' */ + char *p; + char *input_p = input; + + if (s == NULL || cipher == NULL) + return; + + memset(input, 0, sizeof(input)); + strlcpy(input, s, sizeof(input)); + + p = strsep(&input_p, "_"); + strlcpy(cipher, p, ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1); + + + /* check if mode is specified */ + if (input_p != NULL && mode != NULL) + strlcpy(mode, input_p, ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1); +} + #define ecryptfs_printk(type, fmt, arg...) \ __ecryptfs_printk(type "%s: " fmt, __func__, ## arg); __printf(1, 2) @@ -574,6 +624,10 @@ int ecryptfs_encrypt_and_encode_filename( const char *name, size_t name_size); struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry); void ecryptfs_dump_hex(char *data, int bytes); +void ecryptfs_dump_salt_hex(char *data, int key_size, + const struct ecryptfs_crypt_stat *crypt_stat); +extern void ecryptfs_dump_cipher(struct ecryptfs_crypt_stat *stat); + int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg, int sg_size); int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat); @@ -717,4 +771,33 @@ int ecryptfs_set_f_namelen(long *namelen, long lower_namelen, int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, loff_t offset); +void clean_inode_pages(struct address_space *mapping, + pgoff_t start, pgoff_t end); + +void ecryptfs_drop_pagecache_sb(struct super_block *sb, void *unused); + +void ecryptfs_free_events(void); + +void ecryptfs_freepage(struct page *page); + +struct ecryptfs_events *get_events(void); + +size_t ecryptfs_get_salt_size_for_cipher( + const struct ecryptfs_crypt_stat *crypt_stat); + +size_t ecryptfs_get_salt_size_for_cipher_mount( + const struct ecryptfs_mount_crypt_stat *mount_crypt_stat); + +size_t ecryptfs_get_key_size_to_enc_data( + const struct ecryptfs_crypt_stat *crypt_stat); + +size_t ecryptfs_get_key_size_to_store_key( + const struct ecryptfs_crypt_stat *crypt_stat); + +size_t ecryptfs_get_key_size_to_restore_key(size_t stored_key_size, + const struct ecryptfs_crypt_stat *crypt_stat); + +bool ecryptfs_check_space_for_salt(const size_t key_size, + const size_t salt_size); + #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/events.c b/fs/ecryptfs/events.c new file mode 100644 index 000000000000..12e26c683cf6 --- /dev/null +++ b/fs/ecryptfs/events.c @@ -0,0 +1,393 @@ +/** + * eCryptfs: Linux filesystem encryption layer + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/string.h> +#include <linux/ecryptfs.h> +#include <linux/mutex.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <linux/random.h> +#include "ecryptfs_kernel.h" + +static DEFINE_MUTEX(events_mutex); +struct ecryptfs_events *events_ptr = NULL; +static int handle; + +void ecryptfs_free_events(void) +{ + mutex_lock(&events_mutex); + if (events_ptr != NULL) { + kfree(events_ptr); + events_ptr = NULL; + } + + mutex_unlock(&events_mutex); +} + +/** + * Register to ecryptfs events, by passing callback + * functions to be called upon events occurence. + * The function returns a handle to be passed + * to unregister function. + */ +int ecryptfs_register_to_events(const struct ecryptfs_events *ops) +{ + int ret_value = 0; + + if (!ops) + return -EINVAL; + + mutex_lock(&events_mutex); + + if (events_ptr != NULL) { + ecryptfs_printk(KERN_ERR, + "already registered!\n"); + ret_value = -EPERM; + goto out; + } + events_ptr = + kzalloc(sizeof(struct ecryptfs_events), GFP_KERNEL); + + if (!events_ptr) { + ecryptfs_printk(KERN_ERR, "malloc failure\n"); + ret_value = -ENOMEM; + goto out; + } + /* copy the callbacks */ + events_ptr->open_cb = ops->open_cb; + events_ptr->release_cb = ops->release_cb; + events_ptr->encrypt_cb = ops->encrypt_cb; + events_ptr->decrypt_cb = ops->decrypt_cb; + events_ptr->is_cipher_supported_cb = + ops->is_cipher_supported_cb; + events_ptr->is_hw_crypt_cb = ops->is_hw_crypt_cb; + events_ptr->get_salt_key_size_cb = ops->get_salt_key_size_cb; + + get_random_bytes(&handle, sizeof(handle)); + ret_value = handle; + +out: + mutex_unlock(&events_mutex); + return ret_value; +} + +/** + * Unregister from ecryptfs events. + */ +int ecryptfs_unregister_from_events(int user_handle) +{ + int ret_value = 0; + + mutex_lock(&events_mutex); + + if (!events_ptr) { + ret_value = -EINVAL; + goto out; + } + if (user_handle != handle) { + ret_value = ECRYPTFS_INVALID_EVENTS_HANDLE; + goto out; + } + + kfree(events_ptr); + events_ptr = NULL; + +out: + mutex_unlock(&events_mutex); + return ret_value; +} + +/** + * This function decides whether the passed file offset + * belongs to ecryptfs metadata or not. + * The caller must pass ecryptfs data, which was received in one + * of the callback invocations. + */ +bool ecryptfs_is_page_in_metadata(const void *data, pgoff_t offset) +{ + + struct ecryptfs_crypt_stat *stat = NULL; + bool ret = true; + + if (!data) { + ecryptfs_printk(KERN_ERR, "ecryptfs_is_page_in_metadata: invalid data parameter\n"); + ret = false; + goto end; + } + stat = (struct ecryptfs_crypt_stat *)data; + + if (stat->flags & ECRYPTFS_METADATA_IN_XATTR) { + ret = false; + goto end; + } + + if (offset >= (stat->metadata_size/PAGE_CACHE_SIZE)) { + ret = false; + goto end; + } +end: + return ret; +} + +/** + * Given two ecryptfs data, the function + * decides whether they are equal. + */ +inline bool ecryptfs_is_data_equal(const void *data1, const void *data2) +{ + /* pointer comparison*/ + return data1 == data2; +} + +/** + * Given ecryptfs data, the function + * returns appropriate key size. + */ +size_t ecryptfs_get_key_size(const void *data) +{ + + struct ecryptfs_crypt_stat *stat = NULL; + + if (!data) + return 0; + + stat = (struct ecryptfs_crypt_stat *)data; + return stat->key_size; +} + +/** + * Given ecryptfs data, the function + * returns appropriate salt size. + * + * !!! crypt_stat cipher name and mode must be initialized + */ +size_t ecryptfs_get_salt_size(const void *data) +{ + if (!data) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_salt_size: invalid data parameter\n"); + return 0; + } + + return ecryptfs_get_salt_size_for_cipher(data); + +} + +/** + * Given ecryptfs data and cipher string, the function + * returns true if provided cipher and the one in ecryptfs match. + */ +bool ecryptfs_cipher_match(const void *data, + const unsigned char *cipher, size_t cipher_size) +{ + unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1]; + const unsigned char *ecryptfs_cipher = NULL; + struct ecryptfs_crypt_stat *stat = NULL; + + if (!data || !cipher) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_cipher: invalid data parameter\n"); + return false; + } + + if (!cipher_size || cipher_size > sizeof(final)) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_cipher: cipher_size\n"); + return false; + } + + stat = (struct ecryptfs_crypt_stat *)data; + ecryptfs_cipher = ecryptfs_get_full_cipher(stat->cipher, + stat->cipher_mode, + final, sizeof(final)); + + if (!ecryptfs_cipher) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_cipher: internal error while parsing cipher\n"); + return false; + } + + if (strcmp(ecryptfs_cipher, cipher)) { + if (ecryptfs_verbosity > 0) + ecryptfs_dump_cipher(stat); + + return false; + } + + return true; +} + +/** + * Given ecryptfs data, the function + * returns file encryption key. + */ +const unsigned char *ecryptfs_get_key(const void *data) +{ + + struct ecryptfs_crypt_stat *stat = NULL; + + if (!data) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_key: invalid data parameter\n"); + return NULL; + } + stat = (struct ecryptfs_crypt_stat *)data; + return stat->key; +} + +/** + * Given ecryptfs data, the function + * returns file encryption salt. + */ +const unsigned char *ecryptfs_get_salt(const void *data) +{ + struct ecryptfs_crypt_stat *stat = NULL; + + if (!data) { + ecryptfs_printk(KERN_ERR, + "ecryptfs_get_salt: invalid data parameter\n"); + return NULL; + } + stat = (struct ecryptfs_crypt_stat *)data; + return stat->key + ecryptfs_get_salt_size(data); +} + +/** + * Returns ecryptfs events pointer + */ +inline struct ecryptfs_events *get_events(void) +{ + return events_ptr; +} + +/** + * If external crypto module requires salt in addition to key, + * we store it as part of key array (if there is enough space) + * Checks whether a salt key can fit into array allocated for + * regular key + */ +bool ecryptfs_check_space_for_salt(const size_t key_size, + const size_t salt_size) +{ + if ((salt_size + key_size) > ECRYPTFS_MAX_KEY_BYTES) + return false; + + return true; +} + +/* + * If there is salt that is used by external crypto module, it is stored + * in the same array where regular key is. Salt is going to be used by + * external crypto module only, so for all internal crypto operations salt + * should be ignored. + * + * Get key size in cases where it is going to be used for data encryption + * or for all other general purposes + */ +size_t ecryptfs_get_key_size_to_enc_data( + const struct ecryptfs_crypt_stat *crypt_stat) +{ + if (!crypt_stat) + return 0; + + return crypt_stat->key_size; +} + +/* + * If there is salt that is used by external crypto module, it is stored + * in the same array where regular key is. Salt is going to be used by + * external crypto module only, but we still need to save and restore it + * (in encrypted form) as part of ecryptfs header along with the regular + * key. + * + * Get key size in cases where it is going to be stored persistently + * + * !!! crypt_stat cipher name and mode must be initialized + */ +size_t ecryptfs_get_key_size_to_store_key( + const struct ecryptfs_crypt_stat *crypt_stat) +{ + size_t salt_size = 0; + + if (!crypt_stat) + return 0; + + salt_size = ecryptfs_get_salt_size(crypt_stat); + + if (!ecryptfs_check_space_for_salt(crypt_stat->key_size, salt_size)) { + ecryptfs_printk(KERN_WARNING, + "ecryptfs_get_key_size_to_store_key: not enough space for salt\n"); + return crypt_stat->key_size; + } + + return crypt_stat->key_size + salt_size; +} + +/* + * If there is salt that is used by external crypto module, it is stored + * in the same array where regular key is. Salt is going to be used by + * external crypto module only, but we still need to save and restore it + * (in encrypted form) as part of ecryptfs header along with the regular + * key. + * + * Get key size in cases where it is going to be restored from storage + * + * !!! crypt_stat cipher name and mode must be initialized + */ +size_t ecryptfs_get_key_size_to_restore_key(size_t stored_key_size, + const struct ecryptfs_crypt_stat *crypt_stat) +{ + size_t salt_size = 0; + + if (!crypt_stat) + return 0; + + salt_size = ecryptfs_get_salt_size_for_cipher(crypt_stat); + + if (salt_size >= stored_key_size) { + ecryptfs_printk(KERN_WARNING, + "ecryptfs_get_key_size_to_restore_key: salt %zu >= stred size %zu\n", + salt_size, stored_key_size); + + return stored_key_size; + } + + return stored_key_size - salt_size; +} + +/** + * Given crypt_stat, the function returns appropriate salt size. + */ +size_t ecryptfs_get_salt_size_for_cipher( + const struct ecryptfs_crypt_stat *crypt_stat) +{ + if (!get_events() || !(get_events()->get_salt_key_size_cb)) + return 0; + + return get_events()->get_salt_key_size_cb(crypt_stat); +} + +/** + * Given mount_crypt_stat, the function returns appropriate salt size. + */ +size_t ecryptfs_get_salt_size_for_cipher_mount( + const struct ecryptfs_mount_crypt_stat *crypt_stat) +{ + if (!get_events() || !(get_events()->get_salt_key_size_cb)) + return 0; + + return get_events()->get_salt_key_size_cb(crypt_stat); +} + diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index feef8a9c4de7..8c536c02e295 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -31,6 +31,7 @@ #include <linux/security.h> #include <linux/compat.h> #include <linux/fs_stack.h> +#include <linux/ecryptfs.h> #include "ecryptfs_kernel.h" /** @@ -184,6 +185,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file) int rc = 0; struct ecryptfs_crypt_stat *crypt_stat = NULL; struct dentry *ecryptfs_dentry = file->f_path.dentry; + int ret; + + /* Private value of ecryptfs_dentry allocated in * ecryptfs_lookup() */ struct ecryptfs_file_info *file_info; @@ -231,12 +235,31 @@ static int ecryptfs_open(struct inode *inode, struct file *file) rc = 0; goto out; } + rc = read_or_initialize_metadata(ecryptfs_dentry); if (rc) goto out_put; ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = " "[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino, (unsigned long long)i_size_read(inode)); + + if (get_events() && get_events()->open_cb) { + + ret = vfs_fsync(file, false); + + if (ret) + ecryptfs_printk(KERN_ERR, + "failed to sync file ret = %d.\n", ret); + + get_events()->open_cb(ecryptfs_inode_to_lower(inode), + crypt_stat); + + if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) { + truncate_inode_pages(inode->i_mapping, 0); + truncate_inode_pages( + ecryptfs_inode_to_lower(inode)->i_mapping, 0); + } + } goto out; out_put: ecryptfs_put_lower_file(inode); @@ -264,6 +287,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file) ecryptfs_put_lower_file(inode); kmem_cache_free(ecryptfs_file_info_cache, ecryptfs_file_to_private(file)); + return 0; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index e2e47ba5d313..cb3ecf442d96 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -261,12 +261,15 @@ out: * * Returns zero on success; non-zero on error condition */ + + static int ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, umode_t mode, bool excl) { struct inode *ecryptfs_inode; int rc; + struct ecryptfs_crypt_stat *crypt_stat; ecryptfs_inode = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode); @@ -276,6 +279,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, rc = PTR_ERR(ecryptfs_inode); goto out; } + /* At this point, a file exists on "disk"; we need to make sure * that this on disk file is prepared to be an ecryptfs file */ rc = ecryptfs_initialize_file(ecryptfs_dentry, ecryptfs_inode); @@ -288,6 +292,13 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, goto out; } unlock_new_inode(ecryptfs_inode); + + crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; + if (get_events() && get_events()->open_cb) + get_events()->open_cb( + ecryptfs_inode_to_lower(ecryptfs_inode), + crypt_stat); + d_instantiate(ecryptfs_dentry, ecryptfs_inode); out: return rc; diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 6bd67e2011f0..8319b776a461 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -315,7 +315,8 @@ write_tag_66_packet(char *signature, u8 cipher_code, * | File Encryption Key Size | 1 or 2 bytes | * | File Encryption Key | arbitrary | */ - data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size); + data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + + ecryptfs_get_key_size_to_store_key(crypt_stat)); *packet = kmalloc(data_len, GFP_KERNEL); message = *packet; if (!message) { @@ -335,8 +336,9 @@ write_tag_66_packet(char *signature, u8 cipher_code, memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX); i += ECRYPTFS_SIG_SIZE_HEX; /* The encrypted key includes 1 byte cipher code and 2 byte checksum */ - rc = ecryptfs_write_packet_length(&message[i], crypt_stat->key_size + 3, - &packet_size_len); + rc = ecryptfs_write_packet_length(&message[i], + ecryptfs_get_key_size_to_store_key(crypt_stat) + 3, + &packet_size_len); if (rc) { ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet " "header; cannot generate packet length\n"); @@ -344,9 +346,10 @@ write_tag_66_packet(char *signature, u8 cipher_code, } i += packet_size_len; message[i++] = cipher_code; - memcpy(&message[i], crypt_stat->key, crypt_stat->key_size); - i += crypt_stat->key_size; - for (j = 0; j < crypt_stat->key_size; j++) + memcpy(&message[i], crypt_stat->key, + ecryptfs_get_key_size_to_store_key(crypt_stat)); + i += ecryptfs_get_key_size_to_store_key(crypt_stat); + for (j = 0; j < ecryptfs_get_key_size_to_store_key(crypt_stat); j++) checksum += crypt_stat->key[j]; message[i++] = (checksum / 256) % 256; message[i++] = (checksum % 256); @@ -918,6 +921,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, struct ecryptfs_parse_tag_70_packet_silly_stack *s; struct key *auth_tok_key = NULL; int rc = 0; + char full_cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; (*packet_size) = 0; (*filename_size) = 0; @@ -977,12 +981,13 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, s->fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX] = '\0'; (*packet_size) += ECRYPTFS_SIG_SIZE; s->cipher_code = data[(*packet_size)++]; - rc = ecryptfs_cipher_code_to_string(s->cipher_string, s->cipher_code); + rc = ecryptfs_cipher_code_to_string(full_cipher, s->cipher_code); if (rc) { printk(KERN_WARNING "%s: Cipher code [%d] is invalid\n", __func__, s->cipher_code); goto out; } + ecryptfs_parse_full_cipher(full_cipher, s->cipher_string, 0); rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key, &s->auth_tok, mount_crypt_stat, s->fnek_sig_hex); @@ -1151,6 +1156,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, char *payload = NULL; size_t payload_len = 0; int rc; + char full_cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok); if (rc) { @@ -1184,21 +1190,31 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, rc); goto out; } - auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY; - memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key, - auth_tok->session_key.decrypted_key_size); - crypt_stat->key_size = auth_tok->session_key.decrypted_key_size; - rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher, cipher_code); + + rc = ecryptfs_cipher_code_to_string(full_cipher, cipher_code); if (rc) { ecryptfs_printk(KERN_ERR, "Cipher code [%d] is invalid\n", cipher_code) - goto out; + goto out; } + + auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY; + memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key, + auth_tok->session_key.decrypted_key_size); + crypt_stat->key_size = ecryptfs_get_key_size_to_restore_key( + auth_tok->session_key.decrypted_key_size, crypt_stat); + + ecryptfs_parse_full_cipher(full_cipher, + crypt_stat->cipher, crypt_stat->cipher_mode); + crypt_stat->flags |= ECRYPTFS_KEY_VALID; if (ecryptfs_verbosity > 0) { ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n"); ecryptfs_dump_hex(crypt_stat->key, crypt_stat->key_size); + + ecryptfs_dump_salt_hex(crypt_stat->key, crypt_stat->key_size, + crypt_stat); } out: kfree(msg); @@ -1380,6 +1396,7 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, struct ecryptfs_auth_tok_list_item *auth_tok_list_item; size_t length_size; int rc = 0; + char full_cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; (*packet_size) = 0; (*new_auth_tok) = NULL; @@ -1453,10 +1470,13 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, rc = -EINVAL; goto out_free; } - rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher, + rc = ecryptfs_cipher_code_to_string(full_cipher, (u16)data[(*packet_size)]); if (rc) goto out_free; + ecryptfs_parse_full_cipher(full_cipher, + crypt_stat->cipher, crypt_stat->cipher_mode); + /* A little extra work to differentiate among the AES key * sizes; see RFC2440 */ switch(data[(*packet_size)++]) { @@ -1465,7 +1485,10 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat, break; default: crypt_stat->key_size = - (*new_auth_tok)->session_key.encrypted_key_size; + ecryptfs_get_key_size_to_restore_key( + (*new_auth_tok)->session_key.encrypted_key_size, + crypt_stat); + } rc = ecryptfs_init_crypt_ctx(crypt_stat); if (rc) @@ -1713,7 +1736,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, mutex_lock(tfm_mutex); rc = crypto_blkcipher_setkey( desc.tfm, auth_tok->token.password.session_key_encryption_key, - crypt_stat->key_size); + auth_tok->token.password.session_key_encryption_key_bytes); if (unlikely(rc < 0)) { mutex_unlock(tfm_mutex); printk(KERN_ERR "Error setting key for crypto context\n"); @@ -1736,6 +1759,8 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, crypt_stat->key_size); ecryptfs_dump_hex(crypt_stat->key, crypt_stat->key_size); + ecryptfs_dump_salt_hex(crypt_stat->key, crypt_stat->key_size, + crypt_stat); } out: return rc; @@ -1972,12 +1997,17 @@ pki_encrypt_session_key(struct key *auth_tok_key, size_t payload_len = 0; struct ecryptfs_message *msg; int rc; + unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1]; rc = write_tag_66_packet(auth_tok->token.private_key.signature, - ecryptfs_code_for_cipher_string( - crypt_stat->cipher, - crypt_stat->key_size), - crypt_stat, &payload, &payload_len); + ecryptfs_code_for_cipher_string( + ecryptfs_get_full_cipher( + crypt_stat->cipher, + crypt_stat->cipher_mode, + final, sizeof(final)), + ecryptfs_get_key_size_to_enc_data( + crypt_stat)), + crypt_stat, &payload, &payload_len); up_write(&(auth_tok_key->sem)); key_put(auth_tok_key); if (rc) { @@ -2035,7 +2065,7 @@ write_tag_1_packet(char *dest, size_t *remaining_bytes, ecryptfs_from_hex(key_rec->sig, auth_tok->token.private_key.signature, ECRYPTFS_SIG_SIZE); encrypted_session_key_valid = 0; - for (i = 0; i < crypt_stat->key_size; i++) + for (i = 0; i < ecryptfs_get_key_size_to_store_key(crypt_stat); i++) encrypted_session_key_valid |= auth_tok->session_key.encrypted_key[i]; if (encrypted_session_key_valid) { @@ -2189,6 +2219,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, u8 cipher_code; size_t packet_size_length; size_t max_packet_size; + unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1]; struct ecryptfs_mount_crypt_stat *mount_crypt_stat = crypt_stat->mount_crypt_stat; struct blkcipher_desc desc = { @@ -2221,13 +2252,14 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, mount_crypt_stat->global_default_cipher_key_size; if (auth_tok->session_key.encrypted_key_size == 0) auth_tok->session_key.encrypted_key_size = - crypt_stat->key_size; + ecryptfs_get_key_size_to_store_key(crypt_stat); if (crypt_stat->key_size == 24 && strcmp("aes", crypt_stat->cipher) == 0) { memset((crypt_stat->key + 24), 0, 8); auth_tok->session_key.encrypted_key_size = 32; } else - auth_tok->session_key.encrypted_key_size = crypt_stat->key_size; + auth_tok->session_key.encrypted_key_size = + ecryptfs_get_key_size_to_store_key(crypt_stat); key_rec->enc_key_size = auth_tok->session_key.encrypted_key_size; encrypted_session_key_valid = 0; @@ -2251,8 +2283,8 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, auth_tok->token.password. session_key_encryption_key_bytes); memcpy(session_key_encryption_key, - auth_tok->token.password.session_key_encryption_key, - crypt_stat->key_size); + auth_tok->token.password.session_key_encryption_key, + auth_tok->token.password.session_key_encryption_key_bytes); ecryptfs_printk(KERN_DEBUG, "Cached session key encryption key:\n"); if (ecryptfs_verbosity > 0) @@ -2285,7 +2317,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, } mutex_lock(tfm_mutex); rc = crypto_blkcipher_setkey(desc.tfm, session_key_encryption_key, - crypt_stat->key_size); + auth_tok->token.password.session_key_encryption_key_bytes); if (rc < 0) { mutex_unlock(tfm_mutex); ecryptfs_printk(KERN_ERR, "Error setting key for crypto " @@ -2294,7 +2326,9 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, } rc = 0; ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the key\n", - crypt_stat->key_size); + crypt_stat->key_size); + ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the salt key\n", + ecryptfs_get_salt_size_for_cipher(crypt_stat)); rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg, (*key_rec).enc_key_size); mutex_unlock(tfm_mutex); @@ -2343,8 +2377,10 @@ encrypted_session_key_set: dest[(*packet_size)++] = 0x04; /* version 4 */ /* TODO: Break from RFC2440 so that arbitrary ciphers can be * specified with strings */ - cipher_code = ecryptfs_code_for_cipher_string(crypt_stat->cipher, - crypt_stat->key_size); + cipher_code = ecryptfs_code_for_cipher_string( + ecryptfs_get_full_cipher(crypt_stat->cipher, + crypt_stat->cipher_mode, final, sizeof(final)), + crypt_stat->key_size); if (cipher_code == 0) { ecryptfs_printk(KERN_WARNING, "Unable to generate code for " "cipher [%s]\n", crypt_stat->cipher); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 4f4d0474bee9..b591e6772f1b 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -156,16 +156,42 @@ int ecryptfs_get_lower_file(struct dentry *dentry, struct inode *inode) void ecryptfs_put_lower_file(struct inode *inode) { + int ret = 0; struct ecryptfs_inode_info *inode_info; + bool clear_cache_needed = false; inode_info = ecryptfs_inode_to_private(inode); if (atomic_dec_and_mutex_lock(&inode_info->lower_file_count, &inode_info->lower_file_mutex)) { + + if (get_events() && get_events()->is_hw_crypt_cb && + get_events()->is_hw_crypt_cb()) + clear_cache_needed = true; + + if (clear_cache_needed) { + ret = vfs_fsync(inode_info->lower_file, false); + + if (ret) + pr_err("failed to sync file ret = %d.\n", ret); + } + filemap_write_and_wait(inode->i_mapping); fput(inode_info->lower_file); inode_info->lower_file = NULL; mutex_unlock(&inode_info->lower_file_mutex); + + if (clear_cache_needed) { + truncate_inode_pages_fill_zero(inode->i_mapping, 0); + truncate_inode_pages_fill_zero( + ecryptfs_inode_to_lower(inode)->i_mapping, 0); + } + + if (get_events() && get_events()->release_cb) + get_events()->release_cb( + ecryptfs_inode_to_lower(inode)); } + + } enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, @@ -280,6 +306,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, char *cipher_key_bytes_src; char *fn_cipher_key_bytes_src; u8 cipher_code; + unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1]; *check_ruid = 0; @@ -309,12 +336,14 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, case ecryptfs_opt_ecryptfs_cipher: cipher_name_src = args[0].from; cipher_name_dst = - mount_crypt_stat-> - global_default_cipher_name; - strncpy(cipher_name_dst, cipher_name_src, - ECRYPTFS_MAX_CIPHER_NAME_SIZE); - cipher_name_dst[ECRYPTFS_MAX_CIPHER_NAME_SIZE] = '\0'; + mount_crypt_stat->global_default_cipher_name; + + ecryptfs_parse_full_cipher(cipher_name_src, + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_mode); + cipher_name_set = 1; + break; case ecryptfs_opt_ecryptfs_key_bytes: cipher_key_bytes_src = args[0].from; @@ -411,24 +440,35 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, strcpy(mount_crypt_stat->global_default_cipher_name, ECRYPTFS_DEFAULT_CIPHER); } + if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) && !fn_cipher_name_set) strcpy(mount_crypt_stat->global_default_fn_cipher_name, mount_crypt_stat->global_default_cipher_name); + if (!cipher_key_bytes_set) mount_crypt_stat->global_default_cipher_key_size = 0; + if ((mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) && !fn_cipher_key_bytes_set) mount_crypt_stat->global_default_fn_cipher_key_bytes = mount_crypt_stat->global_default_cipher_key_size; cipher_code = ecryptfs_code_for_cipher_string( - mount_crypt_stat->global_default_cipher_name, + ecryptfs_get_full_cipher( + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_mode, + final, sizeof(final)), mount_crypt_stat->global_default_cipher_key_size); if (!cipher_code) { - ecryptfs_printk(KERN_ERR, - "eCryptfs doesn't support cipher: %s", - mount_crypt_stat->global_default_cipher_name); + ecryptfs_printk( + KERN_ERR, + "eCryptfs doesn't support cipher: %s and key size %zu", + ecryptfs_get_full_cipher( + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_mode, + final, sizeof(final)), + mount_crypt_stat->global_default_cipher_key_size); rc = -EINVAL; goto out; } @@ -488,6 +528,7 @@ static struct file_system_type ecryptfs_fs_type; * @dev_name: The path to mount over * @raw_data: The options passed into the kernel */ + static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data) { @@ -557,6 +598,11 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags ecryptfs_set_superblock_lower(s, path.dentry->d_sb); + + if (get_events() && get_events()->is_hw_crypt_cb && + get_events()->is_hw_crypt_cb()) + drop_pagecache_sb(ecryptfs_superblock_to_lower(s), 0); + /** * Set the POSIX ACL flag based on whether they're enabled in the lower * mount. @@ -895,6 +941,7 @@ static void __exit ecryptfs_exit(void) do_sysfs_unregistration(); unregister_filesystem(&ecryptfs_fs_type); ecryptfs_free_kmem_caches(); + ecryptfs_free_events(); } MODULE_AUTHOR("Michael A. Halcrow <mhalcrow@us.ibm.com>"); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index caba848ac763..bdbc72d52438 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -552,10 +552,16 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block) return rc; } +void ecryptfs_freepage(struct page *page) +{ + zero_user(page, 0, PAGE_CACHE_SIZE); +} + const struct address_space_operations ecryptfs_aops = { .writepage = ecryptfs_writepage, .readpage = ecryptfs_readpage, .write_begin = ecryptfs_write_begin, .write_end = ecryptfs_write_end, .bmap = ecryptfs_bmap, + .freepage = ecryptfs_freepage, }; diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index afa1b81c3418..25e436ddcf8e 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -69,6 +69,9 @@ static void ecryptfs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); struct ecryptfs_inode_info *inode_info; + if (inode == NULL) + return; + inode_info = ecryptfs_inode_to_private(inode); kmem_cache_free(ecryptfs_inode_info_cache, inode_info); @@ -88,9 +91,12 @@ static void ecryptfs_destroy_inode(struct inode *inode) struct ecryptfs_inode_info *inode_info; inode_info = ecryptfs_inode_to_private(inode); + BUG_ON(inode_info->lower_file); + ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); call_rcu(&inode->i_rcu, ecryptfs_i_callback); + } /** @@ -149,6 +155,9 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root) struct ecryptfs_mount_crypt_stat *mount_crypt_stat = &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; struct ecryptfs_global_auth_tok *walker; + unsigned char final[2*ECRYPTFS_MAX_CIPHER_NAME_SIZE+1]; + + memset(final, 0, sizeof(final)); mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex); list_for_each_entry(walker, @@ -162,7 +171,10 @@ static int ecryptfs_show_options(struct seq_file *m, struct dentry *root) mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); seq_printf(m, ",ecryptfs_cipher=%s", - mount_crypt_stat->global_default_cipher_name); + ecryptfs_get_full_cipher( + mount_crypt_stat->global_default_cipher_name, + mount_crypt_stat->global_default_cipher_mode, + final, sizeof(final))); if (mount_crypt_stat->global_default_cipher_key_size) seq_printf(m, ",ecryptfs_key_bytes=%zd", diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 1a0835073663..1acac7fd21b2 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -469,3 +469,59 @@ uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size) return size; return 0; } + +/* + * Validate dentries for encrypted directories to make sure we aren't + * potentially caching stale data after a key has been added or + * removed. + */ +static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct inode *dir = d_inode(dentry->d_parent); + struct ext4_crypt_info *ci = EXT4_I(dir)->i_crypt_info; + int dir_has_key, cached_with_key; + + if (!ext4_encrypted_inode(dir)) + return 0; + + if (ci && ci->ci_keyring_key && + (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED) | + (1 << KEY_FLAG_DEAD)))) + ci = NULL; + + /* this should eventually be an flag in d_flags */ + cached_with_key = dentry->d_fsdata != NULL; + dir_has_key = (ci != NULL); + + /* + * If the dentry was cached without the key, and it is a + * negative dentry, it might be a valid name. We can't check + * if the key has since been made available due to locking + * reasons, so we fail the validation so ext4_lookup() can do + * this check. + * + * We also fail the validation if the dentry was created with + * the key present, but we no longer have the key, or vice versa. + */ + if ((!cached_with_key && d_is_negative(dentry)) || + (!cached_with_key && dir_has_key) || + (cached_with_key && !dir_has_key)) { +#if 0 /* Revalidation debug */ + char buf[80]; + char *cp = simple_dname(dentry, buf, sizeof(buf)); + + if (IS_ERR(cp)) + cp = (char *) "???"; + pr_err("revalidate: %s %p %d %d %d\n", cp, dentry->d_fsdata, + cached_with_key, d_is_negative(dentry), + dir_has_key); +#endif + return 0; + } + return 1; +} + +const struct dentry_operations ext4_encrypted_d_ops = { + .d_revalidate = ext4_d_revalidate, +}; diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 1d1bca74f844..33f5e2a50cf8 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -111,6 +111,12 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) int dir_has_error = 0; struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; + if (ext4_encrypted_inode(inode)) { + err = ext4_get_encryption_info(inode); + if (err && err != -ENOKEY) + return err; + } + if (is_dx_dir(inode)) { err = ext4_dx_readdir(file, ctx); if (err != ERR_BAD_DX_DIR) { @@ -157,8 +163,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) index, 1); file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; bh = ext4_bread(NULL, inode, map.m_lblk, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); + if (IS_ERR(bh)) { + err = PTR_ERR(bh); + bh = NULL; + goto errout; + } } if (!bh) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 786cb51cab56..c1b4f6ab2148 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2258,6 +2258,7 @@ struct page *ext4_encrypt(struct inode *inode, struct page *plaintext_page); int ext4_decrypt(struct page *page); int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex); +extern const struct dentry_operations ext4_encrypted_d_ops; #ifdef CONFIG_EXT4_FS_ENCRYPTION int ext4_init_crypto(void); @@ -3010,8 +3011,7 @@ extern int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos, struct page *page); extern int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, - struct inode *inode); + struct inode *dir, struct inode *inode); extern int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent, struct inode *inode); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index d884989cc83d..dfe3b9bafc0d 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -995,12 +995,11 @@ void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, */ static int ext4_add_dirent_to_inline(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, + struct inode *dir, struct inode *inode, struct ext4_iloc *iloc, void *inline_start, int inline_size) { - struct inode *dir = d_inode(dentry->d_parent); int err; struct ext4_dir_entry_2 *de; @@ -1245,12 +1244,11 @@ out: * the new created block. */ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, struct inode *inode) + struct inode *dir, struct inode *inode) { int ret, inline_size; void *inline_start; struct ext4_iloc iloc; - struct inode *dir = d_inode(dentry->d_parent); ret = ext4_get_inode_loc(dir, &iloc); if (ret) @@ -1264,7 +1262,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, EXT4_INLINE_DOTDOT_SIZE; inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; - ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc, + ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, inline_start, inline_size); if (ret != -ENOSPC) goto out; @@ -1285,7 +1283,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, if (inline_size) { inline_start = ext4_get_inline_xattr_pos(dir, &iloc); - ret = ext4_add_dirent_to_inline(handle, fname, dentry, + ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, inline_start, inline_size); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a969ab39f302..c9aad3b8951f 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -273,7 +273,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, struct ext4_filename *fname, struct ext4_dir_entry_2 **res_dir); static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, struct inode *inode); + struct inode *dir, struct inode *inode); /* checksumming functions */ void initialize_dirent_tail(struct ext4_dir_entry_tail *t, @@ -1558,6 +1558,24 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi struct ext4_dir_entry_2 *de; struct buffer_head *bh; + if (ext4_encrypted_inode(dir)) { + int res = ext4_get_encryption_info(dir); + + /* + * This should be a properly defined flag for + * dentry->d_flags when we uplift this to the VFS. + * d_fsdata is set to (void *) 1 if if the dentry is + * created while the directory was encrypted and we + * don't have access to the key. + */ + dentry->d_fsdata = NULL; + if (ext4_encryption_info(dir)) + dentry->d_fsdata = (void *) 1; + d_set_d_op(dentry, &ext4_encrypted_d_ops); + if (res && res != -ENOKEY) + return ERR_PTR(res); + } + if (dentry->d_name.len > EXT4_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); @@ -1928,10 +1946,9 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname, * directory, and adds the dentry to the indexed directory. */ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, + struct inode *dir, struct inode *inode, struct buffer_head *bh) { - struct inode *dir = d_inode(dentry->d_parent); struct buffer_head *bh2; struct dx_root *root; struct dx_frame frames[2], *frame; @@ -2086,8 +2103,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, return retval; if (ext4_has_inline_data(dir)) { - retval = ext4_try_add_inline_entry(handle, &fname, - dentry, inode); + retval = ext4_try_add_inline_entry(handle, &fname, dir, inode); if (retval < 0) goto out; if (retval == 1) { @@ -2097,7 +2113,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, } if (is_dx(dir)) { - retval = ext4_dx_add_entry(handle, &fname, dentry, inode); + retval = ext4_dx_add_entry(handle, &fname, dir, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) goto out; ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); @@ -2119,7 +2135,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, if (blocks == 1 && !dx_fallback && ext4_has_feature_dir_index(sb)) { - retval = make_indexed_dir(handle, &fname, dentry, + retval = make_indexed_dir(handle, &fname, dir, inode, bh); bh = NULL; /* make_indexed_dir releases bh */ goto out; @@ -2154,12 +2170,11 @@ out: * Returns 0 for success, or a negative error value */ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, - struct dentry *dentry, struct inode *inode) + struct inode *dir, struct inode *inode) { struct dx_frame frames[2], *frame; struct dx_entry *entries, *at; struct buffer_head *bh; - struct inode *dir = d_inode(dentry->d_parent); struct super_block *sb = dir->i_sb; struct ext4_dir_entry_2 *de; int err; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 6b6b3e751f8c..06fd5f7f993d 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -232,6 +232,27 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh) return error; } +static int +__xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header, + void *end, const char *function, unsigned int line) +{ + struct ext4_xattr_entry *entry = IFIRST(header); + int error = -EFSCORRUPTED; + + if (((void *) header >= end) || + (header->h_magic != le32_to_cpu(EXT4_XATTR_MAGIC))) + goto errout; + error = ext4_xattr_check_names(entry, end, entry); +errout: + if (error) + __ext4_error_inode(inode, function, line, 0, + "corrupted in-inode xattr"); + return error; +} + +#define xattr_check_inode(inode, header, end) \ + __xattr_check_inode((inode), (header), (end), __func__, __LINE__) + static inline int ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size) { @@ -343,7 +364,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, header = IHDR(inode, raw_inode); entry = IFIRST(header); end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - error = ext4_xattr_check_names(entry, end, entry); + error = xattr_check_inode(inode, header, end); if (error) goto cleanup; error = ext4_xattr_find_entry(&entry, name_index, name, @@ -474,7 +495,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) raw_inode = ext4_raw_inode(&iloc); header = IHDR(inode, raw_inode); end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; - error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header)); + error = xattr_check_inode(inode, header, end); if (error) goto cleanup; error = ext4_xattr_list_entries(dentry, IFIRST(header), @@ -990,8 +1011,7 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, is->s.here = is->s.first; is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { - error = ext4_xattr_check_names(IFIRST(header), is->s.end, - IFIRST(header)); + error = xattr_check_inode(inode, header, is->s.end); if (error) return error; /* Find the named attribute. */ @@ -1288,6 +1308,10 @@ retry: last = entry; total_ino = sizeof(struct ext4_xattr_ibody_header); + error = xattr_check_inode(inode, header, end); + if (error) + goto cleanup; + free = ext4_xattr_free_space(last, &min_offs, base, &total_ino); if (free >= new_extra_isize) { entry = IFIRST(header); diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index e95eeb445e58..3805040bee46 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -5,4 +5,4 @@ obj-$(CONFIG_FUSE_FS) += fuse.o obj-$(CONFIG_CUSE) += cuse.o -fuse-objs := dev.o dir.o file.o inode.o control.o +fuse-objs := dev.o dir.o file.o inode.o control.o passthrough.o diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 175fcdeabe4c..1398674f0614 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -7,6 +7,7 @@ */ #include "fuse_i.h" +#include "fuse_passthrough.h" #include <linux/init.h> #include <linux/module.h> @@ -569,9 +570,14 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) args->out.numargs * sizeof(struct fuse_arg)); fuse_request_send(fc, req); ret = req->out.h.error; - if (!ret && args->out.argvar) { - BUG_ON(args->out.numargs != 1); - ret = req->out.args[0].size; + if (!ret) { + if (args->out.argvar) { + BUG_ON(args->out.numargs != 1); + ret = req->out.args[0].size; + } + + if (req->passthrough_filp != NULL) + args->out.passthrough_filp = req->passthrough_filp; } fuse_put_request(fc, req); @@ -1937,6 +1943,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, err = copy_out_args(cs, &req->out, nbytes); fuse_copy_finish(cs); + fuse_setup_passthrough(fc, req); spin_lock(&fpq->lock); clear_bit(FR_LOCKED, &req->flags); if (!fpq->connected) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5e2e08712d3b..640f66719314 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -428,6 +428,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, args.out.args[0].value = &outentry; args.out.args[1].size = sizeof(outopen); args.out.args[1].value = &outopen; + args.out.passthrough_filp = NULL; err = fuse_simple_request(fc, &args); if (err) goto out_free_ff; @@ -439,6 +440,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, ff->fh = outopen.fh; ff->nodeid = outentry.nodeid; ff->open_flags = outopen.open_flags; + if (args.out.passthrough_filp != NULL) + ff->passthrough_filp = args.out.passthrough_filp; inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, &outentry.attr, entry_attr_timeout(&outentry), 0); if (!inode) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c2e340d6ec6e..461dcf5e4526 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -7,6 +7,7 @@ */ #include "fuse_i.h" +#include "fuse_passthrough.h" #include <linux/pagemap.h> #include <linux/slab.h> @@ -21,8 +22,10 @@ static const struct file_operations fuse_direct_io_file_operations; static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, - int opcode, struct fuse_open_out *outargp) + int opcode, struct fuse_open_out *outargp, + struct file **passthrough_filpp) { + int ret_val; struct fuse_open_in inarg; FUSE_ARGS(args); @@ -38,8 +41,14 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, args.out.numargs = 1; args.out.args[0].size = sizeof(*outargp); args.out.args[0].value = outargp; + args.out.passthrough_filp = NULL; - return fuse_simple_request(fc, &args); + ret_val = fuse_simple_request(fc, &args); + + if (args.out.passthrough_filp != NULL) + *passthrough_filpp = args.out.passthrough_filp; + + return ret_val; } struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) @@ -50,6 +59,10 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) if (unlikely(!ff)) return NULL; + ff->passthrough_filp = NULL; + ff->passthrough_enabled = 0; + if (fc->passthrough) + ff->passthrough_enabled = 1; ff->fc = fc; ff->reserved_req = fuse_request_alloc(0); if (unlikely(!ff->reserved_req)) { @@ -117,6 +130,7 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir) { struct fuse_file *ff; + struct file *passthrough_filp = NULL; int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; ff = fuse_file_alloc(fc); @@ -129,10 +143,12 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, struct fuse_open_out outarg; int err; - err = fuse_send_open(fc, nodeid, file, opcode, &outarg); + err = fuse_send_open(fc, nodeid, file, opcode, &outarg, + &(passthrough_filp)); if (!err) { ff->fh = outarg.fh; ff->open_flags = outarg.open_flags; + ff->passthrough_filp = passthrough_filp; } else if (err != -ENOSYS || isdir) { fuse_file_free(ff); @@ -252,6 +268,8 @@ void fuse_release_common(struct file *file, int opcode) if (unlikely(!ff)) return; + fuse_passthrough_release(ff); + req = ff->reserved_req; fuse_prepare_release(ff, file->f_flags, opcode); @@ -903,8 +921,10 @@ out: static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { + ssize_t ret_val; struct inode *inode = iocb->ki_filp->f_mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = iocb->ki_filp->private_data; /* * In auto invalidate mode, always update attributes on read. @@ -919,7 +939,12 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) return err; } - return generic_file_read_iter(iocb, to); + if (ff && ff->passthrough_enabled && ff->passthrough_filp) + ret_val = fuse_passthrough_read_iter(iocb, to); + else + ret_val = generic_file_read_iter(iocb, to); + + return ret_val; } static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, @@ -1151,6 +1176,7 @@ static ssize_t fuse_perform_write(struct file *file, static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; + struct fuse_file *ff = file->private_data; struct address_space *mapping = file->f_mapping; ssize_t written = 0; ssize_t written_buffered = 0; @@ -1184,8 +1210,14 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err) goto out; + if (ff && ff->passthrough_enabled && ff->passthrough_filp) { + written = fuse_passthrough_write_iter(iocb, from); + goto out; + } + if (iocb->ki_flags & IOCB_DIRECT) { loff_t pos = iocb->ki_pos; + written = generic_file_direct_write(iocb, from, pos); if (written < 0 || !iov_iter_count(from)) goto out; @@ -2049,6 +2081,9 @@ static const struct vm_operations_struct fuse_file_vm_ops = { static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) { + struct fuse_file *ff = file->private_data; + + ff->passthrough_enabled = 0; if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) fuse_link_write_file(file); @@ -2059,6 +2094,9 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma) { + struct fuse_file *ff = file->private_data; + + ff->passthrough_enabled = 0; /* Can't provide the coherency needed for MAP_SHARED */ if (vma->vm_flags & VM_MAYSHARE) return -ENODEV; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 604cd42dafef..0cbeea6ee831 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -158,6 +158,10 @@ struct fuse_file { /** Has flock been performed on this file? */ bool flock:1; + + /* the read write file */ + struct file *passthrough_filp; + bool passthrough_enabled; }; /** One input argument of a request */ @@ -237,6 +241,7 @@ struct fuse_args { unsigned argvar:1; unsigned numargs; struct fuse_arg args[2]; + struct file *passthrough_filp; } out; }; @@ -383,6 +388,9 @@ struct fuse_req { /** Request is stolen from fuse_file->reserved_req */ struct file *stolen_file; + + /** fuse passthrough file */ + struct file *passthrough_filp; }; struct fuse_iqueue { @@ -540,6 +548,9 @@ struct fuse_conn { /** write-back cache policy (default is write-through) */ unsigned writeback_cache:1; + /** passthrough IO. */ + unsigned passthrough:1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction diff --git a/fs/fuse/fuse_passthrough.h b/fs/fuse/fuse_passthrough.h new file mode 100644 index 000000000000..62f12c12ffec --- /dev/null +++ b/fs/fuse/fuse_passthrough.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _FS_FUSE_PASSTHROUGH_H +#define _FS_FUSE_PASSTHROUGH_H + +#include "fuse_i.h" + +#include <linux/fuse.h> +#include <linux/file.h> + +void fuse_setup_passthrough(struct fuse_conn *fc, struct fuse_req *req); + +ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to); + +ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from); + +void fuse_passthrough_release(struct fuse_file *ff); + +#endif /* _FS_FUSE_PASSTHROUGH_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2913db2a5b99..33ec874302cb 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -860,6 +860,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->conn_error = 1; else { unsigned long ra_pages; + struct super_block *sb = fc->sb; process_init_limits(fc, arg); @@ -898,6 +899,13 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->async_dio = 1; if (arg->flags & FUSE_WRITEBACK_CACHE) fc->writeback_cache = 1; + if (arg->flags & FUSE_PASSTHROUGH) { + fc->passthrough = 1; + /* Prevent further stacking */ + sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; + pr_info("FUSE: Pass through is enabled [%s : %d]!\n", + current->comm, current->pid); + } if (arg->time_gran && arg->time_gran <= 1000000000) fc->sb->s_time_gran = arg->time_gran; } else { diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c new file mode 100644 index 000000000000..e8671942c2a0 --- /dev/null +++ b/fs/fuse/passthrough.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "fuse_passthrough.h" + +#include <linux/aio.h> +#include <linux/fs_stack.h> + +void fuse_setup_passthrough(struct fuse_conn *fc, struct fuse_req *req) +{ + int daemon_fd, fs_stack_depth; + unsigned open_out_index; + struct file *passthrough_filp; + struct inode *passthrough_inode; + struct super_block *passthrough_sb; + struct fuse_open_out *open_out; + + req->passthrough_filp = NULL; + + if (!(fc->passthrough)) + return; + + if ((req->in.h.opcode != FUSE_OPEN) && + (req->in.h.opcode != FUSE_CREATE)) + return; + + open_out_index = req->in.numargs - 1; + + BUG_ON(open_out_index != 0 && open_out_index != 1); + BUG_ON(req->out.args[open_out_index].size != sizeof(*open_out)); + + open_out = req->out.args[open_out_index].value; + + daemon_fd = (int)open_out->passthrough_fd; + if (daemon_fd < 0) + return; + + passthrough_filp = fget_raw(daemon_fd); + if (!passthrough_filp) + return; + + passthrough_inode = file_inode(passthrough_filp); + passthrough_sb = passthrough_inode->i_sb; + fs_stack_depth = passthrough_sb->s_stack_depth + 1; + + /* If we reached the stacking limit go through regular io */ + if (fs_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { + /* Release the passthrough file. */ + fput(passthrough_filp); + pr_err("FUSE: maximum fs stacking depth exceeded, cannot use passthrough for this file\n"); + return; + } + req->passthrough_filp = passthrough_filp; +} + +static ssize_t fuse_passthrough_read_write_iter(struct kiocb *iocb, + struct iov_iter *iter, int do_write) +{ + ssize_t ret_val; + struct fuse_file *ff; + struct file *fuse_file, *passthrough_filp; + struct inode *fuse_inode, *passthrough_inode; + + ff = iocb->ki_filp->private_data; + fuse_file = iocb->ki_filp; + passthrough_filp = ff->passthrough_filp; + + /* lock passthrough file to prevent it from being released */ + get_file(passthrough_filp); + iocb->ki_filp = passthrough_filp; + fuse_inode = fuse_file->f_path.dentry->d_inode; + passthrough_inode = file_inode(passthrough_filp); + + if (do_write) { + if (!passthrough_filp->f_op->write_iter) + return -EIO; + ret_val = passthrough_filp->f_op->write_iter(iocb, iter); + + if (ret_val >= 0 || ret_val == -EIOCBQUEUED) { + fsstack_copy_inode_size(fuse_inode, passthrough_inode); + fsstack_copy_attr_times(fuse_inode, passthrough_inode); + } + } else { + if (!passthrough_filp->f_op->read_iter) + return -EIO; + ret_val = passthrough_filp->f_op->read_iter(iocb, iter); + if (ret_val >= 0 || ret_val == -EIOCBQUEUED) + fsstack_copy_attr_atime(fuse_inode, passthrough_inode); + } + + iocb->ki_filp = fuse_file; + + /* unlock passthrough file */ + fput(passthrough_filp); + + return ret_val; +} + +ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + return fuse_passthrough_read_write_iter(iocb, to, 0); +} + +ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + return fuse_passthrough_read_write_iter(iocb, from, 1); +} + +void fuse_passthrough_release(struct fuse_file *ff) +{ + if (!(ff->passthrough_filp)) + return; + + /* Release the passthrough file. */ + fput(ff->passthrough_filp); + ff->passthrough_filp = NULL; +} diff --git a/fs/namei.c b/fs/namei.c index d8ee4da93650..558ea922a515 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2662,8 +2662,14 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (error) return error; error = dir->i_op->create(dir, dentry, mode, want_excl); + if (error) + return error; + error = security_inode_post_create(dir, dentry, mode); + if (error) + return error; if (!error) fsnotify_create(dir, dentry); + return error; } EXPORT_SYMBOL(vfs_create); @@ -3518,8 +3524,16 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) return error; error = dir->i_op->mknod(dir, dentry, mode, dev); + if (error) + return error; + + error = security_inode_post_create(dir, dentry, mode); + if (error) + return error; + if (!error) fsnotify_create(dir, dentry); + return error; } EXPORT_SYMBOL(vfs_mknod); diff --git a/fs/proc/base.c b/fs/proc/base.c index 57df8a52e780..98b4b03dfe31 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1409,6 +1409,204 @@ static const struct file_operations proc_pid_sched_operations = { #endif +/* + * Print out various scheduling related per-task fields: + */ + +#ifdef CONFIG_SMP + +static int sched_wake_up_idle_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + seq_printf(m, "%d\n", sched_get_wake_up_idle(p)); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_wake_up_idle_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + char buffer[PROC_NUMBUF]; + int wake_up_idle, err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &wake_up_idle); + if (err) + goto out; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = sched_set_wake_up_idle(p, wake_up_idle); + + put_task_struct(p); + +out: + return err < 0 ? err : count; +} + +static int sched_wake_up_idle_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_wake_up_idle_show, inode); +} + +static const struct file_operations proc_pid_sched_wake_up_idle_operations = { + .open = sched_wake_up_idle_open, + .read = seq_read, + .write = sched_wake_up_idle_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_SCHED_HMP + +static int sched_init_task_load_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + seq_printf(m, "%d\n", sched_get_init_task_load(p)); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_init_task_load_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + char buffer[PROC_NUMBUF]; + int init_task_load, err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &init_task_load); + if (err) + goto out; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = sched_set_init_task_load(p, init_task_load); + + put_task_struct(p); + +out: + return err < 0 ? err : count; +} + +static int sched_init_task_load_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_init_task_load_show, inode); +} + +static const struct file_operations proc_pid_sched_init_task_load_operations = { + .open = sched_init_task_load_open, + .read = seq_read, + .write = sched_init_task_load_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int sched_group_id_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + seq_printf(m, "%d\n", sched_get_group_id(p)); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_group_id_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + char buffer[PROC_NUMBUF]; + int group_id, err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &group_id); + if (err) + goto out; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = sched_set_group_id(p, group_id); + + put_task_struct(p); + +out: + return err < 0 ? err : count; +} + +static int sched_group_id_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_group_id_show, inode); +} + +static const struct file_operations proc_pid_sched_group_id_operations = { + .open = sched_group_id_open, + .read = seq_read, + .write = sched_group_id_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif /* CONFIG_SCHED_HMP */ + #ifdef CONFIG_SCHED_AUTOGROUP /* * Print out autogroup related information: @@ -2743,6 +2941,13 @@ static const struct pid_entry tgid_base_stuff[] = { ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUSR, proc_pid_personality), ONE("limits", S_IRUGO, proc_pid_limits), +#ifdef CONFIG_SMP + REG("sched_wake_up_idle", S_IRUGO|S_IWUSR, proc_pid_sched_wake_up_idle_operations), +#endif +#ifdef CONFIG_SCHED_HMP + REG("sched_init_task_load", S_IRUGO|S_IWUSR, proc_pid_sched_init_task_load_operations), + REG("sched_group_id", S_IRUGO|S_IWUGO, proc_pid_sched_group_id_operations), +#endif #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif @@ -2767,6 +2972,9 @@ static const struct pid_entry tgid_base_stuff[] = { REG("mounts", S_IRUGO, proc_mounts_operations), REG("mountinfo", S_IRUGO, proc_mountinfo_operations), REG("mountstats", S_IRUSR, proc_mountstats_operations), +#ifdef CONFIG_PROCESS_RECLAIM + REG("reclaim", S_IWUSR, proc_reclaim_operations), +#endif #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_pid_smaps_operations), diff --git a/fs/proc/internal.h b/fs/proc/internal.h index aa2781095bd1..ef2b01533c97 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -209,6 +209,7 @@ struct pde_opener { extern const struct inode_operations proc_link_inode_operations; extern const struct inode_operations proc_pid_link_inode_operations; +extern const struct file_operations proc_reclaim_operations; extern void proc_init_inodecache(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 67aa7e63a5c1..ee79fadfc6e7 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -14,6 +14,8 @@ #include <linux/swapops.h> #include <linux/mmu_notifier.h> #include <linux/page_idle.h> +#include <linux/mm_inline.h> +#include <linux/ctype.h> #include <asm/elf.h> #include <asm/uaccess.h> @@ -1429,6 +1431,238 @@ const struct file_operations proc_pagemap_operations = { }; #endif /* CONFIG_PROC_PAGE_MONITOR */ +#ifdef CONFIG_PROCESS_RECLAIM +static int reclaim_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct reclaim_param *rp = walk->private; + struct vm_area_struct *vma = rp->vma; + pte_t *pte, ptent; + spinlock_t *ptl; + struct page *page; + LIST_HEAD(page_list); + int isolated; + int reclaimed; + + split_huge_page_pmd(vma, addr, pmd); + if (pmd_trans_unstable(pmd) || !rp->nr_to_reclaim) + return 0; +cont: + isolated = 0; + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + for (; addr != end; pte++, addr += PAGE_SIZE) { + ptent = *pte; + if (!pte_present(ptent)) + continue; + + page = vm_normal_page(vma, addr, ptent); + if (!page) + continue; + + if (isolate_lru_page(page)) + continue; + + list_add(&page->lru, &page_list); + inc_zone_page_state(page, NR_ISOLATED_ANON + + page_is_file_cache(page)); + isolated++; + rp->nr_scanned++; + if ((isolated >= SWAP_CLUSTER_MAX) || !rp->nr_to_reclaim) + break; + } + pte_unmap_unlock(pte - 1, ptl); + reclaimed = reclaim_pages_from_list(&page_list, vma); + rp->nr_reclaimed += reclaimed; + rp->nr_to_reclaim -= reclaimed; + if (rp->nr_to_reclaim < 0) + rp->nr_to_reclaim = 0; + + if (rp->nr_to_reclaim && (addr != end)) + goto cont; + + cond_resched(); + return 0; +} + +enum reclaim_type { + RECLAIM_FILE, + RECLAIM_ANON, + RECLAIM_ALL, + RECLAIM_RANGE, +}; + +struct reclaim_param reclaim_task_anon(struct task_struct *task, + int nr_to_reclaim) +{ + struct mm_struct *mm; + struct vm_area_struct *vma; + struct mm_walk reclaim_walk = {}; + struct reclaim_param rp; + + rp.nr_reclaimed = 0; + rp.nr_scanned = 0; + get_task_struct(task); + mm = get_task_mm(task); + if (!mm) + goto out; + + reclaim_walk.mm = mm; + reclaim_walk.pmd_entry = reclaim_pte_range; + + rp.nr_to_reclaim = nr_to_reclaim; + reclaim_walk.private = &rp; + + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (is_vm_hugetlb_page(vma)) + continue; + + if (vma->vm_file) + continue; + + if (!rp.nr_to_reclaim) + break; + + rp.vma = vma; + walk_page_range(vma->vm_start, vma->vm_end, + &reclaim_walk); + } + + flush_tlb_mm(mm); + up_read(&mm->mmap_sem); + mmput(mm); +out: + put_task_struct(task); + return rp; +} + +static ssize_t reclaim_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + char buffer[200]; + struct mm_struct *mm; + struct vm_area_struct *vma; + enum reclaim_type type; + char *type_buf; + struct mm_walk reclaim_walk = {}; + unsigned long start = 0; + unsigned long end = 0; + struct reclaim_param rp; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + + type_buf = strstrip(buffer); + if (!strcmp(type_buf, "file")) + type = RECLAIM_FILE; + else if (!strcmp(type_buf, "anon")) + type = RECLAIM_ANON; + else if (!strcmp(type_buf, "all")) + type = RECLAIM_ALL; + else if (isdigit(*type_buf)) + type = RECLAIM_RANGE; + else + goto out_err; + + if (type == RECLAIM_RANGE) { + char *token; + unsigned long long len, len_in, tmp; + token = strsep(&type_buf, " "); + if (!token) + goto out_err; + tmp = memparse(token, &token); + if (tmp & ~PAGE_MASK || tmp > ULONG_MAX) + goto out_err; + start = tmp; + + token = strsep(&type_buf, " "); + if (!token) + goto out_err; + len_in = memparse(token, &token); + len = (len_in + ~PAGE_MASK) & PAGE_MASK; + if (len > ULONG_MAX) + goto out_err; + /* + * Check to see whether len was rounded up from small -ve + * to zero. + */ + if (len_in && !len) + goto out_err; + + end = start + len; + if (end < start) + goto out_err; + } + + task = get_proc_task(file->f_path.dentry->d_inode); + if (!task) + return -ESRCH; + + mm = get_task_mm(task); + if (!mm) + goto out; + + reclaim_walk.mm = mm; + reclaim_walk.pmd_entry = reclaim_pte_range; + + rp.nr_to_reclaim = ~0; + rp.nr_reclaimed = 0; + reclaim_walk.private = &rp; + + down_read(&mm->mmap_sem); + if (type == RECLAIM_RANGE) { + vma = find_vma(mm, start); + while (vma) { + if (vma->vm_start > end) + break; + if (is_vm_hugetlb_page(vma)) + continue; + + rp.vma = vma; + walk_page_range(max(vma->vm_start, start), + min(vma->vm_end, end), + &reclaim_walk); + vma = vma->vm_next; + } + } else { + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (is_vm_hugetlb_page(vma)) + continue; + + if (type == RECLAIM_ANON && vma->vm_file) + continue; + + if (type == RECLAIM_FILE && !vma->vm_file) + continue; + + rp.vma = vma; + walk_page_range(vma->vm_start, vma->vm_end, + &reclaim_walk); + } + } + + flush_tlb_mm(mm); + up_read(&mm->mmap_sem); + mmput(mm); +out: + put_task_struct(task); + return count; + +out_err: + return -EINVAL; +} + +const struct file_operations proc_reclaim_operations = { + .write = reclaim_write, + .llseek = noop_llseek, +}; +#endif + #ifdef CONFIG_NUMA struct numa_maps { diff --git a/fs/timerfd.c b/fs/timerfd.c index 053818dd6c18..815e5348f048 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -49,7 +49,8 @@ static DEFINE_SPINLOCK(cancel_lock); static inline bool isalarm(struct timerfd_ctx *ctx) { return ctx->clockid == CLOCK_REALTIME_ALARM || - ctx->clockid == CLOCK_BOOTTIME_ALARM; + ctx->clockid == CLOCK_BOOTTIME_ALARM || + ctx->clockid == CLOCK_POWEROFF_ALARM; } /* @@ -133,7 +134,8 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx) static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) { if ((ctx->clockid == CLOCK_REALTIME || - ctx->clockid == CLOCK_REALTIME_ALARM) && + ctx->clockid == CLOCK_REALTIME_ALARM || + ctx->clockid == CLOCK_POWEROFF_ALARM) && (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { if (!ctx->might_cancel) { ctx->might_cancel = true; @@ -164,6 +166,7 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags, enum hrtimer_mode htmode; ktime_t texp; int clockid = ctx->clockid; + enum alarmtimer_type type; htmode = (flags & TFD_TIMER_ABSTIME) ? HRTIMER_MODE_ABS: HRTIMER_MODE_REL; @@ -174,10 +177,8 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags, ctx->tintv = timespec_to_ktime(ktmr->it_interval); if (isalarm(ctx)) { - alarm_init(&ctx->t.alarm, - ctx->clockid == CLOCK_REALTIME_ALARM ? - ALARM_REALTIME : ALARM_BOOTTIME, - timerfd_alarmproc); + type = clock2alarm(ctx->clockid); + alarm_init(&ctx->t.alarm, type, timerfd_alarmproc); } else { hrtimer_init(&ctx->t.tmr, clockid, htmode); hrtimer_set_expires(&ctx->t.tmr, texp); @@ -377,6 +378,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) { int ufd; struct timerfd_ctx *ctx; + enum alarmtimer_type type; /* Check the TFD_* constants for consistency. */ BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); @@ -387,7 +389,8 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) clockid != CLOCK_REALTIME && clockid != CLOCK_REALTIME_ALARM && clockid != CLOCK_BOOTTIME && - clockid != CLOCK_BOOTTIME_ALARM)) + clockid != CLOCK_BOOTTIME_ALARM && + clockid != CLOCK_POWEROFF_ALARM)) return -EINVAL; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -397,13 +400,12 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) init_waitqueue_head(&ctx->wqh); ctx->clockid = clockid; - if (isalarm(ctx)) - alarm_init(&ctx->t.alarm, - ctx->clockid == CLOCK_REALTIME_ALARM ? - ALARM_REALTIME : ALARM_BOOTTIME, - timerfd_alarmproc); - else + if (isalarm(ctx)) { + type = clock2alarm(ctx->clockid); + alarm_init(&ctx->t.alarm, type, timerfd_alarmproc); + } else { hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS); + } ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 }); @@ -475,6 +477,10 @@ static int do_timerfd_settime(int ufd, int flags, ret = timerfd_setup(ctx, flags, new); spin_unlock_irq(&ctx->wqh.lock); + + if (ctx->clockid == CLOCK_POWEROFF_ALARM) + set_power_on_alarm(); + fdput(f); return ret; } diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index 92a8491a8f8c..c0a95e393347 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -34,6 +34,12 @@ * node. We use "r5" hash borrowed from reiserfs. */ +/* + * Lot's of the key helpers require a struct ubifs_info *c as the first parameter. + * But we are not using it at all currently. That's designed for future extensions of + * different c->key_format. But right now, there is only one key type, UBIFS_SIMPLE_KEY_FMT. + */ + #ifndef __UBIFS_KEY_H__ #define __UBIFS_KEY_H__ diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index e8b01b721e99..e53292d0c21b 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -267,7 +267,7 @@ static int check_namespace(const struct qstr *nm) if (!strncmp(nm->name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) { - if (nm->name[sizeof(XATTR_TRUSTED_PREFIX) - 1] == '\0') + if (nm->name[XATTR_TRUSTED_PREFIX_LEN] == '\0') return -EINVAL; type = TRUSTED_XATTR; } else if (!strncmp(nm->name, XATTR_USER_PREFIX, @@ -277,7 +277,7 @@ static int check_namespace(const struct qstr *nm) type = USER_XATTR; } else if (!strncmp(nm->name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { - if (nm->name[sizeof(XATTR_SECURITY_PREFIX) - 1] == '\0') + if (nm->name[XATTR_SECURITY_PREFIX_LEN] == '\0') return -EINVAL; type = SECURITY_XATTR; } else |