summaryrefslogtreecommitdiff
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c230
1 files changed, 136 insertions, 94 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 22e98e04c2ea..7cda51995c1e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -46,6 +46,10 @@
#include "check-integrity.h"
#include "rcu-string.h"
+#ifdef CONFIG_X86
+#include <asm/cpufeature.h>
+#endif
+
static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
@@ -217,26 +221,16 @@ static struct extent_map *btree_get_extent(struct inode *inode,
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
if (ret == -EEXIST) {
- u64 failed_start = em->start;
- u64 failed_len = em->len;
-
free_extent_map(em);
em = lookup_extent_mapping(em_tree, start, len);
- if (em) {
- ret = 0;
- } else {
- em = lookup_extent_mapping(em_tree, failed_start,
- failed_len);
- ret = -EIO;
- }
+ if (!em)
+ em = ERR_PTR(-EIO);
} else if (ret) {
free_extent_map(em);
- em = NULL;
+ em = ERR_PTR(ret);
}
write_unlock(&em_tree->lock);
- if (ret)
- em = ERR_PTR(ret);
out:
return em;
}
@@ -439,10 +433,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
WARN_ON(1);
return 0;
}
- if (eb->pages[0] != page) {
- WARN_ON(1);
- return 0;
- }
if (!PageUptodate(page)) {
WARN_ON(1);
return 0;
@@ -869,10 +859,22 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
}
+static int check_async_write(struct inode *inode, unsigned long bio_flags)
+{
+ if (bio_flags & EXTENT_BIO_TREE_LOG)
+ return 0;
+#ifdef CONFIG_X86
+ if (cpu_has_xmm4_2)
+ return 0;
+#endif
+ return 1;
+}
+
static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset)
{
+ int async = check_async_write(inode, bio_flags);
int ret;
if (!(rw & REQ_WRITE)) {
@@ -887,6 +889,12 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
return ret;
return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
mirror_num, 0);
+ } else if (!async) {
+ ret = btree_csum_one_bio(bio);
+ if (ret)
+ return ret;
+ return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
+ mirror_num, 0);
}
/*
@@ -1168,8 +1176,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
atomic_set(&root->log_commit[0], 0);
atomic_set(&root->log_commit[1], 0);
atomic_set(&root->log_writers, 0);
+ atomic_set(&root->log_batch, 0);
atomic_set(&root->orphan_inodes, 0);
- root->log_batch = 0;
root->log_transid = 0;
root->last_log_commit = 0;
extent_io_tree_init(&root->dirty_log_pages,
@@ -1667,9 +1675,10 @@ static int transaction_kthread(void *arg)
spin_unlock(&root->fs_info->trans_lock);
/* If the file system is aborted, this will always fail. */
- trans = btrfs_join_transaction(root);
+ trans = btrfs_attach_transaction(root);
if (IS_ERR(trans)) {
- cannot_commit = true;
+ if (PTR_ERR(trans) != -ENOENT)
+ cannot_commit = true;
goto sleep;
}
if (transid == trans->transid) {
@@ -1994,13 +2003,11 @@ int open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->delayed_iputs);
- INIT_LIST_HEAD(&fs_info->hashers);
INIT_LIST_HEAD(&fs_info->delalloc_inodes);
INIT_LIST_HEAD(&fs_info->ordered_operations);
INIT_LIST_HEAD(&fs_info->caching_block_groups);
spin_lock_init(&fs_info->delalloc_lock);
spin_lock_init(&fs_info->trans_lock);
- spin_lock_init(&fs_info->ref_cache_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
@@ -2014,12 +2021,15 @@ int open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->space_info);
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
btrfs_mapping_init(&fs_info->mapping_tree);
- btrfs_init_block_rsv(&fs_info->global_block_rsv);
- btrfs_init_block_rsv(&fs_info->delalloc_block_rsv);
- btrfs_init_block_rsv(&fs_info->trans_block_rsv);
- btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
- btrfs_init_block_rsv(&fs_info->empty_block_rsv);
- btrfs_init_block_rsv(&fs_info->delayed_block_rsv);
+ btrfs_init_block_rsv(&fs_info->global_block_rsv,
+ BTRFS_BLOCK_RSV_GLOBAL);
+ btrfs_init_block_rsv(&fs_info->delalloc_block_rsv,
+ BTRFS_BLOCK_RSV_DELALLOC);
+ btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
+ btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
+ btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
+ btrfs_init_block_rsv(&fs_info->delayed_block_rsv,
+ BTRFS_BLOCK_RSV_DELOPS);
atomic_set(&fs_info->nr_async_submits, 0);
atomic_set(&fs_info->async_delalloc_pages, 0);
atomic_set(&fs_info->async_submit_draining, 0);
@@ -2491,6 +2501,8 @@ retry_root_backup:
printk(KERN_ERR "Failed to read block groups: %d\n", ret);
goto fail_block_groups;
}
+ fs_info->num_tolerated_disk_barrier_failures =
+ btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
"btrfs-cleaner");
@@ -2874,12 +2886,10 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
printk_in_rcu("btrfs: disabling barriers on dev %s\n",
rcu_str_deref(device->name));
device->nobarriers = 1;
- }
- if (!bio_flagged(bio, BIO_UPTODATE)) {
+ } else if (!bio_flagged(bio, BIO_UPTODATE)) {
ret = -EIO;
- if (!bio_flagged(bio, BIO_EOPNOTSUPP))
- btrfs_dev_stat_inc_and_print(device,
- BTRFS_DEV_STAT_FLUSH_ERRS);
+ btrfs_dev_stat_inc_and_print(device,
+ BTRFS_DEV_STAT_FLUSH_ERRS);
}
/* drop the reference from the wait == 0 run */
@@ -2918,14 +2928,15 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
{
struct list_head *head;
struct btrfs_device *dev;
- int errors = 0;
+ int errors_send = 0;
+ int errors_wait = 0;
int ret;
/* send down all the barriers */
head = &info->fs_devices->devices;
list_for_each_entry_rcu(dev, head, dev_list) {
if (!dev->bdev) {
- errors++;
+ errors_send++;
continue;
}
if (!dev->in_fs_metadata || !dev->writeable)
@@ -2933,13 +2944,13 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
ret = write_dev_flush(dev, 0);
if (ret)
- errors++;
+ errors_send++;
}
/* wait for all the barriers */
list_for_each_entry_rcu(dev, head, dev_list) {
if (!dev->bdev) {
- errors++;
+ errors_wait++;
continue;
}
if (!dev->in_fs_metadata || !dev->writeable)
@@ -2947,13 +2958,87 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
ret = write_dev_flush(dev, 1);
if (ret)
- errors++;
+ errors_wait++;
}
- if (errors)
+ if (errors_send > info->num_tolerated_disk_barrier_failures ||
+ errors_wait > info->num_tolerated_disk_barrier_failures)
return -EIO;
return 0;
}
+int btrfs_calc_num_tolerated_disk_barrier_failures(
+ struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_ioctl_space_info space;
+ struct btrfs_space_info *sinfo;
+ u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
+ BTRFS_BLOCK_GROUP_SYSTEM,
+ BTRFS_BLOCK_GROUP_METADATA,
+ BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
+ int num_types = 4;
+ int i;
+ int c;
+ int num_tolerated_disk_barrier_failures =
+ (int)fs_info->fs_devices->num_devices;
+
+ for (i = 0; i < num_types; i++) {
+ struct btrfs_space_info *tmp;
+
+ sinfo = NULL;
+ rcu_read_lock();
+ list_for_each_entry_rcu(tmp, &fs_info->space_info, list) {
+ if (tmp->flags == types[i]) {
+ sinfo = tmp;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (!sinfo)
+ continue;
+
+ down_read(&sinfo->groups_sem);
+ for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
+ if (!list_empty(&sinfo->block_groups[c])) {
+ u64 flags;
+
+ btrfs_get_block_group_info(
+ &sinfo->block_groups[c], &space);
+ if (space.total_bytes == 0 ||
+ space.used_bytes == 0)
+ continue;
+ flags = space.flags;
+ /*
+ * return
+ * 0: if dup, single or RAID0 is configured for
+ * any of metadata, system or data, else
+ * 1: if RAID5 is configured, or if RAID1 or
+ * RAID10 is configured and only two mirrors
+ * are used, else
+ * 2: if RAID6 is configured, else
+ * num_mirrors - 1: if RAID1 or RAID10 is
+ * configured and more than
+ * 2 mirrors are used.
+ */
+ if (num_tolerated_disk_barrier_failures > 0 &&
+ ((flags & (BTRFS_BLOCK_GROUP_DUP |
+ BTRFS_BLOCK_GROUP_RAID0)) ||
+ ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)
+ == 0)))
+ num_tolerated_disk_barrier_failures = 0;
+ else if (num_tolerated_disk_barrier_failures > 1
+ &&
+ (flags & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10)))
+ num_tolerated_disk_barrier_failures = 1;
+ }
+ }
+ up_read(&sinfo->groups_sem);
+ }
+
+ return num_tolerated_disk_barrier_failures;
+}
+
int write_all_supers(struct btrfs_root *root, int max_mirrors)
{
struct list_head *head;
@@ -2976,8 +3061,16 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
head = &root->fs_info->fs_devices->devices;
- if (do_barriers)
- barrier_all_devices(root->fs_info);
+ if (do_barriers) {
+ ret = barrier_all_devices(root->fs_info);
+ if (ret) {
+ mutex_unlock(
+ &root->fs_info->fs_devices->device_list_mutex);
+ btrfs_error(root->fs_info, ret,
+ "errors while submitting device barriers.");
+ return ret;
+ }
+ }
list_for_each_entry_rcu(dev, head, dev_list) {
if (!dev->bdev) {
@@ -3211,10 +3304,6 @@ int close_ctree(struct btrfs_root *root)
printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
(unsigned long long)fs_info->delalloc_bytes);
}
- if (fs_info->total_ref_cache_size) {
- printk(KERN_INFO "btrfs: at umount reference cache size %llu\n",
- (unsigned long long)fs_info->total_ref_cache_size);
- }
free_extent_buffer(fs_info->extent_root->node);
free_extent_buffer(fs_info->extent_root->commit_root);
@@ -3360,52 +3449,6 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
return btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
}
-int btree_lock_page_hook(struct page *page, void *data,
- void (*flush_fn)(void *))
-{
- struct inode *inode = page->mapping->host;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_buffer *eb;
-
- /*
- * We culled this eb but the page is still hanging out on the mapping,
- * carry on.
- */
- if (!PagePrivate(page))
- goto out;
-
- eb = (struct extent_buffer *)page->private;
- if (!eb) {
- WARN_ON(1);
- goto out;
- }
- if (page != eb->pages[0])
- goto out;
-
- if (!btrfs_try_tree_write_lock(eb)) {
- flush_fn(data);
- btrfs_tree_lock(eb);
- }
- btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
-
- if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
- spin_lock(&root->fs_info->delalloc_lock);
- if (root->fs_info->dirty_metadata_bytes >= eb->len)
- root->fs_info->dirty_metadata_bytes -= eb->len;
- else
- WARN_ON(1);
- spin_unlock(&root->fs_info->delalloc_lock);
- }
-
- btrfs_tree_unlock(eb);
-out:
- if (!trylock_page(page)) {
- flush_fn(data);
- lock_page(page);
- }
- return 0;
-}
-
static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
int read_only)
{
@@ -3608,7 +3651,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
while (1) {
ret = find_first_extent_bit(dirty_pages, start, &start, &end,
- mark);
+ mark, NULL);
if (ret)
break;
@@ -3663,7 +3706,7 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
again:
while (1) {
ret = find_first_extent_bit(unpin, 0, &start, &end,
- EXTENT_DIRTY);
+ EXTENT_DIRTY, NULL);
if (ret)
break;
@@ -3800,7 +3843,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
}
static struct extent_io_ops btree_extent_io_ops = {
- .write_cache_pages_lock_hook = btree_lock_page_hook,
.readpage_end_io_hook = btree_readpage_end_io_hook,
.readpage_io_failed_hook = btree_io_failed_hook,
.submit_bio_hook = btree_submit_bio_hook,