From 0253f40ef9a709a1af39ce38b1d998af090f8127 Mon Sep 17 00:00:00 2001 From: "jeff.liu" Date: Sat, 27 Oct 2012 12:06:39 +0000 Subject: Btrfs: Remove the invalid shrink size check up from btrfs_shrink_dev() Remove an invalid size check up from btrfs_shrink_dev(). The new size should not larger than the device->total_bytes as it was already verified before coming to here(i.e. new_size < old_size). Remove invalid check up for btrfs_shrink_dev(). Signed-off-by: Jie Liu Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8fcf9a59c28d..14c0d2e0790c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1409,7 +1409,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, btrfs_commit_transaction(trans, root); } else if (new_size < old_size) { ret = btrfs_shrink_device(device, new_size); - } + } /* equal, nothing need to do */ out_free: kfree(vol_args); -- cgit v1.2.3 From 109f2365f1928af241b2ccbd0f6ba0b93d911288 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 5 Nov 2012 12:42:09 +0000 Subject: Btrfs: fix a double free on pending snapshots in error handling When creating a snapshot, failing to commit a transaction can end up with aborting the transaction, following by doing a cleanup for it, where we'll free all snapshots pending to disk. So we check it and avoid double free on pending snapshots. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 14c0d2e0790c..e262cd8c4a7d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -571,8 +571,12 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); } - if (ret) + if (ret) { + /* cleanup_transaction has freed this for us */ + if (trans->aborted) + pending_snapshot = NULL; goto fail; + } ret = pending_snapshot->error; if (ret) -- cgit v1.2.3 From aa1b8cd409f05e1489ec77ff219eff6ed4b801b8 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Mon, 5 Nov 2012 17:03:39 +0100 Subject: Btrfs: pass fs_info instead of root A small number of functions that are used in a device replace procedure when the operation is resumed at mount time are unable to pass the same root pointer that would be used in the regular (ioctl) context. And since the root pointer is not required, only the fs_info is, the root pointer argument is replaced with the fs_info pointer argument. Signed-off-by: Stefan Behrens Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 11 ++++---- fs/btrfs/disk-io.c | 4 +-- fs/btrfs/ioctl.c | 8 +++--- fs/btrfs/scrub.c | 76 ++++++++++++++++++++++++------------------------------ fs/btrfs/super.c | 2 +- fs/btrfs/volumes.c | 23 +++++++++-------- fs/btrfs/volumes.h | 2 +- 7 files changed, 60 insertions(+), 66 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f9a078661ebc..f8bb62c82b0c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3540,15 +3540,16 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, struct btrfs_pending_snapshot *pending); /* scrub.c */ -int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, - struct btrfs_scrub_progress *progress, int readonly); +int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, + u64 end, struct btrfs_scrub_progress *progress, + int readonly); void btrfs_scrub_pause(struct btrfs_root *root); void btrfs_scrub_pause_super(struct btrfs_root *root); void btrfs_scrub_continue(struct btrfs_root *root); void btrfs_scrub_continue_super(struct btrfs_root *root); -int __btrfs_scrub_cancel(struct btrfs_fs_info *info); -int btrfs_scrub_cancel(struct btrfs_root *root); -int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev); +int btrfs_scrub_cancel(struct btrfs_fs_info *info); +int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, + struct btrfs_device *dev); int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid); int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, struct btrfs_scrub_progress *progress); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ba2b931fd8f6..42a8024e935f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3283,9 +3283,9 @@ int close_ctree(struct btrfs_root *root) smp_mb(); /* pause restriper - we want to resume on mount */ - btrfs_pause_balance(root->fs_info); + btrfs_pause_balance(fs_info); - btrfs_scrub_cancel(root); + btrfs_scrub_cancel(fs_info); /* wait for any defraggers to finish */ wait_event(fs_info->transaction_wait, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e262cd8c4a7d..b40b827f93e7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1343,7 +1343,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, printk(KERN_INFO "btrfs: resizing devid %llu\n", (unsigned long long)devid); } - device = btrfs_find_device(root, devid, NULL, NULL); + device = btrfs_find_device(root->fs_info, devid, NULL, NULL); if (!device) { printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", (unsigned long long)devid); @@ -2332,7 +2332,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) s_uuid = di_args->uuid; mutex_lock(&fs_devices->device_list_mutex); - dev = btrfs_find_device(root, di_args->devid, s_uuid, NULL); + dev = btrfs_find_device(root->fs_info, di_args->devid, s_uuid, NULL); mutex_unlock(&fs_devices->device_list_mutex); if (!dev) { @@ -3089,7 +3089,7 @@ static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg) if (IS_ERR(sa)) return PTR_ERR(sa); - ret = btrfs_scrub_dev(root, sa->devid, sa->start, sa->end, + ret = btrfs_scrub_dev(root->fs_info, sa->devid, sa->start, sa->end, &sa->progress, sa->flags & BTRFS_SCRUB_READONLY); if (copy_to_user(arg, sa, sizeof(*sa))) @@ -3104,7 +3104,7 @@ static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - return btrfs_scrub_cancel(root); + return btrfs_scrub_cancel(root->fs_info); } static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 894bb2732fcc..6cf23f4f7bb7 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2262,9 +2262,8 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, /* * get a reference count on fs_info->scrub_workers. start worker if necessary */ -static noinline_for_stack int scrub_workers_get(struct btrfs_root *root) +static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info) { - struct btrfs_fs_info *fs_info = root->fs_info; int ret = 0; mutex_lock(&fs_info->scrub_lock); @@ -2283,10 +2282,8 @@ out: return ret; } -static noinline_for_stack void scrub_workers_put(struct btrfs_root *root) +static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) { - struct btrfs_fs_info *fs_info = root->fs_info; - mutex_lock(&fs_info->scrub_lock); if (--fs_info->scrub_workers_refcnt == 0) btrfs_stop_workers(&fs_info->scrub_workers); @@ -2294,29 +2291,29 @@ static noinline_for_stack void scrub_workers_put(struct btrfs_root *root) mutex_unlock(&fs_info->scrub_lock); } - -int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, - struct btrfs_scrub_progress *progress, int readonly) +int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, + u64 end, struct btrfs_scrub_progress *progress, + int readonly) { struct scrub_ctx *sctx; - struct btrfs_fs_info *fs_info = root->fs_info; int ret; struct btrfs_device *dev; - if (btrfs_fs_closing(root->fs_info)) + if (btrfs_fs_closing(fs_info)) return -EINVAL; /* * check some assumptions */ - if (root->nodesize != root->leafsize) { + if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) { printk(KERN_ERR "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n", - root->nodesize, root->leafsize); + fs_info->chunk_root->nodesize, + fs_info->chunk_root->leafsize); return -EINVAL; } - if (root->nodesize > BTRFS_STRIPE_LEN) { + if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) { /* * in this case scrub is unable to calculate the checksum * the way scrub is implemented. Do not handle this @@ -2324,15 +2321,16 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, */ printk(KERN_ERR "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n", - root->nodesize, BTRFS_STRIPE_LEN); + fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN); return -EINVAL; } - if (root->sectorsize != PAGE_SIZE) { + if (fs_info->chunk_root->sectorsize != PAGE_SIZE) { /* not supported for data w/o checksums */ printk(KERN_ERR "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n", - root->sectorsize, (unsigned long long)PAGE_SIZE); + fs_info->chunk_root->sectorsize, + (unsigned long long)PAGE_SIZE); return -EINVAL; } @@ -2352,37 +2350,37 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, return -EINVAL; } - ret = scrub_workers_get(root); + ret = scrub_workers_get(fs_info); if (ret) return ret; - mutex_lock(&root->fs_info->fs_devices->device_list_mutex); - dev = btrfs_find_device(root, devid, NULL, NULL); + mutex_lock(&fs_info->fs_devices->device_list_mutex); + dev = btrfs_find_device(fs_info, devid, NULL, NULL); if (!dev || dev->missing) { - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); - scrub_workers_put(root); + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + scrub_workers_put(fs_info); return -ENODEV; } mutex_lock(&fs_info->scrub_lock); if (!dev->in_fs_metadata) { mutex_unlock(&fs_info->scrub_lock); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); - scrub_workers_put(root); - return -ENODEV; + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + scrub_workers_put(fs_info); + return -EIO; } if (dev->scrub_device) { mutex_unlock(&fs_info->scrub_lock); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); - scrub_workers_put(root); + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + scrub_workers_put(fs_info); return -EINPROGRESS; } sctx = scrub_setup_ctx(dev); if (IS_ERR(sctx)) { mutex_unlock(&fs_info->scrub_lock); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); - scrub_workers_put(root); + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + scrub_workers_put(fs_info); return PTR_ERR(sctx); } sctx->readonly = readonly; @@ -2390,7 +2388,7 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, atomic_inc(&fs_info->scrubs_running); mutex_unlock(&fs_info->scrub_lock); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + mutex_unlock(&fs_info->fs_devices->device_list_mutex); down_read(&fs_info->scrub_super_lock); ret = scrub_supers(sctx, dev); @@ -2413,7 +2411,7 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, mutex_unlock(&fs_info->scrub_lock); scrub_free_ctx(sctx); - scrub_workers_put(root); + scrub_workers_put(fs_info); return ret; } @@ -2453,9 +2451,8 @@ void btrfs_scrub_continue_super(struct btrfs_root *root) up_write(&root->fs_info->scrub_super_lock); } -int __btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) +int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) { - mutex_lock(&fs_info->scrub_lock); if (!atomic_read(&fs_info->scrubs_running)) { mutex_unlock(&fs_info->scrub_lock); @@ -2475,14 +2472,9 @@ int __btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) return 0; } -int btrfs_scrub_cancel(struct btrfs_root *root) +int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info, + struct btrfs_device *dev) { - return __btrfs_scrub_cancel(root->fs_info); -} - -int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev) -{ - struct btrfs_fs_info *fs_info = root->fs_info; struct scrub_ctx *sctx; mutex_lock(&fs_info->scrub_lock); @@ -2514,12 +2506,12 @@ int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid) * does not go away in cancel_dev. FIXME: find a better solution */ mutex_lock(&fs_info->fs_devices->device_list_mutex); - dev = btrfs_find_device(root, devid, NULL, NULL); + dev = btrfs_find_device(fs_info, devid, NULL, NULL); if (!dev) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); return -ENODEV; } - ret = btrfs_scrub_cancel_dev(root, dev); + ret = btrfs_scrub_cancel_dev(fs_info, dev); mutex_unlock(&fs_info->fs_devices->device_list_mutex); return ret; @@ -2532,7 +2524,7 @@ int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, struct scrub_ctx *sctx = NULL; mutex_lock(&root->fs_info->fs_devices->device_list_mutex); - dev = btrfs_find_device(root, devid, NULL, NULL); + dev = btrfs_find_device(root->fs_info, devid, NULL, NULL); if (dev) sctx = dev->scrub_device; if (sctx) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index acd2df85bed5..a1a6c296ddcd 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -116,7 +116,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { sb->s_flags |= MS_RDONLY; printk(KERN_INFO "btrfs is forced readonly\n"); - __btrfs_scrub_cancel(fs_info); + btrfs_scrub_cancel(fs_info); // WARN_ON(1); } } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6cd8a32c4484..d2c0bccca607 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1398,7 +1398,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) disk_super = (struct btrfs_super_block *)bh->b_data; devid = btrfs_stack_device_id(&disk_super->dev_item); dev_uuid = disk_super->dev_item.uuid; - device = btrfs_find_device(root, devid, dev_uuid, + device = btrfs_find_device(root->fs_info, devid, dev_uuid, disk_super->fsid); if (!device) { ret = -ENOENT; @@ -1435,7 +1435,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) spin_unlock(&root->fs_info->free_chunk_lock); device->in_fs_metadata = 0; - btrfs_scrub_cancel_dev(root, device); + btrfs_scrub_cancel_dev(root->fs_info, device); /* * the device list mutex makes sure that we don't change @@ -1492,7 +1492,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) * at this point, the device is zero sized. We want to * remove it from the devices list and zero out the old super */ - if (clear_super) { + if (clear_super && disk_super) { /* make sure this device isn't detected as part of * the FS anymore */ @@ -1540,7 +1540,7 @@ int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, disk_super = (struct btrfs_super_block *)bh->b_data; devid = btrfs_stack_device_id(&disk_super->dev_item); dev_uuid = disk_super->dev_item.uuid; - *device = btrfs_find_device(root, devid, dev_uuid, + *device = btrfs_find_device(root->fs_info, devid, dev_uuid, disk_super->fsid); brelse(bh); if (!*device) @@ -1699,7 +1699,8 @@ next_slot: read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); - device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); + device = btrfs_find_device(root->fs_info, devid, dev_uuid, + fs_uuid); BUG_ON(!device); /* Logic error */ if (device->fs_devices->seeding) { @@ -4463,13 +4464,13 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, return 0; } -struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, +struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, u8 *uuid, u8 *fsid) { struct btrfs_device *device; struct btrfs_fs_devices *cur_devices; - cur_devices = root->fs_info->fs_devices; + cur_devices = fs_info->fs_devices; while (cur_devices) { if (!fsid || !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) { @@ -4567,8 +4568,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, read_extent_buffer(leaf, uuid, (unsigned long) btrfs_stripe_dev_uuid_nr(chunk, i), BTRFS_UUID_SIZE); - map->stripes[i].dev = btrfs_find_device(root, devid, uuid, - NULL); + map->stripes[i].dev = btrfs_find_device(root->fs_info, devid, + uuid, NULL); if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { kfree(map); free_extent_map(em); @@ -4686,7 +4687,7 @@ static int read_one_dev(struct btrfs_root *root, return ret; } - device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); + device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid); if (!device || !device->bdev) { if (!btrfs_test_opt(root, DEGRADED)) return -EIO; @@ -5078,7 +5079,7 @@ int btrfs_get_dev_stats(struct btrfs_root *root, int i; mutex_lock(&fs_devices->device_list_mutex); - dev = btrfs_find_device(root, stats->devid, NULL, NULL); + dev = btrfs_find_device(root->fs_info, stats->devid, NULL, NULL); mutex_unlock(&fs_devices->device_list_mutex); if (!dev) { diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7eaaf4e61959..802e2ba02f09 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -281,7 +281,7 @@ void btrfs_cleanup_fs_uuids(void); int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); int btrfs_grow_device(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 new_size); -struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, +struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, u8 *uuid, u8 *fsid); int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_root *root, char *path); -- cgit v1.2.3 From 5ac00addc7ac09110995fe967071d191b5981cc1 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Mon, 5 Nov 2012 17:54:08 +0100 Subject: Btrfs: disallow mutually exclusive admin operations from user mode Btrfs admin operations that are manually started from user mode and that cannot be executed at the same time return -EINPROGRESS. A common way to enter and leave this locked section is introduced since it used to be specific to the balance operation. Signed-off-by: Stefan Behrens Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/ioctl.c | 53 ++++++++++++++++++++++++++++++++++++----------------- fs/btrfs/volumes.c | 2 ++ 3 files changed, 40 insertions(+), 17 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 147406d0f9a9..e9dc78014f09 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1527,6 +1527,8 @@ struct btrfs_fs_info { /* device replace state */ struct btrfs_dev_replace dev_replace; + + atomic_t mutually_exclusive_operation_running; }; /* diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b40b827f93e7..26f46dad3b0e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1317,13 +1317,13 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - mutex_lock(&root->fs_info->volume_mutex); - if (root->fs_info->balance_ctl) { - printk(KERN_INFO "btrfs: balance in progress\n"); - ret = -EINVAL; - goto out; + if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, + 1)) { + pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); + return -EINPROGRESS; } + mutex_lock(&root->fs_info->volume_mutex); vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) { ret = PTR_ERR(vol_args); @@ -1419,6 +1419,7 @@ out_free: kfree(vol_args); out: mutex_unlock(&root->fs_info->volume_mutex); + atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); return ret; } @@ -2160,9 +2161,17 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) if (btrfs_root_readonly(root)) return -EROFS; + if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, + 1)) { + pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); + return -EINPROGRESS; + } ret = mnt_want_write_file(file); - if (ret) + if (ret) { + atomic_set(&root->fs_info->mutually_exclusive_operation_running, + 0); return ret; + } switch (inode->i_mode & S_IFMT) { case S_IFDIR: @@ -2214,6 +2223,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) } out: mnt_drop_write_file(file); + atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); return ret; } @@ -2225,13 +2235,13 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - mutex_lock(&root->fs_info->volume_mutex); - if (root->fs_info->balance_ctl) { - printk(KERN_INFO "btrfs: balance in progress\n"); - ret = -EINVAL; - goto out; + if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, + 1)) { + pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); + return -EINPROGRESS; } + mutex_lock(&root->fs_info->volume_mutex); vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) { ret = PTR_ERR(vol_args); @@ -2244,6 +2254,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) kfree(vol_args); out: mutex_unlock(&root->fs_info->volume_mutex); + atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); return ret; } @@ -2258,13 +2269,13 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) if (root->fs_info->sb->s_flags & MS_RDONLY) return -EROFS; - mutex_lock(&root->fs_info->volume_mutex); - if (root->fs_info->balance_ctl) { - printk(KERN_INFO "btrfs: balance in progress\n"); - ret = -EINVAL; - goto out; + if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, + 1)) { + pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); + return -EINPROGRESS; } + mutex_lock(&root->fs_info->volume_mutex); vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) { ret = PTR_ERR(vol_args); @@ -2277,6 +2288,7 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) kfree(vol_args); out: mutex_unlock(&root->fs_info->volume_mutex); + atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); return ret; } @@ -3319,6 +3331,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) struct btrfs_ioctl_balance_args *bargs; struct btrfs_balance_control *bctl; int ret; + int need_to_clear_lock = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -3354,10 +3367,13 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) bargs = NULL; } - if (fs_info->balance_ctl) { + if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, + 1)) { + pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); ret = -EINPROGRESS; goto out_bargs; } + need_to_clear_lock = 1; bctl = kzalloc(sizeof(*bctl), GFP_NOFS); if (!bctl) { @@ -3391,6 +3407,9 @@ do_balance: out_bargs: kfree(bargs); out: + if (need_to_clear_lock) + atomic_set(&root->fs_info->mutually_exclusive_operation_running, + 0); mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->volume_mutex); mnt_drop_write_file(file); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d2c0bccca607..33ca36b37a6a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2952,6 +2952,7 @@ static int balance_kthread(void *data) ret = btrfs_balance(fs_info->balance_ctl, NULL); } + atomic_set(&fs_info->mutually_exclusive_operation_running, 0); mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->volume_mutex); @@ -2974,6 +2975,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) return 0; } + WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)); tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); if (IS_ERR(tsk)) return PTR_ERR(tsk); -- cgit v1.2.3 From 63a212abc2315972b245f93cb11ae3acf3c0b513 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Mon, 5 Nov 2012 18:29:28 +0100 Subject: Btrfs: disallow some operations on the device replace target device This patch adds some code to disallow operations on the device that is used as the target for the device replace operation. Signed-off-by: Stefan Behrens Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 3 ++- fs/btrfs/ioctl.c | 8 +++++++- fs/btrfs/scrub.c | 14 +++++++++----- fs/btrfs/super.c | 3 ++- fs/btrfs/volumes.c | 41 ++++++++++++++++++++++++++++++++--------- fs/btrfs/volumes.h | 1 + 7 files changed, 54 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e9dc78014f09..746cb6aa1f62 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3649,7 +3649,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, /* scrub.c */ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, u64 end, struct btrfs_scrub_progress *progress, - int readonly); + int readonly, int is_dev_replace); void btrfs_scrub_pause(struct btrfs_root *root); void btrfs_scrub_pause_super(struct btrfs_root *root); void btrfs_scrub_continue(struct btrfs_root *root); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b4d438f6c2b3..98af8379895a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7468,7 +7468,8 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) * check to make sure we can actually find a chunk with enough * space to fit our block group in. */ - if (device->total_bytes > device->bytes_used + min_free) { + if (device->total_bytes > device->bytes_used + min_free && + !device->is_tgtdev_for_dev_replace) { ret = find_free_dev_extent(device, min_free, &dev_offset, NULL); if (!ret) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 26f46dad3b0e..e54b5e50c927 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1375,6 +1375,11 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, } } + if (device->is_tgtdev_for_dev_replace) { + ret = -EINVAL; + goto out_free; + } + old_size = device->total_bytes; if (mod < 0) { @@ -3102,7 +3107,8 @@ static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg) return PTR_ERR(sa); ret = btrfs_scrub_dev(root->fs_info, sa->devid, sa->start, sa->end, - &sa->progress, sa->flags & BTRFS_SCRUB_READONLY); + &sa->progress, sa->flags & BTRFS_SCRUB_READONLY, + 0); if (copy_to_user(arg, sa, sizeof(*sa))) ret = -EFAULT; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 6cf23f4f7bb7..460e30bb1884 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -116,6 +116,9 @@ struct scrub_ctx { u32 sectorsize; u32 nodesize; u32 leafsize; + + int is_dev_replace; + /* * statistics */ @@ -284,7 +287,7 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx) } static noinline_for_stack -struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev) +struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) { struct scrub_ctx *sctx; int i; @@ -296,6 +299,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev) sctx = kzalloc(sizeof(*sctx), GFP_NOFS); if (!sctx) goto nomem; + sctx->is_dev_replace = is_dev_replace; sctx->pages_per_bio = pages_per_bio; sctx->curr = -1; sctx->dev_root = dev->dev_root; @@ -2293,7 +2297,7 @@ static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, u64 end, struct btrfs_scrub_progress *progress, - int readonly) + int readonly, int is_dev_replace) { struct scrub_ctx *sctx; int ret; @@ -2356,14 +2360,14 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, mutex_lock(&fs_info->fs_devices->device_list_mutex); dev = btrfs_find_device(fs_info, devid, NULL, NULL); - if (!dev || dev->missing) { + if (!dev || (dev->missing && !is_dev_replace)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); scrub_workers_put(fs_info); return -ENODEV; } mutex_lock(&fs_info->scrub_lock); - if (!dev->in_fs_metadata) { + if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->fs_devices->device_list_mutex); scrub_workers_put(fs_info); @@ -2376,7 +2380,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, scrub_workers_put(fs_info); return -EINPROGRESS; } - sctx = scrub_setup_ctx(dev); + sctx = scrub_setup_ctx(dev, is_dev_replace); if (IS_ERR(sctx)) { mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->fs_devices->device_list_mutex); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ef2415896b06..837ad2d27853 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1354,7 +1354,8 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) min_stripe_size = BTRFS_STRIPE_LEN; list_for_each_entry(device, &fs_devices->devices, dev_list) { - if (!device->in_fs_metadata || !device->bdev) + if (!device->in_fs_metadata || !device->bdev || + device->is_tgtdev_for_dev_replace) continue; avail_space = device->total_bytes - device->bytes_used; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 33ca36b37a6a..31f7af878d96 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -518,8 +518,9 @@ again: /* This is the initialized path, it is safe to release the devices. */ list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { if (device->in_fs_metadata) { - if (!latest_transid || - device->generation > latest_transid) { + if (!device->is_tgtdev_for_dev_replace && + (!latest_transid || + device->generation > latest_transid)) { latest_devid = device->devid; latest_transid = device->generation; latest_bdev = device->bdev; @@ -814,7 +815,7 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, *length = 0; - if (start >= device->total_bytes) + if (start >= device->total_bytes || device->is_tgtdev_for_dev_replace) return 0; path = btrfs_alloc_path(); @@ -931,7 +932,7 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, max_hole_size = 0; hole_size = 0; - if (search_start >= search_end) { + if (search_start >= search_end || device->is_tgtdev_for_dev_replace) { ret = -ENOSPC; goto error; } @@ -1114,6 +1115,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_key key; WARN_ON(!device->in_fs_metadata); + WARN_ON(device->is_tgtdev_for_dev_replace); path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1375,7 +1377,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) * is held. */ list_for_each_entry(tmp, devices, dev_list) { - if (tmp->in_fs_metadata && !tmp->bdev) { + if (tmp->in_fs_metadata && + !tmp->is_tgtdev_for_dev_replace && + !tmp->bdev) { device = tmp; break; } @@ -1406,6 +1410,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } } + if (device->is_tgtdev_for_dev_replace) { + pr_err("btrfs: unable to remove the dev_replace target dev\n"); + ret = -EINVAL; + goto error_brelse; + } + if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { printk(KERN_ERR "btrfs: unable to remove the only writeable " "device\n"); @@ -1425,6 +1435,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (ret) goto error_undo; + /* + * TODO: the superblock still includes this device in its num_devices + * counter although write_all_supers() is not locked out. This + * could give a filesystem state which requires a degraded mount. + */ ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); if (ret) goto error_undo; @@ -1808,6 +1823,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->dev_root = root->fs_info->dev_root; device->bdev = bdev; device->in_fs_metadata = 1; + device->is_tgtdev_for_dev_replace = 0; device->mode = FMODE_EXCL; set_blocksize(device->bdev, 4096); @@ -1971,7 +1987,8 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, if (!device->writeable) return -EACCES; - if (new_size <= device->total_bytes) + if (new_size <= device->total_bytes || + device->is_tgtdev_for_dev_replace) return -EINVAL; btrfs_set_super_total_bytes(super_copy, old_total + diff); @@ -2600,7 +2617,8 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) size_to_free = div_factor(old_size, 1); size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); if (!device->writeable || - device->total_bytes - device->bytes_used > size_to_free) + device->total_bytes - device->bytes_used > size_to_free || + device->is_tgtdev_for_dev_replace) continue; ret = btrfs_shrink_device(device, old_size - size_to_free); @@ -3132,6 +3150,9 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) u64 old_size = device->total_bytes; u64 diff = device->total_bytes - new_size; + if (device->is_tgtdev_for_dev_replace) + return -EINVAL; + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -3401,7 +3422,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, continue; } - if (!device->in_fs_metadata) + if (!device->in_fs_metadata || + device->is_tgtdev_for_dev_replace) continue; if (device->total_bytes > device->bytes_used) @@ -4612,6 +4634,7 @@ static void fill_device_from_item(struct extent_buffer *leaf, device->io_align = btrfs_device_io_align(leaf, dev_item); device->io_width = btrfs_device_io_width(leaf, dev_item); device->sector_size = btrfs_device_sector_size(leaf, dev_item); + device->is_tgtdev_for_dev_replace = 0; ptr = (unsigned long)btrfs_device_uuid(dev_item); read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); @@ -4722,7 +4745,7 @@ static int read_one_dev(struct btrfs_root *root, fill_device_from_item(leaf, dev_item, device); device->dev_root = root->fs_info->dev_root; device->in_fs_metadata = 1; - if (device->writeable) { + if (device->writeable && !device->is_tgtdev_for_dev_replace) { device->fs_devices->total_rw_bytes += device->total_bytes; spin_lock(&root->fs_info->free_chunk_lock); root->fs_info->free_chunk_space += device->total_bytes - diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 802e2ba02f09..8fd5a4d8acc8 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -50,6 +50,7 @@ struct btrfs_device { int in_fs_metadata; int missing; int can_discard; + int is_tgtdev_for_dev_replace; spinlock_t io_lock; -- cgit v1.2.3 From 3f6bcfbd4149875662773eb40a62294cddf215d4 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Tue, 6 Nov 2012 15:08:53 +0100 Subject: Btrfs: add support for device replace ioctls This is the commit that allows to start the device replace procedure. An ioctl() interface is added that supports starting and canceling the device replace procedure, and to retrieve the status and progress. Signed-off-by: Stefan Behrens Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/ioctl.h | 7 ++++--- 2 files changed, 52 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e54b5e50c927..9a71fec86152 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -55,6 +55,7 @@ #include "backref.h" #include "rcu-string.h" #include "send.h" +#include "dev-replace.h" /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) @@ -3171,6 +3172,51 @@ static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root, return ret; } +static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_dev_replace_args *p; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + p = memdup_user(arg, sizeof(*p)); + if (IS_ERR(p)) + return PTR_ERR(p); + + switch (p->cmd) { + case BTRFS_IOCTL_DEV_REPLACE_CMD_START: + if (atomic_xchg( + &root->fs_info->mutually_exclusive_operation_running, + 1)) { + pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); + ret = -EINPROGRESS; + } else { + ret = btrfs_dev_replace_start(root, p); + atomic_set( + &root->fs_info->mutually_exclusive_operation_running, + 0); + } + break; + case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS: + btrfs_dev_replace_status(root->fs_info, p); + ret = 0; + break; + case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL: + ret = btrfs_dev_replace_cancel(root->fs_info, p); + break; + default: + ret = -EINVAL; + break; + } + + if (copy_to_user(arg, p, sizeof(*p))) + ret = -EFAULT; + + kfree(p); + return ret; +} + static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) { int ret = 0; @@ -3826,6 +3872,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_qgroup_create(root, argp); case BTRFS_IOC_QGROUP_LIMIT: return btrfs_ioctl_qgroup_limit(root, argp); + case BTRFS_IOC_DEV_REPLACE: + return btrfs_ioctl_dev_replace(root, argp); } return -ENOTTY; diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 62006ba02719..dabca9cc8c2e 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -30,6 +30,8 @@ struct btrfs_ioctl_vol_args { char name[BTRFS_PATH_NAME_MAX + 1]; }; +#define BTRFS_DEVICE_PATH_NAME_MAX 1024 + #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) #define BTRFS_SUBVOL_RDONLY (1ULL << 1) #define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) @@ -127,10 +129,10 @@ struct btrfs_ioctl_scrub_args { #define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 struct btrfs_ioctl_dev_replace_start_params { __u64 srcdevid; /* in, if 0, use srcdev_name instead */ - __u8 srcdev_name[BTRFS_PATH_NAME_MAX + 1]; /* in */ - __u8 tgtdev_name[BTRFS_PATH_NAME_MAX + 1]; /* in */ __u64 cont_reading_from_srcdev_mode; /* in, see #define * above */ + __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ + __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ }; #define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0 @@ -165,7 +167,6 @@ struct btrfs_ioctl_dev_replace_args { __u64 spare[64]; }; -#define BTRFS_DEVICE_PATH_NAME_MAX 1024 struct btrfs_ioctl_dev_info_args { __u64 devid; /* in/out */ __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ -- cgit v1.2.3 From 9a8c28bec1b40e934ed28149b7eaa7d2fafed92d Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Mon, 26 Nov 2012 08:40:43 +0000 Subject: Btrfs: pass root object into btrfs_ioctl_{start, wait}_sync() Since we have gotten the root in the caller, just pass it into btrfs_ioctl_{start, wait}_sync() directly. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9a71fec86152..5022e62e63a8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3058,9 +3058,9 @@ long btrfs_ioctl_trans_end(struct file *file) return 0; } -static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp) +static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root, + void __user *argp) { - struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; struct btrfs_trans_handle *trans; u64 transid; int ret; @@ -3081,9 +3081,9 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp return 0; } -static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp) +static noinline long btrfs_ioctl_wait_sync(struct btrfs_root *root, + void __user *argp) { - struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; u64 transid; if (argp) { @@ -3843,9 +3843,9 @@ long btrfs_ioctl(struct file *file, unsigned int btrfs_sync_fs(file->f_dentry->d_sb, 1); return 0; case BTRFS_IOC_START_SYNC: - return btrfs_ioctl_start_sync(file, argp); + return btrfs_ioctl_start_sync(root, argp); case BTRFS_IOC_WAIT_SYNC: - return btrfs_ioctl_wait_sync(file, argp); + return btrfs_ioctl_wait_sync(root, argp); case BTRFS_IOC_SCRUB: return btrfs_ioctl_scrub(root, argp); case BTRFS_IOC_SCRUB_CANCEL: -- cgit v1.2.3 From ff7c1d33551862c86f7737fe88edc3e499d291e6 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Mon, 26 Nov 2012 08:41:29 +0000 Subject: Btrfs: don't start a new transaction when starting sync If there is no running transaction in the fs, we needn't start a new one when we want to start sync. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 14 ++++++++++---- fs/btrfs/transaction.c | 13 ++++++++----- 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5022e62e63a8..7b1f614f51f6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3065,16 +3065,22 @@ static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root, u64 transid; int ret; - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); + trans = btrfs_attach_transaction(root); + if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT) + return PTR_ERR(trans); + + /* No running transaction, don't bother */ + transid = root->fs_info->last_trans_committed; + goto out; + } transid = trans->transid; ret = btrfs_commit_transaction_async(trans, root, 0); if (ret) { btrfs_end_transaction(trans, root); return ret; } - +out: if (argp) if (copy_to_user(argp, &transid, sizeof(transid))) return -EFAULT; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bcc6b65be3b0..8db401fa2f8f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1307,9 +1307,10 @@ static void do_async_commit(struct work_struct *work) * We've got freeze protection passed with the transaction. * Tell lockdep about it. */ - rwsem_acquire_read( - &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], - 0, 1, _THIS_IP_); + if (ac->newtrans->type < TRANS_JOIN_NOLOCK) + rwsem_acquire_read( + &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], + 0, 1, _THIS_IP_); current->journal_info = ac->newtrans; @@ -1347,8 +1348,10 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, * Tell lockdep we've released the freeze rwsem, since the * async commit thread will be the one to unlock it. */ - rwsem_release(&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], - 1, _THIS_IP_); + if (trans->type < TRANS_JOIN_NOLOCK) + rwsem_release( + &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], + 1, _THIS_IP_); schedule_delayed_work(&ac->work, 0); -- cgit v1.2.3 From 3c04ce01053413007b9df88313b8b8e17272b57b Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Mon, 26 Nov 2012 08:43:07 +0000 Subject: Btrfs: get write access when setting the default subvolume When wen want to set the default subvolume, we must get write access, or we will change the R/O file system. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7b1f614f51f6..10bc65ed736c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2843,12 +2843,19 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) struct btrfs_disk_key disk_key; u64 objectid = 0; u64 dir_id; + int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&objectid, argp, sizeof(objectid))) - return -EFAULT; + ret = mnt_want_write_file(file); + if (ret) + return ret; + + if (copy_from_user(&objectid, argp, sizeof(objectid))) { + ret = -EFAULT; + goto out; + } if (!objectid) objectid = root->root_key.objectid; @@ -2858,21 +2865,28 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) location.offset = (u64)-1; new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); - if (IS_ERR(new_root)) - return PTR_ERR(new_root); + if (IS_ERR(new_root)) { + ret = PTR_ERR(new_root); + goto out; + } - if (btrfs_root_refs(&new_root->root_item) == 0) - return -ENOENT; + if (btrfs_root_refs(&new_root->root_item) == 0) { + ret = -ENOENT; + goto out; + } path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + ret = -ENOMEM; + goto out; + } path->leave_spinning = 1; trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { btrfs_free_path(path); - return PTR_ERR(trans); + ret = PTR_ERR(trans); + goto out; } dir_id = btrfs_super_root_dir(root->fs_info->super_copy); @@ -2883,7 +2897,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) btrfs_end_transaction(trans, root); printk(KERN_ERR "Umm, you don't have the default dir item, " "this isn't going to work\n"); - return -ENOENT; + ret = -ENOENT; + goto out; } btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); @@ -2893,8 +2908,9 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL); btrfs_end_transaction(trans, root); - - return 0; +out: + mnt_drop_write_file(file); + return ret; } void btrfs_get_block_group_info(struct list_head *groups_list, -- cgit v1.2.3 From 198605a8e2077f174c9834c97b836f535e4e56dd Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Mon, 26 Nov 2012 08:43:45 +0000 Subject: Btrfs: get write access when doing resize fs Steps to reproduce: # mkfs.btrfs # mount -o ro # mount -o ro # mount -o remount,rw # umount # btrfs fi resize 10g We re-sized a R/O filesystem. The reason is that we just check the R/O flag of the super block object. It is not enough, because the kernel may set the R/O flag only for the mount point. We need invoke mnt_want_write_file() to do a full check. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 10bc65ed736c..2be49b4c82d6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1298,12 +1298,13 @@ out_ra: return ret; } -static noinline int btrfs_ioctl_resize(struct btrfs_root *root, +static noinline int btrfs_ioctl_resize(struct file *file, void __user *arg) { u64 new_size; u64 old_size; u64 devid = 1; + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_vol_args *vol_args; struct btrfs_trans_handle *trans; struct btrfs_device *device = NULL; @@ -1318,6 +1319,10 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, if (!capable(CAP_SYS_ADMIN)) return -EPERM; + ret = mnt_want_write_file(file); + if (ret) + return ret; + if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1)) { pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); @@ -1425,6 +1430,7 @@ out_free: kfree(vol_args); out: mutex_unlock(&root->fs_info->volume_mutex); + mnt_drop_write_file(file); atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); return ret; } @@ -3832,7 +3838,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_DEFRAG_RANGE: return btrfs_ioctl_defrag(file, argp); case BTRFS_IOC_RESIZE: - return btrfs_ioctl_resize(root, argp); + return btrfs_ioctl_resize(file, argp); case BTRFS_IOC_ADD_DEV: return btrfs_ioctl_add_dev(root, argp); case BTRFS_IOC_RM_DEV: -- cgit v1.2.3 From da24927b1e1925da5c1885cb483231dabe027e15 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Mon, 26 Nov 2012 08:44:50 +0000 Subject: Btrfs: get write access when removing a device Steps to reproduce: # mkfs.btrfs -d single -m single # mount -o ro # mount -o ro # mount -o remount,rw # umount # btrfs device delete We can remove a device from a R/O filesystem. The reason is that we just check the R/O flag of the super block object. It is not enough, because the kernel may set the R/O flag only for the mount point. We need invoke mnt_want_write_file() to do a full check. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2be49b4c82d6..ee36009f8aa1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2270,20 +2270,23 @@ out: return ret; } -static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) { + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_vol_args *vol_args; int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; + ret = mnt_want_write_file(file); + if (ret) + return ret; if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1)) { pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); + mnt_drop_write_file(file); return -EINPROGRESS; } @@ -2300,6 +2303,7 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) kfree(vol_args); out: mutex_unlock(&root->fs_info->volume_mutex); + mnt_drop_write_file(file); atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); return ret; } @@ -3842,7 +3846,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_ADD_DEV: return btrfs_ioctl_add_dev(root, argp); case BTRFS_IOC_RM_DEV: - return btrfs_ioctl_rm_dev(root, argp); + return btrfs_ioctl_rm_dev(file, argp); case BTRFS_IOC_FS_INFO: return btrfs_ioctl_fs_info(root, argp); case BTRFS_IOC_DEV_INFO: -- cgit v1.2.3 From b8e95489bf0ddf767e4bd38f537e0adad16ee830 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Mon, 26 Nov 2012 08:48:01 +0000 Subject: Btrfs: get write access for scrub We need get write access for scrub, or we will modify the R/O fs. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ee36009f8aa1..12b18c01b911 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3127,10 +3127,11 @@ static noinline long btrfs_ioctl_wait_sync(struct btrfs_root *root, return btrfs_wait_for_commit(root, transid); } -static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_scrub(struct file *file, void __user *arg) { - int ret; + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_scrub_args *sa; + int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -3139,6 +3140,12 @@ static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg) if (IS_ERR(sa)) return PTR_ERR(sa); + if (!(sa->flags & BTRFS_SCRUB_READONLY)) { + ret = mnt_want_write_file(file); + if (ret) + goto out; + } + ret = btrfs_scrub_dev(root->fs_info, sa->devid, sa->start, sa->end, &sa->progress, sa->flags & BTRFS_SCRUB_READONLY, 0); @@ -3146,6 +3153,9 @@ static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg) if (copy_to_user(arg, sa, sizeof(*sa))) ret = -EFAULT; + if (!(sa->flags & BTRFS_SCRUB_READONLY)) + mnt_drop_write_file(file); +out: kfree(sa); return ret; } @@ -3879,7 +3889,7 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_WAIT_SYNC: return btrfs_ioctl_wait_sync(root, argp); case BTRFS_IOC_SCRUB: - return btrfs_ioctl_scrub(root, argp); + return btrfs_ioctl_scrub(file, argp); case BTRFS_IOC_SCRUB_CANCEL: return btrfs_ioctl_scrub_cancel(root, argp); case BTRFS_IOC_SCRUB_PROGRESS: -- cgit v1.2.3 From 905b0dda06a064db08b8a814e968786ff3c4cc19 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Mon, 26 Nov 2012 08:50:11 +0000 Subject: Btrfs: get write access for qgroup operations We need get write access for qgroup operations, or we will modify the R/O fs. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 73 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 25 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 12b18c01b911..657d83ca9dea 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3558,8 +3558,9 @@ out: return ret; } -static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) { + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_quota_ctl_args *sa; struct btrfs_trans_handle *trans = NULL; int ret; @@ -3568,12 +3569,15 @@ static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; + ret = mnt_want_write_file(file); + if (ret) + return ret; sa = memdup_user(arg, sizeof(*sa)); - if (IS_ERR(sa)) - return PTR_ERR(sa); + if (IS_ERR(sa)) { + ret = PTR_ERR(sa); + goto drop_write; + } if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { trans = btrfs_start_transaction(root, 2); @@ -3606,14 +3610,16 @@ static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) if (err && !ret) ret = err; } - out: kfree(sa); +drop_write: + mnt_drop_write_file(file); return ret; } -static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) { + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_qgroup_assign_args *sa; struct btrfs_trans_handle *trans; int ret; @@ -3622,12 +3628,15 @@ static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; + ret = mnt_want_write_file(file); + if (ret) + return ret; sa = memdup_user(arg, sizeof(*sa)); - if (IS_ERR(sa)) - return PTR_ERR(sa); + if (IS_ERR(sa)) { + ret = PTR_ERR(sa); + goto drop_write; + } trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { @@ -3650,11 +3659,14 @@ static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) out: kfree(sa); +drop_write: + mnt_drop_write_file(file); return ret; } -static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) { + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_qgroup_create_args *sa; struct btrfs_trans_handle *trans; int ret; @@ -3663,12 +3675,15 @@ static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; + ret = mnt_want_write_file(file); + if (ret) + return ret; sa = memdup_user(arg, sizeof(*sa)); - if (IS_ERR(sa)) - return PTR_ERR(sa); + if (IS_ERR(sa)) { + ret = PTR_ERR(sa); + goto drop_write; + } trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { @@ -3690,11 +3705,14 @@ static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) out: kfree(sa); +drop_write: + mnt_drop_write_file(file); return ret; } -static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) +static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) { + struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; struct btrfs_ioctl_qgroup_limit_args *sa; struct btrfs_trans_handle *trans; int ret; @@ -3704,12 +3722,15 @@ static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (root->fs_info->sb->s_flags & MS_RDONLY) - return -EROFS; + ret = mnt_want_write_file(file); + if (ret) + return ret; sa = memdup_user(arg, sizeof(*sa)); - if (IS_ERR(sa)) - return PTR_ERR(sa); + if (IS_ERR(sa)) { + ret = PTR_ERR(sa); + goto drop_write; + } trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { @@ -3732,6 +3753,8 @@ static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) out: kfree(sa); +drop_write: + mnt_drop_write_file(file); return ret; } @@ -3907,13 +3930,13 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_GET_DEV_STATS: return btrfs_ioctl_get_dev_stats(root, argp); case BTRFS_IOC_QUOTA_CTL: - return btrfs_ioctl_quota_ctl(root, argp); + return btrfs_ioctl_quota_ctl(file, argp); case BTRFS_IOC_QGROUP_ASSIGN: - return btrfs_ioctl_qgroup_assign(root, argp); + return btrfs_ioctl_qgroup_assign(file, argp); case BTRFS_IOC_QGROUP_CREATE: - return btrfs_ioctl_qgroup_create(root, argp); + return btrfs_ioctl_qgroup_create(file, argp); case BTRFS_IOC_QGROUP_LIMIT: - return btrfs_ioctl_qgroup_limit(root, argp); + return btrfs_ioctl_qgroup_limit(file, argp); case BTRFS_IOC_DEV_REPLACE: return btrfs_ioctl_dev_replace(root, argp); } -- cgit v1.2.3 From 9c52057c698fb96f8f07e7a4bcf4801a092bda89 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Dec 2012 14:26:57 -0500 Subject: Btrfs: fix hash overflow handling The handling for directory crc hash overflows was fairly obscure, split_leaf returns EOVERFLOW when we try to extend the item and that is supposed to bubble up to userland. For a while it did so, but along the way we added better handling of errors and forced the FS readonly if we hit IO errors during the directory insertion. Along the way, we started testing only for EEXIST and the EOVERFLOW case was dropped. The end result is that we may force the FS readonly if we catch a directory hash bucket overflow. This fixes a few problem spots. First I add tests for EOVERFLOW in the places where we can safely just return the error up the chain. btrfs_rename is harder though, because it tries to insert the new directory item only after it has already unlinked anything the rename was going to overwrite. Rather than adding very complex logic, I added a helper to test for the hash overflow case early while it is still safe to bail out. Snapshot and subvolume creation had a similar problem, so they are using the new helper now too. Signed-off-by: Chris Mason Reported-by: Pascal Junod --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/dir-item.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/inode.c | 24 +++++++++++++++++++- fs/btrfs/ioctl.c | 10 +++++++++ fs/btrfs/transaction.c | 2 +- 5 files changed, 95 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 44d9bc87e863..547b7b05727f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3283,6 +3283,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, struct btrfs_root *root); /* dir-item.c */ +int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, + const char *name, int name_len); int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, struct inode *dir, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index c1a074d0696f..502c2158167c 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -213,6 +213,65 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, return btrfs_match_dir_item_name(root, path, name, name_len); } +int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, + const char *name, int name_len) +{ + int ret; + struct btrfs_key key; + struct btrfs_dir_item *di; + int data_size; + struct extent_buffer *leaf; + int slot; + struct btrfs_path *path; + + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = dir; + btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); + key.offset = btrfs_name_hash(name, name_len); + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + + /* return back any errors */ + if (ret < 0) + goto out; + + /* nothing found, we're safe */ + if (ret > 0) { + ret = 0; + goto out; + } + + /* we found an item, look for our name in the item */ + di = btrfs_match_dir_item_name(root, path, name, name_len); + if (di) { + /* our exact name was found */ + ret = -EEXIST; + goto out; + } + + /* + * see if there is room in the item to insert this + * name + */ + data_size = sizeof(*di) + name_len + sizeof(struct btrfs_item); + leaf = path->nodes[0]; + slot = path->slots[0]; + if (data_size + btrfs_item_size_nr(leaf, slot) + + sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) { + ret = -EOVERFLOW; + } else { + /* plenty of insertion room */ + ret = 0; + } +out: + btrfs_free_path(path); + return ret; +} + /* * lookup a directory item based on index. 'dir' is the objectid * we're searching in, and 'mod' tells us if you plan on deleting the diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2e6918c85b72..e95b1f90a1f6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4885,7 +4885,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, ret = btrfs_insert_dir_item(trans, root, name, name_len, parent_inode, &key, btrfs_inode_type(inode), index); - if (ret == -EEXIST) + if (ret == -EEXIST || ret == -EOVERFLOW) goto fail_dir_item; else if (ret) { btrfs_abort_transaction(trans, root, ret); @@ -7336,6 +7336,28 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (S_ISDIR(old_inode->i_mode) && new_inode && new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; + + + /* check for collisions, even if the name isn't there */ + ret = btrfs_check_dir_item_collision(root, new_dir->i_ino, + new_dentry->d_name.name, + new_dentry->d_name.len); + + if (ret) { + if (ret == -EEXIST) { + /* we shouldn't get + * eexist without a new_inode */ + if (!new_inode) { + WARN_ON(1); + return ret; + } + } else { + /* maybe -EOVERFLOW */ + return ret; + } + } + ret = 0; + /* * we're using rename to replace one file with another. * and the replacement file is large. Start IO on it now so diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 657d83ca9dea..d4608ab72b79 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -710,6 +710,16 @@ static noinline int btrfs_mksubvol(struct path *parent, if (error) goto out_dput; + /* + * even if this name doesn't exist, we may get hash collisions. + * check for them now when we can safely fail + */ + error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, + dir->i_ino, name, + namelen); + if (error) + goto out_dput; + down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index e6509b92433b..87fac9a21ea5 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1190,7 +1190,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, parent_inode, &key, BTRFS_FT_DIR, index); /* We have check then name at the beginning, so it is impossible. */ - BUG_ON(ret == -EEXIST); + BUG_ON(ret == -EEXIST || ret == -EOVERFLOW); if (ret) { btrfs_abort_transaction(trans, root, ret); goto fail; -- cgit v1.2.3 From 213490b301773ea9c6fb89a86424a6901fcdd069 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 11 Sep 2012 08:33:50 -0600 Subject: Btrfs: fix a bug of per-file nocow Users report a bug, the reproducer is: $ mkfs.btrfs /dev/loop0 $ mount /dev/loop0 /mnt/btrfs/ $ mkdir /mnt/btrfs/dir $ chattr +C /mnt/btrfs/dir/ $ dd if=/dev/zero of=/mnt/btrfs/dir/foo bs=4K count=10; $ lsattr /mnt/btrfs/dir/foo ---------------C- /mnt/btrfs/dir/foo $ filefrag /mnt/btrfs/dir/foo /mnt/btrfs/dir/foo: 1 extent found ---> an extent $ dd if=/dev/zero of=/mnt/btrfs/dir/foo bs=4K count=1 seek=5 conv=notrunc,nocreat; sync $ filefrag /mnt/btrfs/dir/foo /mnt/btrfs/dir/foo: 3 extents found ---> with nocow, btrfs breaks the extent into three parts The new created file should not only inherit the NODATACOW flag, but also honor NODATASUM flag, because we must do COW on a file extent with checksum. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 3 +-- fs/btrfs/ioctl.c | 5 ++++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e95b1f90a1f6..67ed24ae86bb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4818,8 +4818,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (S_ISREG(mode)) { if (btrfs_test_opt(root, NODATASUM)) BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; - if (btrfs_test_opt(root, NODATACOW) || - (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW)) + if (btrfs_test_opt(root, NODATACOW)) BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d4608ab72b79..7624212ae926 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -141,8 +141,11 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; } - if (flags & BTRFS_INODE_NODATACOW) + if (flags & BTRFS_INODE_NODATACOW) { BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; + if (S_ISREG(inode->i_mode)) + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; + } btrfs_update_iflags(inode); } -- cgit v1.2.3