diff options
Diffstat (limited to 'drivers/md/dm-snap.c')
-rw-r--r-- | drivers/md/dm-snap.c | 178 |
1 files changed, 119 insertions, 59 deletions
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 2a855e5429ab..55e158553700 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -19,7 +19,6 @@ #include <linux/vmalloc.h> #include <linux/log2.h> #include <linux/dm-kcopyd.h> -#include <linux/semaphore.h> #include "dm.h" @@ -48,7 +47,7 @@ struct dm_exception_table { }; struct dm_snapshot { - struct rw_semaphore lock; + struct mutex lock; struct dm_dev *origin; struct dm_dev *cow; @@ -106,8 +105,8 @@ struct dm_snapshot { /* The on disk metadata handler */ struct dm_exception_store *store; - /* Maximum number of in-flight COW jobs. */ - struct semaphore cow_count; + unsigned in_progress; + wait_queue_head_t in_progress_wait; struct dm_kcopyd_client *kcopyd_client; @@ -158,8 +157,8 @@ struct dm_snapshot { */ #define DEFAULT_COW_THRESHOLD 2048 -static int cow_threshold = DEFAULT_COW_THRESHOLD; -module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644); +static unsigned cow_threshold = DEFAULT_COW_THRESHOLD; +module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644); MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, @@ -456,9 +455,9 @@ static int __find_snapshots_sharing_cow(struct dm_snapshot *snap, if (!bdev_equal(s->cow->bdev, snap->cow->bdev)) continue; - down_read(&s->lock); + mutex_lock(&s->lock); active = s->active; - up_read(&s->lock); + mutex_unlock(&s->lock); if (active) { if (snap_src) @@ -926,7 +925,7 @@ static int remove_single_exception_chunk(struct dm_snapshot *s) int r; chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1; - down_write(&s->lock); + mutex_lock(&s->lock); /* * Process chunks (and associated exceptions) in reverse order @@ -941,7 +940,7 @@ static int remove_single_exception_chunk(struct dm_snapshot *s) b = __release_queued_bios_after_merge(s); out: - up_write(&s->lock); + mutex_unlock(&s->lock); if (b) flush_bios(b); @@ -1000,9 +999,9 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s) if (linear_chunks < 0) { DMERR("Read error in exception store: " "shutting down merge"); - down_write(&s->lock); + mutex_lock(&s->lock); s->merge_failed = 1; - up_write(&s->lock); + mutex_unlock(&s->lock); } goto shut; } @@ -1043,10 +1042,10 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s) previous_count = read_pending_exceptions_done_count(); } - down_write(&s->lock); + mutex_lock(&s->lock); s->first_merging_chunk = old_chunk; s->num_merging_chunks = linear_chunks; - up_write(&s->lock); + mutex_unlock(&s->lock); /* Wait until writes to all 'linear_chunks' drain */ for (i = 0; i < linear_chunks; i++) @@ -1088,10 +1087,10 @@ static void merge_callback(int read_err, unsigned long write_err, void *context) return; shut: - down_write(&s->lock); + mutex_lock(&s->lock); s->merge_failed = 1; b = __release_queued_bios_after_merge(s); - up_write(&s->lock); + mutex_unlock(&s->lock); error_bios(b); merge_shutdown(s); @@ -1137,7 +1136,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) origin_mode = FMODE_WRITE; } - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) { ti->error = "Cannot allocate private snapshot structure"; r = -ENOMEM; @@ -1190,7 +1189,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->exception_start_sequence = 0; s->exception_complete_sequence = 0; INIT_LIST_HEAD(&s->out_of_order_list); - init_rwsem(&s->lock); + mutex_init(&s->lock); INIT_LIST_HEAD(&s->list); spin_lock_init(&s->pe_lock); s->state_bits = 0; @@ -1206,7 +1205,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } - sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX); + init_waitqueue_head(&s->in_progress_wait); s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { @@ -1357,9 +1356,9 @@ static void snapshot_dtr(struct dm_target *ti) /* Check whether exception handover must be cancelled */ (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); if (snap_src && snap_dest && (s == snap_src)) { - down_write(&snap_dest->lock); + mutex_lock(&snap_dest->lock); snap_dest->valid = 0; - up_write(&snap_dest->lock); + mutex_unlock(&snap_dest->lock); DMERR("Cancelling snapshot handover."); } up_read(&_origins_lock); @@ -1390,13 +1389,62 @@ static void snapshot_dtr(struct dm_target *ti) dm_exception_store_destroy(s->store); + mutex_destroy(&s->lock); + dm_put_device(ti, s->cow); dm_put_device(ti, s->origin); + WARN_ON(s->in_progress); + kfree(s); } +static void account_start_copy(struct dm_snapshot *s) +{ + spin_lock(&s->in_progress_wait.lock); + s->in_progress++; + spin_unlock(&s->in_progress_wait.lock); +} + +static void account_end_copy(struct dm_snapshot *s) +{ + spin_lock(&s->in_progress_wait.lock); + BUG_ON(!s->in_progress); + s->in_progress--; + if (likely(s->in_progress <= cow_threshold) && + unlikely(waitqueue_active(&s->in_progress_wait))) + wake_up_locked(&s->in_progress_wait); + spin_unlock(&s->in_progress_wait.lock); +} + +static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins) +{ + if (unlikely(s->in_progress > cow_threshold)) { + spin_lock(&s->in_progress_wait.lock); + if (likely(s->in_progress > cow_threshold)) { + /* + * NOTE: this throttle doesn't account for whether + * the caller is servicing an IO that will trigger a COW + * so excess throttling may result for chunks not required + * to be COW'd. But if cow_threshold was reached, extra + * throttling is unlikely to negatively impact performance. + */ + DECLARE_WAITQUEUE(wait, current); + __add_wait_queue(&s->in_progress_wait, &wait); + __set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&s->in_progress_wait.lock); + if (unlock_origins) + up_read(&_origins_lock); + io_schedule(); + remove_wait_queue(&s->in_progress_wait, &wait); + return false; + } + spin_unlock(&s->in_progress_wait.lock); + } + return true; +} + /* * Flush a list of buffers. */ @@ -1412,7 +1460,7 @@ static void flush_bios(struct bio *bio) } } -static int do_origin(struct dm_dev *origin, struct bio *bio); +static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit); /* * Flush a list of buffers. @@ -1425,7 +1473,7 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) while (bio) { n = bio->bi_next; bio->bi_next = NULL; - r = do_origin(s->origin, bio); + r = do_origin(s->origin, bio, false); if (r == DM_MAPIO_REMAPPED) generic_make_request(bio); bio = n; @@ -1477,7 +1525,7 @@ static void pending_complete(void *context, int success) if (!success) { /* Read/write error - snapshot is unusable */ - down_write(&s->lock); + mutex_lock(&s->lock); __invalidate_snapshot(s, -EIO); error = 1; goto out; @@ -1485,14 +1533,14 @@ static void pending_complete(void *context, int success) e = alloc_completed_exception(GFP_NOIO); if (!e) { - down_write(&s->lock); + mutex_lock(&s->lock); __invalidate_snapshot(s, -ENOMEM); error = 1; goto out; } *e = pe->e; - down_write(&s->lock); + mutex_lock(&s->lock); if (!s->valid) { free_completed_exception(e); error = 1; @@ -1517,7 +1565,7 @@ out: full_bio->bi_end_io = pe->full_bio_end_io; increment_pending_exceptions_done_count(); - up_write(&s->lock); + mutex_unlock(&s->lock); /* Submit any pending write bios */ if (error) { @@ -1579,7 +1627,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) } list_add(&pe->out_of_order_entry, lh); } - up(&s->cow_count); + account_end_copy(s); } /* @@ -1603,7 +1651,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count; /* Hand over to kcopyd */ - down(&s->cow_count); + account_start_copy(s); dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } @@ -1623,7 +1671,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe, pe->full_bio = bio; pe->full_bio_end_io = bio->bi_end_io; - down(&s->cow_count); + account_start_copy(s); callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, copy_callback, pe); @@ -1714,9 +1762,12 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) if (!s->valid) return -EIO; - /* FIXME: should only take write lock if we need - * to copy an exception */ - down_write(&s->lock); + if (bio_data_dir(bio) == WRITE) { + while (unlikely(!wait_for_in_progress(s, false))) + ; /* wait_for_in_progress() has slept */ + } + + mutex_lock(&s->lock); if (!s->valid || (unlikely(s->snapshot_overflowed) && bio_rw(bio) == WRITE)) { r = -EIO; @@ -1738,9 +1789,9 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) if (bio_rw(bio) == WRITE) { pe = __lookup_pending_exception(s, chunk); if (!pe) { - up_write(&s->lock); + mutex_unlock(&s->lock); pe = alloc_pending_exception(s); - down_write(&s->lock); + mutex_lock(&s->lock); if (!s->valid || s->snapshot_overflowed) { free_pending_exception(pe); @@ -1775,7 +1826,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) bio->bi_iter.bi_size == (s->store->chunk_size << SECTOR_SHIFT)) { pe->started = 1; - up_write(&s->lock); + mutex_unlock(&s->lock); start_full_bio(pe, bio); goto out; } @@ -1785,7 +1836,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) if (!pe->started) { /* this is protected by snap->lock */ pe->started = 1; - up_write(&s->lock); + mutex_unlock(&s->lock); start_copy(pe); goto out; } @@ -1795,7 +1846,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) } out_unlock: - up_write(&s->lock); + mutex_unlock(&s->lock); out: return r; } @@ -1831,7 +1882,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector); - down_write(&s->lock); + mutex_lock(&s->lock); /* Full merging snapshots are redirected to the origin */ if (!s->valid) @@ -1862,12 +1913,12 @@ redirect_to_origin: bio->bi_bdev = s->origin->bdev; if (bio_rw(bio) == WRITE) { - up_write(&s->lock); - return do_origin(s->origin, bio); + mutex_unlock(&s->lock); + return do_origin(s->origin, bio, false); } out_unlock: - up_write(&s->lock); + mutex_unlock(&s->lock); return r; } @@ -1898,7 +1949,7 @@ static int snapshot_preresume(struct dm_target *ti) down_read(&_origins_lock); (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); if (snap_src && snap_dest) { - down_read(&snap_src->lock); + mutex_lock(&snap_src->lock); if (s == snap_src) { DMERR("Unable to resume snapshot source until " "handover completes."); @@ -1908,7 +1959,7 @@ static int snapshot_preresume(struct dm_target *ti) "source is suspended."); r = -EINVAL; } - up_read(&snap_src->lock); + mutex_unlock(&snap_src->lock); } up_read(&_origins_lock); @@ -1954,11 +2005,11 @@ static void snapshot_resume(struct dm_target *ti) (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); if (snap_src && snap_dest) { - down_write(&snap_src->lock); - down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); + mutex_lock(&snap_src->lock); + mutex_lock_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); __handover_exceptions(snap_src, snap_dest); - up_write(&snap_dest->lock); - up_write(&snap_src->lock); + mutex_unlock(&snap_dest->lock); + mutex_unlock(&snap_src->lock); } up_read(&_origins_lock); @@ -1973,9 +2024,9 @@ static void snapshot_resume(struct dm_target *ti) /* Now we have correct chunk size, reregister */ reregister_snapshot(s); - down_write(&s->lock); + mutex_lock(&s->lock); s->active = 1; - up_write(&s->lock); + mutex_unlock(&s->lock); } static uint32_t get_origin_minimum_chunksize(struct block_device *bdev) @@ -2015,7 +2066,7 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: - down_write(&snap->lock); + mutex_lock(&snap->lock); if (!snap->valid) DMEMIT("Invalid"); @@ -2040,7 +2091,7 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, DMEMIT("Unknown"); } - up_write(&snap->lock); + mutex_unlock(&snap->lock); break; @@ -2106,7 +2157,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, if (dm_target_is_snapshot_merge(snap->ti)) continue; - down_write(&snap->lock); + mutex_lock(&snap->lock); /* Only deal with valid and active snapshots */ if (!snap->valid || !snap->active) @@ -2133,9 +2184,9 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, pe = __lookup_pending_exception(snap, chunk); if (!pe) { - up_write(&snap->lock); + mutex_unlock(&snap->lock); pe = alloc_pending_exception(snap); - down_write(&snap->lock); + mutex_lock(&snap->lock); if (!snap->valid) { free_pending_exception(pe); @@ -2178,7 +2229,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, } next_snapshot: - up_write(&snap->lock); + mutex_unlock(&snap->lock); if (pe_to_start_now) { start_copy(pe_to_start_now); @@ -2199,15 +2250,24 @@ next_snapshot: /* * Called on a write from the origin driver. */ -static int do_origin(struct dm_dev *origin, struct bio *bio) +static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit) { struct origin *o; int r = DM_MAPIO_REMAPPED; +again: down_read(&_origins_lock); o = __lookup_origin(origin->bdev); - if (o) + if (o) { + if (limit) { + struct dm_snapshot *s; + list_for_each_entry(s, &o->snapshots, list) + if (unlikely(!wait_for_in_progress(s, true))) + goto again; + } + r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio); + } up_read(&_origins_lock); return r; @@ -2320,7 +2380,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio) dm_accept_partial_bio(bio, available_sectors); /* Only tell snapshots if this is a write */ - return do_origin(o->dev, bio); + return do_origin(o->dev, bio, true); } /* |