summaryrefslogtreecommitdiff
path: root/drivers/md/dm-snap.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-snap.c')
-rw-r--r--drivers/md/dm-snap.c178
1 files changed, 119 insertions, 59 deletions
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 2a855e5429ab..55e158553700 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -19,7 +19,6 @@
#include <linux/vmalloc.h>
#include <linux/log2.h>
#include <linux/dm-kcopyd.h>
-#include <linux/semaphore.h>
#include "dm.h"
@@ -48,7 +47,7 @@ struct dm_exception_table {
};
struct dm_snapshot {
- struct rw_semaphore lock;
+ struct mutex lock;
struct dm_dev *origin;
struct dm_dev *cow;
@@ -106,8 +105,8 @@ struct dm_snapshot {
/* The on disk metadata handler */
struct dm_exception_store *store;
- /* Maximum number of in-flight COW jobs. */
- struct semaphore cow_count;
+ unsigned in_progress;
+ wait_queue_head_t in_progress_wait;
struct dm_kcopyd_client *kcopyd_client;
@@ -158,8 +157,8 @@ struct dm_snapshot {
*/
#define DEFAULT_COW_THRESHOLD 2048
-static int cow_threshold = DEFAULT_COW_THRESHOLD;
-module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644);
+static unsigned cow_threshold = DEFAULT_COW_THRESHOLD;
+module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644);
MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
@@ -456,9 +455,9 @@ static int __find_snapshots_sharing_cow(struct dm_snapshot *snap,
if (!bdev_equal(s->cow->bdev, snap->cow->bdev))
continue;
- down_read(&s->lock);
+ mutex_lock(&s->lock);
active = s->active;
- up_read(&s->lock);
+ mutex_unlock(&s->lock);
if (active) {
if (snap_src)
@@ -926,7 +925,7 @@ static int remove_single_exception_chunk(struct dm_snapshot *s)
int r;
chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1;
- down_write(&s->lock);
+ mutex_lock(&s->lock);
/*
* Process chunks (and associated exceptions) in reverse order
@@ -941,7 +940,7 @@ static int remove_single_exception_chunk(struct dm_snapshot *s)
b = __release_queued_bios_after_merge(s);
out:
- up_write(&s->lock);
+ mutex_unlock(&s->lock);
if (b)
flush_bios(b);
@@ -1000,9 +999,9 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s)
if (linear_chunks < 0) {
DMERR("Read error in exception store: "
"shutting down merge");
- down_write(&s->lock);
+ mutex_lock(&s->lock);
s->merge_failed = 1;
- up_write(&s->lock);
+ mutex_unlock(&s->lock);
}
goto shut;
}
@@ -1043,10 +1042,10 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s)
previous_count = read_pending_exceptions_done_count();
}
- down_write(&s->lock);
+ mutex_lock(&s->lock);
s->first_merging_chunk = old_chunk;
s->num_merging_chunks = linear_chunks;
- up_write(&s->lock);
+ mutex_unlock(&s->lock);
/* Wait until writes to all 'linear_chunks' drain */
for (i = 0; i < linear_chunks; i++)
@@ -1088,10 +1087,10 @@ static void merge_callback(int read_err, unsigned long write_err, void *context)
return;
shut:
- down_write(&s->lock);
+ mutex_lock(&s->lock);
s->merge_failed = 1;
b = __release_queued_bios_after_merge(s);
- up_write(&s->lock);
+ mutex_unlock(&s->lock);
error_bios(b);
merge_shutdown(s);
@@ -1137,7 +1136,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
origin_mode = FMODE_WRITE;
}
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s) {
ti->error = "Cannot allocate private snapshot structure";
r = -ENOMEM;
@@ -1190,7 +1189,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
s->exception_start_sequence = 0;
s->exception_complete_sequence = 0;
INIT_LIST_HEAD(&s->out_of_order_list);
- init_rwsem(&s->lock);
+ mutex_init(&s->lock);
INIT_LIST_HEAD(&s->list);
spin_lock_init(&s->pe_lock);
s->state_bits = 0;
@@ -1206,7 +1205,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad_hash_tables;
}
- sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX);
+ init_waitqueue_head(&s->in_progress_wait);
s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
if (IS_ERR(s->kcopyd_client)) {
@@ -1357,9 +1356,9 @@ static void snapshot_dtr(struct dm_target *ti)
/* Check whether exception handover must be cancelled */
(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
if (snap_src && snap_dest && (s == snap_src)) {
- down_write(&snap_dest->lock);
+ mutex_lock(&snap_dest->lock);
snap_dest->valid = 0;
- up_write(&snap_dest->lock);
+ mutex_unlock(&snap_dest->lock);
DMERR("Cancelling snapshot handover.");
}
up_read(&_origins_lock);
@@ -1390,13 +1389,62 @@ static void snapshot_dtr(struct dm_target *ti)
dm_exception_store_destroy(s->store);
+ mutex_destroy(&s->lock);
+
dm_put_device(ti, s->cow);
dm_put_device(ti, s->origin);
+ WARN_ON(s->in_progress);
+
kfree(s);
}
+static void account_start_copy(struct dm_snapshot *s)
+{
+ spin_lock(&s->in_progress_wait.lock);
+ s->in_progress++;
+ spin_unlock(&s->in_progress_wait.lock);
+}
+
+static void account_end_copy(struct dm_snapshot *s)
+{
+ spin_lock(&s->in_progress_wait.lock);
+ BUG_ON(!s->in_progress);
+ s->in_progress--;
+ if (likely(s->in_progress <= cow_threshold) &&
+ unlikely(waitqueue_active(&s->in_progress_wait)))
+ wake_up_locked(&s->in_progress_wait);
+ spin_unlock(&s->in_progress_wait.lock);
+}
+
+static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins)
+{
+ if (unlikely(s->in_progress > cow_threshold)) {
+ spin_lock(&s->in_progress_wait.lock);
+ if (likely(s->in_progress > cow_threshold)) {
+ /*
+ * NOTE: this throttle doesn't account for whether
+ * the caller is servicing an IO that will trigger a COW
+ * so excess throttling may result for chunks not required
+ * to be COW'd. But if cow_threshold was reached, extra
+ * throttling is unlikely to negatively impact performance.
+ */
+ DECLARE_WAITQUEUE(wait, current);
+ __add_wait_queue(&s->in_progress_wait, &wait);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock(&s->in_progress_wait.lock);
+ if (unlock_origins)
+ up_read(&_origins_lock);
+ io_schedule();
+ remove_wait_queue(&s->in_progress_wait, &wait);
+ return false;
+ }
+ spin_unlock(&s->in_progress_wait.lock);
+ }
+ return true;
+}
+
/*
* Flush a list of buffers.
*/
@@ -1412,7 +1460,7 @@ static void flush_bios(struct bio *bio)
}
}
-static int do_origin(struct dm_dev *origin, struct bio *bio);
+static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit);
/*
* Flush a list of buffers.
@@ -1425,7 +1473,7 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
while (bio) {
n = bio->bi_next;
bio->bi_next = NULL;
- r = do_origin(s->origin, bio);
+ r = do_origin(s->origin, bio, false);
if (r == DM_MAPIO_REMAPPED)
generic_make_request(bio);
bio = n;
@@ -1477,7 +1525,7 @@ static void pending_complete(void *context, int success)
if (!success) {
/* Read/write error - snapshot is unusable */
- down_write(&s->lock);
+ mutex_lock(&s->lock);
__invalidate_snapshot(s, -EIO);
error = 1;
goto out;
@@ -1485,14 +1533,14 @@ static void pending_complete(void *context, int success)
e = alloc_completed_exception(GFP_NOIO);
if (!e) {
- down_write(&s->lock);
+ mutex_lock(&s->lock);
__invalidate_snapshot(s, -ENOMEM);
error = 1;
goto out;
}
*e = pe->e;
- down_write(&s->lock);
+ mutex_lock(&s->lock);
if (!s->valid) {
free_completed_exception(e);
error = 1;
@@ -1517,7 +1565,7 @@ out:
full_bio->bi_end_io = pe->full_bio_end_io;
increment_pending_exceptions_done_count();
- up_write(&s->lock);
+ mutex_unlock(&s->lock);
/* Submit any pending write bios */
if (error) {
@@ -1579,7 +1627,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
}
list_add(&pe->out_of_order_entry, lh);
}
- up(&s->cow_count);
+ account_end_copy(s);
}
/*
@@ -1603,7 +1651,7 @@ static void start_copy(struct dm_snap_pending_exception *pe)
dest.count = src.count;
/* Hand over to kcopyd */
- down(&s->cow_count);
+ account_start_copy(s);
dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
}
@@ -1623,7 +1671,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
pe->full_bio = bio;
pe->full_bio_end_io = bio->bi_end_io;
- down(&s->cow_count);
+ account_start_copy(s);
callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
copy_callback, pe);
@@ -1714,9 +1762,12 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
if (!s->valid)
return -EIO;
- /* FIXME: should only take write lock if we need
- * to copy an exception */
- down_write(&s->lock);
+ if (bio_data_dir(bio) == WRITE) {
+ while (unlikely(!wait_for_in_progress(s, false)))
+ ; /* wait_for_in_progress() has slept */
+ }
+
+ mutex_lock(&s->lock);
if (!s->valid || (unlikely(s->snapshot_overflowed) && bio_rw(bio) == WRITE)) {
r = -EIO;
@@ -1738,9 +1789,9 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
if (bio_rw(bio) == WRITE) {
pe = __lookup_pending_exception(s, chunk);
if (!pe) {
- up_write(&s->lock);
+ mutex_unlock(&s->lock);
pe = alloc_pending_exception(s);
- down_write(&s->lock);
+ mutex_lock(&s->lock);
if (!s->valid || s->snapshot_overflowed) {
free_pending_exception(pe);
@@ -1775,7 +1826,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
bio->bi_iter.bi_size ==
(s->store->chunk_size << SECTOR_SHIFT)) {
pe->started = 1;
- up_write(&s->lock);
+ mutex_unlock(&s->lock);
start_full_bio(pe, bio);
goto out;
}
@@ -1785,7 +1836,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
if (!pe->started) {
/* this is protected by snap->lock */
pe->started = 1;
- up_write(&s->lock);
+ mutex_unlock(&s->lock);
start_copy(pe);
goto out;
}
@@ -1795,7 +1846,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
}
out_unlock:
- up_write(&s->lock);
+ mutex_unlock(&s->lock);
out:
return r;
}
@@ -1831,7 +1882,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
- down_write(&s->lock);
+ mutex_lock(&s->lock);
/* Full merging snapshots are redirected to the origin */
if (!s->valid)
@@ -1862,12 +1913,12 @@ redirect_to_origin:
bio->bi_bdev = s->origin->bdev;
if (bio_rw(bio) == WRITE) {
- up_write(&s->lock);
- return do_origin(s->origin, bio);
+ mutex_unlock(&s->lock);
+ return do_origin(s->origin, bio, false);
}
out_unlock:
- up_write(&s->lock);
+ mutex_unlock(&s->lock);
return r;
}
@@ -1898,7 +1949,7 @@ static int snapshot_preresume(struct dm_target *ti)
down_read(&_origins_lock);
(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
if (snap_src && snap_dest) {
- down_read(&snap_src->lock);
+ mutex_lock(&snap_src->lock);
if (s == snap_src) {
DMERR("Unable to resume snapshot source until "
"handover completes.");
@@ -1908,7 +1959,7 @@ static int snapshot_preresume(struct dm_target *ti)
"source is suspended.");
r = -EINVAL;
}
- up_read(&snap_src->lock);
+ mutex_unlock(&snap_src->lock);
}
up_read(&_origins_lock);
@@ -1954,11 +2005,11 @@ static void snapshot_resume(struct dm_target *ti)
(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
if (snap_src && snap_dest) {
- down_write(&snap_src->lock);
- down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING);
+ mutex_lock(&snap_src->lock);
+ mutex_lock_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING);
__handover_exceptions(snap_src, snap_dest);
- up_write(&snap_dest->lock);
- up_write(&snap_src->lock);
+ mutex_unlock(&snap_dest->lock);
+ mutex_unlock(&snap_src->lock);
}
up_read(&_origins_lock);
@@ -1973,9 +2024,9 @@ static void snapshot_resume(struct dm_target *ti)
/* Now we have correct chunk size, reregister */
reregister_snapshot(s);
- down_write(&s->lock);
+ mutex_lock(&s->lock);
s->active = 1;
- up_write(&s->lock);
+ mutex_unlock(&s->lock);
}
static uint32_t get_origin_minimum_chunksize(struct block_device *bdev)
@@ -2015,7 +2066,7 @@ static void snapshot_status(struct dm_target *ti, status_type_t type,
switch (type) {
case STATUSTYPE_INFO:
- down_write(&snap->lock);
+ mutex_lock(&snap->lock);
if (!snap->valid)
DMEMIT("Invalid");
@@ -2040,7 +2091,7 @@ static void snapshot_status(struct dm_target *ti, status_type_t type,
DMEMIT("Unknown");
}
- up_write(&snap->lock);
+ mutex_unlock(&snap->lock);
break;
@@ -2106,7 +2157,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
if (dm_target_is_snapshot_merge(snap->ti))
continue;
- down_write(&snap->lock);
+ mutex_lock(&snap->lock);
/* Only deal with valid and active snapshots */
if (!snap->valid || !snap->active)
@@ -2133,9 +2184,9 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
pe = __lookup_pending_exception(snap, chunk);
if (!pe) {
- up_write(&snap->lock);
+ mutex_unlock(&snap->lock);
pe = alloc_pending_exception(snap);
- down_write(&snap->lock);
+ mutex_lock(&snap->lock);
if (!snap->valid) {
free_pending_exception(pe);
@@ -2178,7 +2229,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector,
}
next_snapshot:
- up_write(&snap->lock);
+ mutex_unlock(&snap->lock);
if (pe_to_start_now) {
start_copy(pe_to_start_now);
@@ -2199,15 +2250,24 @@ next_snapshot:
/*
* Called on a write from the origin driver.
*/
-static int do_origin(struct dm_dev *origin, struct bio *bio)
+static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit)
{
struct origin *o;
int r = DM_MAPIO_REMAPPED;
+again:
down_read(&_origins_lock);
o = __lookup_origin(origin->bdev);
- if (o)
+ if (o) {
+ if (limit) {
+ struct dm_snapshot *s;
+ list_for_each_entry(s, &o->snapshots, list)
+ if (unlikely(!wait_for_in_progress(s, true)))
+ goto again;
+ }
+
r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio);
+ }
up_read(&_origins_lock);
return r;
@@ -2320,7 +2380,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
dm_accept_partial_bio(bio, available_sectors);
/* Only tell snapshots if this is a write */
- return do_origin(o->dev, bio);
+ return do_origin(o->dev, bio, true);
}
/*