f2fs: use rw_sem instead of fs_lock(locks mutex)

The fs_locks is used to block other ops(ex, recovery) when doing checkpoint. And each other operate routine(besides checkpoint) needs to acquire a fs_lock, there is a terrible problem here, if these are too many concurrency threads acquiring fs_lock, so that they will block each other and may lead to some performance problem, but this is not the phenomenon we want to see. Though there are some optimization patches introduced to enhance the usage of fs_lock, but the thorough solution is using a *rw_sem* to replace the fs_lock. Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other, this can avoid the problem described above completely. Because of the weakness of rw_sem, the above change may introduce a potential problem that the checkpoint thread might get starved if other threads are intensively locking the read semaphore for I/O.(Pointed out by Xu Jin) In order to avoid this, a wait_list is introduced, the appending read semaphore ops will be dropped into the wait_list if checkpoint thread is waiting for write semaphore, and will be waked up when checkpoint thread gives up write semaphore. Thanks to Kim's previous review and test, and will be very glad to see other guys' performance tests about this patch. V2: -fix the potential starvation problem. -use more suitable func name suggested by Xu Jin. Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com> [Jaegeuk Kim: adjust minor coding standard] Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
author: Gu Zheng <guz.fnst@cn.fujitsu.com> 2013-09-27 18:08:30 +0800
committer: Jaegeuk Kim <jaegeuk.kim@samsung.com> 2013-10-07 11:33:05 +0900
commit: e479556bfdd136669854292eb57ed0139d7253d5 (patch)
tree: 95772ba1ac8cf1e79c89145daf40c417814896da /fs/f2fs/f2fs.h
parent: 2e5558f4a5cf16a7394fd5770087303db8912c66 (diff)
1 files changed, 22 insertions, 42 deletions
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7fd99d8bd2ff..a955a59dfdbe 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -18,6 +18,8 @@
 #include <linux/crc32.h>
 #include <linux/magic.h>
 #include <linux/kobject.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
 
 /*
  * For mount options
@@ -318,14 +320,6 @@ enum count_type {
 };
 
 /*
- * Uses as sbi->fs_lock[NR_GLOBAL_LOCKS].
- * The checkpoint procedure blocks all the locks in this fs_lock array.
- * Some FS operations grab free locks, and if there is no free lock,
- * then wait to grab a lock in a round-robin manner.
- */
-#define NR_GLOBAL_LOCKS	8
-
-/*
  * The below are the page types of bios used in submti_bio().
  * The available types are:
  * DATA			User data pages. It operates as async mode.
@@ -365,10 +359,10 @@ struct f2fs_sb_info {
 	struct f2fs_checkpoint *ckpt;		/* raw checkpoint pointer */
 	struct inode *meta_inode;		/* cache meta blocks */
 	struct mutex cp_mutex;			/* checkpoint procedure lock */
-	struct mutex fs_lock[NR_GLOBAL_LOCKS];	/* blocking FS operations */
+	struct rw_semaphore cp_rwsem;		/* blocking FS operations */
+	wait_queue_head_t cp_wait;		/* checkpoint wait queue */
 	struct mutex node_write;		/* locking node writes */
 	struct mutex writepages;		/* mutex for writepages() */
-	unsigned char next_lock_num;		/* round-robin global locks */
 	int por_doing;				/* recovery is doing or not */
 	int on_build_free_nids;			/* build_free_nids is doing */
 
@@ -520,48 +514,34 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
 	cp->ckpt_flags = cpu_to_le32(ckpt_flags);
 }
 
-static inline void mutex_lock_all(struct f2fs_sb_info *sbi)
+static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
 {
-	int i;
-
-	for (i = 0; i < NR_GLOBAL_LOCKS; i++) {
-		/*
-		 * This is the only time we take multiple fs_lock[]
-		 * instances; the order is immaterial since we
-		 * always hold cp_mutex, which serializes multiple
-		 * such operations.
-		 */
-		mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex);
-	}
+	/*
+	 * If the checkpoint thread is waiting for cp_rwsem, add cuurent task
+	 * into wait list to avoid the checkpoint thread starvation
+	 */
+	while (!list_empty(&sbi->cp_rwsem.wait_list))
+		wait_event_interruptible(sbi->cp_wait,
+				list_empty(&sbi->cp_rwsem.wait_list));
+	down_read(&sbi->cp_rwsem);
 }
 
-static inline void mutex_unlock_all(struct f2fs_sb_info *sbi)
+static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
 {
-	int i = 0;
-	for (; i < NR_GLOBAL_LOCKS; i++)
-		mutex_unlock(&sbi->fs_lock[i]);
+	up_read(&sbi->cp_rwsem);
 }
 
-static inline int mutex_lock_op(struct f2fs_sb_info *sbi)
+static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
 {
-	unsigned char next_lock;
-	int i = 0;
-
-	for (; i < NR_GLOBAL_LOCKS; i++)
-		if (mutex_trylock(&sbi->fs_lock[i]))
-			return i;
-
-	next_lock = sbi->next_lock_num++ % NR_GLOBAL_LOCKS;
-	mutex_lock(&sbi->fs_lock[next_lock]);
-	return next_lock;
+	down_write_nest_lock(&sbi->cp_rwsem, &sbi->cp_mutex);
 }
 
-static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, int ilock)
+static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
 {
-	if (ilock < 0)
-		return;
-	BUG_ON(ilock >= NR_GLOBAL_LOCKS);
-	mutex_unlock(&sbi->fs_lock[ilock]);
+	up_write(&sbi->cp_rwsem);
+
+	/* wake up all tasks blocked by checkpoint */
+	wake_up_all(&sbi->cp_wait);
 }
 
 /*
author	Gu Zheng <guz.fnst@cn.fujitsu.com>	2013-09-27 18:08:30 +0800
committer	Jaegeuk Kim <jaegeuk.kim@samsung.com>	2013-10-07 11:33:05 +0900
commit	e479556bfdd136669854292eb57ed0139d7253d5 (patch)
tree	95772ba1ac8cf1e79c89145daf40c417814896da /fs/f2fs/f2fs.h
parent	2e5558f4a5cf16a7394fd5770087303db8912c66 (diff)