mm: swap: don't delay swap free for fast swap devices

There are couple of issues with swapcache usage when ZRAM is used as swap device. 1) Kernel does a swap readahead which can be around 6 to 8 pages depending on total ram, which is not required for zram since accesses are fast. 2) Kernel delays the freeing up of swapcache expecting a later hit, which again is useless in the case of zram. 3) This is not related to swapcache, but zram usage itself. As mentioned in (2) kernel delays freeing of swapcache, but along with that it delays zram compressed page free also. i.e. there can be 2 copies, though one is compressed. This patch addresses these issues using two new flags QUEUE_FLAG_FAST and SWP_FAST, to indicate that accesses to the device will be fast and cheap, and instructs the swap layer to free up swap space agressively, and not to do read ahead. Change-Id: I5d2d5176a5f9420300bb2f843f6ecbdb25ea80e4 Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
author: Vinayak Menon <vinmenon@codeaurora.org> 2015-02-25 19:43:59 +0530
committer: David Keitel <dkeitel@codeaurora.org> 2016-03-22 11:03:52 -0700
commit: e5ce54a9cbc60d691e15e8e72ec310ab99735906 (patch)
tree: 4456aede35a36a68d20f89f6e4fece080583d9b4
parent: 0a8bf43567ab42f45f17773e53d623d746961743 (diff)
7 files changed, 50 insertions, 10 deletions
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 47915d736f8d..da246c90e27a 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1232,6 +1232,7 @@ static int zram_add(void)
 	zram->disk->private_data = zram;
 	snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
 
+	__set_bit(QUEUE_FLAG_FAST, &zram->queue->queue_flags);
 	/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
 	set_capacity(zram->disk, 0);
 	/* zram devices sort of resembles non-rotational disks */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0adfa9e76f64..4c1110ed0881 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -489,6 +489,7 @@ struct request_queue {
 #define QUEUE_FLAG_INIT_DONE   20	/* queue is initialized */
 #define QUEUE_FLAG_NO_SG_MERGE 21	/* don't attempt to merge SG segments*/
 #define QUEUE_FLAG_POLL	       22	/* IO polling enabled if set */
+#define QUEUE_FLAG_FAST        23	/* fast block device (e.g. ram based) */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -577,6 +578,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 #define blk_queue_discard(q)	test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
 #define blk_queue_secdiscard(q)	(blk_queue_discard(q) && \
 	test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags))
+#define blk_queue_fast(q)	test_bit(QUEUE_FLAG_FAST, &(q)->queue_flags)
 
 #define blk_noretry_request(rq) \
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7ba7dccaf0e7..b142d27b3edb 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -151,7 +151,8 @@ enum {
 	SWP_AREA_DISCARD = (1 << 8),	/* single-time swap area discards */
 	SWP_PAGE_DISCARD = (1 << 9),	/* freed swap page-cluster discards */
 					/* add others here before... */
-	SWP_SCANNING	= (1 << 10),	/* refcount in scan_swap_map */
+	SWP_FAST	= (1 << 10),	/* blkdev access is fast and cheap */
+	SWP_SCANNING	= (1 << 11),	/* refcount in scan_swap_map */
 };
 
 #define SWAP_CLUSTER_MAX 32UL
@@ -415,10 +416,18 @@ extern struct page *swapin_readahead(swp_entry_t, gfp_t,
 /* linux/mm/swapfile.c */
 extern atomic_long_t nr_swap_pages;
 extern long total_swap_pages;
+extern bool is_swap_fast(swp_entry_t entry);
 
 /* Swap 50% full? Release swapcache more aggressively.. */
-static inline bool vm_swap_full(void)
+static inline bool vm_swap_full(struct swap_info_struct *si)
 {
+	/*
+	 * If the swap device is fast, return true
+	 * not to delay swap free.
+	 */
+	if (si->flags & SWP_FAST)
+		return true;
+
 	return atomic_long_read(&nr_swap_pages) * 2 < total_swap_pages;
 }
 
@@ -454,7 +463,7 @@ struct backing_dev_info;
 #define get_nr_swap_pages()			0L
 #define total_swap_pages			0L
 #define total_swapcache_pages()			0UL
-#define vm_swap_full()				0
+#define vm_swap_full(si)			0
 
 #define si_swapinfo(val) \
 	do { (val)->freeswap = (val)->totalswap = 0; } while (0)
diff --git a/mm/memory.c b/mm/memory.c
index c387430f06c3..82e570f7561d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2578,7 +2578,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 
 	swap_free(entry);
-	if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
+	if ((PageSwapCache(page) && vm_swap_full(page_swap_info(page))) ||
+		(vma->vm_flags & VM_LOCKED) || PageMlocked(page))
 		try_to_free_swap(page);
 	unlock_page(page);
 	if (page != swapcache) {
diff --git a/mm/swap_state.c b/mm/swap_state.c
index d504adb7fa5f..bfb0d7d6433e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -469,7 +469,8 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	unsigned long entry_offset = swp_offset(entry);
 	unsigned long offset = entry_offset;
 	unsigned long start_offset, end_offset;
-	unsigned long mask;
+	unsigned long mask = is_swap_fast(entry) ? 0 :
+				(1UL << page_cluster) - 1;
 	struct blk_plug plug;
 
 	mask = swapin_nr_pages(offset) - 1;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 58877312cf6b..6522b8bbe178 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -91,6 +91,26 @@ static inline unsigned char swap_count(unsigned char ent)
 	return ent & ~SWAP_HAS_CACHE;	/* may include SWAP_HAS_CONT flag */
 }
 
+bool is_swap_fast(swp_entry_t entry)
+{
+	struct swap_info_struct *p;
+	unsigned long type;
+
+	if (non_swap_entry(entry))
+		return false;
+
+	type = swp_type(entry);
+	if (type >= nr_swapfiles)
+		return false;
+
+	p = swap_info[type];
+
+	if (p->flags & SWP_FAST)
+		return true;
+
+	return false;
+}
+
 /* returns 1 if swap entry is freed */
 static int
 __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
@@ -564,7 +584,7 @@ checks:
 		scan_base = offset = si->lowest_bit;
 
 	/* reuse swap entry of cache-only swap if not busy. */
-	if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
+	if (vm_swap_full(si) && si->swap_map[offset] == SWAP_HAS_CACHE) {
 		int swap_was_freed;
 		spin_unlock(&si->lock);
 		swap_was_freed = __try_to_reclaim_swap(si, offset);
@@ -604,7 +624,8 @@ scan:
 			spin_lock(&si->lock);
 			goto checks;
 		}
-		if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
+		if (vm_swap_full(si) &&
+			si->swap_map[offset] == SWAP_HAS_CACHE) {
 			spin_lock(&si->lock);
 			goto checks;
 		}
@@ -619,7 +640,8 @@ scan:
 			spin_lock(&si->lock);
 			goto checks;
 		}
-		if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
+		if (vm_swap_full(si) &&
+			si->swap_map[offset] == SWAP_HAS_CACHE) {
 			spin_lock(&si->lock);
 			goto checks;
 		}
@@ -1008,7 +1030,8 @@ int free_swap_and_cache(swp_entry_t entry)
 		 * Also recheck PageSwapCache now page is locked (above).
 		 */
 		if (PageSwapCache(page) && !PageWriteback(page) &&
-				(!page_mapped(page) || vm_swap_full())) {
+				(!page_mapped(page) ||
+				vm_swap_full(page_swap_info(page)))) {
 			delete_from_swap_cache(page);
 			SetPageDirty(page);
 		}
@@ -2531,6 +2554,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 				pr_err("swapon: discard_swap(%p): %d\n",
 					p, err);
 		}
+
+		if (blk_queue_fast(bdev_get_queue(p->bdev)))
+			p->flags |= SWP_FAST;
 	}
 
 	mutex_lock(&swapon_mutex);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 194c534da9ba..65fd3374784c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1269,7 +1269,7 @@ cull_mlocked:
 
 activate_locked:
 		/* Not a candidate for swapping, so reclaim swap space. */
-		if (PageSwapCache(page) && vm_swap_full())
+		if (PageSwapCache(page) && vm_swap_full(page_swap_info(page)))
 			try_to_free_swap(page);
 		VM_BUG_ON_PAGE(PageActive(page), page);
 		SetPageActive(page);
author	Vinayak Menon <vinmenon@codeaurora.org>	2015-02-25 19:43:59 +0530
committer	David Keitel <dkeitel@codeaurora.org>	2016-03-22 11:03:52 -0700
commit	e5ce54a9cbc60d691e15e8e72ec310ab99735906 (patch)
tree	4456aede35a36a68d20f89f6e4fece080583d9b4
parent	0a8bf43567ab42f45f17773e53d623d746961743 (diff)