mm : zram fix swapcached issue on Zram Writeback

Afer Zram backing dev setuped, zram will be treated as a ASYNC IO swap device, no longer SYNC IO device, because WB pages do have IO operation on swap in, so the optimization on skiping swapcache for fast swap device will no longer be applyed on such case. Now we refactor this optimization, skip the swapcache by checking SYNC IO state on each swap page via ioctl. Reuse ioctl because we should not break the GKI. Change-Id: Ie4316c9a43f481ccd4dff6b90c63da108bed916c Signed-off-by: lulei1 <lulei1@motorola.com> Reviewed-on: https://gerrit.mot.com/2156790 SLTApproved: Slta Waiver SME-Granted: SME Approvals Granted Tested-by: Jira Key Reviewed-by: Zhangqing Huang <huangzq2@motorola.com> Reviewed-by: Guolin Wang <wanggl3@mt.com> Reviewed-by: Xiangpo Zhao <zhaoxp3@motorola.com> Submit-Approved: Jira Key
2021-12-29 14:13:56 +08:00 · 2021-12-29 14:13:56 +08:00 · 4ac6be2863
commit 4ac6be2863
parent 71b2333ed3
5 changed files with 81 additions and 10 deletions
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@ -25,6 +25,7 @@
 #include <linux/genhd.h>
 #include <linux/highmem.h>
 #include <linux/slab.h>
+#include <linux/swap.h>
 #include <linux/backing-dev.h>
 #include <linux/string.h>
 #include <linux/vmalloc.h>
@ -1751,6 +1752,28 @@ static void zram_slot_free_notify(struct block_device *bdev,
 	zram_slot_unlock(zram, index);
 }

+/* Moto lulei1: check sync_io state on swap entry,
+ * return 0 on wb page, else return 1.
+ */
+static int zram_ioctl(struct block_device *bdev, fmode_t mode,
+				 unsigned int cmd, unsigned long index)
+{
+	struct zram *zram;
+	int has_sync_io = 1;
+
+	if (cmd != SWP_SYNCHRONOUS_IO) return -EINVAL;
+
+#ifdef CONFIG_ZRAM_WRITEBACK
+	zram = bdev->bd_disk->private_data;
+	zram_slot_lock(zram, index);
+	has_sync_io = zram_test_flag(zram, index, ZRAM_WB) ? 0 : 1;
+	zram_slot_unlock(zram, index);
+#endif
+
+	return has_sync_io;
+}
+
+
 static int zram_rw_page(struct block_device *bdev, sector_t sector,
 		       struct page *page, unsigned int op)
 {
@ -1943,6 +1966,7 @@ static int zram_open(struct block_device *bdev, fmode_t mode)
 static const struct block_device_operations zram_devops = {
 	.open = zram_open,
 	.swap_slot_free_notify = zram_slot_free_notify,
+	.ioctl = zram_ioctl,
 	.rw_page = zram_rw_page,
 	.owner = THIS_MODULE
 };
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@ -403,7 +403,7 @@ extern void end_swap_bio_write(struct bio *bio);
 extern int __swap_writepage(struct page *page, struct writeback_control *wbc,
 	bio_end_io_t end_write_func);
 extern int swap_set_page_dirty(struct page *page);
-
+extern bool swap_slot_has_sync_io(swp_entry_t entry);
 int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
 		unsigned long nr_pages, sector_t start_block);
 int generic_swapfile_activate(struct swap_info_struct *, struct file *,
--- a/mm/memory.c
+++ b/mm/memory.c
@ -3253,8 +3253,14 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	 * and can then take the readahead path instead of SWP_SYNCHRONOUS_IO.
 	 */
 	si = swp_swap_info(entry);
+	/* Moto lulei1: check sync_io on each page if we enabled Zram wb.
+	 * Zram writeback will remove SWP_SYNCHRONOUS_IO flag as it has disk
+	 * IO operation on writeback page during swap in.
+	 */
 	if (si->flags & SWP_SYNCHRONOUS_IO && __swap_count(si, entry) == 1)
 		skip_swapcache = true;
+	else if (__swap_count(si, entry) == 1 && swap_slot_has_sync_io(entry))
+		skip_swapcache = true;

 	page = lookup_swap_cache(entry, vma, vmf->address);
 	swapcache = page;
@ -3282,7 +3288,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 			ret = VM_FAULT_RETRY;
 			goto out;
 		} else {
-			page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
+			page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE | __GFP_CMA,
 						vmf);
 			swapcache = page;
 		}
--- a/mm/page_io.c
+++ b/mm/page_io.c
@ -74,6 +74,22 @@ void end_swap_bio_write(struct bio *bio)
 	bio_put(bio);
 }

+/* Moto lulei1: check sync_io state on swap entry */
+bool swap_slot_has_sync_io(swp_entry_t entry)
+{
+	struct swap_info_struct *sis;
+	struct gendisk *disk;
+
+	sis = swp_swap_info(entry);
+	disk = sis->bdev->bd_disk;
+	if (disk->fops->ioctl) {
+		return disk->fops->ioctl(sis->bdev, 0,
+			SWP_SYNCHRONOUS_IO, swp_offset(entry)) == 1;
+	}
+
+        return (sis->flags & SWP_SYNCHRONOUS_IO) == SWP_SYNCHRONOUS_IO;
+}
+
 static void swap_slot_free_notify(struct page *page)
 {
 	struct swap_info_struct *sis;
@ -359,6 +375,7 @@ int swap_readpage(struct page *page, bool synchronous)
 	blk_qc_t qc;
 	struct gendisk *disk;
 	unsigned long pflags;
+	swp_entry_t entry;

 	VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
@ -387,15 +404,24 @@ int swap_readpage(struct page *page, bool synchronous)
 		goto out;
 	}

-	ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
-	if (!ret) {
-		if (trylock_page(page)) {
-			swap_slot_free_notify(page);
-			unlock_page(page);
-		}
+	/* Moto lulei1: Use rw_page for zram page but submit_bio for zram wb page.
+	 * Since reading zram wb page is an async operation, we can not immediately
+	 * free the slot here. So we need to read the page via submit_bio and free
+	 * the slot in end_swap_bio_read. (bio chain will be used to handle bio here
+	 * and bio in zram driver for wb page).
+	 */
+	entry.val = page_private(page);
+	if (swap_slot_has_sync_io(entry)) {
+		ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
+		if (!ret) {
+			if (trylock_page(page)) {
+				swap_slot_free_notify(page);
+				unlock_page(page);
+			}

-		count_vm_event(PSWPIN);
-		goto out;
+			count_vm_event(PSWPIN);
+			goto out;
+		}
 	}

 	ret = 0;
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@ -584,6 +584,10 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	struct vm_area_struct *vma = vmf->vma;
 	unsigned long addr = vmf->address;

+	/* Moto lulei1: don't readahead sync io pages */
+	if (swap_slot_has_sync_io(entry))
+		goto skip;
+
 	mask = swapin_nr_pages(offset) - 1;
 	if (!mask)
 		goto skip;
@ -599,6 +603,9 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,

 	blk_start_plug(&plug);
 	for (offset = start_offset; offset <= end_offset ; offset++) {
+		/* Moto lulei1: don't readahead sync io pages */
+		if (swap_slot_has_sync_io(swp_entry(swp_type(entry), offset)))
+			continue;
 		/* Ok, do the async read-ahead now */
 		page = __read_swap_cache_async(
 			swp_entry(swp_type(entry), offset),
@ -747,6 +754,11 @@ static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 	unsigned int i;
 	bool page_allocated;
 	struct vma_swap_readahead ra_info = {0,};
+	/* Moto lulei1: don't readahead sync io pages */
+	if (swap_slot_has_sync_io(fentry)) {
+		ra_info.win = 1;
+		goto skip;
+	}

 	swap_ra_info(vmf, &ra_info);
 	if (ra_info.win == 1)
@ -763,6 +775,9 @@ static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 		entry = pte_to_swp_entry(pentry);
 		if (unlikely(non_swap_entry(entry)))
 			continue;
+		/* Moto lulei1: don't readahead sync io pages */
+		if (swap_slot_has_sync_io(entry))
+			continue;
 		page = __read_swap_cache_async(entry, gfp_mask, vma,
 					       vmf->address, &page_allocated);
 		if (!page)