From 35be952ae0ccfab860ce1dc45f07d749456d7551 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Fri, 10 Apr 2020 17:12:59 -0700 Subject: [PATCH] Revert "BACKPORT: mm: reclaim small amounts of memory when an external fragmentation event occurs" This reverts commit 5cbbeadd5a497713db13c82bbb3ae410f50ce8a7. Reason for revert: revert customized code Bug: 140544941 Test: boot Signed-off-by: Minchan Kim Signed-off-by: Martin Liu Signed-off-by: Mark Salyzyn Signed-off-by: Suren Baghdasaryan Change-Id: I65735f27f6a44a112957bcec07e2f63f2d8ccff6 --- Documentation/sysctl/vm.txt | 21 ------ include/linux/mm.h | 1 - include/linux/mmzone.h | 11 ++- kernel/sysctl.c | 8 --- mm/page_alloc.c | 43 +----------- mm/vmscan.c | 133 +++--------------------------------- 6 files changed, 15 insertions(+), 202 deletions(-) diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index e33b6808db6b..a48baf202265 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -64,7 +64,6 @@ Currently, these files are in /proc/sys/vm: - swappiness - user_reserve_kbytes - vfs_cache_pressure -- watermark_boost_factor - watermark_scale_factor - zone_reclaim_mode @@ -873,26 +872,6 @@ ten times more freeable objects than there are. ============================================================= -watermark_boost_factor: - -This factor controls the level of reclaim when memory is being fragmented. -It defines the percentage of the high watermark of a zone that will be -reclaimed if pages of different mobility are being mixed within pageblocks. -The intent is that compaction has less work to do in the future and to -increase the success rate of future high-order allocations such as SLUB -allocations, THP and hugetlbfs pages. - -To make it sensible with respect to the watermark_scale_factor parameter, -the unit is in fractions of 10,000. The default value of 15,000 means -that up to 150% of the high watermark will be reclaimed in the event of -a pageblock being mixed due to fragmentation. The level of reclaim is -determined by the number of fragmentation events that occurred in the -recent past. If this value is smaller than a pageblock then a pageblocks -worth of pages will be reclaimed (e.g. 2MB on 64-bit x86). A boost factor -of 0 will disable the feature. - -============================================================= - watermark_scale_factor: This factor controls the aggressiveness of kswapd. It defines the diff --git a/include/linux/mm.h b/include/linux/mm.h index c06305ce27d5..a3ece90256c0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2239,7 +2239,6 @@ extern void zone_pcp_reset(struct zone *zone); /* page_alloc.c */ extern int min_free_kbytes; -extern int watermark_boost_factor; extern int watermark_scale_factor; /* nommu.c */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c4b5c29060aa..0a75544d130a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -274,10 +274,10 @@ enum zone_watermarks { NR_WMARK }; -#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost) -#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost) -#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost) -#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost) +#define min_wmark_pages(z) (z->_watermark[WMARK_MIN]) +#define low_wmark_pages(z) (z->_watermark[WMARK_LOW]) +#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH]) +#define wmark_pages(z, i) (z->_watermark[i]) struct per_cpu_pages { int count; /* number of pages in the list */ @@ -369,7 +369,6 @@ struct zone { /* zone watermarks, access with *_wmark_pages(zone) macros */ unsigned long _watermark[NR_WMARK]; - unsigned long watermark_boost; unsigned long nr_reserved_highatomic; @@ -897,8 +896,6 @@ static inline int is_highmem(struct zone *zone) struct ctl_table; int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -int watermark_boost_factor_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 62949e56064a..521c38e9ac14 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1494,14 +1494,6 @@ static struct ctl_table vm_table[] = { .proc_handler = min_free_kbytes_sysctl_handler, .extra1 = &zero, }, - { - .procname = "watermark_boost_factor", - .data = &watermark_boost_factor, - .maxlen = sizeof(watermark_boost_factor), - .mode = 0644, - .proc_handler = watermark_boost_factor_sysctl_handler, - .extra1 = &zero, - }, { .procname = "watermark_scale_factor", .data = &watermark_scale_factor, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 56f82c3052c7..4d2cd58a62d3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -318,7 +318,6 @@ compound_page_dtor * const compound_page_dtors[] = { */ int min_free_kbytes = 1024; int user_min_free_kbytes = -1; -int watermark_boost_factor __read_mostly = 15000; int watermark_scale_factor = 10; /* @@ -2219,21 +2218,6 @@ static bool can_steal_fallback(unsigned int order, int start_mt) return false; } -static inline void boost_watermark(struct zone *zone) -{ - unsigned long max_boost; - - if (!watermark_boost_factor) - return; - - max_boost = mult_frac(zone->_watermark[WMARK_HIGH], - watermark_boost_factor, 10000); - max_boost = max(pageblock_nr_pages, max_boost); - - zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages, - max_boost); -} - /* * This function implements actual steal behaviour. If order is large enough, * we can steal whole pageblock. If not, we first move freepages in this @@ -2243,7 +2227,7 @@ static inline void boost_watermark(struct zone *zone) * itself, so pages freed in the future will be put on the correct free list. */ static void steal_suitable_fallback(struct zone *zone, struct page *page, - unsigned int alloc_flags, int start_type, bool whole_block) + int start_type, bool whole_block) { unsigned int current_order = page_order(page); struct free_area *area; @@ -2265,15 +2249,6 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, goto single_page; } - /* - * Boost watermarks to increase reclaim pressure to reduce the - * likelihood of future fallbacks. Wake kswapd now as the node - * may be balanced overall and kswapd will not wake naturally. - */ - boost_watermark(zone); - if (alloc_flags & ALLOC_KSWAPD) - wakeup_kswapd(zone, 0, 0, zone_idx(zone)); - /* We are not allowed to try stealing from the whole block */ if (!whole_block) goto single_page; @@ -2557,8 +2532,7 @@ do_steal: page = list_first_entry(&area->free_list[fallback_mt], struct page, lru); - steal_suitable_fallback(zone, page, alloc_flags, start_migratetype, - can_steal); + steal_suitable_fallback(zone, page, start_migratetype, can_steal); trace_mm_page_alloc_extfrag(page, order, current_order, start_migratetype, fallback_mt); @@ -7594,7 +7568,6 @@ static void __setup_per_zone_wmarks(void) low + min; zone->_watermark[WMARK_HIGH] = min_wmark_pages(zone) + low + min * 2; - zone->watermark_boost = 0; spin_unlock_irqrestore(&zone->lock, flags); } @@ -7695,18 +7668,6 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write, return 0; } -int watermark_boost_factor_sysctl_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int rc; - - rc = proc_dointvec_minmax(table, write, buffer, length, ppos); - if (rc) - return rc; - - return 0; -} - int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 84d1b7488a48..32b2424a3203 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -87,9 +87,6 @@ struct scan_control { /* Can pages be swapped as part of reclaim? */ unsigned int may_swap:1; - /* e.g. boosted watermark reclaim leaves slabs alone */ - unsigned int may_shrinkslab:1; - /* * Cgroups are not reclaimed below their configured memory.low, * unless we threaten to OOM. If any cgroups are skipped due to @@ -2742,10 +2739,8 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) shrink_node_memcg(pgdat, memcg, sc, &lru_pages); node_lru_pages += lru_pages; - if (sc->may_shrinkslab) { - shrink_slab(sc->gfp_mask, pgdat->node_id, + shrink_slab(sc->gfp_mask, pgdat->node_id, memcg, sc->priority); - } /* Record the group's reclaim efficiency */ vmpressure(sc->gfp_mask, memcg, false, @@ -3223,7 +3218,6 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, .may_writepage = !laptop_mode, .may_unmap = 1, .may_swap = 1, - .may_shrinkslab = 1, }; /* @@ -3268,7 +3262,6 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, .may_unmap = 1, .reclaim_idx = MAX_NR_ZONES - 1, .may_swap = !noswap, - .may_shrinkslab = 1, }; unsigned long lru_pages; @@ -3315,7 +3308,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, .may_writepage = !laptop_mode, .may_unmap = 1, .may_swap = may_swap, - .may_shrinkslab = 1, }; /* @@ -3366,30 +3358,6 @@ static void age_active_anon(struct pglist_data *pgdat, } while (memcg); } -static bool pgdat_watermark_boosted(pg_data_t *pgdat, int classzone_idx) -{ - int i; - struct zone *zone; - - /* - * Check for watermark boosts top-down as the higher zones - * are more likely to be boosted. Both watermarks and boosts - * should not be checked at the time time as reclaim would - * start prematurely when there is no boosting and a lower - * zone is balanced. - */ - for (i = classzone_idx; i >= 0; i--) { - zone = pgdat->node_zones + i; - if (!managed_zone(zone)) - continue; - - if (zone->watermark_boost) - return true; - } - - return false; -} - /* * Returns true if there is an eligible zone balanced for the request order * and classzone_idx @@ -3400,10 +3368,6 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx) unsigned long mark = -1; struct zone *zone; - /* - * Check watermarks bottom-up as lower zones are more likely to - * meet watermarks. - */ for (i = 0; i <= classzone_idx; i++) { zone = pgdat->node_zones + i; @@ -3532,14 +3496,14 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) unsigned long nr_soft_reclaimed; unsigned long nr_soft_scanned; unsigned long pflags; - unsigned long nr_boost_reclaim; - unsigned long zone_boosts[MAX_NR_ZONES] = { 0, }; - bool boosted; struct zone *zone; struct scan_control sc = { .gfp_mask = GFP_KERNEL, .order = order, + .priority = DEF_PRIORITY, + .may_writepage = !laptop_mode, .may_unmap = 1, + .may_swap = 1, }; psi_memstall_enter(&pflags); @@ -3547,28 +3511,9 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) count_vm_event(PAGEOUTRUN); - /* - * Account for the reclaim boost. Note that the zone boost is left in - * place so that parallel allocations that are near the watermark will - * stall or direct reclaim until kswapd is finished. - */ - nr_boost_reclaim = 0; - for (i = 0; i <= classzone_idx; i++) { - zone = pgdat->node_zones + i; - if (!managed_zone(zone)) - continue; - - nr_boost_reclaim += zone->watermark_boost; - zone_boosts[i] = zone->watermark_boost; - } - boosted = nr_boost_reclaim; - -restart: - sc.priority = DEF_PRIORITY; do { unsigned long nr_reclaimed = sc.nr_reclaimed; bool raise_priority = true; - bool balanced; bool ret; sc.reclaim_idx = classzone_idx; @@ -3595,40 +3540,13 @@ restart: } /* - * If the pgdat is imbalanced then ignore boosting and preserve - * the watermarks for a later time and restart. Note that the - * zone watermarks will be still reset at the end of balancing - * on the grounds that the normal reclaim should be enough to - * re-evaluate if boosting is required when kswapd next wakes. + * Only reclaim if there are no eligible zones. Note that + * sc.reclaim_idx is not used as buffer_heads_over_limit may + * have adjusted it. */ - balanced = pgdat_balanced(pgdat, sc.order, classzone_idx); - if (!balanced && nr_boost_reclaim) { - nr_boost_reclaim = 0; - goto restart; - } - - /* - * If boosting is not active then only reclaim if there are no - * eligible zones. Note that sc.reclaim_idx is not used as - * buffer_heads_over_limit may have adjusted it. - */ - if (!nr_boost_reclaim && balanced) + if (pgdat_balanced(pgdat, sc.order, classzone_idx)) goto out; - /* Limit the priority of boosting to avoid reclaim writeback */ - if (nr_boost_reclaim && sc.priority == DEF_PRIORITY - 2) - raise_priority = false; - - /* - * Do not writeback or swap pages for boosted reclaim. The - * intent is to relieve pressure not issue sub-optimal IO - * from reclaim context. If no pages are reclaimed, the - * reclaim will be aborted. - */ - sc.may_writepage = !laptop_mode && !nr_boost_reclaim; - sc.may_swap = !nr_boost_reclaim; - sc.may_shrinkslab = !nr_boost_reclaim; - /* * Do some background aging of the anon list, to give * pages a chance to be referenced before reclaiming. All @@ -3680,16 +3598,6 @@ restart: * progress in reclaiming pages */ nr_reclaimed = sc.nr_reclaimed - nr_reclaimed; - nr_boost_reclaim -= min(nr_boost_reclaim, nr_reclaimed); - - /* - * If reclaim made no progress for a boost, stop reclaim as - * IO cannot be queued and it could be an infinite loop in - * extreme circumstances. - */ - if (nr_boost_reclaim && !nr_reclaimed) - break; - if (raise_priority || !nr_reclaimed) sc.priority--; } while (sc.priority >= 1); @@ -3698,28 +3606,6 @@ restart: pgdat->kswapd_failures++; out: - /* If reclaim was boosted, account for the reclaim done in this pass */ - if (boosted) { - unsigned long flags; - - for (i = 0; i <= classzone_idx; i++) { - if (!zone_boosts[i]) - continue; - - /* Increments are under the zone lock */ - zone = pgdat->node_zones + i; - spin_lock_irqsave(&zone->lock, flags); - zone->watermark_boost -= min(zone->watermark_boost, zone_boosts[i]); - spin_unlock_irqrestore(&zone->lock, flags); - } - - /* - * As there is now likely space, wakeup kcompact to defragment - * pageblocks. - */ - wakeup_kcompactd(pgdat, pageblock_order, classzone_idx); - } - snapshot_refaults(NULL, pgdat); __fs_reclaim_release(); psi_memstall_leave(&pflags); @@ -3951,8 +3837,7 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order, /* Hopeless node, leave it to direct reclaim if possible */ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES || - (pgdat_balanced(pgdat, order, classzone_idx) && - !pgdat_watermark_boosted(pgdat, classzone_idx))) { + pgdat_balanced(pgdat, order, classzone_idx)) { /* * There may be plenty of free memory available, but it's too * fragmented for high-order allocations. Wake up kcompactd