diff options
author | Muchun Song <songmuchun@bytedance.com> | 2021-02-04 18:32:06 -0800 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2021-03-03 16:44:22 +0100 |
commit | ea279e9f078eb581d7f5344c93aeb7076a19fe11 (patch) | |
tree | 22620b851ec0928dd79ca81fca80b1b807b92658 /mm | |
parent | f502ef682ee136bbeece188db52096fe69b1d7f2 (diff) |
mm: hugetlb: fix a race between freeing and dissolving the page
commit 7ffddd499ba6122b1a07828f023d1d67629aa017 upstream
There is a race condition between __free_huge_page()
and dissolve_free_huge_page().
CPU0: CPU1:
// page_count(page) == 1
put_page(page)
__free_huge_page(page)
dissolve_free_huge_page(page)
spin_lock(&hugetlb_lock)
// PageHuge(page) && !page_count(page)
update_and_free_page(page)
// page is freed to the buddy
spin_unlock(&hugetlb_lock)
spin_lock(&hugetlb_lock)
clear_page_huge_active(page)
enqueue_huge_page(page)
// It is wrong, the page is already freed
spin_unlock(&hugetlb_lock)
The race window is between put_page() and dissolve_free_huge_page().
We should make sure that the page is already on the free list when it is
dissolved.
As a result __free_huge_page would corrupt page(s) already in the buddy
allocator.
Link: https://lkml.kernel.org/r/20210115124942.46403-4-songmuchun@bytedance.com
Fixes: c8721bbbdd36 ("mm: memory-hotplug: enable memory hotplug to handle hugepage")
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[sudip: adjust context]
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7c3ecac8aeb3..13a4f3fe2d91 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -66,6 +66,21 @@ DEFINE_SPINLOCK(hugetlb_lock); static int num_fault_mutexes; struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp; +static inline bool PageHugeFreed(struct page *head) +{ + return page_private(head + 4) == -1UL; +} + +static inline void SetPageHugeFreed(struct page *head) +{ + set_page_private(head + 4, -1UL); +} + +static inline void ClearPageHugeFreed(struct page *head) +{ + set_page_private(head + 4, 0); +} + /* Forward declaration */ static int hugetlb_acct_memory(struct hstate *h, long delta); @@ -841,6 +856,7 @@ static void enqueue_huge_page(struct hstate *h, struct page *page) list_move(&page->lru, &h->hugepage_freelists[nid]); h->free_huge_pages++; h->free_huge_pages_node[nid]++; + SetPageHugeFreed(page); } static struct page *dequeue_huge_page_node(struct hstate *h, int nid) @@ -858,6 +874,7 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid) return NULL; list_move(&page->lru, &h->hugepage_activelist); set_page_refcounted(page); + ClearPageHugeFreed(page); h->free_huge_pages--; h->free_huge_pages_node[nid]--; return page; @@ -1266,6 +1283,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) set_hugetlb_cgroup(page, NULL); h->nr_huge_pages++; h->nr_huge_pages_node[nid]++; + ClearPageHugeFreed(page); spin_unlock(&hugetlb_lock); put_page(page); /* free it into the hugepage allocator */ } @@ -1424,11 +1442,32 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, */ static void dissolve_free_huge_page(struct page *page) { +retry: spin_lock(&hugetlb_lock); if (PageHuge(page) && !page_count(page)) { struct page *head = compound_head(page); struct hstate *h = page_hstate(head); int nid = page_to_nid(head); + + /* + * We should make sure that the page is already on the free list + * when it is dissolved. + */ + if (unlikely(!PageHugeFreed(head))) { + spin_unlock(&hugetlb_lock); + cond_resched(); + + /* + * Theoretically, we should return -EBUSY when we + * encounter this race. In fact, we have a chance + * to successfully dissolve the page if we do a + * retry. Because the race window is quite small. + * If we seize this opportunity, it is an optimization + * for increasing the success rate of dissolving page. + */ + goto retry; + } + list_del(&head->lru); h->free_huge_pages--; h->free_huge_pages_node[nid]--; |