0e5c9f39f6
I don't think we need to call hugetlb_clean_stale_pgtable() anymore in 2.6.13 because of the rework with free_pgtables(). It now collect all the pte page at the time of munmap. It used to only collect page table pages when entire one pgd can be freed and left with staled pte pages. Not anymore with 2.6.13. This function will never be called and We should turn it into a BUG_ON. I also spotted two problems here, not Adam's fault :-) (1) in huge_pte_alloc(), it looks like a bug to me that pud is not checked before calling pmd_alloc() (2) in hugetlb_clean_stale_pgtable(), it also missed a call to pmd_free_tlb. I think a tlb flush is required to flush the mapping for the page table itself when we clear out the pmd pointing to a pte page. However, since hugetlb_clean_stale_pgtable() is never called, so it won't trigger the bug. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Cc: Adam Litke <agl@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
289 lines
6.6 KiB
C
289 lines
6.6 KiB
C
/*
|
|
* IA-32 Huge TLB Page Support for Kernel.
|
|
*
|
|
* Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
|
|
*/
|
|
|
|
#include <linux/config.h>
|
|
#include <linux/init.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/smp_lock.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/err.h>
|
|
#include <linux/sysctl.h>
|
|
#include <asm/mman.h>
|
|
#include <asm/tlb.h>
|
|
#include <asm/tlbflush.h>
|
|
|
|
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
|
|
{
|
|
pgd_t *pgd;
|
|
pud_t *pud;
|
|
pte_t *pte = NULL;
|
|
|
|
pgd = pgd_offset(mm, addr);
|
|
pud = pud_alloc(mm, pgd, addr);
|
|
if (pud)
|
|
pte = (pte_t *) pmd_alloc(mm, pud, addr);
|
|
BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
|
|
|
|
return pte;
|
|
}
|
|
|
|
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
|
|
{
|
|
pgd_t *pgd;
|
|
pud_t *pud;
|
|
pmd_t *pmd = NULL;
|
|
|
|
pgd = pgd_offset(mm, addr);
|
|
if (pgd_present(*pgd)) {
|
|
pud = pud_offset(pgd, addr);
|
|
if (pud_present(*pud))
|
|
pmd = pmd_offset(pud, addr);
|
|
}
|
|
return (pte_t *) pmd;
|
|
}
|
|
|
|
/*
|
|
* This function checks for proper alignment of input addr and len parameters.
|
|
*/
|
|
int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
|
|
{
|
|
if (len & ~HPAGE_MASK)
|
|
return -EINVAL;
|
|
if (addr & ~HPAGE_MASK)
|
|
return -EINVAL;
|
|
return 0;
|
|
}
|
|
|
|
#if 0 /* This is just for testing */
|
|
struct page *
|
|
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
|
|
{
|
|
unsigned long start = address;
|
|
int length = 1;
|
|
int nr;
|
|
struct page *page;
|
|
struct vm_area_struct *vma;
|
|
|
|
vma = find_vma(mm, addr);
|
|
if (!vma || !is_vm_hugetlb_page(vma))
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
pte = huge_pte_offset(mm, address);
|
|
|
|
/* hugetlb should be locked, and hence, prefaulted */
|
|
WARN_ON(!pte || pte_none(*pte));
|
|
|
|
page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
|
|
|
|
WARN_ON(!PageCompound(page));
|
|
|
|
return page;
|
|
}
|
|
|
|
int pmd_huge(pmd_t pmd)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
struct page *
|
|
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
|
|
pmd_t *pmd, int write)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
#else
|
|
|
|
struct page *
|
|
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
|
|
{
|
|
return ERR_PTR(-EINVAL);
|
|
}
|
|
|
|
int pmd_huge(pmd_t pmd)
|
|
{
|
|
return !!(pmd_val(pmd) & _PAGE_PSE);
|
|
}
|
|
|
|
struct page *
|
|
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
|
|
pmd_t *pmd, int write)
|
|
{
|
|
struct page *page;
|
|
|
|
page = pte_page(*(pte_t *)pmd);
|
|
if (page)
|
|
page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
|
|
return page;
|
|
}
|
|
#endif
|
|
|
|
/* x86_64 also uses this file */
|
|
|
|
#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
|
|
static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
|
|
unsigned long addr, unsigned long len,
|
|
unsigned long pgoff, unsigned long flags)
|
|
{
|
|
struct mm_struct *mm = current->mm;
|
|
struct vm_area_struct *vma;
|
|
unsigned long start_addr;
|
|
|
|
if (len > mm->cached_hole_size) {
|
|
start_addr = mm->free_area_cache;
|
|
} else {
|
|
start_addr = TASK_UNMAPPED_BASE;
|
|
mm->cached_hole_size = 0;
|
|
}
|
|
|
|
full_search:
|
|
addr = ALIGN(start_addr, HPAGE_SIZE);
|
|
|
|
for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
|
|
/* At this point: (!vma || addr < vma->vm_end). */
|
|
if (TASK_SIZE - len < addr) {
|
|
/*
|
|
* Start a new search - just in case we missed
|
|
* some holes.
|
|
*/
|
|
if (start_addr != TASK_UNMAPPED_BASE) {
|
|
start_addr = TASK_UNMAPPED_BASE;
|
|
mm->cached_hole_size = 0;
|
|
goto full_search;
|
|
}
|
|
return -ENOMEM;
|
|
}
|
|
if (!vma || addr + len <= vma->vm_start) {
|
|
mm->free_area_cache = addr + len;
|
|
return addr;
|
|
}
|
|
if (addr + mm->cached_hole_size < vma->vm_start)
|
|
mm->cached_hole_size = vma->vm_start - addr;
|
|
addr = ALIGN(vma->vm_end, HPAGE_SIZE);
|
|
}
|
|
}
|
|
|
|
static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
|
|
unsigned long addr0, unsigned long len,
|
|
unsigned long pgoff, unsigned long flags)
|
|
{
|
|
struct mm_struct *mm = current->mm;
|
|
struct vm_area_struct *vma, *prev_vma;
|
|
unsigned long base = mm->mmap_base, addr = addr0;
|
|
unsigned long largest_hole = mm->cached_hole_size;
|
|
int first_time = 1;
|
|
|
|
/* don't allow allocations above current base */
|
|
if (mm->free_area_cache > base)
|
|
mm->free_area_cache = base;
|
|
|
|
if (len <= largest_hole) {
|
|
largest_hole = 0;
|
|
mm->free_area_cache = base;
|
|
}
|
|
try_again:
|
|
/* make sure it can fit in the remaining address space */
|
|
if (mm->free_area_cache < len)
|
|
goto fail;
|
|
|
|
/* either no address requested or cant fit in requested address hole */
|
|
addr = (mm->free_area_cache - len) & HPAGE_MASK;
|
|
do {
|
|
/*
|
|
* Lookup failure means no vma is above this address,
|
|
* i.e. return with success:
|
|
*/
|
|
if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
|
|
return addr;
|
|
|
|
/*
|
|
* new region fits between prev_vma->vm_end and
|
|
* vma->vm_start, use it:
|
|
*/
|
|
if (addr + len <= vma->vm_start &&
|
|
(!prev_vma || (addr >= prev_vma->vm_end))) {
|
|
/* remember the address as a hint for next time */
|
|
mm->cached_hole_size = largest_hole;
|
|
return (mm->free_area_cache = addr);
|
|
} else {
|
|
/* pull free_area_cache down to the first hole */
|
|
if (mm->free_area_cache == vma->vm_end) {
|
|
mm->free_area_cache = vma->vm_start;
|
|
mm->cached_hole_size = largest_hole;
|
|
}
|
|
}
|
|
|
|
/* remember the largest hole we saw so far */
|
|
if (addr + largest_hole < vma->vm_start)
|
|
largest_hole = vma->vm_start - addr;
|
|
|
|
/* try just below the current vma->vm_start */
|
|
addr = (vma->vm_start - len) & HPAGE_MASK;
|
|
} while (len <= vma->vm_start);
|
|
|
|
fail:
|
|
/*
|
|
* if hint left us with no space for the requested
|
|
* mapping then try again:
|
|
*/
|
|
if (first_time) {
|
|
mm->free_area_cache = base;
|
|
largest_hole = 0;
|
|
first_time = 0;
|
|
goto try_again;
|
|
}
|
|
/*
|
|
* A failed mmap() very likely causes application failure,
|
|
* so fall back to the bottom-up function here. This scenario
|
|
* can happen with large stack limits and large mmap()
|
|
* allocations.
|
|
*/
|
|
mm->free_area_cache = TASK_UNMAPPED_BASE;
|
|
mm->cached_hole_size = ~0UL;
|
|
addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
|
|
len, pgoff, flags);
|
|
|
|
/*
|
|
* Restore the topdown base:
|
|
*/
|
|
mm->free_area_cache = base;
|
|
mm->cached_hole_size = ~0UL;
|
|
|
|
return addr;
|
|
}
|
|
|
|
unsigned long
|
|
hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
|
|
unsigned long len, unsigned long pgoff, unsigned long flags)
|
|
{
|
|
struct mm_struct *mm = current->mm;
|
|
struct vm_area_struct *vma;
|
|
|
|
if (len & ~HPAGE_MASK)
|
|
return -EINVAL;
|
|
if (len > TASK_SIZE)
|
|
return -ENOMEM;
|
|
|
|
if (addr) {
|
|
addr = ALIGN(addr, HPAGE_SIZE);
|
|
vma = find_vma(mm, addr);
|
|
if (TASK_SIZE - len >= addr &&
|
|
(!vma || addr + len <= vma->vm_start))
|
|
return addr;
|
|
}
|
|
if (mm->get_unmapped_area == arch_get_unmapped_area)
|
|
return hugetlb_get_unmapped_area_bottomup(file, addr, len,
|
|
pgoff, flags);
|
|
else
|
|
return hugetlb_get_unmapped_area_topdown(file, addr, len,
|
|
pgoff, flags);
|
|
}
|
|
|
|
#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
|
|
|