71fe804b6d
This patch replaces the mempolicy mode, mode_flags, and nodemask in the shmem_sb_info struct with a struct mempolicy pointer, initialized to NULL. This removes dependency on the details of mempolicy from shmem.c and hugetlbfs inode.c and simplifies the interfaces. mpol_parse_str() in mempolicy.c is changed to return, via a pointer to a pointer arg, a struct mempolicy pointer on success. For MPOL_DEFAULT, the returned pointer is NULL. Further, mpol_parse_str() now takes a 'no_context' argument that causes the input nodemask to be stored in the w.user_nodemask of the created mempolicy for use when the mempolicy is installed in a tmpfs inode shared policy tree. At that time, any cpuset contextualization is applied to the original input nodemask. This preserves the previous behavior where the input nodemask was stored in the superblock. We can think of the returned mempolicy as "context free". Because mpol_parse_str() is now calling mpol_new(), we can remove from mpol_to_str() the semantic checks that mpol_new() already performs. Add 'no_context' parameter to mpol_to_str() to specify that it should format the nodemask in w.user_nodemask for 'bind' and 'interleave' policies. Change mpol_shared_policy_init() to take a pointer to a "context free" struct mempolicy and to create a new, "contextualized" mempolicy using the mode, mode_flags and user_nodemask from the input mempolicy. Note: we know that the mempolicy passed to mpol_to_str() or mpol_shared_policy_init() from a tmpfs superblock is "context free". This is currently the only instance thereof. However, if we found more uses for this concept, and introduced any ambiguity as to whether a mempolicy was context free or not, we could add another internal mode flag to identify context free mempolicies. Then, we could remove the 'no_context' argument from mpol_to_str(). Added shmem_get_sbmpol() to return a reference counted superblock mempolicy, if one exists, to pass to mpol_shared_policy_init(). We must add the reference under the sb stat_lock to prevent races with replacement of the mpol by remount. This reference is removed in mpol_shared_policy_init(). [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: another build fix] [akpm@linux-foundation.org: yet another build fix] Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Christoph Lameter <clameter@sgi.com> Cc: David Rientjes <rientjes@google.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
340 lines
8.4 KiB
C
340 lines
8.4 KiB
C
#ifndef _LINUX_MEMPOLICY_H
|
|
#define _LINUX_MEMPOLICY_H 1
|
|
|
|
#include <linux/errno.h>
|
|
|
|
/*
|
|
* NUMA memory policies for Linux.
|
|
* Copyright 2003,2004 Andi Kleen SuSE Labs
|
|
*/
|
|
|
|
/*
|
|
* Both the MPOL_* mempolicy mode and the MPOL_F_* optional mode flags are
|
|
* passed by the user to either set_mempolicy() or mbind() in an 'int' actual.
|
|
* The MPOL_MODE_FLAGS macro determines the legal set of optional mode flags.
|
|
*/
|
|
|
|
/* Policies */
|
|
enum {
|
|
MPOL_DEFAULT,
|
|
MPOL_PREFERRED,
|
|
MPOL_BIND,
|
|
MPOL_INTERLEAVE,
|
|
MPOL_MAX, /* always last member of enum */
|
|
};
|
|
|
|
/* Flags for set_mempolicy */
|
|
#define MPOL_F_STATIC_NODES (1 << 15)
|
|
#define MPOL_F_RELATIVE_NODES (1 << 14)
|
|
|
|
/*
|
|
* MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to
|
|
* either set_mempolicy() or mbind().
|
|
*/
|
|
#define MPOL_MODE_FLAGS (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES)
|
|
|
|
/* Flags for get_mempolicy */
|
|
#define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */
|
|
#define MPOL_F_ADDR (1<<1) /* look up vma using address */
|
|
#define MPOL_F_MEMS_ALLOWED (1<<2) /* return allowed memories */
|
|
|
|
/* Flags for mbind */
|
|
#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
|
|
#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */
|
|
#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */
|
|
#define MPOL_MF_INTERNAL (1<<3) /* Internal flags start here */
|
|
|
|
/*
|
|
* Internal flags that share the struct mempolicy flags word with
|
|
* "mode flags". These flags are allocated from bit 0 up, as they
|
|
* are never OR'ed into the mode in mempolicy API arguments.
|
|
*/
|
|
#define MPOL_F_SHARED (1 << 0) /* identify shared policies */
|
|
#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
#include <linux/mmzone.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/rbtree.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/nodemask.h>
|
|
|
|
struct mm_struct;
|
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
/*
|
|
* Describe a memory policy.
|
|
*
|
|
* A mempolicy can be either associated with a process or with a VMA.
|
|
* For VMA related allocations the VMA policy is preferred, otherwise
|
|
* the process policy is used. Interrupts ignore the memory policy
|
|
* of the current process.
|
|
*
|
|
* Locking policy for interlave:
|
|
* In process context there is no locking because only the process accesses
|
|
* its own state. All vma manipulation is somewhat protected by a down_read on
|
|
* mmap_sem.
|
|
*
|
|
* Freeing policy:
|
|
* Mempolicy objects are reference counted. A mempolicy will be freed when
|
|
* mpol_put() decrements the reference count to zero.
|
|
*
|
|
* Duplicating policy objects:
|
|
* mpol_dup() allocates a new mempolicy and copies the specified mempolicy
|
|
* to the new storage. The reference count of the new object is initialized
|
|
* to 1, representing the caller of mpol_dup().
|
|
*/
|
|
struct mempolicy {
|
|
atomic_t refcnt;
|
|
unsigned short mode; /* See MPOL_* above */
|
|
unsigned short flags; /* See set_mempolicy() MPOL_F_* above */
|
|
union {
|
|
short preferred_node; /* preferred */
|
|
nodemask_t nodes; /* interleave/bind */
|
|
/* undefined for default */
|
|
} v;
|
|
union {
|
|
nodemask_t cpuset_mems_allowed; /* relative to these nodes */
|
|
nodemask_t user_nodemask; /* nodemask passed by user */
|
|
} w;
|
|
};
|
|
|
|
/*
|
|
* Support for managing mempolicy data objects (clone, copy, destroy)
|
|
* The default fast path of a NULL MPOL_DEFAULT policy is always inlined.
|
|
*/
|
|
|
|
extern void __mpol_put(struct mempolicy *pol);
|
|
static inline void mpol_put(struct mempolicy *pol)
|
|
{
|
|
if (pol)
|
|
__mpol_put(pol);
|
|
}
|
|
|
|
/*
|
|
* Does mempolicy pol need explicit unref after use?
|
|
* Currently only needed for shared policies.
|
|
*/
|
|
static inline int mpol_needs_cond_ref(struct mempolicy *pol)
|
|
{
|
|
return (pol && (pol->flags & MPOL_F_SHARED));
|
|
}
|
|
|
|
static inline void mpol_cond_put(struct mempolicy *pol)
|
|
{
|
|
if (mpol_needs_cond_ref(pol))
|
|
__mpol_put(pol);
|
|
}
|
|
|
|
extern struct mempolicy *__mpol_cond_copy(struct mempolicy *tompol,
|
|
struct mempolicy *frompol);
|
|
static inline struct mempolicy *mpol_cond_copy(struct mempolicy *tompol,
|
|
struct mempolicy *frompol)
|
|
{
|
|
if (!frompol)
|
|
return frompol;
|
|
return __mpol_cond_copy(tompol, frompol);
|
|
}
|
|
|
|
extern struct mempolicy *__mpol_dup(struct mempolicy *pol);
|
|
static inline struct mempolicy *mpol_dup(struct mempolicy *pol)
|
|
{
|
|
if (pol)
|
|
pol = __mpol_dup(pol);
|
|
return pol;
|
|
}
|
|
|
|
#define vma_policy(vma) ((vma)->vm_policy)
|
|
#define vma_set_policy(vma, pol) ((vma)->vm_policy = (pol))
|
|
|
|
static inline void mpol_get(struct mempolicy *pol)
|
|
{
|
|
if (pol)
|
|
atomic_inc(&pol->refcnt);
|
|
}
|
|
|
|
extern int __mpol_equal(struct mempolicy *a, struct mempolicy *b);
|
|
static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
|
|
{
|
|
if (a == b)
|
|
return 1;
|
|
return __mpol_equal(a, b);
|
|
}
|
|
|
|
/*
|
|
* Tree of shared policies for a shared memory region.
|
|
* Maintain the policies in a pseudo mm that contains vmas. The vmas
|
|
* carry the policy. As a special twist the pseudo mm is indexed in pages, not
|
|
* bytes, so that we can work with shared memory segments bigger than
|
|
* unsigned long.
|
|
*/
|
|
|
|
struct sp_node {
|
|
struct rb_node nd;
|
|
unsigned long start, end;
|
|
struct mempolicy *policy;
|
|
};
|
|
|
|
struct shared_policy {
|
|
struct rb_root root;
|
|
spinlock_t lock;
|
|
};
|
|
|
|
void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
|
|
int mpol_set_shared_policy(struct shared_policy *info,
|
|
struct vm_area_struct *vma,
|
|
struct mempolicy *new);
|
|
void mpol_free_shared_policy(struct shared_policy *p);
|
|
struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
|
|
unsigned long idx);
|
|
|
|
extern void numa_default_policy(void);
|
|
extern void numa_policy_init(void);
|
|
extern void mpol_rebind_task(struct task_struct *tsk,
|
|
const nodemask_t *new);
|
|
extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
|
|
extern void mpol_fix_fork_child_flag(struct task_struct *p);
|
|
|
|
extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
|
|
unsigned long addr, gfp_t gfp_flags,
|
|
struct mempolicy **mpol, nodemask_t **nodemask);
|
|
extern unsigned slab_node(struct mempolicy *policy);
|
|
|
|
extern enum zone_type policy_zone;
|
|
|
|
static inline void check_highest_zone(enum zone_type k)
|
|
{
|
|
if (k > policy_zone && k != ZONE_MOVABLE)
|
|
policy_zone = k;
|
|
}
|
|
|
|
int do_migrate_pages(struct mm_struct *mm,
|
|
const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags);
|
|
|
|
|
|
#ifdef CONFIG_TMPFS
|
|
extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context);
|
|
|
|
extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
|
|
int no_context);
|
|
#endif
|
|
#else
|
|
|
|
struct mempolicy {};
|
|
|
|
static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static inline void mpol_put(struct mempolicy *p)
|
|
{
|
|
}
|
|
|
|
static inline void mpol_cond_put(struct mempolicy *pol)
|
|
{
|
|
}
|
|
|
|
static inline struct mempolicy *mpol_cond_copy(struct mempolicy *to,
|
|
struct mempolicy *from)
|
|
{
|
|
return from;
|
|
}
|
|
|
|
static inline void mpol_get(struct mempolicy *pol)
|
|
{
|
|
}
|
|
|
|
static inline struct mempolicy *mpol_dup(struct mempolicy *old)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
struct shared_policy {};
|
|
|
|
static inline int mpol_set_shared_policy(struct shared_policy *info,
|
|
struct vm_area_struct *vma,
|
|
struct mempolicy *new)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
static inline void mpol_shared_policy_init(struct shared_policy *sp,
|
|
struct mempolicy *mpol)
|
|
{
|
|
}
|
|
|
|
static inline void mpol_free_shared_policy(struct shared_policy *p)
|
|
{
|
|
}
|
|
|
|
static inline struct mempolicy *
|
|
mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
#define vma_policy(vma) NULL
|
|
#define vma_set_policy(vma, pol) do {} while(0)
|
|
|
|
static inline void numa_policy_init(void)
|
|
{
|
|
}
|
|
|
|
static inline void numa_default_policy(void)
|
|
{
|
|
}
|
|
|
|
static inline void mpol_rebind_task(struct task_struct *tsk,
|
|
const nodemask_t *new)
|
|
{
|
|
}
|
|
|
|
static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
|
|
{
|
|
}
|
|
|
|
static inline void mpol_fix_fork_child_flag(struct task_struct *p)
|
|
{
|
|
}
|
|
|
|
static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
|
|
unsigned long addr, gfp_t gfp_flags,
|
|
struct mempolicy **mpol, nodemask_t **nodemask)
|
|
{
|
|
*mpol = NULL;
|
|
*nodemask = NULL;
|
|
return node_zonelist(0, gfp_flags);
|
|
}
|
|
|
|
static inline int do_migrate_pages(struct mm_struct *mm,
|
|
const nodemask_t *from_nodes,
|
|
const nodemask_t *to_nodes, int flags)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void check_highest_zone(int k)
|
|
{
|
|
}
|
|
|
|
#ifdef CONFIG_TMPFS
|
|
static inline int mpol_parse_str(char *str, struct mempolicy **mpol,
|
|
int no_context)
|
|
{
|
|
return 1; /* error */
|
|
}
|
|
|
|
static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
|
|
int no_context)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#endif /* CONFIG_NUMA */
|
|
#endif /* __KERNEL__ */
|
|
|
|
#endif
|