2005-10-30 02:16:52 +01:00
|
|
|
#ifndef __LINUX_MEMORY_HOTPLUG_H
|
|
|
|
#define __LINUX_MEMORY_HOTPLUG_H
|
|
|
|
|
|
|
|
#include <linux/mmzone.h>
|
|
|
|
#include <linux/spinlock.h>
|
2005-10-30 02:16:54 +01:00
|
|
|
#include <linux/notifier.h>
|
2005-10-30 02:16:52 +01:00
|
|
|
|
2006-03-07 00:42:49 +01:00
|
|
|
struct page;
|
|
|
|
struct zone;
|
|
|
|
struct pglist_data;
|
2008-04-28 11:12:01 +02:00
|
|
|
struct mem_section;
|
2006-03-07 00:42:49 +01:00
|
|
|
|
2005-10-30 02:16:52 +01:00
|
|
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
memory hotplug: register section/node id to free
This patch set is to free pages which is allocated by bootmem for
memory-hotremove. Some structures of memory management are allocated by
bootmem. ex) memmap, etc.
To remove memory physically, some of them must be freed according to
circumstance. This patch set makes basis to free those pages, and free
memmaps.
Basic my idea is using remain members of struct page to remember information
of users of bootmem (section number or node id). When the section is
removing, kernel can confirm it. By this information, some issues can be
solved.
1) When the memmap of removing section is allocated on other
section by bootmem, it should/can be free.
2) When the memmap of removing section is allocated on the
same section, it shouldn't be freed. Because the section has to be
logical memory offlined already and all pages must be isolated against
page allocater. If it is freed, page allocator may use it which will
be removed physically soon.
3) When removing section has other section's memmap,
kernel will be able to show easily which section should be removed
before it for user. (Not implemented yet)
4) When the above case 2), the page isolation will be able to check and skip
memmap's page when logical memory offline (offline_pages()).
Current page isolation code fails in this case because this page is
just reserved page and it can't distinguish this pages can be
removed or not. But, it will be able to do by this patch.
(Not implemented yet.)
5) The node information like pgdat has similar issues. But, this
will be able to be solved too by this.
(Not implemented yet, but, remembering node id in the pages.)
Fortunately, current bootmem allocator just keeps PageReserved flags,
and doesn't use any other members of page struct. The users of
bootmem doesn't use them too.
This patch:
This is to register information which is node or section's id. Kernel can
distinguish which node/section uses the pages allcated by bootmem. This is
basis for hot-remove sections or nodes.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 11:13:31 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Magic number for free bootmem.
|
|
|
|
* The normal smallest mapcount is -1. Here is smaller value than it.
|
|
|
|
*/
|
|
|
|
#define SECTION_INFO 0xfffffffe
|
|
|
|
#define MIX_INFO 0xfffffffd
|
|
|
|
#define NODE_INFO 0xfffffffc
|
|
|
|
|
2005-10-30 02:16:52 +01:00
|
|
|
/*
|
|
|
|
* pgdat resizing functions
|
|
|
|
*/
|
|
|
|
static inline
|
|
|
|
void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
|
|
|
|
{
|
|
|
|
spin_lock_irqsave(&pgdat->node_size_lock, *flags);
|
|
|
|
}
|
|
|
|
static inline
|
|
|
|
void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
|
|
|
|
{
|
2005-10-30 02:16:53 +01:00
|
|
|
spin_unlock_irqrestore(&pgdat->node_size_lock, *flags);
|
2005-10-30 02:16:52 +01:00
|
|
|
}
|
|
|
|
static inline
|
|
|
|
void pgdat_resize_init(struct pglist_data *pgdat)
|
|
|
|
{
|
|
|
|
spin_lock_init(&pgdat->node_size_lock);
|
|
|
|
}
|
2005-10-30 02:16:53 +01:00
|
|
|
/*
|
|
|
|
* Zone resizing functions
|
|
|
|
*/
|
|
|
|
static inline unsigned zone_span_seqbegin(struct zone *zone)
|
|
|
|
{
|
|
|
|
return read_seqbegin(&zone->span_seqlock);
|
|
|
|
}
|
|
|
|
static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
|
|
|
|
{
|
|
|
|
return read_seqretry(&zone->span_seqlock, iv);
|
|
|
|
}
|
|
|
|
static inline void zone_span_writelock(struct zone *zone)
|
|
|
|
{
|
|
|
|
write_seqlock(&zone->span_seqlock);
|
|
|
|
}
|
|
|
|
static inline void zone_span_writeunlock(struct zone *zone)
|
|
|
|
{
|
|
|
|
write_sequnlock(&zone->span_seqlock);
|
|
|
|
}
|
|
|
|
static inline void zone_seqlock_init(struct zone *zone)
|
|
|
|
{
|
|
|
|
seqlock_init(&zone->span_seqlock);
|
|
|
|
}
|
2005-10-30 02:16:54 +01:00
|
|
|
extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
|
|
|
|
extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
|
|
|
|
extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
|
|
|
|
/* need some defines for these for archs that don't support it */
|
|
|
|
extern void online_page(struct page *page);
|
|
|
|
/* VM interface that may be used by firmware interface */
|
|
|
|
extern int online_pages(unsigned long, unsigned long);
|
2007-10-16 10:26:12 +02:00
|
|
|
extern void __offline_isolated_pages(unsigned long, unsigned long);
|
2007-10-16 10:26:14 +02:00
|
|
|
extern int offline_pages(unsigned long, unsigned long, unsigned long);
|
|
|
|
|
2005-10-30 02:16:54 +01:00
|
|
|
/* reasonably generic interface to expand the physical pages in a zone */
|
|
|
|
extern int __add_pages(struct zone *zone, unsigned long start_pfn,
|
|
|
|
unsigned long nr_pages);
|
2008-04-28 11:12:01 +02:00
|
|
|
extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
|
|
|
|
unsigned long nr_pages);
|
2006-06-27 11:53:30 +02:00
|
|
|
|
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
extern int memory_add_physaddr_to_nid(u64 start);
|
|
|
|
#else
|
|
|
|
static inline int memory_add_physaddr_to_nid(u64 start)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2006-06-27 11:53:32 +02:00
|
|
|
#ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION
|
|
|
|
/*
|
|
|
|
* For supporting node-hotadd, we have to allocate a new pgdat.
|
|
|
|
*
|
|
|
|
* If an arch has generic style NODE_DATA(),
|
|
|
|
* node_data[nid] = kzalloc() works well. But it depends on the architecture.
|
|
|
|
*
|
|
|
|
* In general, generic_alloc_nodedata() is used.
|
|
|
|
* Now, arch_free_nodedata() is just defined for error path of node_hot_add.
|
|
|
|
*
|
|
|
|
*/
|
2006-06-27 11:53:40 +02:00
|
|
|
extern pg_data_t *arch_alloc_nodedata(int nid);
|
|
|
|
extern void arch_free_nodedata(pg_data_t *pgdat);
|
[PATCH] pgdat allocation and update for ia64 of memory hotplug: update pgdat address array
This is to refresh node_data[] array for ia64. As I mentioned previous
patches, ia64 has copies of information of pgdat address array on each node as
per node data.
At v2 of node_add, this function used stop_machine_run() to update them. (I
wished that they were copied safety as much as possible.) But, in this patch,
this arrays are just copied simply, and set node_online_map bit after
completion of pgdat initialization.
So, kernel must touch NODE_DATA() macro after checking node_online_map().
(Current code has already done it.) This is more simple way for just
hot-add.....
Note : It will be problem when hot-remove will occur,
because, even if online_map bit is set, kernel may
touch NODE_DATA() due to race condition. :-(
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27 11:53:39 +02:00
|
|
|
extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat);
|
2006-06-27 11:53:32 +02:00
|
|
|
|
|
|
|
#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
|
|
|
|
|
|
|
|
#define arch_alloc_nodedata(nid) generic_alloc_nodedata(nid)
|
|
|
|
#define arch_free_nodedata(pgdat) generic_free_nodedata(pgdat)
|
|
|
|
|
|
|
|
#ifdef CONFIG_NUMA
|
|
|
|
/*
|
|
|
|
* If ARCH_HAS_NODEDATA_EXTENSION=n, this func is used to allocate pgdat.
|
|
|
|
* XXX: kmalloc_node() can't work well to get new node's memory at this time.
|
|
|
|
* Because, pgdat for the new node is not allocated/initialized yet itself.
|
|
|
|
* To use new node's memory, more consideration will be necessary.
|
|
|
|
*/
|
|
|
|
#define generic_alloc_nodedata(nid) \
|
|
|
|
({ \
|
|
|
|
kzalloc(sizeof(pg_data_t), GFP_KERNEL); \
|
|
|
|
})
|
|
|
|
/*
|
|
|
|
* This definition is just for error path in node hotadd.
|
|
|
|
* For node hotremove, we have to replace this.
|
|
|
|
*/
|
|
|
|
#define generic_free_nodedata(pgdat) kfree(pgdat)
|
|
|
|
|
2006-06-27 11:53:33 +02:00
|
|
|
extern pg_data_t *node_data[];
|
|
|
|
static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
|
|
|
|
{
|
|
|
|
node_data[nid] = pgdat;
|
|
|
|
}
|
|
|
|
|
2006-06-27 11:53:32 +02:00
|
|
|
#else /* !CONFIG_NUMA */
|
|
|
|
|
|
|
|
/* never called */
|
|
|
|
static inline pg_data_t *generic_alloc_nodedata(int nid)
|
|
|
|
{
|
|
|
|
BUG();
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
static inline void generic_free_nodedata(pg_data_t *pgdat)
|
|
|
|
{
|
|
|
|
}
|
2006-06-27 11:53:33 +02:00
|
|
|
static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
|
|
|
|
{
|
|
|
|
}
|
2006-06-27 11:53:32 +02:00
|
|
|
#endif /* CONFIG_NUMA */
|
|
|
|
#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
|
|
|
|
|
memory hotplug: register section/node id to free
This patch set is to free pages which is allocated by bootmem for
memory-hotremove. Some structures of memory management are allocated by
bootmem. ex) memmap, etc.
To remove memory physically, some of them must be freed according to
circumstance. This patch set makes basis to free those pages, and free
memmaps.
Basic my idea is using remain members of struct page to remember information
of users of bootmem (section number or node id). When the section is
removing, kernel can confirm it. By this information, some issues can be
solved.
1) When the memmap of removing section is allocated on other
section by bootmem, it should/can be free.
2) When the memmap of removing section is allocated on the
same section, it shouldn't be freed. Because the section has to be
logical memory offlined already and all pages must be isolated against
page allocater. If it is freed, page allocator may use it which will
be removed physically soon.
3) When removing section has other section's memmap,
kernel will be able to show easily which section should be removed
before it for user. (Not implemented yet)
4) When the above case 2), the page isolation will be able to check and skip
memmap's page when logical memory offline (offline_pages()).
Current page isolation code fails in this case because this page is
just reserved page and it can't distinguish this pages can be
removed or not. But, it will be able to do by this patch.
(Not implemented yet.)
5) The node information like pgdat has similar issues. But, this
will be able to be solved too by this.
(Not implemented yet, but, remembering node id in the pages.)
Fortunately, current bootmem allocator just keeps PageReserved flags,
and doesn't use any other members of page struct. The users of
bootmem doesn't use them too.
This patch:
This is to register information which is node or section's id. Kernel can
distinguish which node/section uses the pages allcated by bootmem. This is
basis for hot-remove sections or nodes.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 11:13:31 +02:00
|
|
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
|
|
|
static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
static inline void put_page_bootmem(struct page *page)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
|
|
|
|
extern void put_page_bootmem(struct page *page);
|
|
|
|
#endif
|
|
|
|
|
2005-10-30 02:16:52 +01:00
|
|
|
#else /* ! CONFIG_MEMORY_HOTPLUG */
|
|
|
|
/*
|
|
|
|
* Stub functions for when hotplug is off
|
|
|
|
*/
|
|
|
|
static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
|
|
|
|
static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
|
|
|
|
static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
|
2005-10-30 02:16:53 +01:00
|
|
|
|
|
|
|
static inline unsigned zone_span_seqbegin(struct zone *zone)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
static inline void zone_span_writelock(struct zone *zone) {}
|
|
|
|
static inline void zone_span_writeunlock(struct zone *zone) {}
|
|
|
|
static inline void zone_seqlock_init(struct zone *zone) {}
|
2005-10-30 02:16:54 +01:00
|
|
|
|
|
|
|
static inline int mhp_notimplemented(const char *func)
|
|
|
|
{
|
|
|
|
printk(KERN_WARNING "%s() called, with CONFIG_MEMORY_HOTPLUG disabled\n", func);
|
|
|
|
dump_stack();
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
memory hotplug: register section/node id to free
This patch set is to free pages which is allocated by bootmem for
memory-hotremove. Some structures of memory management are allocated by
bootmem. ex) memmap, etc.
To remove memory physically, some of them must be freed according to
circumstance. This patch set makes basis to free those pages, and free
memmaps.
Basic my idea is using remain members of struct page to remember information
of users of bootmem (section number or node id). When the section is
removing, kernel can confirm it. By this information, some issues can be
solved.
1) When the memmap of removing section is allocated on other
section by bootmem, it should/can be free.
2) When the memmap of removing section is allocated on the
same section, it shouldn't be freed. Because the section has to be
logical memory offlined already and all pages must be isolated against
page allocater. If it is freed, page allocator may use it which will
be removed physically soon.
3) When removing section has other section's memmap,
kernel will be able to show easily which section should be removed
before it for user. (Not implemented yet)
4) When the above case 2), the page isolation will be able to check and skip
memmap's page when logical memory offline (offline_pages()).
Current page isolation code fails in this case because this page is
just reserved page and it can't distinguish this pages can be
removed or not. But, it will be able to do by this patch.
(Not implemented yet.)
5) The node information like pgdat has similar issues. But, this
will be able to be solved too by this.
(Not implemented yet, but, remembering node id in the pages.)
Fortunately, current bootmem allocator just keeps PageReserved flags,
and doesn't use any other members of page struct. The users of
bootmem doesn't use them too.
This patch:
This is to register information which is node or section's id. Kernel can
distinguish which node/section uses the pages allcated by bootmem. This is
basis for hot-remove sections or nodes.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 11:13:31 +02:00
|
|
|
static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2005-10-30 02:16:53 +01:00
|
|
|
#endif /* ! CONFIG_MEMORY_HOTPLUG */
|
2006-04-07 19:49:15 +02:00
|
|
|
|
2008-06-04 00:30:54 +02:00
|
|
|
/*
|
|
|
|
* Walk through all memory which is registered as resource.
|
|
|
|
* arg is (start_pfn, nr_pages, private_arg_pointer)
|
|
|
|
*/
|
|
|
|
extern int walk_memory_resource(unsigned long start_pfn,
|
|
|
|
unsigned long nr_pages, void *arg,
|
|
|
|
int (*func)(unsigned long, unsigned long, void *));
|
|
|
|
|
2006-06-27 11:53:30 +02:00
|
|
|
extern int add_memory(int nid, u64 start, u64 size);
|
|
|
|
extern int arch_add_memory(int nid, u64 start, u64 size);
|
2006-04-07 19:49:15 +02:00
|
|
|
extern int remove_memory(u64 start, u64 size);
|
2006-10-01 08:27:04 +02:00
|
|
|
extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
|
|
|
|
int nr_pages);
|
2008-04-28 11:12:01 +02:00
|
|
|
extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
|
memory hotplug: register section/node id to free
This patch set is to free pages which is allocated by bootmem for
memory-hotremove. Some structures of memory management are allocated by
bootmem. ex) memmap, etc.
To remove memory physically, some of them must be freed according to
circumstance. This patch set makes basis to free those pages, and free
memmaps.
Basic my idea is using remain members of struct page to remember information
of users of bootmem (section number or node id). When the section is
removing, kernel can confirm it. By this information, some issues can be
solved.
1) When the memmap of removing section is allocated on other
section by bootmem, it should/can be free.
2) When the memmap of removing section is allocated on the
same section, it shouldn't be freed. Because the section has to be
logical memory offlined already and all pages must be isolated against
page allocater. If it is freed, page allocator may use it which will
be removed physically soon.
3) When removing section has other section's memmap,
kernel will be able to show easily which section should be removed
before it for user. (Not implemented yet)
4) When the above case 2), the page isolation will be able to check and skip
memmap's page when logical memory offline (offline_pages()).
Current page isolation code fails in this case because this page is
just reserved page and it can't distinguish this pages can be
removed or not. But, it will be able to do by this patch.
(Not implemented yet.)
5) The node information like pgdat has similar issues. But, this
will be able to be solved too by this.
(Not implemented yet, but, remembering node id in the pages.)
Fortunately, current bootmem allocator just keeps PageReserved flags,
and doesn't use any other members of page struct. The users of
bootmem doesn't use them too.
This patch:
This is to register information which is node or section's id. Kernel can
distinguish which node/section uses the pages allcated by bootmem. This is
basis for hot-remove sections or nodes.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-04-28 11:13:31 +02:00
|
|
|
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
|
|
|
|
unsigned long pnum);
|
2006-04-07 19:49:15 +02:00
|
|
|
|
2005-10-30 02:16:52 +01:00
|
|
|
#endif /* __LINUX_MEMORY_HOTPLUG_H */
|