summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorYasunori Goto <y-goto@jp.fujitsu.com>2008-04-28 02:13:31 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 08:58:25 -0700
commit04753278769f3b6c3b79a080edb52f21d83bf6e2 (patch)
tree0dff4088b44016b6d04930b2fc09419412821aa2 /mm
parent7f2e9525ba55b1c42ad6c4a5a59d7eb7bdd9be72 (diff)
memory hotplug: register section/node id to free
This patch set is to free pages which is allocated by bootmem for memory-hotremove. Some structures of memory management are allocated by bootmem. ex) memmap, etc. To remove memory physically, some of them must be freed according to circumstance. This patch set makes basis to free those pages, and free memmaps. Basic my idea is using remain members of struct page to remember information of users of bootmem (section number or node id). When the section is removing, kernel can confirm it. By this information, some issues can be solved. 1) When the memmap of removing section is allocated on other section by bootmem, it should/can be free. 2) When the memmap of removing section is allocated on the same section, it shouldn't be freed. Because the section has to be logical memory offlined already and all pages must be isolated against page allocater. If it is freed, page allocator may use it which will be removed physically soon. 3) When removing section has other section's memmap, kernel will be able to show easily which section should be removed before it for user. (Not implemented yet) 4) When the above case 2), the page isolation will be able to check and skip memmap's page when logical memory offline (offline_pages()). Current page isolation code fails in this case because this page is just reserved page and it can't distinguish this pages can be removed or not. But, it will be able to do by this patch. (Not implemented yet.) 5) The node information like pgdat has similar issues. But, this will be able to be solved too by this. (Not implemented yet, but, remembering node id in the pages.) Fortunately, current bootmem allocator just keeps PageReserved flags, and doesn't use any other members of page struct. The users of bootmem doesn't use them too. This patch: This is to register information which is node or section's id. Kernel can distinguish which node/section uses the pages allcated by bootmem. This is basis for hot-remove sections or nodes. Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com> Cc: Badari Pulavarty <pbadari@us.ibm.com> Cc: Yinghai Lu <yhlu.kernel@gmail.com> Cc: Yasunori Goto <y-goto@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/bootmem.c1
-rw-r--r--mm/memory_hotplug.c99
-rw-r--r--mm/sparse.c3
3 files changed, 100 insertions, 3 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c
index b6791646143e..369624d2789c 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -461,6 +461,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
{
+ register_page_bootmem_info_node(pgdat);
return free_all_bootmem_core(pgdat);
}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c8b3ca79de2d..cba36ef0d506 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -58,8 +58,105 @@ static void release_memory_resource(struct resource *res)
return;
}
-
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#ifndef CONFIG_SPARSEMEM_VMEMMAP
+static void get_page_bootmem(unsigned long info, struct page *page, int magic)
+{
+ atomic_set(&page->_mapcount, magic);
+ SetPagePrivate(page);
+ set_page_private(page, info);
+ atomic_inc(&page->_count);
+}
+
+void put_page_bootmem(struct page *page)
+{
+ int magic;
+
+ magic = atomic_read(&page->_mapcount);
+ BUG_ON(magic >= -1);
+
+ if (atomic_dec_return(&page->_count) == 1) {
+ ClearPagePrivate(page);
+ set_page_private(page, 0);
+ reset_page_mapcount(page);
+ __free_pages_bootmem(page, 0);
+ }
+
+}
+
+void register_page_bootmem_info_section(unsigned long start_pfn)
+{
+ unsigned long *usemap, mapsize, section_nr, i;
+ struct mem_section *ms;
+ struct page *page, *memmap;
+
+ if (!pfn_valid(start_pfn))
+ return;
+
+ section_nr = pfn_to_section_nr(start_pfn);
+ ms = __nr_to_section(section_nr);
+
+ /* Get section's memmap address */
+ memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
+
+ /*
+ * Get page for the memmap's phys address
+ * XXX: need more consideration for sparse_vmemmap...
+ */
+ page = virt_to_page(memmap);
+ mapsize = sizeof(struct page) * PAGES_PER_SECTION;
+ mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
+
+ /* remember memmap's page */
+ for (i = 0; i < mapsize; i++, page++)
+ get_page_bootmem(section_nr, page, SECTION_INFO);
+
+ usemap = __nr_to_section(section_nr)->pageblock_flags;
+ page = virt_to_page(usemap);
+
+ mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
+
+ for (i = 0; i < mapsize; i++, page++)
+ get_page_bootmem(section_nr, page, MIX_INFO);
+
+}
+
+void register_page_bootmem_info_node(struct pglist_data *pgdat)
+{
+ unsigned long i, pfn, end_pfn, nr_pages;
+ int node = pgdat->node_id;
+ struct page *page;
+ struct zone *zone;
+
+ nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
+ page = virt_to_page(pgdat);
+
+ for (i = 0; i < nr_pages; i++, page++)
+ get_page_bootmem(node, page, NODE_INFO);
+
+ zone = &pgdat->node_zones[0];
+ for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) {
+ if (zone->wait_table) {
+ nr_pages = zone->wait_table_hash_nr_entries
+ * sizeof(wait_queue_head_t);
+ nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT;
+ page = virt_to_page(zone->wait_table);
+
+ for (i = 0; i < nr_pages; i++, page++)
+ get_page_bootmem(node, page, NODE_INFO);
+ }
+ }
+
+ pfn = pgdat->node_start_pfn;
+ end_pfn = pfn + pgdat->node_spanned_pages;
+
+ /* register_section info */
+ for (; pfn < end_pfn; pfn += PAGES_PER_SECTION)
+ register_page_bootmem_info_section(pfn);
+
+}
+#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
+
static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
{
struct pglist_data *pgdat = zone->zone_pgdat;
diff --git a/mm/sparse.c b/mm/sparse.c
index 186a85bf7912..8903c484389a 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -210,7 +210,6 @@ static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long p
/*
* Decode mem_map from the coded memmap
*/
-static
struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
{
/* mask off the extra low bits of information */
@@ -233,7 +232,7 @@ static int __meminit sparse_init_one_section(struct mem_section *ms,
return 1;
}
-static unsigned long usemap_size(void)
+unsigned long usemap_size(void)
{
unsigned long size_bytes;
size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;