From c3d16e16522fe3fe8759735850a0676da18f4b1d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 16 Oct 2013 13:46:51 -0700 Subject: mm: migration: do not lose soft dirty bit if page is in migration state If page migration is turned on in config and the page is migrating, we may lose the soft dirty bit. If fork and mprotect are called on migrating pages (once migration is complete) pages do not obtain the soft dirty bit in the correspond pte entries. Fix it adding an appropriate test on swap entries. Signed-off-by: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: Andy Lutomirski Cc: Matt Mackall Cc: Xiao Guangrong Cc: Marcelo Tosatti Cc: KOSAKI Motohiro Cc: Stephen Rothwell Cc: Peter Zijlstra Cc: "Aneesh Kumar K.V" Cc: Naoya Horiguchi Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index ca0003947115..f7b7692c05ed 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -837,6 +837,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, */ make_migration_entry_read(&entry); pte = swp_entry_to_pte(entry); + if (pte_swp_soft_dirty(*src_pte)) + pte = pte_swp_mksoft_dirty(pte); set_pte_at(src_mm, addr, src_pte, pte); } } -- cgit v1.2.3 From 4942642080ea82d99ab5b653abb9a12b7ba31f4a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 16 Oct 2013 13:46:59 -0700 Subject: mm: memcg: handle non-error OOM situations more gracefully Commit 3812c8c8f395 ("mm: memcg: do not trap chargers with full callstack on OOM") assumed that only a few places that can trigger a memcg OOM situation do not return VM_FAULT_OOM, like optional page cache readahead. But there are many more and it's impractical to annotate them all. First of all, we don't want to invoke the OOM killer when the failed allocation is gracefully handled, so defer the actual kill to the end of the fault handling as well. This simplifies the code quite a bit for added bonus. Second, since a failed allocation might not be the abrupt end of the fault, the memcg OOM handler needs to be re-entrant until the fault finishes for subsequent allocation attempts. If an allocation is attempted after the task already OOMed, allow it to bypass the limit so that it can quickly finish the fault and invoke the OOM killer. Reported-by: azurIt Signed-off-by: Johannes Weiner Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index f7b7692c05ed..1311f26497e6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3865,15 +3865,21 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, * space. Kernel faults are handled more gracefully. */ if (flags & FAULT_FLAG_USER) - mem_cgroup_enable_oom(); + mem_cgroup_oom_enable(); ret = __handle_mm_fault(mm, vma, address, flags); - if (flags & FAULT_FLAG_USER) - mem_cgroup_disable_oom(); - - if (WARN_ON(task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))) - mem_cgroup_oom_synchronize(); + if (flags & FAULT_FLAG_USER) { + mem_cgroup_oom_disable(); + /* + * The task may have entered a memcg OOM situation but + * if the allocation error was handled gracefully (no + * VM_FAULT_OOM), there is no need to kill anything. + * Just clean up the OOM state peacefully. + */ + if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)) + mem_cgroup_oom_synchronize(false); + } return ret; } -- cgit v1.2.3 From c61109e34f60f6e85bb43c5a1cd51c0e3db40847 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 7 Oct 2013 11:28:45 +0100 Subject: mm: numa: Sanitize task_numa_fault() callsites There are three callers of task_numa_fault(): - do_huge_pmd_numa_page(): Accounts against the current node, not the node where the page resides, unless we migrated, in which case it accounts against the node we migrated to. - do_numa_page(): Accounts against the current node, not the node where the page resides, unless we migrated, in which case it accounts against the node we migrated to. - do_pmd_numa_page(): Accounts not at all when the page isn't migrated, otherwise accounts against the node we migrated towards. This seems wrong to me; all three sites should have the same sementaics, furthermore we should accounts against where the page really is, we already know where the task is. So modify all three sites to always account; we did after all receive the fault; and always account to where the page is after migration, regardless of success. They all still differ on when they clear the PTE/PMD; ideally that would get sorted too. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Srikar Dronamraju Cc: Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1381141781-10992-8-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar --- mm/memory.c | 53 +++++++++++++++++++++-------------------------------- 1 file changed, 21 insertions(+), 32 deletions(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 1311f26497e6..d176154c243f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3521,12 +3521,12 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, } int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, - unsigned long addr, int current_nid) + unsigned long addr, int page_nid) { get_page(page); count_vm_numa_event(NUMA_HINT_FAULTS); - if (current_nid == numa_node_id()) + if (page_nid == numa_node_id()) count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); return mpol_misplaced(page, vma, addr); @@ -3537,7 +3537,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, { struct page *page = NULL; spinlock_t *ptl; - int current_nid = -1; + int page_nid = -1; int target_nid; bool migrated = false; @@ -3567,15 +3567,10 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, return 0; } - current_nid = page_to_nid(page); - target_nid = numa_migrate_prep(page, vma, addr, current_nid); + page_nid = page_to_nid(page); + target_nid = numa_migrate_prep(page, vma, addr, page_nid); pte_unmap_unlock(ptep, ptl); if (target_nid == -1) { - /* - * Account for the fault against the current node if it not - * being replaced regardless of where the page is located. - */ - current_nid = numa_node_id(); put_page(page); goto out; } @@ -3583,11 +3578,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Migrate to the requested node */ migrated = migrate_misplaced_page(page, target_nid); if (migrated) - current_nid = target_nid; + page_nid = target_nid; out: - if (current_nid != -1) - task_numa_fault(current_nid, 1, migrated); + if (page_nid != -1) + task_numa_fault(page_nid, 1, migrated); return 0; } @@ -3602,7 +3597,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long offset; spinlock_t *ptl; bool numa = false; - int local_nid = numa_node_id(); spin_lock(&mm->page_table_lock); pmd = *pmdp; @@ -3625,9 +3619,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { pte_t pteval = *pte; struct page *page; - int curr_nid = local_nid; + int page_nid = -1; int target_nid; - bool migrated; + bool migrated = false; + if (!pte_present(pteval)) continue; if (!pte_numa(pteval)) @@ -3649,25 +3644,19 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(page_mapcount(page) != 1)) continue; - /* - * Note that the NUMA fault is later accounted to either - * the node that is currently running or where the page is - * migrated to. - */ - curr_nid = local_nid; - target_nid = numa_migrate_prep(page, vma, addr, - page_to_nid(page)); - if (target_nid == -1) { + page_nid = page_to_nid(page); + target_nid = numa_migrate_prep(page, vma, addr, page_nid); + pte_unmap_unlock(pte, ptl); + if (target_nid != -1) { + migrated = migrate_misplaced_page(page, target_nid); + if (migrated) + page_nid = target_nid; + } else { put_page(page); - continue; } - /* Migrate to the requested node */ - pte_unmap_unlock(pte, ptl); - migrated = migrate_misplaced_page(page, target_nid); - if (migrated) - curr_nid = target_nid; - task_numa_fault(curr_nid, 1, migrated); + if (page_nid != -1) + task_numa_fault(page_nid, 1, migrated); pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); } -- cgit v1.2.3