gfp: add __GFP_NOACCOUNT

commit 8f4fc071b1926d0b20336e2b3f8ab85c94c734c5 upstream. Not all kmem allocations should be accounted to memcg. The following patch gives an example when accounting of a certain type of allocations to memcg can effectively result in a memory leak. This patch adds the __GFP_NOACCOUNT flag which if passed to kmalloc and friends will force the allocation to go through the root cgroup. It will be used by the next patch. Note, since in case of kmemleak enabled each kmalloc implies yet another allocation from the kmemleak_object cache, we add __GFP_NOACCOUNT to gfp_kmemleak_mask. Alternatively, we could introduce a per kmem cache flag disabling accounting for all allocations of a particular kind, but (a) we would not be able to bypass accounting for kmalloc then and (b) a kmem cache with this flag set could not be merged with a kmem cache without this flag, which would increase the number of global caches and therefore fragmentation even if the memory cgroup controller is not used. Despite its generic name, currently __GFP_NOACCOUNT disables accounting only for kmem allocations while user page allocations are always charged. To catch abusing of this flag, a warning is issued on an attempt of passing it to mem_cgroup_try_charge. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Tejun Heo <tj@kernel.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Greg Thelen <gthelen@google.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Vladimir Davydov <vdavydov@parallels.com> 2015-05-14 15:16:55 -0700
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2015-06-06 08:21:05 -0700
commit: 4d039741b4155da5b25bc484b32439794672645f (patch)
tree: 584d4e1db0c172bd07bd9218f7b580f78712f1a3
parent: a9996d15757ea207baf4e74b640fcc164863f4d7 (diff)
3 files changed, 8 insertions, 1 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 51bd1e72a917..eb6fafe66bec 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -30,6 +30,7 @@ struct vm_area_struct;
 #define ___GFP_HARDWALL		0x20000u
 #define ___GFP_THISNODE		0x40000u
 #define ___GFP_RECLAIMABLE	0x80000u
+#define ___GFP_NOACCOUNT	0x100000u
 #define ___GFP_NOTRACK		0x200000u
 #define ___GFP_NO_KSWAPD	0x400000u
 #define ___GFP_OTHER_NODE	0x800000u
@@ -85,6 +86,7 @@ struct vm_area_struct;
 #define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */
 #define __GFP_THISNODE	((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */
 #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
+#define __GFP_NOACCOUNT	((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */
 #define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
 
 #define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 72dff5fb0d0c..6c8918114804 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -463,6 +463,8 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
 	if (!memcg_kmem_enabled())
 		return true;
 
+	if (gfp & __GFP_NOACCOUNT)
+		return true;
 	/*
 	 * __GFP_NOFAIL allocations will move on even if charging is not
 	 * possible. Therefore we don't even try, and have this allocation
@@ -522,6 +524,8 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	if (!memcg_kmem_enabled())
 		return cachep;
+	if (gfp & __GFP_NOACCOUNT)
+		return cachep;
 	if (gfp & __GFP_NOFAIL)
 		return cachep;
 	if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 5405aff5a590..f0fe4f2c1fa7 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -115,7 +115,8 @@
 #define BYTES_PER_POINTER	sizeof(void *)
 
 /* GFP bitmask for kmemleak internal allocations */
-#define gfp_kmemleak_mask(gfp)	(((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \
+#define gfp_kmemleak_mask(gfp)	(((gfp) & (GFP_KERNEL | GFP_ATOMIC | \
+					   __GFP_NOACCOUNT)) | \
 				 __GFP_NORETRY | __GFP_NOMEMALLOC | \
 				 __GFP_NOWARN)
author	Vladimir Davydov <vdavydov@parallels.com>	2015-05-14 15:16:55 -0700
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2015-06-06 08:21:05 -0700
commit	4d039741b4155da5b25bc484b32439794672645f (patch)
tree	584d4e1db0c172bd07bd9218f7b580f78712f1a3
parent	a9996d15757ea207baf4e74b640fcc164863f4d7 (diff)