From a7c3e901a46ff54c016d040847eda598a9e3e653 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 8 May 2017 15:57:09 -0700 Subject: mm: introduce kv[mz]alloc helpers Patch series "kvmalloc", v5. There are many open coded kmalloc with vmalloc fallback instances in the tree. Most of them are not careful enough or simply do not care about the underlying semantic of the kmalloc/page allocator which means that a) some vmalloc fallbacks are basically unreachable because the kmalloc part will keep retrying until it succeeds b) the page allocator can invoke a really disruptive steps like the OOM killer to move forward which doesn't sound appropriate when we consider that the vmalloc fallback is available. As it can be seen implementing kvmalloc requires quite an intimate knowledge if the page allocator and the memory reclaim internals which strongly suggests that a helper should be implemented in the memory subsystem proper. Most callers, I could find, have been converted to use the helper instead. This is patch 6. There are some more relying on __GFP_REPEAT in the networking stack which I have converted as well and Eric Dumazet was not opposed [2] to convert them as well. [1] http://lkml.kernel.org/r/20170130094940.13546-1-mhocko@kernel.org [2] http://lkml.kernel.org/r/1485273626.16328.301.camel@edumazet-glaptop3.roam.corp.google.com This patch (of 9): Using kmalloc with the vmalloc fallback for larger allocations is a common pattern in the kernel code. Yet we do not have any common helper for that and so users have invented their own helpers. Some of them are really creative when doing so. Let's just add kv[mz]alloc and make sure it is implemented properly. This implementation makes sure to not make a large memory pressure for > PAGE_SZE requests (__GFP_NORETRY) and also to not warn about allocation failures. This also rules out the OOM killer as the vmalloc is a more approapriate fallback than a disruptive user visible action. This patch also changes some existing users and removes helpers which are specific for them. In some cases this is not possible (e.g. ext4_kvmalloc, libcfs_kvzalloc) because those seems to be broken and require GFP_NO{FS,IO} context which is not vmalloc compatible in general (note that the page table allocation is GFP_KERNEL). Those need to be fixed separately. While we are at it, document that __vmalloc{_node} about unsupported gfp mask because there seems to be a lot of confusion out there. kvmalloc_node will warn about GFP_KERNEL incompatible (which are not superset) flags to catch new abusers. Existing ones would have to die slowly. [sfr@canb.auug.org.au: f2fs fixup] Link: http://lkml.kernel.org/r/20170320163735.332e64b7@canb.auug.org.au Link: http://lkml.kernel.org/r/20170306103032.2540-2-mhocko@kernel.org Signed-off-by: Michal Hocko Signed-off-by: Stephen Rothwell Reviewed-by: Andreas Dilger [ext4 part] Acked-by: Vlastimil Babka Cc: John Hubbard Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index d68edffbf142..46991ad3ddd5 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -80,6 +80,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller); +extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); -- cgit v1.2.3-71-gd317 From 1f5307b1e094bfffa83c65c40ac6e3415c108780 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 8 May 2017 15:57:12 -0700 Subject: mm, vmalloc: properly track vmalloc users __vmalloc_node_flags used to be static inline but this has changed by "mm: introduce kv[mz]alloc helpers" because kvmalloc_node needs to use it as well and the code is outside of the vmalloc proper. I haven't realized that changing this will lead to a subtle bug though. The function is responsible to track the caller as well. This caller is then printed by /proc/vmallocinfo. If __vmalloc_node_flags is not inline then we would get only direct users of __vmalloc_node_flags as callers (e.g. v[mz]alloc) which reduces usefulness of this debugging feature considerably. It simply doesn't help to see that the given range belongs to vmalloc as a caller: 0xffffc90002c79000-0xffffc90002c7d000 16384 vmalloc+0x16/0x18 pages=3 vmalloc N0=3 0xffffc90002c81000-0xffffc90002c85000 16384 vmalloc+0x16/0x18 pages=3 vmalloc N1=3 0xffffc90002c8d000-0xffffc90002c91000 16384 vmalloc+0x16/0x18 pages=3 vmalloc N1=3 0xffffc90002c95000-0xffffc90002c99000 16384 vmalloc+0x16/0x18 pages=3 vmalloc N1=3 We really want to catch the _caller_ of the vmalloc function. Fix this issue by making __vmalloc_node_flags static inline again. Link: http://lkml.kernel.org/r/20170502134657.12381-1-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 19 +++++++++++++++++++ mm/vmalloc.c | 12 +----------- 2 files changed, 20 insertions(+), 11 deletions(-) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 46991ad3ddd5..0328ce003992 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -6,6 +6,7 @@ #include #include #include /* pgprot_t */ +#include /* PAGE_KERNEL */ #include struct vm_area_struct; /* vma defining user mapping in mm_types.h */ @@ -80,7 +81,25 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller); +#ifndef CONFIG_MMU extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); +#else +extern void *__vmalloc_node(unsigned long size, unsigned long align, + gfp_t gfp_mask, pgprot_t prot, + int node, const void *caller); + +/* + * We really want to have this inlined due to caller tracking. This + * function is used by the highlevel vmalloc apis and so we want to track + * their callers and inlining will achieve that. + */ +static inline void *__vmalloc_node_flags(unsigned long size, + int node, gfp_t flags) +{ + return __vmalloc_node(size, 1, flags, PAGE_KERNEL, + node, __builtin_return_address(0)); +} +#endif extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 33603239560e..717b1e8b942c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1649,9 +1649,6 @@ void *vmap(struct page **pages, unsigned int count, } EXPORT_SYMBOL(vmap); -static void *__vmalloc_node(unsigned long size, unsigned long align, - gfp_t gfp_mask, pgprot_t prot, - int node, const void *caller); static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot, int node) { @@ -1794,7 +1791,7 @@ fail: * with mm people. * */ -static void *__vmalloc_node(unsigned long size, unsigned long align, +void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, pgprot_t prot, int node, const void *caller) { @@ -1809,13 +1806,6 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) } EXPORT_SYMBOL(__vmalloc); -void *__vmalloc_node_flags(unsigned long size, - int node, gfp_t flags) -{ - return __vmalloc_node(size, 1, flags, PAGE_KERNEL, - node, __builtin_return_address(0)); -} - /** * vmalloc - allocate virtually contiguous memory * @size: allocation size -- cgit v1.2.3-71-gd317 From 8594a21cf7a8318baedbedc3fcd2967a17ddeec0 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 12 May 2017 15:46:41 -0700 Subject: mm, vmalloc: fix vmalloc users tracking properly Commit 1f5307b1e094 ("mm, vmalloc: properly track vmalloc users") has pulled asm/pgtable.h include dependency to linux/vmalloc.h and that turned out to be a bad idea for some architectures. E.g. m68k fails with In file included from arch/m68k/include/asm/pgtable_mm.h:145:0, from arch/m68k/include/asm/pgtable.h:4, from include/linux/vmalloc.h:9, from arch/m68k/kernel/module.c:9: arch/m68k/include/asm/mcf_pgtable.h: In function 'nocache_page': >> arch/m68k/include/asm/mcf_pgtable.h:339:43: error: 'init_mm' undeclared (first use in this function) #define pgd_offset_k(address) pgd_offset(&init_mm, address) as spotted by kernel build bot. nios2 fails for other reason In file included from include/asm-generic/io.h:767:0, from arch/nios2/include/asm/io.h:61, from include/linux/io.h:25, from arch/nios2/include/asm/pgtable.h:18, from include/linux/mm.h:70, from include/linux/pid_namespace.h:6, from include/linux/ptrace.h:9, from arch/nios2/include/uapi/asm/elf.h:23, from arch/nios2/include/asm/elf.h:22, from include/linux/elf.h:4, from include/linux/module.h:15, from init/main.c:16: include/linux/vmalloc.h: In function '__vmalloc_node_flags': include/linux/vmalloc.h:99:40: error: 'PAGE_KERNEL' undeclared (first use in this function); did you mean 'GFP_KERNEL'? which is due to the newly added #include , which on nios2 includes and thus and which again includes . Tweaking that around just turns out a bigger headache than necessary. This patch reverts 1f5307b1e094 and reimplements the original fix in a different way. __vmalloc_node_flags can stay static inline which will cover vmalloc* functions. We only have one external user (kvmalloc_node) and we can export __vmalloc_node_flags_caller and provide the caller directly. This is much simpler and it doesn't really need any games with header files. [akpm@linux-foundation.org: coding-style fixes] [mhocko@kernel.org: revert old comment] Link: http://lkml.kernel.org/r/20170509211054.GB16325@dhcp22.suse.cz Fixes: 1f5307b1e094 ("mm, vmalloc: properly track vmalloc users") Link: http://lkml.kernel.org/r/20170509153702.GR6481@dhcp22.suse.cz Signed-off-by: Michal Hocko Cc: Tobias Klauser Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 21 ++++++--------------- mm/util.c | 3 ++- mm/vmalloc.c | 19 ++++++++++++++++++- 3 files changed, 26 insertions(+), 17 deletions(-) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 0328ce003992..2d92dd002abd 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -6,7 +6,6 @@ #include #include #include /* pgprot_t */ -#include /* PAGE_KERNEL */ #include struct vm_area_struct; /* vma defining user mapping in mm_types.h */ @@ -83,22 +82,14 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, const void *caller); #ifndef CONFIG_MMU extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); -#else -extern void *__vmalloc_node(unsigned long size, unsigned long align, - gfp_t gfp_mask, pgprot_t prot, - int node, const void *caller); - -/* - * We really want to have this inlined due to caller tracking. This - * function is used by the highlevel vmalloc apis and so we want to track - * their callers and inlining will achieve that. - */ -static inline void *__vmalloc_node_flags(unsigned long size, - int node, gfp_t flags) +static inline void *__vmalloc_node_flags_caller(unsigned long size, int node, + gfp_t flags, void *caller) { - return __vmalloc_node(size, 1, flags, PAGE_KERNEL, - node, __builtin_return_address(0)); + return __vmalloc_node_flags(size, node, flags); } +#else +extern void *__vmalloc_node_flags_caller(unsigned long size, + int node, gfp_t flags, void *caller); #endif extern void vfree(const void *addr); diff --git a/mm/util.c b/mm/util.c index 718154debc87..464df3489903 100644 --- a/mm/util.c +++ b/mm/util.c @@ -382,7 +382,8 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) if (ret || size <= PAGE_SIZE) return ret; - return __vmalloc_node_flags(size, node, flags); + return __vmalloc_node_flags_caller(size, node, flags, + __builtin_return_address(0)); } EXPORT_SYMBOL(kvmalloc_node); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 194c22eccb9d..34a1c3e46ed7 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1649,6 +1649,9 @@ void *vmap(struct page **pages, unsigned int count, } EXPORT_SYMBOL(vmap); +static void *__vmalloc_node(unsigned long size, unsigned long align, + gfp_t gfp_mask, pgprot_t prot, + int node, const void *caller); static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot, int node) { @@ -1791,7 +1794,7 @@ fail: * with mm people. * */ -void *__vmalloc_node(unsigned long size, unsigned long align, +static void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, pgprot_t prot, int node, const void *caller) { @@ -1806,6 +1809,20 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) } EXPORT_SYMBOL(__vmalloc); +static inline void *__vmalloc_node_flags(unsigned long size, + int node, gfp_t flags) +{ + return __vmalloc_node(size, 1, flags, PAGE_KERNEL, + node, __builtin_return_address(0)); +} + + +void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags, + void *caller) +{ + return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller); +} + /** * vmalloc - allocate virtually contiguous memory * @size: allocation size -- cgit v1.2.3-71-gd317