From cdd77d3e193031cc67426cd671d8aa370f7dfee4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 17 Nov 2017 16:23:08 -0800 Subject: nfit, libnvdimm: deprecate the generic SMART ioctl The kernel's ND_IOCTL_SMART_THRESHOLD command is based on a payload definition that has become broken / out-of-sync with recent versions of the NVDIMM_FAMILY_INTEL definition. Deprecate the use of the ND_IOCTL_SMART_THRESHOLD command in favor of the ND_CMD_CALL approach taken by NVDIMM_FAMILY_{HPE,MSFT}, where we can manage the per-vendor variance in userspace. In a couple years, when the new scheme is widely deployed in userspace packages, the ND_IOCTL_SMART_THRESHOLD support can be removed. For now we prevent new binaries from compiling against the kernel header definitions, but kernel still compatible with old binaries. The libndctl.h [1] header is now the authoritative interface definition for NVDIMM SMART. [1]: https://github.com/pmem/ndctl Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 54 ---------------------------------------------- 1 file changed, 54 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 3f03567631cb..30ef1236aafa 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -15,54 +15,6 @@ #include -struct nd_cmd_smart { - __u32 status; - __u8 data[128]; -} __packed; - -#define ND_SMART_HEALTH_VALID (1 << 0) -#define ND_SMART_SPARES_VALID (1 << 1) -#define ND_SMART_USED_VALID (1 << 2) -#define ND_SMART_TEMP_VALID (1 << 3) -#define ND_SMART_CTEMP_VALID (1 << 4) -#define ND_SMART_ALARM_VALID (1 << 9) -#define ND_SMART_SHUTDOWN_VALID (1 << 10) -#define ND_SMART_VENDOR_VALID (1 << 11) -#define ND_SMART_SPARE_TRIP (1 << 0) -#define ND_SMART_TEMP_TRIP (1 << 1) -#define ND_SMART_CTEMP_TRIP (1 << 2) -#define ND_SMART_NON_CRITICAL_HEALTH (1 << 0) -#define ND_SMART_CRITICAL_HEALTH (1 << 1) -#define ND_SMART_FATAL_HEALTH (1 << 2) - -struct nd_smart_payload { - __u32 flags; - __u8 reserved0[4]; - __u8 health; - __u8 spares; - __u8 life_used; - __u8 alarm_flags; - __u16 temperature; - __u16 ctrl_temperature; - __u8 reserved1[15]; - __u8 shutdown_state; - __u32 vendor_size; - __u8 vendor_data[92]; -} __packed; - -struct nd_cmd_smart_threshold { - __u32 status; - __u8 data[8]; -} __packed; - -struct nd_smart_threshold_payload { - __u8 alarm_control; - __u8 reserved0; - __u16 temperature; - __u8 spares; - __u8 reserved[3]; -} __packed; - struct nd_cmd_dimm_flags { __u32 status; __u32 flags; @@ -211,12 +163,6 @@ static inline const char *nvdimm_cmd_name(unsigned cmd) #define ND_IOCTL 'N' -#define ND_IOCTL_SMART _IOWR(ND_IOCTL, ND_CMD_SMART,\ - struct nd_cmd_smart) - -#define ND_IOCTL_SMART_THRESHOLD _IOWR(ND_IOCTL, ND_CMD_SMART_THRESHOLD,\ - struct nd_cmd_smart_threshold) - #define ND_IOCTL_DIMM_FLAGS _IOWR(ND_IOCTL, ND_CMD_DIMM_FLAGS,\ struct nd_cmd_dimm_flags) -- cgit v1.2.3-71-gd317 From 8e37d00a850160bbfadbb3bf4ce49539770c5d2c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:50 +0100 Subject: memremap: provide stubs for vmem_altmap_offset and vmem_altmap_free Currently all calls to those functions are eliminated by the compiler when CONFIG_ZONE_DEVICE is not set, but this soon won't be the case. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/memremap.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 10d23c367048..d5a6736d9737 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -26,9 +26,6 @@ struct vmem_altmap { unsigned long alloc; }; -unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); -void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); - #ifdef CONFIG_ZONE_DEVICE struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start); #else @@ -138,6 +135,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, struct vmem_altmap *altmap); struct dev_pagemap *find_dev_pagemap(resource_size_t phys); +unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); +void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); + static inline bool is_zone_device_page(const struct page *page); #else static inline void *devm_memremap_pages(struct device *dev, @@ -157,7 +157,17 @@ static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys) { return NULL; } -#endif + +static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) +{ + return 0; +} + +static inline void vmem_altmap_free(struct vmem_altmap *altmap, + unsigned long nr_pfns) +{ +} +#endif /* CONFIG_ZONE_DEVICE */ #if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) static inline bool is_device_private_page(const struct page *page) -- cgit v1.2.3-71-gd317 From 24e6d5a59ac7d31adc0322de2d0117dfa370936f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:53 +0100 Subject: mm: pass the vmem_altmap to arch_add_memory and __add_pages We can just pass this on instead of having to do a radix tree lookup without proper locking 2 levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/ia64/mm/init.c | 5 +++-- arch/powerpc/mm/mem.c | 5 +++-- arch/s390/mm/init.c | 5 +++-- arch/sh/mm/init.c | 5 +++-- arch/x86/mm/init_32.c | 5 +++-- arch/x86/mm/init_64.c | 11 ++++++----- include/linux/memory_hotplug.h | 17 ++++++++++------- kernel/memremap.c | 3 ++- mm/hmm.c | 5 +++-- mm/memory_hotplug.c | 7 +++---- 10 files changed, 39 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 7af4e05bb61e..2e2e4f532204 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -647,13 +647,14 @@ mem_init (void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", __func__, ret); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 4362b86ef84c..e670cfc2766e 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -127,7 +127,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -144,7 +145,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) return -EFAULT; } - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 671535e64aba..e12c5af50cd7 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -222,7 +222,8 @@ device_initcall(s390_cma_mem_init); #endif /* CONFIG_CMA */ -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); @@ -232,7 +233,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) if (rc) return rc; - rc = __add_pages(nid, start_pfn, size_pages, want_memblock); + rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock); if (rc) vmem_remove_mapping(start, size); return rc; diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index afc54d593a26..552afbf55bad 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -485,14 +485,15 @@ void free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; int ret; /* We only have ZONE_NORMAL, so this is easy.. */ - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 135c9a7898c7..8a3091511a71 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -829,12 +829,13 @@ void __init mem_init(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 8acdc35c2dfa..e80bb4189254 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -772,12 +772,12 @@ static void update_end_of_memory_vars(u64 start, u64 size) } } -int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock) +int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, bool want_memblock) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -787,14 +787,15 @@ int add_pages(int nid, unsigned long start_pfn, return ret; } -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; init_memory_mapping(start, start + size); - return add_pages(nid, start_pfn, nr_pages, want_memblock); + return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #define PAGE_INUSE 0xFD diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 58e110aee7ab..db276afbefcc 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -13,6 +13,7 @@ struct pglist_data; struct mem_section; struct memory_block; struct resource; +struct vmem_altmap; #ifdef CONFIG_MEMORY_HOTPLUG /* @@ -131,18 +132,19 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn, #endif /* CONFIG_MEMORY_HOTREMOVE */ /* reasonably generic interface to expand the physical pages */ -extern int __add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock); +extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, bool want_memblock); #ifndef CONFIG_ARCH_HAS_ADD_PAGES static inline int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock) + unsigned long nr_pages, struct vmem_altmap *altmap, + bool want_memblock) { - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #else /* ARCH_HAS_ADD_PAGES */ -int add_pages(int nid, unsigned long start_pfn, - unsigned long nr_pages, bool want_memblock); +int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap, bool want_memblock); #endif /* ARCH_HAS_ADD_PAGES */ #ifdef CONFIG_NUMA @@ -318,7 +320,8 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); extern int add_memory(int nid, u64 start, u64 size); extern int add_memory_resource(int nid, struct resource *resource, bool online); -extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock); +extern int arch_add_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap, bool want_memblock); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); diff --git a/kernel/memremap.c b/kernel/memremap.c index 403ab9cdb949..8488cdeead16 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -382,6 +382,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (altmap) { memcpy(&page_map->altmap, altmap, sizeof(*altmap)); pgmap->altmap = &page_map->altmap; + altmap = pgmap->altmap; } pgmap->ref = ref; pgmap->res = &page_map->res; @@ -427,7 +428,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, goto err_pfn_remap; mem_hotplug_begin(); - error = arch_add_memory(nid, align_start, align_size, false); + error = arch_add_memory(nid, align_start, align_size, altmap, false); if (!error) move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], align_start >> PAGE_SHIFT, diff --git a/mm/hmm.c b/mm/hmm.c index ea19742a5d60..231aaacd1997 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -931,10 +931,11 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) * want the linear mapping and thus use arch_add_memory(). */ if (devmem->pagemap.type == MEMORY_DEVICE_PUBLIC) - ret = arch_add_memory(nid, align_start, align_size, false); + ret = arch_add_memory(nid, align_start, align_size, NULL, + false); else ret = add_pages(nid, align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT, false); + align_size >> PAGE_SHIFT, NULL, false); if (ret) { mem_hotplug_done(); goto error_add_memory; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 5c6f96e6b334..fc0485dcece1 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -292,18 +292,17 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, * add the new pages. */ int __ref __add_pages(int nid, unsigned long phys_start_pfn, - unsigned long nr_pages, bool want_memblock) + unsigned long nr_pages, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long i; int err = 0; int start_sec, end_sec; - struct vmem_altmap *altmap; /* during initialize mem_map, align hot-added range to section */ start_sec = pfn_to_section_nr(phys_start_pfn); end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); - altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn)); if (altmap) { /* * Validate altmap is within bounds of the total request @@ -1148,7 +1147,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online) } /* call arch's memory hotadd */ - ret = arch_add_memory(nid, start, size, true); + ret = arch_add_memory(nid, start, size, NULL, true); if (ret < 0) goto error; -- cgit v1.2.3-71-gd317 From 7b73d978a5d0d2a3637bdd57191cb6ffbad3feca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:54 +0100 Subject: mm: pass the vmem_altmap to vmemmap_populate We can just pass this on instead of having to do a radix tree lookup without proper locking a few levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/arm64/mm/mmu.c | 6 ++++-- arch/ia64/mm/discontig.c | 3 ++- arch/powerpc/mm/init_64.c | 7 ++----- arch/s390/mm/vmem.c | 3 ++- arch/sparc/mm/init_64.c | 2 +- arch/x86/mm/init_64.c | 4 ++-- include/linux/memory_hotplug.h | 3 ++- include/linux/mm.h | 6 ++++-- mm/memory_hotplug.c | 7 ++++--- mm/sparse-vmemmap.c | 7 ++++--- mm/sparse.c | 20 ++++++++++++-------- 11 files changed, 39 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 267d2b79d52d..ec8952ff13be 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -654,12 +654,14 @@ int kern_addr_valid(unsigned long addr) } #ifdef CONFIG_SPARSEMEM_VMEMMAP #if !ARM64_SWAPPER_USES_SECTION_MAPS -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { return vmemmap_populate_basepages(start, end, node); } #else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long addr = start; unsigned long next; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 9b2d994cddf6..1555aecaaf85 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -754,7 +754,8 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { return vmemmap_populate_basepages(start, end, node); } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a07722531b32..779b74a96b8f 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -183,7 +183,8 @@ static __meminit void vmemmap_list_populate(unsigned long phys, vmemmap_list = vmem_back; } -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; @@ -193,16 +194,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); for (; start < end; start += page_size) { - struct vmem_altmap *altmap; void *p; int rc; if (vmemmap_populated(start, page_size)) continue; - /* altmap lookups only work at section boundaries */ - altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start)); - p = __vmemmap_alloc_block_buf(page_size, node, altmap); if (!p) return -ENOMEM; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 3316d463fc29..c44ef0e7c466 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -211,7 +211,8 @@ static void vmem_remove_range(unsigned long start, unsigned long size) /* * Add a backed mem_map array to the virtual mem_map array. */ -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long pgt_prot, sgt_prot; unsigned long address = start; diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 55ba62957e64..42d27a1a042a 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2628,7 +2628,7 @@ EXPORT_SYMBOL(_PAGE_CACHE); #ifdef CONFIG_SPARSEMEM_VMEMMAP int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, - int node) + int node, struct vmem_altmap *altmap) { unsigned long pte_base; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e80bb4189254..594902ef56ef 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1411,9 +1411,9 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, return 0; } -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { - struct vmem_altmap *altmap = to_vmem_altmap(start); int err; if (boot_cpu_has(X86_FEATURE_PSE)) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index db276afbefcc..cbdd6d52e877 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -327,7 +327,8 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); extern void remove_memory(int nid, u64 start, u64 size); -extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn); +extern int sparse_add_one_section(struct pglist_data *pgdat, + unsigned long start_pfn, struct vmem_altmap *altmap); extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, unsigned long map_offset); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, diff --git a/include/linux/mm.h b/include/linux/mm.h index ea818ff739cd..2f3a7ebecbe2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2538,7 +2538,8 @@ void sparse_mem_maps_populate_node(struct page **map_map, unsigned long map_count, int nodeid); -struct page *sparse_mem_map_populate(unsigned long pnum, int nid); +struct page *sparse_mem_map_populate(unsigned long pnum, int nid, + struct vmem_altmap *altmap); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); @@ -2556,7 +2557,8 @@ static inline void *vmemmap_alloc_block_buf(unsigned long size, int node) void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); int vmemmap_populate_basepages(unsigned long start, unsigned long end, int node); -int vmemmap_populate(unsigned long start, unsigned long end, int node); +int vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap); void vmemmap_populate_print_last(void); #ifdef CONFIG_MEMORY_HOTPLUG void vmemmap_free(unsigned long start, unsigned long end); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fc0485dcece1..b36f1822c432 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -250,7 +250,7 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat) #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, - bool want_memblock) + struct vmem_altmap *altmap, bool want_memblock) { int ret; int i; @@ -258,7 +258,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, if (pfn_valid(phys_start_pfn)) return -EEXIST; - ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn); + ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap); if (ret < 0) return ret; @@ -317,7 +317,8 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, } for (i = start_sec; i <= end_sec; i++) { - err = __add_section(nid, section_nr_to_pfn(i), want_memblock); + err = __add_section(nid, section_nr_to_pfn(i), altmap, + want_memblock); /* * EEXIST is finally dealt with by ioresource collision diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 17acf01791fa..376dcf05a39c 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -278,7 +278,8 @@ int __meminit vmemmap_populate_basepages(unsigned long start, return 0; } -struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) +struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { unsigned long start; unsigned long end; @@ -288,7 +289,7 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) start = (unsigned long)map; end = (unsigned long)(map + PAGES_PER_SECTION); - if (vmemmap_populate(start, end, nid)) + if (vmemmap_populate(start, end, nid, altmap)) return NULL; return map; @@ -318,7 +319,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, if (!present_section_nr(pnum)) continue; - map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); + map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL); if (map_map[pnum]) continue; ms = __nr_to_section(pnum); diff --git a/mm/sparse.c b/mm/sparse.c index 7a5dacaa06e3..5f4a0dac7836 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -417,7 +417,8 @@ static void __init sparse_early_usemaps_alloc_node(void *data, } #ifndef CONFIG_SPARSEMEM_VMEMMAP -struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) +struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { struct page *map; unsigned long size; @@ -472,7 +473,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, if (!present_section_nr(pnum)) continue; - map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); + map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL); if (map_map[pnum]) continue; ms = __nr_to_section(pnum); @@ -500,7 +501,7 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) struct mem_section *ms = __nr_to_section(pnum); int nid = sparse_early_nid(ms); - map = sparse_mem_map_populate(pnum, nid); + map = sparse_mem_map_populate(pnum, nid, NULL); if (map) return map; @@ -678,10 +679,11 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) +static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { /* This will make the necessary allocations eventually. */ - return sparse_mem_map_populate(pnum, nid); + return sparse_mem_map_populate(pnum, nid, altmap); } static void __kfree_section_memmap(struct page *memmap) { @@ -721,7 +723,8 @@ got_map_ptr: return ret; } -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) +static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, + struct vmem_altmap *altmap) { return __kmalloc_section_memmap(); } @@ -773,7 +776,8 @@ static void free_map_bootmem(struct page *memmap) * set. If this is <=0, then that means that the passed-in * map was not consumed and must be freed. */ -int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn) +int __meminit sparse_add_one_section(struct pglist_data *pgdat, + unsigned long start_pfn, struct vmem_altmap *altmap) { unsigned long section_nr = pfn_to_section_nr(start_pfn); struct mem_section *ms; @@ -789,7 +793,7 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long st ret = sparse_index_init(section_nr, pgdat->node_id); if (ret < 0 && ret != -EEXIST) return ret; - memmap = kmalloc_section_memmap(section_nr, pgdat->node_id); + memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap); if (!memmap) return -ENOMEM; usemap = __kmalloc_section_usemap(); -- cgit v1.2.3-71-gd317 From da024512a1fa5c979257e442130ee1d468285057 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:55 +0100 Subject: mm: pass the vmem_altmap to arch_remove_memory and __remove_pages We can just pass this on instead of having to do a radix tree lookup without proper locking 2 levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/ia64/mm/init.c | 4 ++-- arch/powerpc/mm/mem.c | 6 ++---- arch/s390/mm/init.c | 2 +- arch/sh/mm/init.c | 4 ++-- arch/x86/mm/init_32.c | 4 ++-- arch/x86/mm/init_64.c | 6 ++---- include/linux/memory_hotplug.h | 5 +++-- kernel/memremap.c | 2 +- mm/hmm.c | 4 ++-- mm/memory_hotplug.c | 8 ++------ 10 files changed, 19 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 2e2e4f532204..6a8ce9e1536e 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -663,7 +663,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -671,7 +671,7 @@ int arch_remove_memory(u64 start, u64 size) int ret; zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); if (ret) pr_warn("%s: Problem encountered in __remove_pages() as" " ret=%d\n", __func__, ret); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e670cfc2766e..22aa528b78a2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -149,11 +149,10 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct vmem_altmap *altmap; struct page *page; int ret; @@ -162,11 +161,10 @@ int arch_remove_memory(u64 start, u64 size) * when querying the zone. */ page = pfn_to_page(start_pfn); - altmap = to_vmem_altmap((unsigned long) page); if (altmap) page += vmem_altmap_offset(altmap); - ret = __remove_pages(page_zone(page), start_pfn, nr_pages); + ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); if (ret) return ret; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index e12c5af50cd7..3fa3e5323612 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -240,7 +240,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { /* * There is no hardware or firmware interface which could trigger a diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 552afbf55bad..ce0bbaa7e404 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -510,7 +510,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; @@ -518,7 +518,7 @@ int arch_remove_memory(u64 start, u64 size) int ret; zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); if (unlikely(ret)) pr_warn("%s: Failed, __remove_pages() == %d\n", __func__, ret); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8a3091511a71..79cb066f40c0 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -839,14 +839,14 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct zone *zone; zone = page_zone(pfn_to_page(start_pfn)); - return __remove_pages(zone, start_pfn, nr_pages); + return __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 594902ef56ef..3c046618cc7e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1132,21 +1132,19 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) remove_pagetable(start, end, true); } -int __ref arch_remove_memory(u64 start, u64 size) +int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct page *page = pfn_to_page(start_pfn); - struct vmem_altmap *altmap; struct zone *zone; int ret; /* With altmap the first mapped page is offset from @start */ - altmap = to_vmem_altmap((unsigned long) page); if (altmap) page += vmem_altmap_offset(altmap); zone = page_zone(page); - ret = __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages, altmap); WARN_ON_ONCE(ret); kernel_physical_mapping_remove(start, start + size); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index cbdd6d52e877..e71927d0d46b 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -126,9 +126,10 @@ static inline bool movable_node_is_enabled(void) #ifdef CONFIG_MEMORY_HOTREMOVE extern bool is_pageblock_removable_nolock(struct page *page); -extern int arch_remove_memory(u64 start, u64 size); +extern int arch_remove_memory(u64 start, u64 size, + struct vmem_altmap *altmap); extern int __remove_pages(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages); + unsigned long nr_pages, struct vmem_altmap *altmap); #endif /* CONFIG_MEMORY_HOTREMOVE */ /* reasonably generic interface to expand the physical pages */ diff --git a/kernel/memremap.c b/kernel/memremap.c index 8488cdeead16..380fca1c4a02 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -304,7 +304,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data) align_size = ALIGN(resource_size(res), SECTION_SIZE); mem_hotplug_begin(); - arch_remove_memory(align_start, align_size); + arch_remove_memory(align_start, align_size, pgmap->altmap); mem_hotplug_done(); untrack_pfn(NULL, PHYS_PFN(align_start), align_size); diff --git a/mm/hmm.c b/mm/hmm.c index 231aaacd1997..5d17ba89062f 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -838,10 +838,10 @@ static void hmm_devmem_release(struct device *dev, void *data) mem_hotplug_begin(); if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) - __remove_pages(zone, start_pfn, npages); + __remove_pages(zone, start_pfn, npages, NULL); else arch_remove_memory(start_pfn << PAGE_SHIFT, - npages << PAGE_SHIFT); + npages << PAGE_SHIFT, NULL); mem_hotplug_done(); hmm_devmem_radix_release(resource); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b36f1822c432..eae6bf47caf7 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -569,7 +569,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms, * calling offline_pages(). */ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, - unsigned long nr_pages) + unsigned long nr_pages, struct vmem_altmap *altmap) { unsigned long i; unsigned long map_offset = 0; @@ -577,10 +577,6 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, /* In the ZONE_DEVICE case device driver owns the memory region */ if (is_dev_zone(zone)) { - struct page *page = pfn_to_page(phys_start_pfn); - struct vmem_altmap *altmap; - - altmap = to_vmem_altmap((unsigned long) page); if (altmap) map_offset = vmem_altmap_offset(altmap); } else { @@ -1890,7 +1886,7 @@ void __ref remove_memory(int nid, u64 start, u64 size) memblock_free(start, size); memblock_remove(start, size); - arch_remove_memory(start, size); + arch_remove_memory(start, size, NULL); try_offline_node(nid); -- cgit v1.2.3-71-gd317 From 24b6d4164348370c6b6a58b4248babd85ff9e982 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:56 +0100 Subject: mm: pass the vmem_altmap to vmemmap_free We can just pass this on instead of having to do a radix tree lookup without proper locking a few levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/arm64/mm/mmu.c | 3 +- arch/ia64/mm/discontig.c | 3 +- arch/powerpc/mm/init_64.c | 5 ++-- arch/s390/mm/vmem.c | 3 +- arch/sparc/mm/init_64.c | 3 +- arch/x86/mm/init_64.c | 67 ++++++++++++++++++++++++------------------ include/linux/memory_hotplug.h | 2 +- include/linux/mm.h | 3 +- mm/memory_hotplug.c | 7 +++-- mm/sparse.c | 23 ++++++++------- 10 files changed, 68 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ec8952ff13be..0b1f13e0b4b3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -696,7 +696,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return 0; } #endif /* CONFIG_ARM64_64K_PAGES */ -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 1555aecaaf85..5ea0d8d0968b 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -760,7 +760,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return vmemmap_populate_basepages(start, end, node); } -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } #endif diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 779b74a96b8f..db7d4e092157 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -254,7 +254,8 @@ static unsigned long vmemmap_list_free(unsigned long start) return vmem_back->phys; } -void __ref vmemmap_free(unsigned long start, unsigned long end) +void __ref vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; unsigned long page_order = get_order(page_size); @@ -265,7 +266,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end) for (; start < end; start += page_size) { unsigned long nr_pages, addr; - struct vmem_altmap *altmap; struct page *section_base; struct page *page; @@ -285,7 +285,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end) section_base = pfn_to_page(vmemmap_section_start(start)); nr_pages = 1 << page_order; - altmap = to_vmem_altmap((unsigned long) section_base); if (altmap) { vmem_altmap_free(altmap, nr_pages); } else if (PageReserved(page)) { diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index c44ef0e7c466..db55561c5981 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -297,7 +297,8 @@ out: return ret; } -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 42d27a1a042a..995f9490334d 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2671,7 +2671,8 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, return 0; } -void vmemmap_free(unsigned long start, unsigned long end) +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { } #endif /* CONFIG_SPARSEMEM_VMEMMAP */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3c046618cc7e..0cab4b5b59ba 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -800,11 +800,11 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, #define PAGE_INUSE 0xFD -static void __meminit free_pagetable(struct page *page, int order) +static void __meminit free_pagetable(struct page *page, int order, + struct vmem_altmap *altmap) { unsigned long magic; unsigned int nr_pages = 1 << order; - struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page); if (altmap) { vmem_altmap_free(altmap, nr_pages); @@ -826,7 +826,8 @@ static void __meminit free_pagetable(struct page *page, int order) free_pages((unsigned long)page_address(page), order); } -static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, + struct vmem_altmap *altmap) { pte_t *pte; int i; @@ -838,13 +839,14 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) } /* free a pte talbe */ - free_pagetable(pmd_page(*pmd), 0); + free_pagetable(pmd_page(*pmd), 0, altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, + struct vmem_altmap *altmap) { pmd_t *pmd; int i; @@ -856,13 +858,14 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) } /* free a pmd talbe */ - free_pagetable(pud_page(*pud), 0); + free_pagetable(pud_page(*pud), 0, altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, + struct vmem_altmap *altmap) { pud_t *pud; int i; @@ -874,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) } /* free a pud talbe */ - free_pagetable(p4d_page(*p4d), 0); + free_pagetable(p4d_page(*p4d), 0, altmap); spin_lock(&init_mm.page_table_lock); p4d_clear(p4d); spin_unlock(&init_mm.page_table_lock); @@ -882,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pte_t *pte; @@ -913,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, * freed when offlining, or simplely not in use. */ if (!direct) - free_pagetable(pte_page(*pte), 0); + free_pagetable(pte_page(*pte), 0, altmap); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -936,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, page_addr = page_address(pte_page(*pte)); if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { - free_pagetable(pte_page(*pte), 0); + free_pagetable(pte_page(*pte), 0, altmap); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -953,7 +956,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, - bool direct) + bool direct, struct vmem_altmap *altmap) { unsigned long next, pages = 0; pte_t *pte_base; @@ -972,7 +975,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PMD_SIZE)) { if (!direct) free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE)); + get_order(PMD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -986,7 +990,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PMD_SIZE)) { free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE)); + get_order(PMD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -998,8 +1003,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, } pte_base = (pte_t *)pmd_page_vaddr(*pmd); - remove_pte_table(pte_base, addr, next, direct); - free_pte_table(pte_base, pmd); + remove_pte_table(pte_base, addr, next, altmap, direct); + free_pte_table(pte_base, pmd, altmap); } /* Call free_pmd_table() in remove_pud_table(). */ @@ -1009,7 +1014,7 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pmd_t *pmd_base; @@ -1028,7 +1033,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PUD_SIZE)) { if (!direct) free_pagetable(pud_page(*pud), - get_order(PUD_SIZE)); + get_order(PUD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1042,7 +1048,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PUD_SIZE)) { free_pagetable(pud_page(*pud), - get_order(PUD_SIZE)); + get_order(PUD_SIZE), + altmap); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1054,8 +1061,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, } pmd_base = pmd_offset(pud, 0); - remove_pmd_table(pmd_base, addr, next, direct); - free_pmd_table(pmd_base, pud); + remove_pmd_table(pmd_base, addr, next, direct, altmap); + free_pmd_table(pmd_base, pud, altmap); } if (direct) @@ -1064,7 +1071,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, static void __meminit remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, - bool direct) + struct vmem_altmap *altmap, bool direct) { unsigned long next, pages = 0; pud_t *pud_base; @@ -1080,14 +1087,14 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, BUILD_BUG_ON(p4d_large(*p4d)); pud_base = pud_offset(p4d, 0); - remove_pud_table(pud_base, addr, next, direct); + remove_pud_table(pud_base, addr, next, altmap, direct); /* * For 4-level page tables we do not want to free PUDs, but in the * 5-level case we should free them. This code will have to change * to adapt for boot-time switching between 4 and 5 level page tables. */ if (CONFIG_PGTABLE_LEVELS == 5) - free_pud_table(pud_base, p4d); + free_pud_table(pud_base, p4d, altmap); } if (direct) @@ -1096,7 +1103,8 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, /* start and end are both virtual address. */ static void __meminit -remove_pagetable(unsigned long start, unsigned long end, bool direct) +remove_pagetable(unsigned long start, unsigned long end, bool direct, + struct vmem_altmap *altmap) { unsigned long next; unsigned long addr; @@ -1111,15 +1119,16 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct) continue; p4d = p4d_offset(pgd, 0); - remove_p4d_table(p4d, addr, next, direct); + remove_p4d_table(p4d, addr, next, altmap, direct); } flush_tlb_all(); } -void __ref vmemmap_free(unsigned long start, unsigned long end) +void __ref vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { - remove_pagetable(start, end, false); + remove_pagetable(start, end, false, altmap); } #ifdef CONFIG_MEMORY_HOTREMOVE @@ -1129,7 +1138,7 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) start = (unsigned long)__va(start); end = (unsigned long)__va(end); - remove_pagetable(start, end, true); + remove_pagetable(start, end, true, NULL); } int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index e71927d0d46b..20dd98ad44a0 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -331,7 +331,7 @@ extern void remove_memory(int nid, u64 start, u64 size); extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn, struct vmem_altmap *altmap); extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset); + unsigned long map_offset, struct vmem_altmap *altmap); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, diff --git a/include/linux/mm.h b/include/linux/mm.h index 2f3a7ebecbe2..9d4cd4c1dc6d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2561,7 +2561,8 @@ int vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap); void vmemmap_populate_print_last(void); #ifdef CONFIG_MEMORY_HOTPLUG -void vmemmap_free(unsigned long start, unsigned long end); +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap); #endif void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, unsigned long nr_pages); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index eae6bf47caf7..a8dde9734120 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -536,7 +536,7 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn) } static int __remove_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset) + unsigned long map_offset, struct vmem_altmap *altmap) { unsigned long start_pfn; int scn_nr; @@ -553,7 +553,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms, start_pfn = section_nr_to_pfn((unsigned long)scn_nr); __remove_zone(zone, start_pfn); - sparse_remove_one_section(zone, ms, map_offset); + sparse_remove_one_section(zone, ms, map_offset, altmap); return 0; } @@ -607,7 +607,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, for (i = 0; i < sections_to_remove; i++) { unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; - ret = __remove_section(zone, __pfn_to_section(pfn), map_offset); + ret = __remove_section(zone, __pfn_to_section(pfn), map_offset, + altmap); map_offset = 0; if (ret) break; diff --git a/mm/sparse.c b/mm/sparse.c index 5f4a0dac7836..06130c13dc99 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -685,12 +685,13 @@ static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, /* This will make the necessary allocations eventually. */ return sparse_mem_map_populate(pnum, nid, altmap); } -static void __kfree_section_memmap(struct page *memmap) +static void __kfree_section_memmap(struct page *memmap, + struct vmem_altmap *altmap) { unsigned long start = (unsigned long)memmap; unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); - vmemmap_free(start, end); + vmemmap_free(start, end, altmap); } #ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap) @@ -698,7 +699,7 @@ static void free_map_bootmem(struct page *memmap) unsigned long start = (unsigned long)memmap; unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); - vmemmap_free(start, end); + vmemmap_free(start, end, NULL); } #endif /* CONFIG_MEMORY_HOTREMOVE */ #else @@ -729,7 +730,8 @@ static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, return __kmalloc_section_memmap(); } -static void __kfree_section_memmap(struct page *memmap) +static void __kfree_section_memmap(struct page *memmap, + struct vmem_altmap *altmap) { if (is_vmalloc_addr(memmap)) vfree(memmap); @@ -798,7 +800,7 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, return -ENOMEM; usemap = __kmalloc_section_usemap(); if (!usemap) { - __kfree_section_memmap(memmap); + __kfree_section_memmap(memmap, altmap); return -ENOMEM; } @@ -820,7 +822,7 @@ out: pgdat_resize_unlock(pgdat, &flags); if (ret <= 0) { kfree(usemap); - __kfree_section_memmap(memmap); + __kfree_section_memmap(memmap, altmap); } return ret; } @@ -847,7 +849,8 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) } #endif -static void free_section_usemap(struct page *memmap, unsigned long *usemap) +static void free_section_usemap(struct page *memmap, unsigned long *usemap, + struct vmem_altmap *altmap) { struct page *usemap_page; @@ -861,7 +864,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap) if (PageSlab(usemap_page) || PageCompound(usemap_page)) { kfree(usemap); if (memmap) - __kfree_section_memmap(memmap); + __kfree_section_memmap(memmap, altmap); return; } @@ -875,7 +878,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap) } void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset) + unsigned long map_offset, struct vmem_altmap *altmap) { struct page *memmap = NULL; unsigned long *usemap = NULL, flags; @@ -893,7 +896,7 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, clear_hwpoisoned_pages(memmap + map_offset, PAGES_PER_SECTION - map_offset); - free_section_usemap(memmap, usemap); + free_section_usemap(memmap, usemap, altmap); } #endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */ -- cgit v1.2.3-71-gd317 From a99583e780c751003ac9c0105eec9a3b23ec3bc4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:57 +0100 Subject: mm: pass the vmem_altmap to memmap_init_zone Pass the vmem_altmap two levels down instead of needing a lookup. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/ia64/mm/init.c | 9 +++++---- include/linux/memory_hotplug.h | 2 +- include/linux/mm.h | 4 ++-- kernel/memremap.c | 2 +- mm/hmm.c | 2 +- mm/memory_hotplug.c | 9 +++++---- mm/page_alloc.c | 6 +++--- 7 files changed, 18 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 6a8ce9e1536e..18278b448530 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -501,7 +501,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg) if (map_start < map_end) memmap_init_zone((unsigned long)(map_end - map_start), args->nid, args->zone, page_to_pfn(map_start), - MEMMAP_EARLY); + MEMMAP_EARLY, NULL); return 0; } @@ -509,9 +509,10 @@ void __meminit memmap_init (unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { - if (!vmem_map) - memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY); - else { + if (!vmem_map) { + memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY, + NULL); + } else { struct page *start; struct memmap_init_callback_data args; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 20dd98ad44a0..aba5f86eb038 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -324,7 +324,7 @@ extern int add_memory_resource(int nid, struct resource *resource, bool online); extern int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, bool want_memblock); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages); + unsigned long nr_pages, struct vmem_altmap *altmap); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); extern void remove_memory(int nid, u64 start, u64 size); diff --git a/include/linux/mm.h b/include/linux/mm.h index 9d4cd4c1dc6d..fd01135324b6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2069,8 +2069,8 @@ static inline void zero_resv_unavail(void) {} #endif extern void set_dma_reserve(unsigned long new_dma_reserve); -extern void memmap_init_zone(unsigned long, int, unsigned long, - unsigned long, enum memmap_context); +extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, + enum memmap_context, struct vmem_altmap *); extern void setup_per_zone_wmarks(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); diff --git a/kernel/memremap.c b/kernel/memremap.c index 380fca1c4a02..64b12c806cc5 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -432,7 +432,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (!error) move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT); + align_size >> PAGE_SHIFT, altmap); mem_hotplug_done(); if (error) goto err_add_memory; diff --git a/mm/hmm.c b/mm/hmm.c index 5d17ba89062f..2f2e13c61040 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -942,7 +942,7 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) } move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], align_start >> PAGE_SHIFT, - align_size >> PAGE_SHIFT); + align_size >> PAGE_SHIFT, NULL); mem_hotplug_done(); for (pfn = devmem->pfn_first; pfn < devmem->pfn_last; pfn++) { diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a8dde9734120..12df8a5fadcc 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -798,8 +798,8 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn; } -void __ref move_pfn_range_to_zone(struct zone *zone, - unsigned long start_pfn, unsigned long nr_pages) +void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages, struct vmem_altmap *altmap) { struct pglist_data *pgdat = zone->zone_pgdat; int nid = pgdat->node_id; @@ -824,7 +824,8 @@ void __ref move_pfn_range_to_zone(struct zone *zone, * expects the zone spans the pfn range. All the pages in the range * are reserved so nobody should be touching them so we should be safe */ - memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, MEMMAP_HOTPLUG); + memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, + MEMMAP_HOTPLUG, altmap); set_zone_contiguous(zone); } @@ -896,7 +897,7 @@ static struct zone * __meminit move_pfn_range(int online_type, int nid, struct zone *zone; zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages); - move_pfn_range_to_zone(zone, start_pfn, nr_pages); + move_pfn_range_to_zone(zone, start_pfn, nr_pages, NULL); return zone; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7e5e775e97f4..1748dd4a4b1b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5314,9 +5314,9 @@ void __ref build_all_zonelists(pg_data_t *pgdat) * done. Non-atomic initialization, single-pass. */ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, - unsigned long start_pfn, enum memmap_context context) + unsigned long start_pfn, enum memmap_context context, + struct vmem_altmap *altmap) { - struct vmem_altmap *altmap = to_vmem_altmap(__pfn_to_phys(start_pfn)); unsigned long end_pfn = start_pfn + size; pg_data_t *pgdat = NODE_DATA(nid); unsigned long pfn; @@ -5417,7 +5417,7 @@ static void __meminit zone_init_free_lists(struct zone *zone) #ifndef __HAVE_ARCH_MEMMAP_INIT #define memmap_init(size, nid, zone, start_pfn) \ - memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) + memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY, NULL) #endif static int zone_batchsize(struct zone *zone) -- cgit v1.2.3-71-gd317 From a8fc357b2875da8732c91eb085862a0648d82767 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:58 +0100 Subject: mm: split altmap memory map allocation from normal case No functional changes, just untangling the call chain and document why the altmap is passed around the hotplug code. Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Signed-off-by: Dan Williams --- arch/powerpc/mm/init_64.c | 5 ++++- arch/x86/mm/init_64.c | 5 ++++- include/linux/mm.h | 9 ++------- mm/sparse-vmemmap.c | 15 +++------------ 4 files changed, 13 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index db7d4e092157..7a2251d99ed3 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -200,7 +200,10 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, if (vmemmap_populated(start, page_size)) continue; - p = __vmemmap_alloc_block_buf(page_size, node, altmap); + if (altmap) + p = altmap_alloc_block_buf(page_size, altmap); + else + p = vmemmap_alloc_block_buf(page_size, node); if (!p) return -ENOMEM; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0cab4b5b59ba..1ab42c852069 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1385,7 +1385,10 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, if (pmd_none(*pmd)) { void *p; - p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); + if (altmap) + p = altmap_alloc_block_buf(PMD_SIZE, altmap); + else + p = vmemmap_alloc_block_buf(PMD_SIZE, node); if (p) { pte_t entry; diff --git a/include/linux/mm.h b/include/linux/mm.h index fd01135324b6..09637c353de0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2547,13 +2547,8 @@ pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); void *vmemmap_alloc_block(unsigned long size, int node); struct vmem_altmap; -void *__vmemmap_alloc_block_buf(unsigned long size, int node, - struct vmem_altmap *altmap); -static inline void *vmemmap_alloc_block_buf(unsigned long size, int node) -{ - return __vmemmap_alloc_block_buf(size, node, NULL); -} - +void *vmemmap_alloc_block_buf(unsigned long size, int node); +void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap); void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); int vmemmap_populate_basepages(unsigned long start, unsigned long end, int node); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 376dcf05a39c..d012c9e2811b 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -74,7 +74,7 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node) } /* need to make sure size is all the same during early stage */ -static void * __meminit alloc_block_buf(unsigned long size, int node) +void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node) { void *ptr; @@ -129,7 +129,7 @@ static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap, return pfn + nr_align; } -static void * __meminit altmap_alloc_block_buf(unsigned long size, +void * __meminit altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap) { unsigned long pfn, nr_pfns; @@ -153,15 +153,6 @@ static void * __meminit altmap_alloc_block_buf(unsigned long size, return ptr; } -/* need to make sure size is all the same during early stage */ -void * __meminit __vmemmap_alloc_block_buf(unsigned long size, int node, - struct vmem_altmap *altmap) -{ - if (altmap) - return altmap_alloc_block_buf(size, altmap); - return alloc_block_buf(size, node); -} - void __meminit vmemmap_verify(pte_t *pte, int node, unsigned long start, unsigned long end) { @@ -178,7 +169,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) pte_t *pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) { pte_t entry; - void *p = alloc_block_buf(PAGE_SIZE, node); + void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); if (!p) return NULL; entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); -- cgit v1.2.3-71-gd317 From 0822acb86cf340cd45b3af6436cec7e3bb24ebd2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:54:00 +0100 Subject: mm: move get_dev_pagemap out of line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a pretty big function, which should be out of line in general, and a no-op stub if CONFIG_ZONE_DEVICЕ is not set. Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Signed-off-by: Dan Williams --- include/linux/memremap.h | 39 ++++----------------------------------- kernel/memremap.c | 36 ++++++++++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index d5a6736d9737..26e8aaba27d5 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -133,7 +133,8 @@ struct dev_pagemap { #ifdef CONFIG_ZONE_DEVICE void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, struct vmem_altmap *altmap); -struct dev_pagemap *find_dev_pagemap(resource_size_t phys); +struct dev_pagemap *get_dev_pagemap(unsigned long pfn, + struct dev_pagemap *pgmap); unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); @@ -153,7 +154,8 @@ static inline void *devm_memremap_pages(struct device *dev, return ERR_PTR(-ENXIO); } -static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys) +static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, + struct dev_pagemap *pgmap) { return NULL; } @@ -183,39 +185,6 @@ static inline bool is_device_public_page(const struct page *page) } #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ -/** - * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn - * @pfn: page frame number to lookup page_map - * @pgmap: optional known pgmap that already has a reference - * - * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the - * same mapping. - */ -static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, - struct dev_pagemap *pgmap) -{ - const struct resource *res = pgmap ? pgmap->res : NULL; - resource_size_t phys = PFN_PHYS(pfn); - - /* - * In the cached case we're already holding a live reference so - * we can simply do a blind increment - */ - if (res && phys >= res->start && phys <= res->end) { - percpu_ref_get(pgmap->ref); - return pgmap; - } - - /* fall back to slow path lookup */ - rcu_read_lock(); - pgmap = find_dev_pagemap(phys); - if (pgmap && !percpu_ref_tryget_live(pgmap->ref)) - pgmap = NULL; - rcu_read_unlock(); - - return pgmap; -} - static inline void put_dev_pagemap(struct dev_pagemap *pgmap) { if (pgmap) diff --git a/kernel/memremap.c b/kernel/memremap.c index 64b12c806cc5..3df6cd4ffb40 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -314,7 +314,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data) } /* assumes rcu_read_lock() held at entry */ -struct dev_pagemap *find_dev_pagemap(resource_size_t phys) +static struct dev_pagemap *find_dev_pagemap(resource_size_t phys) { struct page_map *page_map; @@ -501,8 +501,40 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) return pgmap ? pgmap->altmap : NULL; } -#endif /* CONFIG_ZONE_DEVICE */ +/** + * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn + * @pfn: page frame number to lookup page_map + * @pgmap: optional known pgmap that already has a reference + * + * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the + * same mapping. + */ +struct dev_pagemap *get_dev_pagemap(unsigned long pfn, + struct dev_pagemap *pgmap) +{ + const struct resource *res = pgmap ? pgmap->res : NULL; + resource_size_t phys = PFN_PHYS(pfn); + + /* + * In the cached case we're already holding a live reference so + * we can simply do a blind increment + */ + if (res && phys >= res->start && phys <= res->end) { + percpu_ref_get(pgmap->ref); + return pgmap; + } + + /* fall back to slow path lookup */ + rcu_read_lock(); + pgmap = find_dev_pagemap(phys); + if (pgmap && !percpu_ref_tryget_live(pgmap->ref)) + pgmap = NULL; + rcu_read_unlock(); + + return pgmap; +} +#endif /* CONFIG_ZONE_DEVICE */ #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) void put_zone_device_private_or_public_page(struct page *page) -- cgit v1.2.3-71-gd317 From 0628b8c650718f4dfedfcdc9ed136bf7e394aae7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:54:02 +0100 Subject: memremap: remove to_vmem_altmap All callers are gone now. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/memremap.h | 9 --------- kernel/memremap.c | 26 -------------------------- 2 files changed, 35 deletions(-) (limited to 'include') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 26e8aaba27d5..3fddcfe57bb0 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -26,15 +26,6 @@ struct vmem_altmap { unsigned long alloc; }; -#ifdef CONFIG_ZONE_DEVICE -struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start); -#else -static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) -{ - return NULL; -} -#endif - /* * Specialize ZONE_DEVICE memory into multiple types each having differents * usage. diff --git a/kernel/memremap.c b/kernel/memremap.c index 891c77487a6a..b09517439dec 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -476,32 +476,6 @@ void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns) altmap->alloc -= nr_pfns; } -struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) -{ - /* - * 'memmap_start' is the virtual address for the first "struct - * page" in this range of the vmemmap array. In the case of - * CONFIG_SPARSEMEM_VMEMMAP a page_to_pfn conversion is simple - * pointer arithmetic, so we can perform this to_vmem_altmap() - * conversion without concern for the initialization state of - * the struct page fields. - */ - struct page *page = (struct page *) memmap_start; - struct dev_pagemap *pgmap; - - /* - * Unconditionally retrieve a dev_pagemap associated with the - * given physical address, this is only for use in the - * arch_{add|remove}_memory() for setting up and tearing down - * the memmap. - */ - rcu_read_lock(); - pgmap = find_dev_pagemap(__pfn_to_phys(page_to_pfn(page))); - rcu_read_unlock(); - - return pgmap ? pgmap->altmap : NULL; -} - /** * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn * @pfn: page frame number to lookup page_map -- cgit v1.2.3-71-gd317 From e7744aa25cffe26d3767c9ffcf4e130cca1dff00 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 29 Dec 2017 08:54:04 +0100 Subject: memremap: drop private struct page_map 'struct page_map' is a private structure of 'struct dev_pagemap' but the latter replicates all the same fields as the former so there isn't much value in it. Thus drop it in favour of a completely public struct. This is a clean up in preperation for a more generally useful 'devm_memeremap_pages' interface. Signed-off-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/memremap.h | 5 ++-- kernel/memremap.c | 68 ++++++++++++++++++------------------------------ mm/hmm.c | 2 +- 3 files changed, 30 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 3fddcfe57bb0..1cb5f39d25c1 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -113,8 +113,9 @@ typedef void (*dev_page_free_t)(struct page *page, void *data); struct dev_pagemap { dev_page_fault_t page_fault; dev_page_free_t page_free; - struct vmem_altmap *altmap; - const struct resource *res; + struct vmem_altmap altmap; + bool altmap_valid; + struct resource res; struct percpu_ref *ref; struct device *dev; void *data; diff --git a/kernel/memremap.c b/kernel/memremap.c index 12e78528fea4..9207c44cce20 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -188,13 +188,6 @@ static RADIX_TREE(pgmap_radix, GFP_KERNEL); #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1) #define SECTION_SIZE (1UL << PA_SECTION_SHIFT) -struct page_map { - struct resource res; - struct percpu_ref *ref; - struct dev_pagemap pgmap; - struct vmem_altmap altmap; -}; - static unsigned long order_at(struct resource *res, unsigned long pgoff) { unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff; @@ -260,22 +253,21 @@ static void pgmap_radix_release(struct resource *res) synchronize_rcu(); } -static unsigned long pfn_first(struct page_map *page_map) +static unsigned long pfn_first(struct dev_pagemap *pgmap) { - struct dev_pagemap *pgmap = &page_map->pgmap; - const struct resource *res = &page_map->res; - struct vmem_altmap *altmap = pgmap->altmap; + const struct resource *res = &pgmap->res; + struct vmem_altmap *altmap = &pgmap->altmap; unsigned long pfn; pfn = res->start >> PAGE_SHIFT; - if (altmap) + if (pgmap->altmap_valid) pfn += vmem_altmap_offset(altmap); return pfn; } -static unsigned long pfn_end(struct page_map *page_map) +static unsigned long pfn_end(struct dev_pagemap *pgmap) { - const struct resource *res = &page_map->res; + const struct resource *res = &pgmap->res; return (res->start + resource_size(res)) >> PAGE_SHIFT; } @@ -285,13 +277,12 @@ static unsigned long pfn_end(struct page_map *page_map) static void devm_memremap_pages_release(struct device *dev, void *data) { - struct page_map *page_map = data; - struct resource *res = &page_map->res; + struct dev_pagemap *pgmap = data; + struct resource *res = &pgmap->res; resource_size_t align_start, align_size; - struct dev_pagemap *pgmap = &page_map->pgmap; unsigned long pfn; - for_each_device_pfn(pfn, page_map) + for_each_device_pfn(pfn, pgmap) put_page(pfn_to_page(pfn)); if (percpu_ref_tryget_live(pgmap->ref)) { @@ -304,24 +295,22 @@ static void devm_memremap_pages_release(struct device *dev, void *data) align_size = ALIGN(resource_size(res), SECTION_SIZE); mem_hotplug_begin(); - arch_remove_memory(align_start, align_size, pgmap->altmap); + arch_remove_memory(align_start, align_size, pgmap->altmap_valid ? + &pgmap->altmap : NULL); mem_hotplug_done(); untrack_pfn(NULL, PHYS_PFN(align_start), align_size); pgmap_radix_release(res); - dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, - "%s: failed to free all reserved pages\n", __func__); + dev_WARN_ONCE(dev, pgmap->altmap.alloc, + "%s: failed to free all reserved pages\n", __func__); } /* assumes rcu_read_lock() held at entry */ static struct dev_pagemap *find_dev_pagemap(resource_size_t phys) { - struct page_map *page_map; - WARN_ON_ONCE(!rcu_read_lock_held()); - page_map = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys)); - return page_map ? &page_map->pgmap : NULL; + return radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys)); } /** @@ -349,7 +338,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, unsigned long pfn, pgoff, order; pgprot_t pgprot = PAGE_KERNEL; struct dev_pagemap *pgmap; - struct page_map *page_map; int error, nid, is_ram, i = 0; align_start = res->start & ~(SECTION_SIZE - 1); @@ -370,22 +358,20 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (!ref) return ERR_PTR(-EINVAL); - page_map = devres_alloc_node(devm_memremap_pages_release, - sizeof(*page_map), GFP_KERNEL, dev_to_node(dev)); - if (!page_map) + pgmap = devres_alloc_node(devm_memremap_pages_release, + sizeof(*pgmap), GFP_KERNEL, dev_to_node(dev)); + if (!pgmap) return ERR_PTR(-ENOMEM); - pgmap = &page_map->pgmap; - memcpy(&page_map->res, res, sizeof(*res)); + memcpy(&pgmap->res, res, sizeof(*res)); pgmap->dev = dev; if (altmap) { - memcpy(&page_map->altmap, altmap, sizeof(*altmap)); - pgmap->altmap = &page_map->altmap; - altmap = pgmap->altmap; + memcpy(&pgmap->altmap, altmap, sizeof(*altmap)); + pgmap->altmap_valid = true; + altmap = &pgmap->altmap; } pgmap->ref = ref; - pgmap->res = &page_map->res; pgmap->type = MEMORY_DEVICE_HOST; pgmap->page_fault = NULL; pgmap->page_free = NULL; @@ -397,7 +383,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, foreach_order_pgoff(res, order, pgoff) { error = __radix_tree_insert(&pgmap_radix, - PHYS_PFN(res->start) + pgoff, order, page_map); + PHYS_PFN(res->start) + pgoff, order, pgmap); if (error) { dev_err(dev, "%s: failed: %d\n", __func__, error); break; @@ -426,7 +412,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (error) goto err_add_memory; - for_each_device_pfn(pfn, page_map) { + for_each_device_pfn(pfn, pgmap) { struct page *page = pfn_to_page(pfn); /* @@ -441,7 +427,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (!(++i % 1024)) cond_resched(); } - devres_add(dev, page_map); + devres_add(dev, pgmap); return __va(res->start); err_add_memory: @@ -449,7 +435,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, err_pfn_remap: err_radix: pgmap_radix_release(res); - devres_free(page_map); + devres_free(pgmap); return ERR_PTR(error); } EXPORT_SYMBOL(devm_memremap_pages); @@ -482,9 +468,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn, * In the cached case we're already holding a live reference. */ if (pgmap) { - const struct resource *res = pgmap ? pgmap->res : NULL; - - if (res && phys >= res->start && phys <= res->end) + if (phys >= pgmap->res.start && phys <= pgmap->res.end) return pgmap; put_dev_pagemap(pgmap); } diff --git a/mm/hmm.c b/mm/hmm.c index 2f2e13c61040..320fdc87f064 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -882,7 +882,7 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem) else devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; - devmem->pagemap.res = devmem->resource; + devmem->pagemap.res = *devmem->resource; devmem->pagemap.page_fault = hmm_devmem_fault; devmem->pagemap.page_free = hmm_devmem_free; devmem->pagemap.dev = devmem->device; -- cgit v1.2.3-71-gd317 From e8d5134833006a46fcbefc5f4a84d0b62bd520e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:54:05 +0100 Subject: memremap: change devm_memremap_pages interface to use struct dev_pagemap This new interface is similar to how struct device (and many others) work. The caller initializes a 'struct dev_pagemap' as required and calls 'devm_memremap_pages'. This allows the pagemap structure to be embedded in another structure and thus container_of can be used. In this way application specific members can be stored in a containing struct. This will be used by the P2P infrastructure and HMM could probably be cleaned up to use it as well (instead of having it's own, similar 'hmm_devmem_pages_create' function). Signed-off-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/dax/pmem.c | 20 ++++++++------- drivers/nvdimm/nd.h | 9 +++---- drivers/nvdimm/pfn_devs.c | 27 ++++++++++++--------- drivers/nvdimm/pmem.c | 37 +++++++++++++++------------- drivers/nvdimm/pmem.h | 1 + include/linux/memremap.h | 6 ++--- kernel/memremap.c | 51 ++++++++++++++++----------------------- tools/testing/nvdimm/test/iomap.c | 7 +++--- 8 files changed, 77 insertions(+), 81 deletions(-) (limited to 'include') diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c index 8d8c852ba8f2..31b6ecce4c64 100644 --- a/drivers/dax/pmem.c +++ b/drivers/dax/pmem.c @@ -21,6 +21,7 @@ struct dax_pmem { struct device *dev; struct percpu_ref ref; + struct dev_pagemap pgmap; struct completion cmp; }; @@ -69,20 +70,23 @@ static int dax_pmem_probe(struct device *dev) struct nd_namespace_common *ndns; struct nd_dax *nd_dax = to_nd_dax(dev); struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; - struct vmem_altmap __altmap, *altmap = NULL; ndns = nvdimm_namespace_common_probe(dev); if (IS_ERR(ndns)) return PTR_ERR(ndns); nsio = to_nd_namespace_io(&ndns->dev); + dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL); + if (!dax_pmem) + return -ENOMEM; + /* parse the 'pfn' info block via ->rw_bytes */ rc = devm_nsio_enable(dev, nsio); if (rc) return rc; - altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap); - if (IS_ERR(altmap)) - return PTR_ERR(altmap); + rc = nvdimm_setup_pfn(nd_pfn, &dax_pmem->pgmap); + if (rc) + return rc; devm_nsio_disable(dev, nsio); pfn_sb = nd_pfn->pfn_sb; @@ -94,10 +98,6 @@ static int dax_pmem_probe(struct device *dev) return -EBUSY; } - dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL); - if (!dax_pmem) - return -ENOMEM; - dax_pmem->dev = dev; init_completion(&dax_pmem->cmp); rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0, @@ -110,7 +110,8 @@ static int dax_pmem_probe(struct device *dev) if (rc) return rc; - addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap); + dax_pmem->pgmap.ref = &dax_pmem->ref; + addr = devm_memremap_pages(dev, &dax_pmem->pgmap); if (IS_ERR(addr)) return PTR_ERR(addr); @@ -120,6 +121,7 @@ static int dax_pmem_probe(struct device *dev) return rc; /* adjust the dax_region resource to the start of data */ + memcpy(&res, &dax_pmem->pgmap.res, sizeof(res)); res.start += le64_to_cpu(pfn_sb->dataoff); rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", ®ion_id, &id); diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index e958f3724c41..8d6375ee0fda 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -368,15 +368,14 @@ unsigned int pmem_sector_size(struct nd_namespace_common *ndns); void nvdimm_badblocks_populate(struct nd_region *nd_region, struct badblocks *bb, const struct resource *res); #if IS_ENABLED(CONFIG_ND_CLAIM) -struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, - struct resource *res, struct vmem_altmap *altmap); +int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap); int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio); void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio); #else -static inline struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, - struct resource *res, struct vmem_altmap *altmap) +static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, + struct dev_pagemap *pgmap) { - return ERR_PTR(-ENXIO); + return -ENXIO; } static inline int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 2adada1a5855..f5c4e8c6e29d 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -542,9 +542,10 @@ static unsigned long init_altmap_reserve(resource_size_t base) return reserve; } -static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, - struct resource *res, struct vmem_altmap *altmap) +static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) { + struct resource *res = &pgmap->res; + struct vmem_altmap *altmap = &pgmap->altmap; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; u64 offset = le64_to_cpu(pfn_sb->dataoff); u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); @@ -561,11 +562,13 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, res->start += start_pad; res->end -= end_trunc; + pgmap->type = MEMORY_DEVICE_HOST; + if (nd_pfn->mode == PFN_MODE_RAM) { if (offset < SZ_8K) - return ERR_PTR(-EINVAL); + return -EINVAL; nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); - altmap = NULL; + pgmap->altmap_valid = false; } else if (nd_pfn->mode == PFN_MODE_PMEM) { nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res) - offset) / PAGE_SIZE); @@ -577,10 +580,11 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, memcpy(altmap, &__altmap, sizeof(*altmap)); altmap->free = PHYS_PFN(offset - SZ_8K); altmap->alloc = 0; + pgmap->altmap_valid = true; } else - return ERR_PTR(-ENXIO); + return -ENXIO; - return altmap; + return 0; } static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) @@ -708,19 +712,18 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * Determine the effective resource range and vmem_altmap from an nd_pfn * instance. */ -struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, - struct resource *res, struct vmem_altmap *altmap) +int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) { int rc; if (!nd_pfn->uuid || !nd_pfn->ndns) - return ERR_PTR(-ENODEV); + return -ENODEV; rc = nd_pfn_init(nd_pfn); if (rc) - return ERR_PTR(rc); + return rc; - /* we need a valid pfn_sb before we can init a vmem_altmap */ - return __nvdimm_setup_pfn(nd_pfn, res, altmap); + /* we need a valid pfn_sb before we can init a dev_pagemap */ + return __nvdimm_setup_pfn(nd_pfn, pgmap); } EXPORT_SYMBOL_GPL(nvdimm_setup_pfn); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 7fbc5c5dc8e1..cf074b1ce219 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -298,34 +298,34 @@ static int pmem_attach_disk(struct device *dev, { struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); struct nd_region *nd_region = to_nd_region(dev->parent); - struct vmem_altmap __altmap, *altmap = NULL; int nid = dev_to_node(dev), fua, wbc; struct resource *res = &nsio->res; + struct resource bb_res; struct nd_pfn *nd_pfn = NULL; struct dax_device *dax_dev; struct nd_pfn_sb *pfn_sb; struct pmem_device *pmem; - struct resource pfn_res; struct request_queue *q; struct device *gendev; struct gendisk *disk; void *addr; + int rc; + + pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); + if (!pmem) + return -ENOMEM; /* while nsio_rw_bytes is active, parse a pfn info block if present */ if (is_nd_pfn(dev)) { nd_pfn = to_nd_pfn(dev); - altmap = nvdimm_setup_pfn(nd_pfn, &pfn_res, &__altmap); - if (IS_ERR(altmap)) - return PTR_ERR(altmap); + rc = nvdimm_setup_pfn(nd_pfn, &pmem->pgmap); + if (rc) + return rc; } /* we're attaching a block device, disable raw namespace access */ devm_nsio_disable(dev, nsio); - pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); - if (!pmem) - return -ENOMEM; - dev_set_drvdata(dev, pmem); pmem->phys_addr = res->start; pmem->size = resource_size(res); @@ -350,19 +350,22 @@ static int pmem_attach_disk(struct device *dev, return -ENOMEM; pmem->pfn_flags = PFN_DEV; + pmem->pgmap.ref = &q->q_usage_counter; if (is_nd_pfn(dev)) { - addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter, - altmap); + addr = devm_memremap_pages(dev, &pmem->pgmap); pfn_sb = nd_pfn->pfn_sb; pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); - pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res); + pmem->pfn_pad = resource_size(res) - + resource_size(&pmem->pgmap.res); pmem->pfn_flags |= PFN_MAP; - res = &pfn_res; /* for badblocks populate */ - res->start += pmem->data_offset; + memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); + bb_res.start += pmem->data_offset; } else if (pmem_should_map_pages(dev)) { - addr = devm_memremap_pages(dev, &nsio->res, - &q->q_usage_counter, NULL); + memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res)); + pmem->pgmap.altmap_valid = false; + addr = devm_memremap_pages(dev, &pmem->pgmap); pmem->pfn_flags |= PFN_MAP; + memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); } else addr = devm_memremap(dev, pmem->phys_addr, pmem->size, ARCH_MEMREMAP_PMEM); @@ -401,7 +404,7 @@ static int pmem_attach_disk(struct device *dev, / 512); if (devm_init_badblocks(dev, &pmem->bb)) return -ENOMEM; - nvdimm_badblocks_populate(nd_region, &pmem->bb, res); + nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res); disk->bb = &pmem->bb; dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops); diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h index 6a3cd2a10db6..a64ebc78b5df 100644 --- a/drivers/nvdimm/pmem.h +++ b/drivers/nvdimm/pmem.h @@ -22,6 +22,7 @@ struct pmem_device { struct badblocks bb; struct dax_device *dax_dev; struct gendisk *disk; + struct dev_pagemap pgmap; }; long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 1cb5f39d25c1..7b4899c06f49 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -123,8 +123,7 @@ struct dev_pagemap { }; #ifdef CONFIG_ZONE_DEVICE -void *devm_memremap_pages(struct device *dev, struct resource *res, - struct percpu_ref *ref, struct vmem_altmap *altmap); +void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap); @@ -134,8 +133,7 @@ void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); static inline bool is_zone_device_page(const struct page *page); #else static inline void *devm_memremap_pages(struct device *dev, - struct resource *res, struct percpu_ref *ref, - struct vmem_altmap *altmap) + struct dev_pagemap *pgmap) { /* * Fail attempts to call devm_memremap_pages() without diff --git a/kernel/memremap.c b/kernel/memremap.c index 9207c44cce20..a9a948cd3d7f 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -275,9 +275,10 @@ static unsigned long pfn_end(struct dev_pagemap *pgmap) #define for_each_device_pfn(pfn, map) \ for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++) -static void devm_memremap_pages_release(struct device *dev, void *data) +static void devm_memremap_pages_release(void *data) { struct dev_pagemap *pgmap = data; + struct device *dev = pgmap->dev; struct resource *res = &pgmap->res; resource_size_t align_start, align_size; unsigned long pfn; @@ -316,29 +317,34 @@ static struct dev_pagemap *find_dev_pagemap(resource_size_t phys) /** * devm_memremap_pages - remap and provide memmap backing for the given resource * @dev: hosting device for @res - * @res: "host memory" address range - * @ref: a live per-cpu reference count - * @altmap: optional descriptor for allocating the memmap from @res + * @pgmap: pointer to a struct dev_pgmap * * Notes: - * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time - * (or devm release event). The expected order of events is that @ref has + * 1/ At a minimum the res, ref and type members of @pgmap must be initialized + * by the caller before passing it to this function + * + * 2/ The altmap field may optionally be initialized, in which case altmap_valid + * must be set to true + * + * 3/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages() + * time (or devm release event). The expected order of events is that ref has * been through percpu_ref_kill() before devm_memremap_pages_release(). The * wait for the completion of all references being dropped and * percpu_ref_exit() must occur after devm_memremap_pages_release(). * - * 2/ @res is expected to be a host memory range that could feasibly be + * 4/ res is expected to be a host memory range that could feasibly be * treated as a "System RAM" range, i.e. not a device mmio range, but * this is not enforced. */ -void *devm_memremap_pages(struct device *dev, struct resource *res, - struct percpu_ref *ref, struct vmem_altmap *altmap) +void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { resource_size_t align_start, align_size, align_end; + struct vmem_altmap *altmap = pgmap->altmap_valid ? + &pgmap->altmap : NULL; unsigned long pfn, pgoff, order; pgprot_t pgprot = PAGE_KERNEL; - struct dev_pagemap *pgmap; int error, nid, is_ram, i = 0; + struct resource *res = &pgmap->res; align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) @@ -355,27 +361,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (is_ram == REGION_INTERSECTS) return __va(res->start); - if (!ref) + if (!pgmap->ref) return ERR_PTR(-EINVAL); - pgmap = devres_alloc_node(devm_memremap_pages_release, - sizeof(*pgmap), GFP_KERNEL, dev_to_node(dev)); - if (!pgmap) - return ERR_PTR(-ENOMEM); - - memcpy(&pgmap->res, res, sizeof(*res)); - pgmap->dev = dev; - if (altmap) { - memcpy(&pgmap->altmap, altmap, sizeof(*altmap)); - pgmap->altmap_valid = true; - altmap = &pgmap->altmap; - } - pgmap->ref = ref; - pgmap->type = MEMORY_DEVICE_HOST; - pgmap->page_fault = NULL; - pgmap->page_free = NULL; - pgmap->data = NULL; mutex_lock(&pgmap_lock); error = 0; @@ -423,11 +412,13 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, */ list_del(&page->lru); page->pgmap = pgmap; - percpu_ref_get(ref); + percpu_ref_get(pgmap->ref); if (!(++i % 1024)) cond_resched(); } - devres_add(dev, pgmap); + + devm_add_action(dev, devm_memremap_pages_release, pgmap); + return __va(res->start); err_add_memory: diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index e1f75a1914a1..ff9d3a5825e1 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -104,15 +104,14 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset, } EXPORT_SYMBOL(__wrap_devm_memremap); -void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, - struct percpu_ref *ref, struct vmem_altmap *altmap) +void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { - resource_size_t offset = res->start; + resource_size_t offset = pgmap->res.start; struct nfit_test_resource *nfit_res = get_nfit_res(offset); if (nfit_res) return nfit_res->buf + offset - nfit_res->res.start; - return devm_memremap_pages(dev, res, ref, altmap); + return devm_memremap_pages(dev, pgmap); } EXPORT_SYMBOL(__wrap_devm_memremap_pages); -- cgit v1.2.3-71-gd317 From 785a3fab4adbf91b2189c928a59ae219c54ba95e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 23 Oct 2017 07:20:00 -0700 Subject: mm, dax: introduce pfn_t_special() In support of removing the VM_MIXEDMAP indication from DAX VMAs, introduce pfn_t_special() for drivers to indicate that _PAGE_SPECIAL should be used for DAX ptes. This also helps identify drivers like dccssblk that only want to use DAX in a read-only fashion without get_user_pages() support. Ideally we could delete axonram and dcssblk DAX support, but if we need to keep it better make it explicit that axonram and dcssblk only support a sub-set of DAX due to missing _PAGE_DEVMAP support. Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Dan Williams --- arch/powerpc/sysdev/axonram.c | 2 +- drivers/s390/block/dcssblk.c | 3 ++- include/linux/pfn_t.h | 13 +++++++++++++ mm/memory.c | 16 +++++++++++++++- 4 files changed, 31 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 1b307c80b401..cdbb0e59b3d3 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -151,7 +151,7 @@ __axon_ram_direct_access(struct axon_ram_bank *bank, pgoff_t pgoff, long nr_page resource_size_t offset = pgoff * PAGE_SIZE; *kaddr = (void *) bank->io_addr + offset; - *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); + *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV|PFN_SPECIAL); return (bank->size - offset) / PAGE_SIZE; } diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 6aaefb780436..9cae08b36b80 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -916,7 +916,8 @@ __dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff, dev_sz = dev_info->end - dev_info->start + 1; *kaddr = (void *) dev_info->start + offset; - *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV); + *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), + PFN_DEV|PFN_SPECIAL); return (dev_sz - offset) / PAGE_SIZE; } diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 43b1d7648e82..a03c2642a87c 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -15,8 +15,10 @@ #define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2)) #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) #define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4)) +#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5)) #define PFN_FLAGS_TRACE \ + { PFN_SPECIAL, "SPECIAL" }, \ { PFN_SG_CHAIN, "SG_CHAIN" }, \ { PFN_SG_LAST, "SG_LAST" }, \ { PFN_DEV, "DEV" }, \ @@ -120,4 +122,15 @@ pud_t pud_mkdevmap(pud_t pud); #endif #endif /* __HAVE_ARCH_PTE_DEVMAP */ +#ifdef __HAVE_ARCH_PTE_SPECIAL +static inline bool pfn_t_special(pfn_t pfn) +{ + return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL; +} +#else +static inline bool pfn_t_special(pfn_t pfn) +{ + return false; +} +#endif /* __HAVE_ARCH_PTE_SPECIAL */ #endif /* _LINUX_PFN_T_H_ */ diff --git a/mm/memory.c b/mm/memory.c index ca5674cbaff2..46b6c33b7f04 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1897,12 +1897,26 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL(vm_insert_pfn_prot); +static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn) +{ + /* these checks mirror the abort conditions in vm_normal_page */ + if (vma->vm_flags & VM_MIXEDMAP) + return true; + if (pfn_t_devmap(pfn)) + return true; + if (pfn_t_special(pfn)) + return true; + if (is_zero_pfn(pfn_t_to_pfn(pfn))) + return true; + return false; +} + static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn, bool mkwrite) { pgprot_t pgprot = vma->vm_page_prot; - BUG_ON(!(vma->vm_flags & VM_MIXEDMAP)); + BUG_ON(!vm_mixed_ok(vma, pfn)); if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; -- cgit v1.2.3-71-gd317 From 06e8ccdab15f46dfd31292e2b75d744bc5fc2a7c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 31 Jan 2018 12:45:38 -0700 Subject: acpi: nfit: Add support for detect platform CPU cache flush on power loss In ACPI 6.2a the platform capability structure has been added to the NFIT tables. That provides software the ability to determine whether a system supports the auto flushing of CPU caches on power loss. If the capability is supported, we do not need to do dax_flush(). Plumbing the path to set the property on per region from the NFIT tables. This patch depends on the ACPI NFIT 6.2a platform capabilities support code in include/acpi/actbl1.h. Signed-off-by: Dave Jiang Reviewed-by: Ross Zwisler Signed-off-by: Ross Zwisler --- drivers/acpi/nfit/core.c | 20 ++++++++++++++++++++ drivers/acpi/nfit/nfit.h | 1 + drivers/nvdimm/pmem.c | 4 +++- include/linux/libnvdimm.h | 5 +++++ 4 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index abeb4df4f22e..dc775823aea0 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -838,6 +838,18 @@ static bool add_flush(struct acpi_nfit_desc *acpi_desc, return true; } +static bool add_platform_cap(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_capabilities *pcap) +{ + struct device *dev = acpi_desc->dev; + u32 mask; + + mask = (1 << (pcap->highest_capability + 1)) - 1; + acpi_desc->platform_cap = pcap->capabilities & mask; + dev_dbg(dev, "%s: cap: %#x\n", __func__, acpi_desc->platform_cap); + return true; +} + static void *add_table(struct acpi_nfit_desc *acpi_desc, struct nfit_table_prev *prev, void *table, const void *end) { @@ -883,6 +895,10 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc, case ACPI_NFIT_TYPE_SMBIOS: dev_dbg(dev, "%s: smbios\n", __func__); break; + case ACPI_NFIT_TYPE_CAPABILITIES: + if (!add_platform_cap(acpi_desc, table)) + return err; + break; default: dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type); break; @@ -2656,6 +2672,9 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, else ndr_desc->numa_node = NUMA_NO_NODE; + if(acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_CACHE_FLUSH) + set_bit(ND_REGION_PERSIST_CACHE, &ndr_desc->flags); + list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev; struct nd_mapping_desc *mapping; @@ -3464,6 +3483,7 @@ static __init int nfit_init(void) BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9); BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80); BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40); + BUILD_BUG_ON(sizeof(struct acpi_nfit_capabilities) != 16); guid_parse(UUID_VOLATILE_MEMORY, &nfit_uuid[NFIT_SPA_VOLATILE]); guid_parse(UUID_PERSISTENT_MEMORY, &nfit_uuid[NFIT_SPA_PM]); diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index f0cf18b2da8b..50d36e166d70 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -202,6 +202,7 @@ struct acpi_nfit_desc { unsigned long dimm_cmd_force_en; unsigned long bus_cmd_force_en; unsigned long bus_nfit_cmd_force_en; + unsigned int platform_cap; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, void *iobuf, u64 len, int rw); }; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 7fbc5c5dc8e1..8aa542398db4 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -35,6 +35,7 @@ #include "pmem.h" #include "pfn.h" #include "nd.h" +#include "nd-core.h" static struct device *to_dev(struct pmem_device *pmem) { @@ -334,7 +335,8 @@ static int pmem_attach_disk(struct device *dev, dev_warn(dev, "unable to guarantee persistence of writes\n"); fua = 0; } - wbc = nvdimm_has_cache(nd_region); + wbc = nvdimm_has_cache(nd_region) && + !test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags); if (!devm_request_mem_region(dev, res->start, resource_size(res), dev_name(&ndns->dev))) { diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index f8109ddb5ef1..f2fc0da4da04 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -47,6 +47,11 @@ enum { /* region flag indicating to direct-map persistent memory by default */ ND_REGION_PAGEMAP = 0, + /* + * Platform ensures entire CPU store data path is flushed to pmem on + * system power loss. + */ + ND_REGION_PERSIST_CACHE = 1, /* mark newly adjusted resources as requiring a label update */ DPA_RESOURCE_ADJUSTED = 1 << 0, -- cgit v1.2.3-71-gd317 From 30e6d7bf29daa79d80711d35211c9b60894dbc44 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 31 Jan 2018 12:45:43 -0700 Subject: acpi: nfit: add persistent memory control flag for nd_region Propagate the ADR attribute flag from the NFIT platform capabilities sub-table to nd_region. Signed-off-by: Dave Jiang Reviewed-by: Ross Zwisler Signed-off-by: Ross Zwisler --- drivers/acpi/nfit/core.c | 3 +++ include/linux/libnvdimm.h | 6 ++++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index dc775823aea0..aa9d00db763a 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2675,6 +2675,9 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, if(acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_CACHE_FLUSH) set_bit(ND_REGION_PERSIST_CACHE, &ndr_desc->flags); + if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_MEM_FLUSH) + set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc->flags); + list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev; struct nd_mapping_desc *mapping; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index f2fc0da4da04..ff855ed965fb 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -52,6 +52,12 @@ enum { * system power loss. */ ND_REGION_PERSIST_CACHE = 1, + /* + * Platform provides mechanisms to automatically flush outstanding + * write data from memory controler to pmem on system power loss. + * (ADR) + */ + ND_REGION_PERSIST_MEMCTRL = 2, /* mark newly adjusted resources as requiring a label update */ DPA_RESOURCE_ADJUSTED = 1 << 0, -- cgit v1.2.3-71-gd317 From f2ba5a5baecf795c2150826bd0c95fc3f7f3d226 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 1 Feb 2018 21:27:22 -0800 Subject: libnvdimm, namespace: make min namespace size 4K The arbitrary 4MB minimum namespace size turns out to be too large for some environments. Quoting Cheng-mean Liu: In the case of emulated NVDIMM devices in the VM environment, there are scenarios that NVDIMM device with much smaller sizes are desired, for example, we might use a single enumerated NVDIMM DAX device for representing each container layer, which in some cases could be just a few KBs size. PAGE_SIZE is the minimum where we can still support DAX of at least a single page. Cc: Matthew Wilcox Reported-by: Cheng-mean Liu Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 30ef1236aafa..7e27070b9440 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -209,7 +209,7 @@ enum nd_driver_flags { }; enum { - ND_MIN_NAMESPACE_SIZE = 0x00400000, + ND_MIN_NAMESPACE_SIZE = PAGE_SIZE, }; enum ars_masks { -- cgit v1.2.3-71-gd317