From 0aed55af88345b5d673240f90e671d79662fb01e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 12:22:50 -0700 Subject: x86, uaccess: introduce copy_from_iter_flushcache for pmem / cache-bypass operations The pmem driver has a need to transfer data with a persistent memory destination and be able to rely on the fact that the destination writes are not cached. It is sufficient for the writes to be flushed to a cpu-store-buffer (non-temporal / "movnt" in x86 terms), as we expect userspace to call fsync() to ensure data-writes have reached a power-fail-safe zone in the platform. The fsync() triggers a REQ_FUA or REQ_FLUSH to the pmem driver which will turn around and fence previous writes with an "sfence". Implement a __copy_from_user_inatomic_flushcache, memcpy_page_flushcache, and memcpy_flushcache, that guarantee that the destination buffer is not dirty in the cpu cache on completion. The new copy_from_iter_flushcache and sub-routines will be used to replace the "pmem api" (include/linux/pmem.h + arch/x86/include/asm/pmem.h). The availability of copy_from_iter_flushcache() and memcpy_flushcache() are gated by the CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE config symbol, and fallback to copy_from_iter_nocache() and plain memcpy() otherwise. This is meant to satisfy the concern from Linus that if a driver wants to do something beyond the normal nocache semantics it should be something private to that driver [1], and Al's concern that anything uaccess related belongs with the rest of the uaccess code [2]. The first consumer of this interface is a new 'copy_from_iter' dax operation so that pmem can inject cache maintenance operations without imposing this overhead on other dax-capable drivers. [1]: https://lists.01.org/pipermail/linux-nvdimm/2017-January/008364.html [2]: https://lists.01.org/pipermail/linux-nvdimm/2017-April/009942.html Cc: Cc: Jan Kara Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Toshi Kani Cc: "H. Peter Anvin" Cc: Al Viro Cc: Thomas Gleixner Cc: Matthew Wilcox Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- arch/x86/Kconfig | 1 + arch/x86/include/asm/string_64.h | 5 ++ arch/x86/include/asm/uaccess_64.h | 11 ++++ arch/x86/lib/usercopy_64.c | 128 ++++++++++++++++++++++++++++++++++++++ drivers/acpi/nfit/core.c | 3 +- drivers/nvdimm/claim.c | 2 +- drivers/nvdimm/pmem.c | 13 +++- drivers/nvdimm/region_devs.c | 4 +- include/linux/dax.h | 3 + include/linux/string.h | 6 ++ include/linux/uio.h | 15 +++++ lib/Kconfig | 3 + lib/iov_iter.c | 22 +++++++ 13 files changed, 209 insertions(+), 7 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4ccfacc7232a..bb273b2f50b5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -54,6 +54,7 @@ config X86 select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_MMIO_FLUSH select ARCH_HAS_PMEM_API if X86_64 + select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 733bae07fb29..1f22bc277c45 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -109,6 +109,11 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt) return 0; } +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1 +void memcpy_flushcache(void *dst, const void *src, size_t cnt); +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_STRING_64_H */ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index c5504b9a472e..b16f6a1d8b26 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -171,6 +171,10 @@ unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigne extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size, int zerorest); +extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size); +extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset, + size_t len); + static inline int __copy_from_user_inatomic_nocache(void *dst, const void __user *src, unsigned size) @@ -179,6 +183,13 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src, return __copy_user_nocache(dst, src, size, 0); } +static inline int +__copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) +{ + kasan_check_write(dst, size); + return __copy_user_flushcache(dst, src, size); +} + unsigned long copy_user_handle_tail(char *to, char *from, unsigned len); diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 3b7c40a2e3e1..f42d2fd86ca3 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -7,6 +7,7 @@ */ #include #include +#include /* * Zero Userspace @@ -73,3 +74,130 @@ copy_user_handle_tail(char *to, char *from, unsigned len) clac(); return len; } + +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +/** + * clean_cache_range - write back a cache range with CLWB + * @vaddr: virtual start address + * @size: number of bytes to write back + * + * Write back a cache range using the CLWB (cache line write back) + * instruction. Note that @size is internally rounded up to be cache + * line size aligned. + */ +static void clean_cache_range(void *addr, size_t size) +{ + u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; + unsigned long clflush_mask = x86_clflush_size - 1; + void *vend = addr + size; + void *p; + + for (p = (void *)((unsigned long)addr & ~clflush_mask); + p < vend; p += x86_clflush_size) + clwb(p); +} + +long __copy_user_flushcache(void *dst, const void __user *src, unsigned size) +{ + unsigned long flushed, dest = (unsigned long) dst; + long rc = __copy_user_nocache(dst, src, size, 0); + + /* + * __copy_user_nocache() uses non-temporal stores for the bulk + * of the transfer, but we need to manually flush if the + * transfer is unaligned. A cached memory copy is used when + * destination or size is not naturally aligned. That is: + * - Require 8-byte alignment when size is 8 bytes or larger. + * - Require 4-byte alignment when size is 4 bytes. + */ + if (size < 8) { + if (!IS_ALIGNED(dest, 4) || size != 4) + clean_cache_range(dst, 1); + } else { + if (!IS_ALIGNED(dest, 8)) { + dest = ALIGN(dest, boot_cpu_data.x86_clflush_size); + clean_cache_range(dst, 1); + } + + flushed = dest - (unsigned long) dst; + if (size > flushed && !IS_ALIGNED(size - flushed, 8)) + clean_cache_range(dst + size - 1, 1); + } + + return rc; +} + +void memcpy_flushcache(void *_dst, const void *_src, size_t size) +{ + unsigned long dest = (unsigned long) _dst; + unsigned long source = (unsigned long) _src; + + /* cache copy and flush to align dest */ + if (!IS_ALIGNED(dest, 8)) { + unsigned len = min_t(unsigned, size, ALIGN(dest, 8) - dest); + + memcpy((void *) dest, (void *) source, len); + clean_cache_range((void *) dest, len); + dest += len; + source += len; + size -= len; + if (!size) + return; + } + + /* 4x8 movnti loop */ + while (size >= 32) { + asm("movq (%0), %%r8\n" + "movq 8(%0), %%r9\n" + "movq 16(%0), %%r10\n" + "movq 24(%0), %%r11\n" + "movnti %%r8, (%1)\n" + "movnti %%r9, 8(%1)\n" + "movnti %%r10, 16(%1)\n" + "movnti %%r11, 24(%1)\n" + :: "r" (source), "r" (dest) + : "memory", "r8", "r9", "r10", "r11"); + dest += 32; + source += 32; + size -= 32; + } + + /* 1x8 movnti loop */ + while (size >= 8) { + asm("movq (%0), %%r8\n" + "movnti %%r8, (%1)\n" + :: "r" (source), "r" (dest) + : "memory", "r8"); + dest += 8; + source += 8; + size -= 8; + } + + /* 1x4 movnti loop */ + while (size >= 4) { + asm("movl (%0), %%r8d\n" + "movnti %%r8d, (%1)\n" + :: "r" (source), "r" (dest) + : "memory", "r8"); + dest += 4; + source += 4; + size -= 4; + } + + /* cache copy for remaining bytes */ + if (size) { + memcpy((void *) dest, (void *) source, size); + clean_cache_range((void *) dest, size); + } +} +EXPORT_SYMBOL_GPL(memcpy_flushcache); + +void memcpy_page_flushcache(char *to, struct page *page, size_t offset, + size_t len) +{ + char *from = kmap_atomic(page); + + memcpy_flushcache(to, from + offset, len); + kunmap_atomic(from); +} +#endif diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 656acb5d7166..cbd5596e7562 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1842,8 +1842,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, } if (rw) - memcpy_to_pmem(mmio->addr.aperture + offset, - iobuf + copied, c); + memcpy_flushcache(mmio->addr.aperture + offset, iobuf + copied, c); else { if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH) mmio_flush_range((void __force *) diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 7ceb5fa4f2a1..b8b9c8ca7862 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -277,7 +277,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, rc = -EIO; } - memcpy_to_pmem(nsio->addr + offset, buf, size); + memcpy_flushcache(nsio->addr + offset, buf, size); nvdimm_flush(to_nd_region(ndns->dev.parent)); return rc; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index c544d466ea51..2f3aefe565c6 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include "pmem.h" @@ -80,7 +81,7 @@ static void write_pmem(void *pmem_addr, struct page *page, { void *mem = kmap_atomic(page); - memcpy_to_pmem(pmem_addr, mem + off, len); + memcpy_flushcache(pmem_addr, mem + off, len); kunmap_atomic(mem); } @@ -235,8 +236,15 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev, return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); } +static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_from_iter_flushcache(addr, bytes, i); +} + static const struct dax_operations pmem_dax_ops = { .direct_access = pmem_dax_direct_access, + .copy_from_iter = pmem_copy_from_iter, }; static void pmem_release_queue(void *q) @@ -294,7 +302,8 @@ static int pmem_attach_disk(struct device *dev, dev_set_drvdata(dev, pmem); pmem->phys_addr = res->start; pmem->size = resource_size(res); - if (nvdimm_has_flush(nd_region) < 0) + if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) + || nvdimm_has_flush(nd_region) < 0) dev_warn(dev, "unable to guarantee persistence of writes\n"); if (!devm_request_mem_region(dev, res->start, resource_size(res), diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index b550edf2571f..985b0e11bd73 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1015,8 +1015,8 @@ void nvdimm_flush(struct nd_region *nd_region) * The first wmb() is needed to 'sfence' all previous writes * such that they are architecturally visible for the platform * buffer flush. Note that we've already arranged for pmem - * writes to avoid the cache via arch_memcpy_to_pmem(). The - * final wmb() ensures ordering for the NVDIMM flush write. + * writes to avoid the cache via memcpy_flushcache(). The final + * wmb() ensures ordering for the NVDIMM flush write. */ wmb(); for (i = 0; i < nd_region->ndr_mappings; i++) diff --git a/include/linux/dax.h b/include/linux/dax.h index 5ec1f6c47716..bbe79ed90e2b 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -16,6 +16,9 @@ struct dax_operations { */ long (*direct_access)(struct dax_device *, pgoff_t, long, void **, pfn_t *); + /* copy_from_iter: dax-driver override for default copy_from_iter */ + size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, + struct iov_iter *); }; #if IS_ENABLED(CONFIG_DAX) diff --git a/include/linux/string.h b/include/linux/string.h index 537918f8a98e..7439d83eaa33 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -122,6 +122,12 @@ static inline __must_check int memcpy_mcsafe(void *dst, const void *src, return 0; } #endif +#ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE +static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) +{ + memcpy(dst, src, cnt); +} +#endif void *memchr_inv(const void *s, int c, size_t n); char *strreplace(char *s, char old, char new); diff --git a/include/linux/uio.h b/include/linux/uio.h index f2d36a3d3005..55cd54a0e941 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -95,6 +95,21 @@ size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i); size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i); bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i); size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i); +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +/* + * Note, users like pmem that depend on the stricter semantics of + * copy_from_iter_flushcache() than copy_from_iter_nocache() must check for + * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the + * destination is flushed from the cache on return. + */ +size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i); +#else +static inline size_t copy_from_iter_flushcache(void *addr, size_t bytes, + struct iov_iter *i) +{ + return copy_from_iter_nocache(addr, bytes, i); +} +#endif bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i); size_t iov_iter_zero(size_t bytes, struct iov_iter *); unsigned long iov_iter_alignment(const struct iov_iter *i); diff --git a/lib/Kconfig b/lib/Kconfig index 0c8b78a9ae2e..2d1c4b3a085c 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -548,6 +548,9 @@ config ARCH_HAS_SG_CHAIN config ARCH_HAS_PMEM_API bool +config ARCH_HAS_UACCESS_FLUSHCACHE + bool + config ARCH_HAS_MMIO_FLUSH bool diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f835964c9485..c9a69064462f 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -615,6 +615,28 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) } EXPORT_SYMBOL(copy_from_iter_nocache); +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) +{ + char *to = addr; + if (unlikely(i->type & ITER_PIPE)) { + WARN_ON(1); + return 0; + } + iterate_and_advance(i, bytes, v, + __copy_from_user_flushcache((to += v.iov_len) - v.iov_len, + v.iov_base, v.iov_len), + memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len), + memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, + v.iov_len) + ) + + return bytes; +} +EXPORT_SYMBOL_GPL(copy_from_iter_flushcache); +#endif + bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; -- cgit v1.2.3 From 7e026c8c0a4200da86bc51edeaad79dcdccf78ca Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 12:57:56 -0700 Subject: dm: add ->copy_from_iter() dax operation support Allow device-mapper to route copy_from_iter operations to the per-target implementation. In order for the device stacking to work we need a dax_dev and a pgoff relative to that device. This gives each layer of the stack the information it needs to look up the operation pointer for the next level. This conceptually allows for an array of mixed device drivers with varying copy_from_iter implementations. Reviewed-by: Toshi Kani Reviewed-by: Mike Snitzer Signed-off-by: Dan Williams --- drivers/dax/super.c | 13 +++++++++++++ drivers/md/dm-linear.c | 15 +++++++++++++++ drivers/md/dm-stripe.c | 20 ++++++++++++++++++++ drivers/md/dm.c | 26 ++++++++++++++++++++++++++ include/linux/dax.h | 2 ++ include/linux/device-mapper.h | 3 +++ 6 files changed, 79 insertions(+) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 6ed32aac8bbe..dd299e55f65d 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -172,6 +173,18 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, } EXPORT_SYMBOL_GPL(dax_direct_access); +size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i) +{ + if (!dax_alive(dax_dev)) + return 0; + + if (!dax_dev->ops->copy_from_iter) + return copy_from_iter(addr, bytes, i); + return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i); +} +EXPORT_SYMBOL_GPL(dax_copy_from_iter); + bool dax_alive(struct dax_device *dax_dev) { lockdep_assert_held(&dax_srcu); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 7d42a9d9f406..0841ec1bfbad 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -159,6 +159,20 @@ static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); } +static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + struct linear_c *lc = ti->private; + struct block_device *bdev = lc->dev->bdev; + struct dax_device *dax_dev = lc->dev->dax_dev; + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + + dev_sector = linear_map_sector(ti, sector); + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) + return 0; + return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); +} + static struct target_type linear_target = { .name = "linear", .version = {1, 3, 0}, @@ -171,6 +185,7 @@ static struct target_type linear_target = { .prepare_ioctl = linear_prepare_ioctl, .iterate_devices = linear_iterate_devices, .direct_access = linear_dax_direct_access, + .dax_copy_from_iter = linear_dax_copy_from_iter, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 75152482f3ad..1ef914f9ca72 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -332,6 +332,25 @@ static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); } +static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + struct stripe_c *sc = ti->private; + struct dax_device *dax_dev; + struct block_device *bdev; + uint32_t stripe; + + stripe_map_sector(sc, sector, &stripe, &dev_sector); + dev_sector += sc->stripe[stripe].physical_start; + dax_dev = sc->stripe[stripe].dev->dax_dev; + bdev = sc->stripe[stripe].dev->bdev; + + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) + return 0; + return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); +} + /* * Stripe status: * @@ -451,6 +470,7 @@ static struct target_type stripe_target = { .iterate_devices = stripe_iterate_devices, .io_hints = stripe_io_hints, .direct_access = stripe_dax_direct_access, + .dax_copy_from_iter = stripe_dax_copy_from_iter, }; int __init dm_stripe_init(void) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 37ccd73c79ec..7faaceb52819 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -969,6 +970,30 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, return ret; } +static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + struct mapped_device *md = dax_get_private(dax_dev); + sector_t sector = pgoff * PAGE_SECTORS; + struct dm_target *ti; + long ret = 0; + int srcu_idx; + + ti = dm_dax_get_live_target(md, sector, &srcu_idx); + + if (!ti) + goto out; + if (!ti->type->dax_copy_from_iter) { + ret = copy_from_iter(addr, bytes, i); + goto out; + } + ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i); + out: + dm_put_live_table(md, srcu_idx); + + return ret; +} + /* * A target may call dm_accept_partial_bio only from the map routine. It is * allowed for all bio types except REQ_PREFLUSH. @@ -2859,6 +2884,7 @@ static const struct block_device_operations dm_blk_dops = { static const struct dax_operations dm_dax_ops = { .direct_access = dm_dax_direct_access, + .copy_from_iter = dm_dax_copy_from_iter, }; /* diff --git a/include/linux/dax.h b/include/linux/dax.h index bbe79ed90e2b..28e398f8c59e 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -78,6 +78,8 @@ void kill_dax(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); +size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i); /* * We use lowest available bit in exceptional entry for locking, one bit for diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index f4c639c0c362..11c8a0a92f9c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -132,6 +132,8 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); */ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); +typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i); #define PAGE_SECTORS (PAGE_SIZE / 512) void dm_error(const char *message); @@ -181,6 +183,7 @@ struct target_type { dm_iterate_devices_fn iterate_devices; dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; + dm_dax_copy_from_iter_fn dax_copy_from_iter; /* For internal device-mapper use. */ struct list_head list; -- cgit v1.2.3 From 564e871aa66f548a947b23808d3140f326381f0c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 3 Jun 2017 18:30:43 +0900 Subject: libnvdimm, label: add v1.2 nvdimm label definitions In support of improved interoperability between operating systems and pre-boot environments the Intel proposed NVDIMM Namespace Specification [1], has been adopted and modified to the the UEFI 2.7 NVDIMM Label Protocol [2]. Update the definitions of the namespace label data structures so that the new format can be supported alongside the existing label format. The new specification changes the default label size to 256 bytes, so everywhere that relied on sizeof(struct nd_namespace_label) must now use the sizeof_namespace_label() helper. There should be no functional differences from these changes as the default is still the v1.1 128-byte format. Future patches will move the default to the v1.2 definition. [1]: http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf [2]: http://www.uefi.org/sites/default/files/resources/UEFI_Spec_2_7.pdf Signed-off-by: Dan Williams --- drivers/nvdimm/label.c | 95 ++++++++++++++++++++++++++++++++++++++++---------- drivers/nvdimm/label.h | 15 ++++++-- drivers/nvdimm/nd.h | 8 ++++- 3 files changed, 97 insertions(+), 21 deletions(-) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index dd615345699f..d6233d220bfd 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -34,6 +34,11 @@ static u32 best_seq(u32 a, u32 b) return a; } +unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd) +{ + return ndd->nslabel_size; +} + size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) { u32 index_span; @@ -49,7 +54,7 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) * starts to waste space at larger config_sizes, but it's * unlikely we'll ever see anything but 128K. */ - index_span = ndd->nsarea.config_size / 129; + index_span = ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1); index_span /= NSINDEX_ALIGN * 2; ndd->nsindex_size = index_span * NSINDEX_ALIGN; @@ -58,10 +63,10 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd) int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd) { - return ndd->nsarea.config_size / 129; + return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1); } -int nd_label_validate(struct nvdimm_drvdata *ndd) +static int __nd_label_validate(struct nvdimm_drvdata *ndd) { /* * On media label format consists of two index blocks followed @@ -104,6 +109,7 @@ int nd_label_validate(struct nvdimm_drvdata *ndd) u32 nslot; u8 sig[NSINDEX_SIG_LEN]; u64 sum_save, sum, size; + unsigned int version, labelsize; memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN); if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) { @@ -111,6 +117,21 @@ int nd_label_validate(struct nvdimm_drvdata *ndd) __func__, i); continue; } + + /* label sizes larger than 128 arrived with v1.2 */ + version = __le16_to_cpu(nsindex[i]->major) * 100 + + __le16_to_cpu(nsindex[i]->minor); + if (version >= 102) + labelsize = 1 << (7 + nsindex[i]->labelsize); + else + labelsize = 128; + + if (labelsize != sizeof_namespace_label(ndd)) { + dev_dbg(dev, "%s: nsindex%d labelsize %d invalid\n", + __func__, i, nsindex[i]->labelsize); + continue; + } + sum_save = __le64_to_cpu(nsindex[i]->checksum); nsindex[i]->checksum = __cpu_to_le64(0); sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1); @@ -153,7 +174,7 @@ int nd_label_validate(struct nvdimm_drvdata *ndd) } nslot = __le32_to_cpu(nsindex[i]->nslot); - if (nslot * sizeof(struct nd_namespace_label) + if (nslot * sizeof_namespace_label(ndd) + 2 * sizeof_namespace_index(ndd) > ndd->nsarea.config_size) { dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n", @@ -189,6 +210,28 @@ int nd_label_validate(struct nvdimm_drvdata *ndd) return -1; } +int nd_label_validate(struct nvdimm_drvdata *ndd) +{ + /* + * In order to probe for and validate namespace index blocks we + * need to know the size of the labels, and we can't trust the + * size of the labels until we validate the index blocks. + * Resolve this dependency loop by probing for known label + * sizes. + */ + int label_size[] = { 256, 128 }; + int i, rc; + + for (i = 0; i < ARRAY_SIZE(label_size); i++) { + ndd->nslabel_size = label_size[i]; + rc = __nd_label_validate(ndd); + if (rc >= 0) + return rc; + } + + return -1; +} + void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst, struct nd_namespace_index *src) { @@ -210,7 +253,22 @@ static struct nd_namespace_label *nd_label_base(struct nvdimm_drvdata *ndd) static int to_slot(struct nvdimm_drvdata *ndd, struct nd_namespace_label *nd_label) { - return nd_label - nd_label_base(ndd); + unsigned long label, base; + + label = (unsigned long) nd_label; + base = (unsigned long) nd_label_base(ndd); + + return (label - base) / sizeof_namespace_label(ndd); +} + +static struct nd_namespace_label *to_label(struct nvdimm_drvdata *ndd, int slot) +{ + unsigned long label, base; + + base = (unsigned long) nd_label_base(ndd); + label = base + sizeof_namespace_label(ndd) * slot; + + return (struct nd_namespace_label *) label; } #define for_each_clear_bit_le(bit, addr, size) \ @@ -299,7 +357,7 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) struct resource *res; u32 flags; - nd_label = nd_label_base(ndd) + slot; + nd_label = to_label(ndd, slot); if (!slot_valid(nd_label, slot)) continue; @@ -331,7 +389,7 @@ int nd_label_active_count(struct nvdimm_drvdata *ndd) for_each_clear_bit_le(slot, free, nslot) { struct nd_namespace_label *nd_label; - nd_label = nd_label_base(ndd) + slot; + nd_label = to_label(ndd, slot); if (!slot_valid(nd_label, slot)) { u32 label_slot = __le32_to_cpu(nd_label->slot); @@ -360,12 +418,12 @@ struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n) for_each_clear_bit_le(slot, free, nslot) { struct nd_namespace_label *nd_label; - nd_label = nd_label_base(ndd) + slot; + nd_label = to_label(ndd, slot); if (!slot_valid(nd_label, slot)) continue; if (n-- == 0) - return nd_label_base(ndd) + slot; + return to_label(ndd, slot); } return NULL; @@ -437,7 +495,8 @@ static int nd_label_write_index(struct nvdimm_drvdata *ndd, int index, u32 seq, nslot = __le32_to_cpu(nsindex->nslot); memcpy(nsindex->sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN); - nsindex->flags = __cpu_to_le32(0); + memset(&nsindex->flags, 0, 3); + nsindex->labelsize = sizeof_namespace_label(ndd) >> 8; nsindex->seq = __cpu_to_le32(seq); offset = (unsigned long) nsindex - (unsigned long) to_namespace_index(ndd, 0); @@ -525,8 +584,8 @@ static int __pmem_label_update(struct nd_region *nd_region, return -ENXIO; dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot); - nd_label = nd_label_base(ndd) + slot; - memset(nd_label, 0, sizeof(struct nd_namespace_label)); + nd_label = to_label(ndd, slot); + memset(nd_label, 0, sizeof_namespace_label(ndd)); memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN); if (nspm->alt_name) memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN); @@ -542,7 +601,7 @@ static int __pmem_label_update(struct nd_region *nd_region, /* update label */ offset = nd_label_offset(ndd, nd_label); rc = nvdimm_set_config_data(ndd, offset, nd_label, - sizeof(struct nd_namespace_label)); + sizeof_namespace_label(ndd)); if (rc < 0) return rc; @@ -668,7 +727,7 @@ static int __blk_label_update(struct nd_region *nd_region, /* mark unused labels for garbage collection */ for_each_clear_bit_le(slot, free, nslot) { - nd_label = nd_label_base(ndd) + slot; + nd_label = to_label(ndd, slot); memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN); if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0) continue; @@ -714,8 +773,8 @@ static int __blk_label_update(struct nd_region *nd_region, goto abort; dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot); - nd_label = nd_label_base(ndd) + slot; - memset(nd_label, 0, sizeof(struct nd_namespace_label)); + nd_label = to_label(ndd, slot); + memset(nd_label, 0, sizeof_namespace_label(ndd)); memcpy(nd_label->uuid, nsblk->uuid, NSLABEL_UUID_LEN); if (nsblk->alt_name) memcpy(nd_label->name, nsblk->alt_name, @@ -732,7 +791,7 @@ static int __blk_label_update(struct nd_region *nd_region, /* update label */ offset = nd_label_offset(ndd, nd_label); rc = nvdimm_set_config_data(ndd, offset, nd_label, - sizeof(struct nd_namespace_label)); + sizeof_namespace_label(ndd)); if (rc < 0) goto abort; } @@ -790,7 +849,7 @@ static int __blk_label_update(struct nd_region *nd_region, goto out; } for_each_clear_bit_le(slot, free, nslot) { - nd_label = nd_label_base(ndd) + slot; + nd_label = to_label(ndd, slot); memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN); if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0) continue; diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h index a59ef6eef2a3..f39bfb31f72f 100644 --- a/drivers/nvdimm/label.h +++ b/drivers/nvdimm/label.h @@ -15,6 +15,7 @@ #include #include +#include #include enum { @@ -60,7 +61,8 @@ static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0"; */ struct nd_namespace_index { u8 sig[NSINDEX_SIG_LEN]; - __le32 flags; + u8 flags[3]; + u8 labelsize; __le32 seq; __le64 myoff; __le64 mysize; @@ -98,7 +100,16 @@ struct nd_namespace_label { __le64 dpa; __le64 rawsize; __le32 slot; - __le32 unused; + /* + * Accessing fields past this point should be gated by a + * namespace_label_has() check. + */ + u8 align; + u8 reserved[3]; + guid_t type_guid; + guid_t abstraction_guid; + u8 reserved2[88]; + __le64 checksum; }; /** diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 03852d738eec..28d9f4481547 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -42,7 +42,7 @@ struct nd_poison { struct nvdimm_drvdata { struct device *dev; - int nsindex_size; + int nsindex_size, nslabel_size; struct nd_cmd_get_config_size nsarea; void *data; int ns_current, ns_next; @@ -96,6 +96,12 @@ static inline struct nd_namespace_index *to_next_namespace_index( return to_namespace_index(ndd, ndd->ns_next); } +unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd); + +#define namespace_label_has(ndd, field) \ + (offsetof(struct nd_namespace_label, field) \ + < sizeof_namespace_label(ndd)) + #define nd_dbg_dpa(r, d, res, fmt, arg...) \ dev_dbg((r) ? &(r)->dev : (d)->dev, "%s: %.13s: %#llx @ %#llx " fmt, \ (r) ? dev_name((d)->dev) : "", res ? res->name : "null", \ -- cgit v1.2.3 From c12c48ce869d72029d70666f615cbd8f67fc14e9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 4 Jun 2017 10:59:15 +0900 Subject: libnvdimm, label: add v1.2 interleave-set-cookie algorithm The interleave-set-cookie algorithm is extended to incorporate all the same components that are used to generate an nvdimm unique-id. For backwards compatibility we still maintain the old v1.1 definition. Reported-by: Nicholas Moulin Reported-by: Kaushik Kanetkar Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 53 +++++++++++++++++++++++++++++++++++++++-- drivers/nvdimm/label.c | 3 ++- drivers/nvdimm/namespace_devs.c | 9 +++++-- drivers/nvdimm/nd.h | 3 ++- drivers/nvdimm/region_devs.c | 43 +++++++++++++++++++++++++++++---- include/linux/libnvdimm.h | 5 +++- 6 files changed, 104 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 097eff0b963d..e744ab38eaf9 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1663,12 +1663,29 @@ struct nfit_set_info { } mapping[0]; }; +struct nfit_set_info2 { + struct nfit_set_info_map2 { + u64 region_offset; + u32 serial_number; + u16 vendor_id; + u16 manufacturing_date; + u8 manufacturing_location; + u8 reserved[31]; + } mapping[0]; +}; + static size_t sizeof_nfit_set_info(int num_mappings) { return sizeof(struct nfit_set_info) + num_mappings * sizeof(struct nfit_set_info_map); } +static size_t sizeof_nfit_set_info2(int num_mappings) +{ + return sizeof(struct nfit_set_info2) + + num_mappings * sizeof(struct nfit_set_info_map2); +} + static int cmp_map_compat(const void *m0, const void *m1) { const struct nfit_set_info_map *map0 = m0; @@ -1690,6 +1707,18 @@ static int cmp_map(const void *m0, const void *m1) return 0; } +static int cmp_map2(const void *m0, const void *m1) +{ + const struct nfit_set_info_map2 *map0 = m0; + const struct nfit_set_info_map2 *map1 = m1; + + if (map0->region_offset < map1->region_offset) + return -1; + else if (map0->region_offset > map1->region_offset) + return 1; + return 0; +} + /* Retrieve the nth entry referencing this spa */ static struct acpi_nfit_memory_map *memdev_from_spa( struct acpi_nfit_desc *acpi_desc, u16 range_index, int n) @@ -1711,6 +1740,7 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, struct device *dev = acpi_desc->dev; struct nd_interleave_set *nd_set; u16 nr = ndr_desc->num_mappings; + struct nfit_set_info2 *info2; struct nfit_set_info *info; if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE) @@ -1725,9 +1755,15 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL); if (!info) return -ENOMEM; + + info2 = devm_kzalloc(dev, sizeof_nfit_set_info2(nr), GFP_KERNEL); + if (!info2) + return -ENOMEM; + for (i = 0; i < nr; i++) { struct nd_mapping_desc *mapping = &ndr_desc->mapping[i]; struct nfit_set_info_map *map = &info->mapping[i]; + struct nfit_set_info_map2 *map2 = &info2->mapping[i]; struct nvdimm *nvdimm = mapping->nvdimm; struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc, @@ -1740,19 +1776,32 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, map->region_offset = memdev->region_offset; map->serial_number = nfit_mem->dcr->serial_number; + + map2->region_offset = memdev->region_offset; + map2->serial_number = nfit_mem->dcr->serial_number; + map2->vendor_id = nfit_mem->dcr->vendor_id; + map2->manufacturing_date = nfit_mem->dcr->manufacturing_date; + map2->manufacturing_location = nfit_mem->dcr->manufacturing_location; } + /* v1.1 namespaces */ sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map), cmp_map, NULL); - nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); + nd_set->cookie1 = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); + + /* v1.2 namespaces */ + sort(&info2->mapping[0], nr, sizeof(struct nfit_set_info_map2), + cmp_map2, NULL); + nd_set->cookie2 = nd_fletcher64(info2, sizeof_nfit_set_info2(nr), 0); - /* support namespaces created with the wrong sort order */ + /* support v1.1 namespaces created with the wrong sort order */ sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map), cmp_map_compat, NULL); nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0); ndr_desc->nd_set = nd_set; devm_kfree(dev, info); + devm_kfree(dev, info2); return 0; } diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index d6233d220bfd..1aacd4866c76 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -553,7 +553,6 @@ static int __pmem_label_update(struct nd_region *nd_region, struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm, int pos) { - u64 cookie = nd_region_interleave_set_cookie(nd_region); struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nd_label_ent *label_ent, *victim = NULL; struct nd_namespace_label *nd_label; @@ -563,11 +562,13 @@ static int __pmem_label_update(struct nd_region *nd_region, unsigned long *free; u32 nslot, slot; size_t offset; + u64 cookie; int rc; if (!preamble_next(ndd, &nsindex, &free, &nslot)) return -ENXIO; + cookie = nd_region_interleave_set_cookie(nd_region, nsindex); nd_label_gen_id(&label_id, nspm->uuid, 0); for_each_dpa_resource(ndd, res) if (strcmp(res->name, label_id.id) == 0) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 2f9dfbd2dbec..51f304fe8a52 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1698,10 +1698,11 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id) * @nd_label: target pmem namespace label to evaluate */ struct device *create_namespace_pmem(struct nd_region *nd_region, + struct nd_namespace_index *nsindex, struct nd_namespace_label *nd_label) { + u64 cookie = nd_region_interleave_set_cookie(nd_region, nsindex); u64 altcookie = nd_region_interleave_set_altcookie(nd_region); - u64 cookie = nd_region_interleave_set_cookie(nd_region); struct nd_label_ent *label_ent; struct nd_namespace_pmem *nspm; struct nd_mapping *nd_mapping; @@ -2108,7 +2109,11 @@ static struct device **scan_labels(struct nd_region *nd_region) goto err; devs[count++] = dev; } else { - dev = create_namespace_pmem(nd_region, nd_label); + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nd_namespace_index *nsindex; + + nsindex = to_namespace_index(ndd, ndd->ns_current); + dev = create_namespace_pmem(nd_region, nsindex, nd_label); if (IS_ERR(dev)) { switch (PTR_ERR(dev)) { case -EAGAIN: diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 28d9f4481547..ad4e518940c9 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -336,7 +336,8 @@ static inline struct device *nd_dax_create(struct nd_region *nd_region) struct nd_region *to_nd_region(struct device *dev); int nd_region_to_nstype(struct nd_region *nd_region); int nd_region_register_namespaces(struct nd_region *nd_region, int *err); -u64 nd_region_interleave_set_cookie(struct nd_region *nd_region); +u64 nd_region_interleave_set_cookie(struct nd_region *nd_region, + struct nd_namespace_index *nsindex); u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region); void nvdimm_bus_lock(struct device *dev); void nvdimm_bus_unlock(struct device *dev); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index b550edf2571f..282b8991ea83 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -307,13 +307,41 @@ static ssize_t set_cookie_show(struct device *dev, { struct nd_region *nd_region = to_nd_region(dev); struct nd_interleave_set *nd_set = nd_region->nd_set; + ssize_t rc = 0; if (is_nd_pmem(dev) && nd_set) /* pass, should be precluded by region_visible */; else return -ENXIO; - return sprintf(buf, "%#llx\n", nd_set->cookie); + /* + * The cookie to show depends on which specification of the + * labels we are using. If there are not labels then default to + * the v1.1 namespace label cookie definition. To read all this + * data we need to wait for probing to settle. + */ + device_lock(dev); + nvdimm_bus_lock(dev); + wait_nvdimm_bus_probe_idle(dev); + if (nd_region->ndr_mappings) { + struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + + if (ndd) { + struct nd_namespace_index *nsindex; + + nsindex = to_namespace_index(ndd, ndd->ns_current); + rc = sprintf(buf, "%#llx\n", + nd_region_interleave_set_cookie(nd_region, + nsindex)); + } + } + nvdimm_bus_unlock(dev); + device_unlock(dev); + + if (rc) + return rc; + return sprintf(buf, "%#llx\n", nd_set->cookie1); } static DEVICE_ATTR_RO(set_cookie); @@ -564,13 +592,18 @@ struct attribute_group nd_region_attribute_group = { }; EXPORT_SYMBOL_GPL(nd_region_attribute_group); -u64 nd_region_interleave_set_cookie(struct nd_region *nd_region) +u64 nd_region_interleave_set_cookie(struct nd_region *nd_region, + struct nd_namespace_index *nsindex) { struct nd_interleave_set *nd_set = nd_region->nd_set; - if (nd_set) - return nd_set->cookie; - return 0; + if (!nd_set) + return 0; + + if (nsindex && __le16_to_cpu(nsindex->major) == 1 + && __le16_to_cpu(nsindex->minor) == 1) + return nd_set->cookie1; + return nd_set->cookie2; } u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region) diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 6c807017128d..722cdf21429f 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -71,7 +71,10 @@ struct nd_cmd_desc { }; struct nd_interleave_set { - u64 cookie; + /* v1.1 definition of the interleave-set-cookie algorithm */ + u64 cookie1; + /* v1.2 definition of the interleave-set-cookie algorithm */ + u64 cookie2; /* compatibility with initial buggy Linux implementation */ u64 altcookie; }; -- cgit v1.2.3 From f979b13c3cc51584882bffa32965f34e5afa3b9b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 4 Jun 2017 12:12:07 +0900 Subject: libnvdimm, label: honor the lba size specified in v1.2 labels Previously we only honored the lba size for blk-aperture mode namespaces. For pmem namespaces the lba size was just assumed to be 512. With the new v1.2 label definition and compatibility with other operating environments, the ->lbasize property is now respected for pmem namespaces. Cc: Ross Zwisler Signed-off-by: Dan Williams --- drivers/nvdimm/namespace_devs.c | 65 +++++++++++++++++++++++++++++++++-------- drivers/nvdimm/nd.h | 1 + drivers/nvdimm/pmem.c | 1 + include/linux/nd.h | 2 ++ 4 files changed, 57 insertions(+), 12 deletions(-) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 51f304fe8a52..e034b003a5e2 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -163,6 +163,29 @@ bool pmem_should_map_pages(struct device *dev) } EXPORT_SYMBOL(pmem_should_map_pages); +unsigned int pmem_sector_size(struct nd_namespace_common *ndns) +{ + if (is_namespace_pmem(&ndns->dev)) { + struct nd_namespace_pmem *nspm; + + nspm = to_nd_namespace_pmem(&ndns->dev); + if (nspm->lbasize == 0 || nspm->lbasize == 512) + /* default */; + else if (nspm->lbasize == 4096) + return 4096; + else + dev_WARN(&ndns->dev, "unsupported sector size: %ld\n", + nspm->lbasize); + } + + /* + * There is no namespace label (is_namespace_io()), or the label + * indicates the default sector size. + */ + return 512; +} +EXPORT_SYMBOL(pmem_sector_size); + const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, char *name) { @@ -1283,28 +1306,49 @@ static ssize_t resource_show(struct device *dev, } static DEVICE_ATTR_RO(resource); -static const unsigned long ns_lbasize_supported[] = { 512, 520, 528, +static const unsigned long blk_lbasize_supported[] = { 512, 520, 528, 4096, 4104, 4160, 4224, 0 }; +static const unsigned long pmem_lbasize_supported[] = { 512, 4096, 0 }; + static ssize_t sector_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); - if (!is_namespace_blk(dev)) - return -ENXIO; + return nd_sector_size_show(nsblk->lbasize, + blk_lbasize_supported, buf); + } - return nd_sector_size_show(nsblk->lbasize, ns_lbasize_supported, buf); + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + return nd_sector_size_show(nspm->lbasize, + pmem_lbasize_supported, buf); + } + return -ENXIO; } static ssize_t sector_size_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); struct nd_region *nd_region = to_nd_region(dev->parent); + const unsigned long *supported; + unsigned long *lbasize; ssize_t rc = 0; - if (!is_namespace_blk(dev)) + if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + lbasize = &nsblk->lbasize; + supported = blk_lbasize_supported; + } else if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + lbasize = &nspm->lbasize; + supported = pmem_lbasize_supported; + } else return -ENXIO; device_lock(dev); @@ -1312,8 +1356,7 @@ static ssize_t sector_size_store(struct device *dev, if (to_ndns(dev)->claim) rc = -EBUSY; if (rc >= 0) - rc = nd_sector_size_store(dev, buf, &nsblk->lbasize, - ns_lbasize_supported); + rc = nd_sector_size_store(dev, buf, lbasize, supported); if (rc >= 0) rc = nd_namespace_label_update(nd_region, dev); dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__, @@ -1458,9 +1501,6 @@ static umode_t namespace_visible(struct kobject *kobj, if (a == &dev_attr_size.attr) return 0644; - if (is_namespace_pmem(dev) && a == &dev_attr_sector_size.attr) - return 0; - return a->mode; } @@ -1795,6 +1835,7 @@ struct device *create_namespace_pmem(struct nd_region *nd_region, NSLABEL_NAME_LEN, GFP_KERNEL); nspm->uuid = kmemdup((void __force *) label0->uuid, NSLABEL_UUID_LEN, GFP_KERNEL); + nspm->lbasize = __le64_to_cpu(label0->lbasize); } if (!nspm->alt_name || !nspm->uuid) { diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index ad4e518940c9..17cecb38dfc9 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -356,6 +356,7 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns); int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt); const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, char *name); +unsigned int pmem_sector_size(struct nd_namespace_common *ndns); void nvdimm_badblocks_populate(struct nd_region *nd_region, struct badblocks *bb, const struct resource *res); #if IS_ENABLED(CONFIG_ND_CLAIM) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index c544d466ea51..5c45e178bd4a 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -342,6 +342,7 @@ static int pmem_attach_disk(struct device *dev, blk_queue_write_cache(q, true, true); blk_queue_make_request(q, pmem_make_request); blk_queue_physical_block_size(q, PAGE_SIZE); + blk_queue_logical_block_size(q, pmem_sector_size(ndns)); blk_queue_max_hw_sectors(q, UINT_MAX); blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); diff --git a/include/linux/nd.h b/include/linux/nd.h index 194b8e002ea7..d8f5023b49ae 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -75,12 +75,14 @@ struct nd_namespace_io { /** * struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory * @nsio: device and system physical address range to drive + * @lbasize: logical sector size for the namespace in block-device-mode * @alt_name: namespace name supplied in the dimm label * @uuid: namespace name supplied in the dimm label * @id: ida allocated id */ struct nd_namespace_pmem { struct nd_namespace_io nsio; + unsigned long lbasize; char *alt_name; u8 *uuid; int id; -- cgit v1.2.3 From faec6f8a1cd2c44e439de35ab3328c5cf7bf52d8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jun 2017 11:10:51 -0700 Subject: libnvdimm, label: populate the type_guid property for v1.2 namespaces The type_guid refers to the "Address Range Type GUID" for the region backing a namespace as defined the ACPI NFIT (NVDIMM Firmware Interface Table). This 'type' identifier specifies an access mechanism for the given namespace. This capability replaces the confusing usage of the 'NSLABEL_FLAG_LOCAL' flag to indicate a block-aperture-mode namespace. Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 15 +++++++---- drivers/nvdimm/label.c | 6 +++++ drivers/nvdimm/namespace_devs.c | 57 +++++++++++++++++++++++++++-------------- include/linux/libnvdimm.h | 3 +++ 4 files changed, 57 insertions(+), 24 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index e744ab38eaf9..436cfdd1215b 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1743,15 +1743,17 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, struct nfit_set_info2 *info2; struct nfit_set_info *info; + nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); + if (!nd_set) + return -ENOMEM; + ndr_desc->nd_set = nd_set; + guid_copy(&nd_set->type_guid, (guid_t *) spa->range_guid); + if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE) /* pass */; else return 0; - nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); - if (!nd_set) - return -ENOMEM; - info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL); if (!info) return -ENOMEM; @@ -2228,7 +2230,7 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, struct acpi_nfit_system_address *spa = nfit_spa->spa; struct nd_blk_region_desc *ndbr_desc; struct nfit_mem *nfit_mem; - int blk_valid = 0; + int blk_valid = 0, rc; if (!nvdimm) { dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n", @@ -2260,6 +2262,9 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, ndbr_desc = to_blk_region_desc(ndr_desc); ndbr_desc->enable = acpi_nfit_blk_region_enable; ndbr_desc->do_io = acpi_desc->blk_do_io; + rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa); + if (rc) + return rc; nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus, ndr_desc); if (!nfit_spa->nd_region) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 1aacd4866c76..d8b87d3a0ebe 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -553,6 +553,7 @@ static int __pmem_label_update(struct nd_region *nd_region, struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm, int pos) { + struct nd_interleave_set *nd_set = nd_region->nd_set; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nd_label_ent *label_ent, *victim = NULL; struct nd_namespace_label *nd_label; @@ -597,6 +598,8 @@ static int __pmem_label_update(struct nd_region *nd_region, nd_label->rawsize = __cpu_to_le64(resource_size(res)); nd_label->dpa = __cpu_to_le64(res->start); nd_label->slot = __cpu_to_le32(slot); + if (namespace_label_has(ndd, type_guid)) + guid_copy(&nd_label->type_guid, &nd_set->type_guid); nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__); /* update label */ @@ -684,6 +687,7 @@ static int __blk_label_update(struct nd_region *nd_region, int num_labels) { int i, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO; + struct nd_interleave_set *nd_set = nd_region->nd_set; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nd_namespace_label *nd_label; struct nd_label_ent *label_ent, *e; @@ -788,6 +792,8 @@ static int __blk_label_update(struct nd_region *nd_region, nd_label->rawsize = __cpu_to_le64(resource_size(res)); nd_label->lbasize = __cpu_to_le64(nsblk->lbasize); nd_label->slot = __cpu_to_le32(slot); + if (namespace_label_has(ndd, type_guid)) + guid_copy(&nd_label->type_guid, &nd_set->type_guid); /* update label */ offset = nd_label_offset(ndd, nd_label); diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index e034b003a5e2..e101aec186c7 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1639,6 +1639,8 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid, for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nd_interleave_set *nd_set = nd_region->nd_set; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nd_label_ent *label_ent; bool found_uuid = false; @@ -1659,8 +1661,17 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid, if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0) continue; + if (namespace_label_has(ndd, type_guid) + && !guid_equal(&nd_set->type_guid, + &nd_label->type_guid)) { + dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n", + nd_set->type_guid.b, + nd_label->type_guid.b); + continue; + } + if (found_uuid) { - dev_dbg(to_ndd(nd_mapping)->dev, + dev_dbg(ndd->dev, "%s duplicate entry for uuid\n", __func__); return false; @@ -2047,12 +2058,21 @@ struct device *create_namespace_blk(struct nd_region *nd_region, { struct nd_mapping *nd_mapping = &nd_region->mapping[0]; + struct nd_interleave_set *nd_set = nd_region->nd_set; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nd_namespace_blk *nsblk; char name[NSLABEL_NAME_LEN]; struct device *dev = NULL; struct resource *res; + if (namespace_label_has(ndd, type_guid) + && !guid_equal(&nd_set->type_guid, + &nd_label->type_guid)) { + dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n", + nd_set->type_guid.b, nd_label->type_guid.b); + return ERR_PTR(-EAGAIN); + } + nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL); if (!nsblk) return ERR_PTR(-ENOMEM); @@ -2144,31 +2164,30 @@ static struct device **scan_labels(struct nd_region *nd_region) kfree(devs); devs = __devs; - if (is_nd_blk(&nd_region->dev)) { + if (is_nd_blk(&nd_region->dev)) dev = create_namespace_blk(nd_region, nd_label, count); - if (IS_ERR(dev)) - goto err; - devs[count++] = dev; - } else { + else { struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nd_namespace_index *nsindex; nsindex = to_namespace_index(ndd, ndd->ns_current); dev = create_namespace_pmem(nd_region, nsindex, nd_label); - if (IS_ERR(dev)) { - switch (PTR_ERR(dev)) { - case -EAGAIN: - /* skip invalid labels */ - continue; - case -ENODEV: - /* fallthrough to seed creation */ - break; - default: - goto err; - } - } else - devs[count++] = dev; } + + if (IS_ERR(dev)) { + switch (PTR_ERR(dev)) { + case -EAGAIN: + /* skip invalid labels */ + continue; + case -ENODEV: + /* fallthrough to seed creation */ + break; + default: + goto err; + } + } else + devs[count++] = dev; + } dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n", diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 722cdf21429f..4b9f178c82e6 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -17,6 +17,7 @@ #include #include #include +#include enum { /* when a dimm supports both PMEM and BLK access a label is required */ @@ -77,6 +78,8 @@ struct nd_interleave_set { u64 cookie2; /* compatibility with initial buggy Linux implementation */ u64 altcookie; + + guid_t type_guid; }; struct nd_mapping_desc { -- cgit v1.2.3 From 8f2bc2430e4ec53ea961997d760c3b35f729e444 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jun 2017 11:39:30 -0700 Subject: libnvdimm, label: populate 'isetcookie' for blk-aperture namespaces Starting with the v1.2 definition of namespace labels, the isetcookie field is populated and validated for blk-aperture namespaces. This adds some safety against inadvertent copying of namespace labels from one DIMM-device to another. Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 7 +------ drivers/nvdimm/label.c | 12 +++++++++++- drivers/nvdimm/namespace_devs.c | 20 ++++++++++++++------ 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 436cfdd1215b..b930d12f636b 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1736,12 +1736,12 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, struct nd_region_desc *ndr_desc, struct acpi_nfit_system_address *spa) { - int i, spa_type = nfit_spa_type(spa); struct device *dev = acpi_desc->dev; struct nd_interleave_set *nd_set; u16 nr = ndr_desc->num_mappings; struct nfit_set_info2 *info2; struct nfit_set_info *info; + int i; nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); if (!nd_set) @@ -1749,11 +1749,6 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc, ndr_desc->nd_set = nd_set; guid_copy(&nd_set->type_guid, (guid_t *) spa->range_guid); - if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE) - /* pass */; - else - return 0; - info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL); if (!info) return -ENOMEM; diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index d8b87d3a0ebe..ba0582fb0e21 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -787,7 +787,17 @@ static int __blk_label_update(struct nd_region *nd_region, nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_LOCAL); nd_label->nlabel = __cpu_to_le16(0); /* N/A */ nd_label->position = __cpu_to_le16(0); /* N/A */ - nd_label->isetcookie = __cpu_to_le64(0); /* N/A */ + + /* + * Use the presence of the type_guid as a flag to + * determine isetcookie usage for blk-aperture + * namespaces. + */ + if (namespace_label_has(ndd, type_guid)) + nd_label->isetcookie = __cpu_to_le64(nd_set->cookie2); + else + nd_label->isetcookie = __cpu_to_le64(0); /* N/A */ + nd_label->dpa = __cpu_to_le64(res->start); nd_label->rawsize = __cpu_to_le64(resource_size(res)); nd_label->lbasize = __cpu_to_le64(nsblk->lbasize); diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index e101aec186c7..7aba9a569c8e 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2065,12 +2065,20 @@ struct device *create_namespace_blk(struct nd_region *nd_region, struct device *dev = NULL; struct resource *res; - if (namespace_label_has(ndd, type_guid) - && !guid_equal(&nd_set->type_guid, - &nd_label->type_guid)) { - dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n", - nd_set->type_guid.b, nd_label->type_guid.b); - return ERR_PTR(-EAGAIN); + if (namespace_label_has(ndd, type_guid)) { + if (!guid_equal(&nd_set->type_guid, &nd_label->type_guid)) { + dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n", + nd_set->type_guid.b, + nd_label->type_guid.b); + return ERR_PTR(-EAGAIN); + } + + if (nd_label->isetcookie != __cpu_to_le64(nd_set->cookie2)) { + dev_dbg(ndd->dev, "expect cookie %#llx got %#llx\n", + nd_set->cookie2, + __le64_to_cpu(nd_label->isetcookie)); + return ERR_PTR(-EAGAIN); + } } nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL); -- cgit v1.2.3 From 3934d8410cb837c5f6bff54e66574a4bbcef340a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jun 2017 14:59:04 -0700 Subject: libnvdimm, label: update 'nlabel' and 'position' handling for local namespaces The v1.2 namespace label specification requires 'nlabel' and 'position' to be valid for the first ("lowest dpa") label in the set. It also requires all non-first labels to set those fields to 0xff. Linux does not much care if these values are correct, because we can just trust the count of labels with the matching uuid like the v1.1 case. However, we set them correctly in case other environments care. Signed-off-by: Dan Williams --- drivers/nvdimm/label.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index ba0582fb0e21..d7f9916c6ed5 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -696,6 +696,7 @@ static int __blk_label_update(struct nd_region *nd_region, struct resource *res, **old_res_list; struct nd_label_id label_id; u8 uuid[NSLABEL_UUID_LEN]; + int min_dpa_idx = 0; LIST_HEAD(list); u32 nslot, slot; @@ -767,6 +768,18 @@ static int __blk_label_update(struct nd_region *nd_region, } } + /* + * Find the resource associated with the first label in the set + * per the v1.2 namespace specification. + */ + for (i = 0; i < nsblk->num_resources; i++) { + struct resource *min = nsblk->res[min_dpa_idx]; + + res = nsblk->res[i]; + if (res->start < min->start) + min_dpa_idx = i; + } + for (i = 0; i < nsblk->num_resources; i++) { size_t offset; @@ -785,18 +798,26 @@ static int __blk_label_update(struct nd_region *nd_region, memcpy(nd_label->name, nsblk->alt_name, NSLABEL_NAME_LEN); nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_LOCAL); - nd_label->nlabel = __cpu_to_le16(0); /* N/A */ - nd_label->position = __cpu_to_le16(0); /* N/A */ /* * Use the presence of the type_guid as a flag to - * determine isetcookie usage for blk-aperture - * namespaces. + * determine isetcookie usage and nlabel + position + * policy for blk-aperture namespaces. */ - if (namespace_label_has(ndd, type_guid)) + if (namespace_label_has(ndd, type_guid)) { + if (i == min_dpa_idx) { + nd_label->nlabel = __cpu_to_le16(nsblk->num_resources); + nd_label->position = __cpu_to_le16(0); + } else { + nd_label->nlabel = __cpu_to_le16(0xffff); + nd_label->position = __cpu_to_le16(0xffff); + } nd_label->isetcookie = __cpu_to_le64(nd_set->cookie2); - else + } else { + nd_label->nlabel = __cpu_to_le16(0); /* N/A */ + nd_label->position = __cpu_to_le16(0); /* N/A */ nd_label->isetcookie = __cpu_to_le64(0); /* N/A */ + } nd_label->dpa = __cpu_to_le64(res->start); nd_label->rawsize = __cpu_to_le64(resource_size(res)); -- cgit v1.2.3 From 355d838878e1baec494c228458238d078dc3ca51 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 6 Jun 2017 14:56:43 -0700 Subject: libnvdimm, label: add v1.2 label checksum support The v1.2 namespace label specification adds a fletcher checksum to each label instance. Add generation and validation support for the new field. Signed-off-by: Dan Williams --- drivers/nvdimm/label.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index d7f9916c6ed5..c503362a03c7 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -326,7 +326,8 @@ static bool preamble_next(struct nvdimm_drvdata *ndd, free, nslot); } -static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot) +static bool slot_valid(struct nvdimm_drvdata *ndd, + struct nd_namespace_label *nd_label, u32 slot) { /* check that we are written where we expect to be written */ if (slot != __le32_to_cpu(nd_label->slot)) @@ -337,6 +338,21 @@ static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot) | __le64_to_cpu(nd_label->rawsize)) % SZ_4K) return false; + /* check checksum */ + if (namespace_label_has(ndd, checksum)) { + u64 sum, sum_save; + + sum_save = __le64_to_cpu(nd_label->checksum); + nd_label->checksum = __cpu_to_le64(0); + sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1); + nd_label->checksum = __cpu_to_le64(sum_save); + if (sum != sum_save) { + dev_dbg(ndd->dev, "%s fail checksum. slot: %d expect: %#llx\n", + __func__, slot, sum); + return false; + } + } + return true; } @@ -359,7 +375,7 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) nd_label = to_label(ndd, slot); - if (!slot_valid(nd_label, slot)) + if (!slot_valid(ndd, nd_label, slot)) continue; memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN); @@ -391,7 +407,7 @@ int nd_label_active_count(struct nvdimm_drvdata *ndd) nd_label = to_label(ndd, slot); - if (!slot_valid(nd_label, slot)) { + if (!slot_valid(ndd, nd_label, slot)) { u32 label_slot = __le32_to_cpu(nd_label->slot); u64 size = __le64_to_cpu(nd_label->rawsize); u64 dpa = __le64_to_cpu(nd_label->dpa); @@ -419,7 +435,7 @@ struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n) struct nd_namespace_label *nd_label; nd_label = to_label(ndd, slot); - if (!slot_valid(nd_label, slot)) + if (!slot_valid(ndd, nd_label, slot)) continue; if (n-- == 0) @@ -600,6 +616,13 @@ static int __pmem_label_update(struct nd_region *nd_region, nd_label->slot = __cpu_to_le32(slot); if (namespace_label_has(ndd, type_guid)) guid_copy(&nd_label->type_guid, &nd_set->type_guid); + if (namespace_label_has(ndd, checksum)) { + u64 sum; + + nd_label->checksum = __cpu_to_le64(0); + sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1); + nd_label->checksum = __cpu_to_le64(sum); + } nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__); /* update label */ @@ -825,6 +848,14 @@ static int __blk_label_update(struct nd_region *nd_region, nd_label->slot = __cpu_to_le32(slot); if (namespace_label_has(ndd, type_guid)) guid_copy(&nd_label->type_guid, &nd_set->type_guid); + if (namespace_label_has(ndd, checksum)) { + u64 sum; + + nd_label->checksum = __cpu_to_le64(0); + sum = nd_fletcher64(nd_label, + sizeof_namespace_label(ndd), 1); + nd_label->checksum = __cpu_to_le64(sum); + } /* update label */ offset = nd_label_offset(ndd, nd_label); -- cgit v1.2.3 From b3fde74ea195d2f9f49830a29f971a0aab4cd67a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 4 Jun 2017 10:18:39 +0900 Subject: libnvdimm, label: add address abstraction identifiers Starting with v1.2 labels, 'address abstractions' can be hinted via an address abstraction id that implies an info-block format. The standard address abstraction in the specification is the v2 format of the Block-Translation-Table (BTT). Support for that is saved for a later patch, for now we add support for the Linux supported address abstractions BTT (v1), PFN, and DAX. The new 'holder_class' attribute for namespace devices is added for tooling to specify the 'abstraction_guid' to store in the namespace label. For v1.1 labels this field is undefined and any setting of 'holder_class' away from the default 'none' value will only have effect until the driver is unloaded. Setting 'holder_class' requires that whatever device tries to claim the namespace must be of the specified class. Cc: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/btt_devs.c | 8 +++++ drivers/nvdimm/claim.c | 28 ++++++++++++++++ drivers/nvdimm/core.c | 3 ++ drivers/nvdimm/dax_devs.c | 8 +++++ drivers/nvdimm/label.c | 58 ++++++++++++++++++++++++++++++++ drivers/nvdimm/label.h | 5 +++ drivers/nvdimm/namespace_devs.c | 74 +++++++++++++++++++++++++++++++++++++++++ drivers/nvdimm/nd.h | 1 + drivers/nvdimm/pfn_devs.c | 8 +++++ include/linux/nd.h | 10 ++++++ 10 files changed, 203 insertions(+) diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 4c989bb9a8a0..31d875a91569 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -295,6 +295,14 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns) if (ndns->force_raw) return -ENODEV; + switch (ndns->claim_class) { + case NVDIMM_CCLASS_NONE: + case NVDIMM_CCLASS_BTT: + break; + default: + return -ENODEV; + } + nvdimm_bus_lock(&ndns->dev); btt_dev = __nd_btt_create(nd_region, 0, NULL, ndns); nvdimm_bus_unlock(&ndns->dev); diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 7ceb5fa4f2a1..de9b1cce242e 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -184,6 +184,34 @@ ssize_t nd_namespace_store(struct device *dev, } ndns = to_ndns(found); + + switch (ndns->claim_class) { + case NVDIMM_CCLASS_NONE: + break; + case NVDIMM_CCLASS_BTT: + if (!is_nd_btt(dev)) { + len = -EBUSY; + goto out_attach; + } + break; + case NVDIMM_CCLASS_PFN: + if (!is_nd_pfn(dev)) { + len = -EBUSY; + goto out_attach; + } + break; + case NVDIMM_CCLASS_DAX: + if (!is_nd_dax(dev)) { + len = -EBUSY; + goto out_attach; + } + break; + default: + len = -EBUSY; + goto out_attach; + break; + } + if (__nvdimm_namespace_capacity(ndns) < SZ_16M) { dev_dbg(dev, "%s too small to host\n", name); len = -ENXIO; diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 2dee908e4bae..ed0bf174d128 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -699,6 +699,9 @@ static __init int libnvdimm_init(void) rc = nd_region_init(); if (rc) goto err_region; + + nd_label_init(); + return 0; err_region: nvdimm_exit(); diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c index c1b6556aea6e..59f676381ae5 100644 --- a/drivers/nvdimm/dax_devs.c +++ b/drivers/nvdimm/dax_devs.c @@ -111,6 +111,14 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns) if (ndns->force_raw) return -ENODEV; + switch (ndns->claim_class) { + case NVDIMM_CCLASS_NONE: + case NVDIMM_CCLASS_DAX: + break; + default: + return -ENODEV; + } + nvdimm_bus_lock(&ndns->dev); nd_dax = nd_dax_alloc(nd_region); nd_pfn = &nd_dax->nd_pfn; diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index c503362a03c7..837bf21c8555 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -12,6 +12,7 @@ */ #include #include +#include #include #include #include @@ -19,6 +20,10 @@ #include "label.h" #include "nd.h" +static guid_t nvdimm_btt_guid; +static guid_t nvdimm_pfn_guid; +static guid_t nvdimm_dax_guid; + static u32 best_seq(u32 a, u32 b) { a &= NSINDEX_SEQ_MASK; @@ -565,10 +570,44 @@ static unsigned long nd_label_offset(struct nvdimm_drvdata *ndd, - (unsigned long) to_namespace_index(ndd, 0); } +enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid) +{ + if (guid_equal(guid, &nvdimm_btt_guid)) + return NVDIMM_CCLASS_BTT; + else if (guid_equal(guid, &nvdimm_pfn_guid)) + return NVDIMM_CCLASS_PFN; + else if (guid_equal(guid, &nvdimm_dax_guid)) + return NVDIMM_CCLASS_DAX; + else if (guid_equal(guid, &guid_null)) + return NVDIMM_CCLASS_NONE; + + return NVDIMM_CCLASS_UNKNOWN; +} + +static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class, + guid_t *target) +{ + if (claim_class == NVDIMM_CCLASS_BTT) + return &nvdimm_btt_guid; + else if (claim_class == NVDIMM_CCLASS_PFN) + return &nvdimm_pfn_guid; + else if (claim_class == NVDIMM_CCLASS_DAX) + return &nvdimm_dax_guid; + else if (claim_class == NVDIMM_CCLASS_UNKNOWN) { + /* + * If we're modifying a namespace for which we don't + * know the claim_class, don't touch the existing guid. + */ + return target; + } else + return &guid_null; +} + static int __pmem_label_update(struct nd_region *nd_region, struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm, int pos) { + struct nd_namespace_common *ndns = &nspm->nsio.common; struct nd_interleave_set *nd_set = nd_region->nd_set; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nd_label_ent *label_ent, *victim = NULL; @@ -616,6 +655,10 @@ static int __pmem_label_update(struct nd_region *nd_region, nd_label->slot = __cpu_to_le32(slot); if (namespace_label_has(ndd, type_guid)) guid_copy(&nd_label->type_guid, &nd_set->type_guid); + if (namespace_label_has(ndd, abstraction_guid)) + guid_copy(&nd_label->abstraction_guid, + to_abstraction_guid(ndns->claim_class, + &nd_label->abstraction_guid)); if (namespace_label_has(ndd, checksum)) { u64 sum; @@ -711,6 +754,7 @@ static int __blk_label_update(struct nd_region *nd_region, { int i, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO; struct nd_interleave_set *nd_set = nd_region->nd_set; + struct nd_namespace_common *ndns = &nsblk->common; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nd_namespace_label *nd_label; struct nd_label_ent *label_ent, *e; @@ -848,6 +892,11 @@ static int __blk_label_update(struct nd_region *nd_region, nd_label->slot = __cpu_to_le32(slot); if (namespace_label_has(ndd, type_guid)) guid_copy(&nd_label->type_guid, &nd_set->type_guid); + if (namespace_label_has(ndd, abstraction_guid)) + guid_copy(&nd_label->abstraction_guid, + to_abstraction_guid(ndns->claim_class, + &nd_label->abstraction_guid)); + if (namespace_label_has(ndd, checksum)) { u64 sum; @@ -1101,3 +1150,12 @@ int nd_blk_namespace_label_update(struct nd_region *nd_region, return __blk_label_update(nd_region, nd_mapping, nsblk, count); } + +int __init nd_label_init(void) +{ + WARN_ON(guid_parse(NVDIMM_BTT_GUID, &nvdimm_btt_guid)); + WARN_ON(guid_parse(NVDIMM_PFN_GUID, &nvdimm_pfn_guid)); + WARN_ON(guid_parse(NVDIMM_DAX_GUID, &nvdimm_dax_guid)); + + return 0; +} diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h index f39bfb31f72f..7c8e2cc9e73e 100644 --- a/drivers/nvdimm/label.h +++ b/drivers/nvdimm/label.h @@ -112,6 +112,10 @@ struct nd_namespace_label { __le64 checksum; }; +#define NVDIMM_BTT_GUID "8aed63a2-29a2-4c66-8b12-f05d15d3922a" +#define NVDIMM_PFN_GUID "266400ba-fb9f-4677-bcb0-968f11d0d225" +#define NVDIMM_DAX_GUID "97a86d9c-3cdd-4eda-986f-5068b4f80088" + /** * struct nd_label_id - identifier string for dpa allocation * @id: "{blk|pmem}-" @@ -142,6 +146,7 @@ struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n); u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd); bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot); u32 nd_label_nfree(struct nvdimm_drvdata *ndd); +enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid); struct nd_region; struct nd_namespace_pmem; struct nd_namespace_blk; diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 7aba9a569c8e..f05d9b0672bf 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1425,6 +1425,69 @@ static ssize_t holder_show(struct device *dev, } static DEVICE_ATTR_RO(holder); +static ssize_t __holder_class_store(struct device *dev, const char *buf) +{ + struct nd_namespace_common *ndns = to_ndns(dev); + + if (dev->driver || ndns->claim) + return -EBUSY; + + if (strcmp(buf, "btt") == 0 || strcmp(buf, "btt\n") == 0) + ndns->claim_class = NVDIMM_CCLASS_BTT; + else if (strcmp(buf, "pfn") == 0 || strcmp(buf, "pfn\n") == 0) + ndns->claim_class = NVDIMM_CCLASS_PFN; + else if (strcmp(buf, "dax") == 0 || strcmp(buf, "dax\n") == 0) + ndns->claim_class = NVDIMM_CCLASS_DAX; + else if (strcmp(buf, "") == 0 || strcmp(buf, "\n") == 0) + ndns->claim_class = NVDIMM_CCLASS_NONE; + else + return -EINVAL; + + return 0; +} + +static ssize_t holder_class_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + ssize_t rc; + + device_lock(dev); + nvdimm_bus_lock(dev); + wait_nvdimm_bus_probe_idle(dev); + rc = __holder_class_store(dev, buf); + if (rc >= 0) + rc = nd_namespace_label_update(nd_region, dev); + dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc); + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc < 0 ? rc : len; +} + +static ssize_t holder_class_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_namespace_common *ndns = to_ndns(dev); + ssize_t rc; + + device_lock(dev); + if (ndns->claim_class == NVDIMM_CCLASS_NONE) + rc = sprintf(buf, "\n"); + else if (ndns->claim_class == NVDIMM_CCLASS_BTT) + rc = sprintf(buf, "btt\n"); + else if (ndns->claim_class == NVDIMM_CCLASS_PFN) + rc = sprintf(buf, "pfn\n"); + else if (ndns->claim_class == NVDIMM_CCLASS_DAX) + rc = sprintf(buf, "dax\n"); + else + rc = sprintf(buf, "\n"); + device_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RW(holder_class); + static ssize_t mode_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1483,6 +1546,7 @@ static struct attribute *nd_namespace_attributes[] = { &dev_attr_force_raw.attr, &dev_attr_sector_size.attr, &dev_attr_dpa_extents.attr, + &dev_attr_holder_class.attr, NULL, }; @@ -1506,6 +1570,7 @@ static umode_t namespace_visible(struct kobject *kobj, if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr || a == &dev_attr_holder.attr + || a == &dev_attr_holder_class.attr || a == &dev_attr_force_raw.attr || a == &dev_attr_mode.attr) return a->mode; @@ -1827,6 +1892,7 @@ struct device *create_namespace_pmem(struct nd_region *nd_region, /* Calculate total size and populate namespace properties from label0 */ for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_namespace_label *label0; + struct nvdimm_drvdata *ndd; nd_mapping = &nd_region->mapping[i]; label_ent = list_first_entry_or_null(&nd_mapping->labels, @@ -1847,6 +1913,11 @@ struct device *create_namespace_pmem(struct nd_region *nd_region, nspm->uuid = kmemdup((void __force *) label0->uuid, NSLABEL_UUID_LEN, GFP_KERNEL); nspm->lbasize = __le64_to_cpu(label0->lbasize); + ndd = to_ndd(nd_mapping); + if (namespace_label_has(ndd, abstraction_guid)) + nspm->nsio.common.claim_class + = to_nvdimm_cclass(&label0->abstraction_guid); + } if (!nspm->alt_name || !nspm->uuid) { @@ -2091,6 +2162,9 @@ struct device *create_namespace_blk(struct nd_region *nd_region, nsblk->lbasize = __le64_to_cpu(nd_label->lbasize); nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN, GFP_KERNEL); + if (namespace_label_has(ndd, abstraction_guid)) + nsblk->common.claim_class + = to_nvdimm_cclass(&nd_label->abstraction_guid); if (!nsblk->uuid) goto blk_err; memcpy(name, nd_label->name, NSLABEL_NAME_LEN); diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 17cecb38dfc9..8cabd836df0e 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -235,6 +235,7 @@ ssize_t nd_sector_size_store(struct device *dev, const char *buf, unsigned long *current_lbasize, const unsigned long *supported); int __init nvdimm_init(void); int __init nd_region_init(void); +int __init nd_label_init(void); void nvdimm_exit(void); void nd_region_exit(void); struct nvdimm; diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index a6c403600d19..5e4041276d6f 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -471,6 +471,14 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) if (ndns->force_raw) return -ENODEV; + switch (ndns->claim_class) { + case NVDIMM_CCLASS_NONE: + case NVDIMM_CCLASS_PFN: + break; + default: + return -ENODEV; + } + nvdimm_bus_lock(&ndns->dev); nd_pfn = nd_pfn_alloc(nd_region); pfn_dev = nd_pfn_devinit(nd_pfn, ndns); diff --git a/include/linux/nd.h b/include/linux/nd.h index d8f5023b49ae..96069c543890 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -21,6 +21,14 @@ enum nvdimm_event { NVDIMM_REVALIDATE_POISON, }; +enum nvdimm_claim_class { + NVDIMM_CCLASS_NONE, + NVDIMM_CCLASS_BTT, + NVDIMM_CCLASS_PFN, + NVDIMM_CCLASS_DAX, + NVDIMM_CCLASS_UNKNOWN, +}; + struct nd_device_driver { struct device_driver drv; unsigned long type; @@ -41,12 +49,14 @@ static inline struct nd_device_driver *to_nd_device_driver( * @force_raw: ignore other personalities for the namespace (e.g. btt) * @dev: device model node * @claim: when set a another personality has taken ownership of the namespace + * @claim_class: restrict claim type to a given class * @rw_bytes: access the raw namespace capacity with byte-aligned transfers */ struct nd_namespace_common { int force_raw; struct device dev; struct device *claim; + enum nvdimm_claim_class claim_class; int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset, void *buf, size_t size, int rw, unsigned long flags); }; -- cgit v1.2.3 From 8990cdf10cf50dc68aaf5a8479b04304d05f1581 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 7 Jun 2017 10:19:46 -0700 Subject: libnvdimm, label: switch to using v1.2 labels by default The rules for which version of the label specification are in effect at any given point in time are as follows: 1/ If a DIMM has an existing / valid index block then the version specified is used regardless if it is a previous version. 2/ By default when the kernel is initializing new index blocks the latest specification version (v1.2 at time of writing) is used. 3/ An environment that wants to force create v1.1 label-sets must arrange for userspace to disable all active regions / namespaces / dimms and write a valid set of v1.1 index blocks to the dimms. Signed-off-by: Dan Williams --- drivers/nvdimm/label.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 837bf21c8555..235f2089fab2 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -222,9 +222,10 @@ int nd_label_validate(struct nvdimm_drvdata *ndd) * need to know the size of the labels, and we can't trust the * size of the labels until we validate the index blocks. * Resolve this dependency loop by probing for known label - * sizes. + * sizes, but default to v1.2 256-byte namespace labels if + * discovery fails. */ - int label_size[] = { 256, 128 }; + int label_size[] = { 128, 256 }; int i, rc; for (i = 0; i < ARRAY_SIZE(label_size); i++) { @@ -532,7 +533,10 @@ static int nd_label_write_index(struct nvdimm_drvdata *ndd, int index, u32 seq, nsindex->labeloff = __cpu_to_le64(offset); nsindex->nslot = __cpu_to_le32(nslot); nsindex->major = __cpu_to_le16(1); - nsindex->minor = __cpu_to_le16(1); + if (sizeof_namespace_label(ndd) < 256) + nsindex->minor = __cpu_to_le16(1); + else + nsindex->minor = __cpu_to_le16(2); nsindex->checksum = __cpu_to_le64(0); if (flags & ND_NSINDEX_INIT) { unsigned long *free = (unsigned long *) nsindex->free; -- cgit v1.2.3 From a117699c6c4a4b1b4e90ed51e393590986567cb4 Mon Sep 17 00:00:00 2001 From: Yasunori Goto Date: Thu, 15 Jun 2017 14:04:16 +0900 Subject: tools/testing/nvdimm: fix nfit_test buffer overflow The root cause of panic is the num_pm of nfit_test1 is wrong. Though 1 is specified for num_pm at nfit_test_init(), it must be 2, because nfit_test1->spa_set[] array has 2 elements. Since the array is smaller than expected, the driver breaks other area. (it is often the link list of devres). As a result, panic occurs like the following example. CPU: 4 PID: 2233 Comm: lt-libndctl Tainted: G O 4.12.0-rc1+ #12 RIP: 0010:__list_del_entry_valid+0x6c/0xa0 Call Trace: release_nodes+0x76/0x260 devres_release_all+0x3c/0x50 device_release_driver_internal+0x159/0x200 device_release_driver+0x12/0x20 bus_remove_device+0xfd/0x170 device_del+0x1e8/0x330 platform_device_del+0x28/0x90 platform_device_unregister+0x12/0x30 nfit_test_exit+0x2a/0x93b [nfit_test] Cc: Signed-off-by: Yasunori Goto Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 28859da78edf..4c2fa98ef39d 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1943,7 +1943,7 @@ static __init int nfit_test_init(void) nfit_test->setup = nfit_test0_setup; break; case 1: - nfit_test->num_pm = 1; + nfit_test->num_pm = 2; nfit_test->dcr_idx = NUM_DCR; nfit_test->num_dcr = 2; nfit_test->alloc = nfit_test1_alloc; -- cgit v1.2.3 From 975750a98c26769fe54785579f4b26c961a7a6f4 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Mon, 12 Jun 2017 16:25:11 -0600 Subject: libnvdimm, pmem: Add sysfs notifications to badblocks Sysfs "badblocks" information may be updated during run-time that: - MCE, SCI, and sysfs "scrub" may add new bad blocks - Writes and ioctl() may clear bad blocks Add support to send sysfs notifications to sysfs "badblocks" file under region and pmem directories when their badblocks information is re-evaluated (but is not necessarily changed) during run-time. Signed-off-by: Toshi Kani Cc: Vishal Verma Cc: Linda Knippers Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 3 +++ drivers/nvdimm/nd.h | 1 + drivers/nvdimm/pmem.c | 14 ++++++++++++++ drivers/nvdimm/pmem.h | 1 + drivers/nvdimm/region.c | 12 ++++++++++-- 5 files changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index e9361bffe5ee..63ce50d9c1c5 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -198,6 +198,9 @@ static int nvdimm_clear_badblocks_region(struct device *dev, void *data) sector = (ctx->phys - nd_region->ndr_start) / 512; badblocks_clear(&nd_region->bb, sector, ctx->cleared / 512); + if (nd_region->bb_state) + sysfs_notify_dirent(nd_region->bb_state); + return 0; } diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 8cabd836df0e..e802c877d783 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -161,6 +161,7 @@ struct nd_region { u64 ndr_start; int id, num_lanes, ro, numa_node; void *provider_data; + struct kernfs_node *bb_state; struct badblocks bb; struct nd_interleave_set *nd_set; struct nd_percpu_lane __percpu *lane; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 5c45e178bd4a..34189a145ac6 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -68,6 +68,8 @@ static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, (unsigned long long) sector, cleared, cleared > 1 ? "s" : ""); badblocks_clear(&pmem->bb, sector, cleared); + if (pmem->bb_state) + sysfs_notify_dirent(pmem->bb_state); } invalidate_pmem(pmem->virt_addr + offset, len); @@ -378,6 +380,13 @@ static int pmem_attach_disk(struct device *dev, revalidate_disk(disk); + pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd, + "badblocks"); + if (pmem->bb_state) + sysfs_put(pmem->bb_state); + else + dev_warn(dev, "sysfs_get_dirent 'badblocks' failed\n"); + return 0; } @@ -429,6 +438,7 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) struct nd_namespace_io *nsio; struct resource res; struct badblocks *bb; + struct kernfs_node *bb_state; if (event != NVDIMM_REVALIDATE_POISON) return; @@ -440,11 +450,13 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) nd_region = to_nd_region(ndns->dev.parent); nsio = to_nd_namespace_io(&ndns->dev); bb = &nsio->bb; + bb_state = NULL; } else { struct pmem_device *pmem = dev_get_drvdata(dev); nd_region = to_region(pmem); bb = &pmem->bb; + bb_state = pmem->bb_state; if (is_nd_pfn(dev)) { struct nd_pfn *nd_pfn = to_nd_pfn(dev); @@ -464,6 +476,8 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) res.start = nsio->res.start + offset; res.end = nsio->res.end - end_trunc; nvdimm_badblocks_populate(nd_region, bb, &res); + if (bb_state) + sysfs_notify_dirent(bb_state); } MODULE_ALIAS("pmem"); diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h index 7f4dbd72a90a..c5917f040fa7 100644 --- a/drivers/nvdimm/pmem.h +++ b/drivers/nvdimm/pmem.h @@ -17,6 +17,7 @@ struct pmem_device { size_t size; /* trim size when namespace capacity has been section aligned */ u32 pfn_pad; + struct kernfs_node *bb_state; struct badblocks bb; struct dax_device *dax_dev; struct gendisk *disk; diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 869a886c292e..ca94029d20b3 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -58,10 +58,16 @@ static int nd_region_probe(struct device *dev) if (devm_init_badblocks(dev, &nd_region->bb)) return -ENODEV; + nd_region->bb_state = sysfs_get_dirent(nd_region->dev.kobj.sd, + "badblocks"); + if (nd_region->bb_state) + sysfs_put(nd_region->bb_state); + else + dev_warn(&nd_region->dev, + "sysfs_get_dirent 'badblocks' failed\n"); ndr_res.start = nd_region->ndr_start; ndr_res.end = nd_region->ndr_start + nd_region->ndr_size - 1; - nvdimm_badblocks_populate(nd_region, - &nd_region->bb, &ndr_res); + nvdimm_badblocks_populate(nd_region, &nd_region->bb, &ndr_res); } nd_region->btt_seed = nd_btt_create(nd_region); @@ -126,6 +132,8 @@ static void nd_region_notify(struct device *dev, enum nvdimm_event event) nd_region->ndr_size - 1; nvdimm_badblocks_populate(nd_region, &nd_region->bb, &res); + if (nd_region->bb_state) + sysfs_notify_dirent(nd_region->bb_state); } } device_for_each_child(dev, &event, child_notify); -- cgit v1.2.3 From fec53774fd043038e57ac737d90e8d58975d6e92 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 21:56:49 -0700 Subject: filesystem-dax: convert to dax_copy_from_iter() Now that all possible providers of the dax_operations copy_from_iter method are implemented, switch filesytem-dax to call the driver rather than copy_to_iter_pmem. Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- arch/x86/include/asm/pmem.h | 50 --------------------------------------------- fs/dax.c | 3 ++- include/linux/pmem.h | 24 ---------------------- 3 files changed, 2 insertions(+), 75 deletions(-) diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index 0ff8fe71b255..60e8edbe0205 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -65,56 +65,6 @@ static inline void arch_wb_cache_pmem(void *addr, size_t size) clwb(p); } -/** - * arch_copy_from_iter_pmem - copy data from an iterator to PMEM - * @addr: PMEM destination address - * @bytes: number of bytes to copy - * @i: iterator with source data - * - * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. - */ -static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, - struct iov_iter *i) -{ - size_t len; - - /* TODO: skip the write-back by always using non-temporal stores */ - len = copy_from_iter_nocache(addr, bytes, i); - - /* - * In the iovec case on x86_64 copy_from_iter_nocache() uses - * non-temporal stores for the bulk of the transfer, but we need - * to manually flush if the transfer is unaligned. A cached - * memory copy is used when destination or size is not naturally - * aligned. That is: - * - Require 8-byte alignment when size is 8 bytes or larger. - * - Require 4-byte alignment when size is 4 bytes. - * - * In the non-iovec case the entire destination needs to be - * flushed. - */ - if (iter_is_iovec(i)) { - unsigned long flushed, dest = (unsigned long) addr; - - if (bytes < 8) { - if (!IS_ALIGNED(dest, 4) || (bytes != 4)) - arch_wb_cache_pmem(addr, bytes); - } else { - if (!IS_ALIGNED(dest, 8)) { - dest = ALIGN(dest, boot_cpu_data.x86_clflush_size); - arch_wb_cache_pmem(addr, 1); - } - - flushed = dest - (unsigned long) addr; - if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8)) - arch_wb_cache_pmem(addr + bytes - 1, 1); - } - } else - arch_wb_cache_pmem(addr, bytes); - - return len; -} - /** * arch_clear_pmem - zero a PMEM memory range * @addr: virtual start address diff --git a/fs/dax.c b/fs/dax.c index 2a6889b3585f..b459948de427 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1054,7 +1054,8 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, map_len = end - pos; if (iov_iter_rw(iter) == WRITE) - map_len = copy_from_iter_pmem(kaddr, map_len, iter); + map_len = dax_copy_from_iter(dax_dev, pgoff, kaddr, + map_len, iter); else map_len = copy_to_iter(kaddr, map_len, iter); if (map_len <= 0) { diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 71ecf3d46aac..9d542a5600e4 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -31,13 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) BUG(); } -static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, - struct iov_iter *i) -{ - BUG(); - return 0; -} - static inline void arch_clear_pmem(void *addr, size_t size) { BUG(); @@ -79,23 +72,6 @@ static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) memcpy(dst, src, n); } -/** - * copy_from_iter_pmem - copy data from an iterator to PMEM - * @addr: PMEM destination address - * @bytes: number of bytes to copy - * @i: iterator with source data - * - * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'. - * See blkdev_issue_flush() note for memcpy_to_pmem(). - */ -static inline size_t copy_from_iter_pmem(void *addr, size_t bytes, - struct iov_iter *i) -{ - if (arch_has_pmem_api()) - return arch_copy_from_iter_pmem(addr, bytes, i); - return copy_from_iter_nocache(addr, bytes, i); -} - /** * clear_pmem - zero a PMEM memory range * @addr: virtual start address -- cgit v1.2.3 From 3c1cebff23cdca01c421411e953a9e239f2b9ef9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 12:58:19 -0700 Subject: dax, pmem: introduce an optional 'flush' dax_operation Filesystem-DAX flushes caches whenever it writes to the address returned through dax_direct_access() and when writing back dirty radix entries. That flushing is only required in the pmem case, so add a dax operation to allow pmem to take this extra action, but skip it for other dax capable devices that do not provide a flush routine. An example for this differentiation might be a volatile ram disk where there is no expectation of persistence. In fact the pmem driver itself might front such an address range specified by the NFIT. So, this "no flush" property might be something passed down by the bus / libnvdimm. Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 7 +++++++ include/linux/dax.h | 2 ++ 2 files changed, 9 insertions(+) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 2f3aefe565c6..823b07774244 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -242,9 +242,16 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, return copy_from_iter_flushcache(addr, bytes, i); } +static void pmem_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t size) +{ + wb_cache_pmem(addr, size); +} + static const struct dax_operations pmem_dax_ops = { .direct_access = pmem_dax_direct_access, .copy_from_iter = pmem_copy_from_iter, + .flush = pmem_dax_flush, }; static void pmem_release_queue(void *q) diff --git a/include/linux/dax.h b/include/linux/dax.h index 28e398f8c59e..407dd3ff6e54 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -19,6 +19,8 @@ struct dax_operations { /* copy_from_iter: dax-driver override for default copy_from_iter */ size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, struct iov_iter *); + /* flush: optional driver-specific cache management after writes */ + void (*flush)(struct dax_device *, pgoff_t, void *, size_t); }; #if IS_ENABLED(CONFIG_DAX) -- cgit v1.2.3 From abebfbe2f7315dd3ec9a0c69596a76e32beb5749 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 13:02:52 -0700 Subject: dm: add ->flush() dax operation support Allow device-mapper to route flush operations to the per-target implementation. In order for the device stacking to work we need a dax_dev and a pgoff relative to that device. This gives each layer of the stack the information it needs to look up the operation pointer for the next level. This conceptually allows for an array of mixed device drivers with varying flush implementations. Reviewed-by: Toshi Kani Reviewed-by: Mike Snitzer Signed-off-by: Dan Williams --- drivers/dax/super.c | 11 +++++++++++ drivers/md/dm-linear.c | 15 +++++++++++++++ drivers/md/dm-stripe.c | 20 ++++++++++++++++++++ drivers/md/dm.c | 19 +++++++++++++++++++ include/linux/dax.h | 2 ++ include/linux/device-mapper.h | 3 +++ 6 files changed, 70 insertions(+) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index dd299e55f65d..b7729e4d351a 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -185,6 +185,17 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, } EXPORT_SYMBOL_GPL(dax_copy_from_iter); +void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t size) +{ + if (!dax_alive(dax_dev)) + return; + + if (dax_dev->ops->flush) + dax_dev->ops->flush(dax_dev, pgoff, addr, size); +} +EXPORT_SYMBOL_GPL(dax_flush); + bool dax_alive(struct dax_device *dax_dev) { lockdep_assert_held(&dax_srcu); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 0841ec1bfbad..25e661974319 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -173,6 +173,20 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } +static void linear_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr, + size_t size) +{ + struct linear_c *lc = ti->private; + struct block_device *bdev = lc->dev->bdev; + struct dax_device *dax_dev = lc->dev->dax_dev; + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + + dev_sector = linear_map_sector(ti, sector); + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff)) + return; + dax_flush(dax_dev, pgoff, addr, size); +} + static struct target_type linear_target = { .name = "linear", .version = {1, 3, 0}, @@ -186,6 +200,7 @@ static struct target_type linear_target = { .iterate_devices = linear_iterate_devices, .direct_access = linear_dax_direct_access, .dax_copy_from_iter = linear_dax_copy_from_iter, + .dax_flush = linear_dax_flush, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 1ef914f9ca72..8e73517967b6 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -351,6 +351,25 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } +static void stripe_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr, + size_t size) +{ + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + struct stripe_c *sc = ti->private; + struct dax_device *dax_dev; + struct block_device *bdev; + uint32_t stripe; + + stripe_map_sector(sc, sector, &stripe, &dev_sector); + dev_sector += sc->stripe[stripe].physical_start; + dax_dev = sc->stripe[stripe].dev->dax_dev; + bdev = sc->stripe[stripe].dev->bdev; + + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff)) + return; + dax_flush(dax_dev, pgoff, addr, size); +} + /* * Stripe status: * @@ -471,6 +490,7 @@ static struct target_type stripe_target = { .io_hints = stripe_io_hints, .direct_access = stripe_dax_direct_access, .dax_copy_from_iter = stripe_dax_copy_from_iter, + .dax_flush = stripe_dax_flush, }; int __init dm_stripe_init(void) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7faaceb52819..09b3efdc8abf 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -994,6 +994,24 @@ static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, return ret; } +static void dm_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t size) +{ + struct mapped_device *md = dax_get_private(dax_dev); + sector_t sector = pgoff * PAGE_SECTORS; + struct dm_target *ti; + int srcu_idx; + + ti = dm_dax_get_live_target(md, sector, &srcu_idx); + + if (!ti) + goto out; + if (ti->type->dax_flush) + ti->type->dax_flush(ti, pgoff, addr, size); + out: + dm_put_live_table(md, srcu_idx); +} + /* * A target may call dm_accept_partial_bio only from the map routine. It is * allowed for all bio types except REQ_PREFLUSH. @@ -2885,6 +2903,7 @@ static const struct block_device_operations dm_blk_dops = { static const struct dax_operations dm_dax_ops = { .direct_access = dm_dax_direct_access, .copy_from_iter = dm_dax_copy_from_iter, + .flush = dm_dax_flush, }; /* diff --git a/include/linux/dax.h b/include/linux/dax.h index 407dd3ff6e54..1f6b6072af64 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -82,6 +82,8 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); +void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t size); /* * We use lowest available bit in exceptional entry for locking, one bit for diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 11c8a0a92f9c..67bfe8ddcb32 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -134,6 +134,8 @@ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); +typedef void (*dm_dax_flush_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, + size_t size); #define PAGE_SECTORS (PAGE_SIZE / 512) void dm_error(const char *message); @@ -184,6 +186,7 @@ struct target_type { dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; dm_dax_copy_from_iter_fn dax_copy_from_iter; + dm_dax_flush_fn dax_flush; /* For internal device-mapper use. */ struct list_head list; -- cgit v1.2.3 From 6318770a7d43a4166b6a8e3cef62e4a019d3c95e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 13:07:46 -0700 Subject: filesystem-dax: convert to dax_flush() Filesystem-DAX flushes caches whenever it writes to the address returned through dax_direct_access() and when writing back dirty radix entries. That flushing is only required in the pmem case, so the dax_flush() helper skips cache management work when the underlying driver does not specify a flush method. We still do all the dirty tracking since the radix entry will already be there for locking purposes. However, the work to clean the entry will be a nop for some dax drivers. Cc: Jeff Moyer Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- fs/dax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index b459948de427..0933fc460ada 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -784,7 +784,7 @@ static int dax_writeback_one(struct block_device *bdev, } dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn)); - wb_cache_pmem(kaddr, size); + dax_flush(dax_dev, pgoff, kaddr, size); /* * After we have flushed the cache, we can clear the dirty tag. There * cannot be new dirty data in the pfn after the flush has completed as -- cgit v1.2.3 From 81f558701ae8d5677635118751b1b4043094c7e9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 13:12:20 -0700 Subject: x86, dax: replace clear_pmem() with open coded memset + dax_ops->flush The clear_pmem() helper simply combines a memset() plus a cache flush. Now that the flush routine is optionally provided by the dax device driver we can avoid unnecessary cache management on dax devices fronting volatile memory. With clear_pmem() gone we can follow on with a patch to make pmem cache management completely defined within the pmem driver. Cc: Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Matthew Wilcox Cc: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- arch/x86/include/asm/pmem.h | 13 ------------- fs/dax.c | 3 ++- include/linux/pmem.h | 21 --------------------- 3 files changed, 2 insertions(+), 35 deletions(-) diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index 60e8edbe0205..f4c119d253f3 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -65,19 +65,6 @@ static inline void arch_wb_cache_pmem(void *addr, size_t size) clwb(p); } -/** - * arch_clear_pmem - zero a PMEM memory range - * @addr: virtual start address - * @size: number of bytes to zero - * - * Write zeros into the memory range starting at 'addr' for 'size' bytes. - */ -static inline void arch_clear_pmem(void *addr, size_t size) -{ - memset(addr, 0, size); - arch_wb_cache_pmem(addr, size); -} - static inline void arch_invalidate_pmem(void *addr, size_t size) { clflush_cache_range(addr, size); diff --git a/fs/dax.c b/fs/dax.c index 0933fc460ada..554b8e7d921c 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -975,7 +975,8 @@ int __dax_zero_page_range(struct block_device *bdev, dax_read_unlock(id); return rc; } - clear_pmem(kaddr + offset, size); + memset(kaddr + offset, 0, size); + dax_flush(dax_dev, pgoff, kaddr + offset, size); dax_read_unlock(id); } return 0; diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 9d542a5600e4..772bd02a5b52 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -31,11 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) BUG(); } -static inline void arch_clear_pmem(void *addr, size_t size) -{ - BUG(); -} - static inline void arch_wb_cache_pmem(void *addr, size_t size) { BUG(); @@ -72,22 +67,6 @@ static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) memcpy(dst, src, n); } -/** - * clear_pmem - zero a PMEM memory range - * @addr: virtual start address - * @size: number of bytes to zero - * - * Write zeros into the memory range starting at 'addr' for 'size' bytes. - * See blkdev_issue_flush() note for memcpy_to_pmem(). - */ -static inline void clear_pmem(void *addr, size_t size) -{ - if (arch_has_pmem_api()) - arch_clear_pmem(addr, size); - else - memset(addr, 0, size); -} - /** * invalidate_pmem - flush a pmem range from the cache hierarchy * @addr: virtual start address -- cgit v1.2.3 From 4e4f00a9b51a1c52ebdd728a1caeb3b9fe48c39d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 22:40:44 -0700 Subject: x86, dax, libnvdimm: remove wb_cache_pmem() indirection With all handling of the CONFIG_ARCH_HAS_PMEM_API case being moved to libnvdimm and the pmem driver directly we do not need to provide global wrappers and fallbacks in the CONFIG_ARCH_HAS_PMEM_API=n case. The pmem driver will simply not link to arch_wb_cache_pmem() in that case. Same as before, pmem flushing is only defined for x86_64, via clean_cache_range(), but it is straightforward to add other archs in the future. arch_wb_cache_pmem() is an exported function since the pmem module needs to find it, but it is privately declared in drivers/nvdimm/pmem.h because there are no consumers outside of the pmem driver. Cc: Cc: Jan Kara Cc: Jeff Moyer Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Oliver O'Halloran Cc: Matthew Wilcox Cc: Ross Zwisler Suggested-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/x86/include/asm/pmem.h | 21 --------------------- arch/x86/lib/usercopy_64.c | 6 ++++++ drivers/nvdimm/pmem.c | 2 +- drivers/nvdimm/pmem.h | 8 ++++++++ include/linux/pmem.h | 19 ------------------- 5 files changed, 15 insertions(+), 41 deletions(-) diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index f4c119d253f3..4759a179aa52 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -44,27 +44,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) BUG(); } -/** - * arch_wb_cache_pmem - write back a cache range with CLWB - * @vaddr: virtual start address - * @size: number of bytes to write back - * - * Write back a cache range using the CLWB (cache line write back) - * instruction. Note that @size is internally rounded up to be cache - * line size aligned. - */ -static inline void arch_wb_cache_pmem(void *addr, size_t size) -{ - u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; - unsigned long clflush_mask = x86_clflush_size - 1; - void *vend = addr + size; - void *p; - - for (p = (void *)((unsigned long)addr & ~clflush_mask); - p < vend; p += x86_clflush_size) - clwb(p); -} - static inline void arch_invalidate_pmem(void *addr, size_t size) { clflush_cache_range(addr, size); diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index f42d2fd86ca3..75d3776123cc 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -97,6 +97,12 @@ static void clean_cache_range(void *addr, size_t size) clwb(p); } +void arch_wb_cache_pmem(void *addr, size_t size) +{ + clean_cache_range(addr, size); +} +EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); + long __copy_user_flushcache(void *dst, const void __user *src, unsigned size) { unsigned long flushed, dest = (unsigned long) dst; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 823b07774244..3b87702d46bb 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -245,7 +245,7 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, static void pmem_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t size) { - wb_cache_pmem(addr, size); + arch_wb_cache_pmem(addr, size); } static const struct dax_operations pmem_dax_ops = { diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h index 7f4dbd72a90a..c4b3371c7f88 100644 --- a/drivers/nvdimm/pmem.h +++ b/drivers/nvdimm/pmem.h @@ -5,6 +5,14 @@ #include #include +#ifdef CONFIG_ARCH_HAS_PMEM_API +void arch_wb_cache_pmem(void *addr, size_t size); +#else +static inline void arch_wb_cache_pmem(void *addr, size_t size) +{ +} +#endif + /* this definition is in it's own header for tools/testing/nvdimm to consume */ struct pmem_device { /* One contiguous memory region per device */ diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 772bd02a5b52..33ae761f010a 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -31,11 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) BUG(); } -static inline void arch_wb_cache_pmem(void *addr, size_t size) -{ - BUG(); -} - static inline void arch_invalidate_pmem(void *addr, size_t size) { BUG(); @@ -80,18 +75,4 @@ static inline void invalidate_pmem(void *addr, size_t size) if (arch_has_pmem_api()) arch_invalidate_pmem(addr, size); } - -/** - * wb_cache_pmem - write back processor cache for PMEM memory range - * @addr: virtual start address - * @size: number of bytes to write back - * - * Write back the processor cache range starting at 'addr' for 'size' bytes. - * See blkdev_issue_flush() note for memcpy_to_pmem(). - */ -static inline void wb_cache_pmem(void *addr, size_t size) -{ - if (arch_has_pmem_api()) - arch_wb_cache_pmem(addr, size); -} #endif /* __PMEM_H__ */ -- cgit v1.2.3 From 56b47fe6579234e3102f1f28e26fa91fb6c38b3d Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 8 Jun 2017 12:36:57 -0600 Subject: acpi/nfit: Add support of NVDIMM memory error notification in ACPI 6.2 ACPI 6.2 defines a new ACPI notification value to NVDIMM Root Device in Table 5-169. 0x81 Unconsumed Uncorrectable Memory Error Detected Used to pro-actively notify OSPM of uncorrectable memory errors detected (for example a memory scrubbing engine that continuously scans the NVDIMMs memory). This is an optional notification. Only locations that were mapped in to SPA by the platform will generate a notification. Add support of this notification value by initiating an ARS scan. This will find new error locations and add their badblocks information. Link: http://www.uefi.org/sites/default/files/resources/ACPI_6_2.pdf Signed-off-by: Toshi Kani Cc: Rafael J. Wysocki Cc: Vishal Verma Cc: Linda Knippers Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 28 ++++++++++++++++++++++------ drivers/acpi/nfit/nfit.h | 1 + 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index b930d12f636b..facbc6107165 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -3016,7 +3016,7 @@ static int acpi_nfit_remove(struct acpi_device *adev) return 0; } -void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) +static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle) { struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev); struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL }; @@ -3024,11 +3024,6 @@ void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) acpi_status status; int ret; - dev_dbg(dev, "%s: event: %d\n", __func__, event); - - if (event != NFIT_NOTIFY_UPDATE) - return; - if (!dev->driver) { /* dev->driver may be null if we're being removed */ dev_dbg(dev, "%s: no driver found for dev\n", __func__); @@ -3065,6 +3060,27 @@ void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) dev_err(dev, "Invalid _FIT\n"); kfree(buf.pointer); } + +static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle) +{ + struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev); + + acpi_nfit_ars_rescan(acpi_desc); +} + +void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) +{ + dev_dbg(dev, "%s: event: 0x%x\n", __func__, event); + + switch (event) { + case NFIT_NOTIFY_UPDATE: + return acpi_nfit_update_notify(dev, handle); + case NFIT_NOTIFY_UC_MEMORY_ERROR: + return acpi_nfit_uc_error_notify(dev, handle); + default: + return; + } +} EXPORT_SYMBOL_GPL(__acpi_nfit_notify); static void acpi_nfit_notify(struct acpi_device *adev, u32 event) diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 29bdd959517f..e3da60b2d686 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -79,6 +79,7 @@ enum { enum nfit_root_notifiers { NFIT_NOTIFY_UPDATE = 0x80, + NFIT_NOTIFY_UC_MEMORY_ERROR = 0x81, }; enum nfit_dimm_notifiers { -- cgit v1.2.3 From f2b612578e163b49661ece2fe01dfafb0e78f545 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 23:00:34 -0700 Subject: x86, libnvdimm, pmem: move arch_invalidate_pmem() to libnvdimm Kill this globally defined wrapper and move to libnvdimm so that we can ultimately remove include/linux/pmem.h and asm/pmem.h. Cc: Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Matthew Wilcox Cc: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- arch/x86/include/asm/pmem.h | 5 ----- arch/x86/mm/pageattr.c | 6 ++++++ drivers/nvdimm/claim.c | 3 ++- drivers/nvdimm/pmem.c | 2 +- drivers/nvdimm/pmem.h | 4 ++++ include/linux/pmem.h | 19 ------------------- 6 files changed, 13 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index 4759a179aa52..b61a25a895a7 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -43,10 +43,5 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) __func__, dst, src, rem)) BUG(); } - -static inline void arch_invalidate_pmem(void *addr, size_t size) -{ - clflush_cache_range(addr, size); -} #endif /* CONFIG_ARCH_HAS_PMEM_API */ #endif /* __ASM_X86_PMEM_H__ */ diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index c8520b2c62d2..757b0bcdf712 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -150,6 +150,12 @@ void clflush_cache_range(void *vaddr, unsigned int size) } EXPORT_SYMBOL_GPL(clflush_cache_range); +void arch_invalidate_pmem(void *addr, size_t size) +{ + clflush_cache_range(addr, size); +} +EXPORT_SYMBOL_GPL(arch_invalidate_pmem); + static void __cpa_flush_all(void *arg) { unsigned long cache = (unsigned long)arg; diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index b8b9c8ca7862..d2e16c0401df 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -14,6 +14,7 @@ #include #include #include "nd-core.h" +#include "pmem.h" #include "pfn.h" #include "btt.h" #include "nd.h" @@ -272,7 +273,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, cleared /= 512; badblocks_clear(&nsio->bb, sector, cleared); } - invalidate_pmem(nsio->addr + offset, size); + arch_invalidate_pmem(nsio->addr + offset, size); } else rc = -EIO; } diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 3b87702d46bb..68737bc68a07 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -71,7 +71,7 @@ static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, badblocks_clear(&pmem->bb, sector, cleared); } - invalidate_pmem(pmem->virt_addr + offset, len); + arch_invalidate_pmem(pmem->virt_addr + offset, len); return rc; } diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h index c4b3371c7f88..00005900c1b7 100644 --- a/drivers/nvdimm/pmem.h +++ b/drivers/nvdimm/pmem.h @@ -7,10 +7,14 @@ #ifdef CONFIG_ARCH_HAS_PMEM_API void arch_wb_cache_pmem(void *addr, size_t size); +void arch_invalidate_pmem(void *addr, size_t size); #else static inline void arch_wb_cache_pmem(void *addr, size_t size) { } +static inline void arch_invalidate_pmem(void *addr, size_t size) +{ +} #endif /* this definition is in it's own header for tools/testing/nvdimm to consume */ diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 33ae761f010a..559c00848583 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -30,11 +30,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) { BUG(); } - -static inline void arch_invalidate_pmem(void *addr, size_t size) -{ - BUG(); -} #endif static inline bool arch_has_pmem_api(void) @@ -61,18 +56,4 @@ static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) else memcpy(dst, src, n); } - -/** - * invalidate_pmem - flush a pmem range from the cache hierarchy - * @addr: virtual start address - * @size: bytes to invalidate (internally aligned to cache line size) - * - * For platforms that support clearing poison this flushes any poisoned - * ranges out of the cache - */ -static inline void invalidate_pmem(void *addr, size_t size) -{ - if (arch_has_pmem_api()) - arch_invalidate_pmem(addr, size); -} #endif /* __PMEM_H__ */ -- cgit v1.2.3 From ca6a4657e5420dec727256717e905ebc3c751352 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 13 Jan 2017 20:36:58 -0800 Subject: x86, libnvdimm, pmem: remove global pmem api Now that all callers of the pmem api have been converted to dax helpers that call back to the pmem driver, we can remove include/linux/pmem.h and asm/pmem.h. Cc: Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Toshi Kani Cc: Oliver O'Halloran Cc: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- MAINTAINERS | 4 +-- arch/x86/include/asm/pmem.h | 47 -------------------------------- drivers/acpi/nfit/core.c | 3 +-- drivers/nvdimm/claim.c | 1 - drivers/nvdimm/dimm_devs.c | 8 ++++++ drivers/nvdimm/namespace_devs.c | 6 +---- drivers/nvdimm/pmem.c | 1 - drivers/nvdimm/pmem.h | 2 ++ drivers/nvdimm/region_devs.c | 1 - fs/dax.c | 1 - include/linux/libnvdimm.h | 1 + include/linux/pmem.h | 59 ----------------------------------------- 12 files changed, 14 insertions(+), 120 deletions(-) delete mode 100644 arch/x86/include/asm/pmem.h delete mode 100644 include/linux/pmem.h diff --git a/MAINTAINERS b/MAINTAINERS index 7a28acd7f525..1636ce420251 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7592,9 +7592,7 @@ M: Ross Zwisler L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ S: Supported -F: drivers/nvdimm/pmem.c -F: include/linux/pmem.h -F: arch/*/include/asm/pmem.h +F: drivers/nvdimm/pmem* LIGHTNVM PLATFORM SUPPORT M: Matias Bjorling diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h deleted file mode 100644 index b61a25a895a7..000000000000 --- a/arch/x86/include/asm/pmem.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright(c) 2015 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ -#ifndef __ASM_X86_PMEM_H__ -#define __ASM_X86_PMEM_H__ - -#include -#include -#include -#include - -#ifdef CONFIG_ARCH_HAS_PMEM_API -/** - * arch_memcpy_to_pmem - copy data to persistent memory - * @dst: destination buffer for the copy - * @src: source buffer for the copy - * @n: length of the copy in bytes - * - * Copy data to persistent memory media via non-temporal stores so that - * a subsequent pmem driver flush operation will drain posted write queues. - */ -static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) -{ - int rem; - - /* - * We are copying between two kernel buffers, if - * __copy_from_user_inatomic_nocache() returns an error (page - * fault) we would have already reported a general protection fault - * before the WARN+BUG. - */ - rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n); - if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n", - __func__, dst, src, rem)) - BUG(); -} -#endif /* CONFIG_ARCH_HAS_PMEM_API */ -#endif /* __ASM_X86_PMEM_H__ */ diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index cbd5596e7562..ac2436538b7e 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -1956,7 +1955,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, nfit_blk->bdw_offset = nfit_mem->bdw->offset; mmio = &nfit_blk->mmio[BDW]; mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address, - nfit_mem->spa_bdw->length, ARCH_MEMREMAP_PMEM); + nfit_mem->spa_bdw->length, nd_blk_memremap_flags(ndbr)); if (!mmio->addr.base) { dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, nvdimm_name(nvdimm)); diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index d2e16c0401df..3beedf173902 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -12,7 +12,6 @@ */ #include #include -#include #include "nd-core.h" #include "pmem.h" #include "pfn.h" diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 9852a3355509..6a1e7a3c0c17 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -20,6 +20,7 @@ #include #include "nd-core.h" #include "label.h" +#include "pmem.h" #include "nd.h" static DEFINE_IDA(dimm_ida); @@ -235,6 +236,13 @@ struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr) } EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm); +unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr) +{ + /* pmem mapping properties are private to libnvdimm */ + return ARCH_MEMREMAP_PMEM; +} +EXPORT_SYMBOL_GPL(nd_blk_memremap_flags); + struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping) { struct nvdimm *nvdimm = nd_mapping->nvdimm; diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 2f9dfbd2dbec..4e9261ef8a95 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -14,10 +14,10 @@ #include #include #include -#include #include #include #include "nd-core.h" +#include "pmem.h" #include "nd.h" static void namespace_io_release(struct device *dev) @@ -155,11 +155,7 @@ bool pmem_should_map_pages(struct device *dev) IORES_DESC_NONE) == REGION_MIXED) return false; -#ifdef ARCH_MEMREMAP_PMEM return ARCH_MEMREMAP_PMEM == MEMREMAP_WB; -#else - return false; -#endif } EXPORT_SYMBOL(pmem_should_map_pages); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 68737bc68a07..06f6c27ec1e9 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h index 00005900c1b7..fce248a1fc87 100644 --- a/drivers/nvdimm/pmem.h +++ b/drivers/nvdimm/pmem.h @@ -6,9 +6,11 @@ #include #ifdef CONFIG_ARCH_HAS_PMEM_API +#define ARCH_MEMREMAP_PMEM MEMREMAP_WB void arch_wb_cache_pmem(void *addr, size_t size); void arch_invalidate_pmem(void *addr, size_t size); #else +#define ARCH_MEMREMAP_PMEM MEMREMAP_WT static inline void arch_wb_cache_pmem(void *addr, size_t size) { } diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 985b0e11bd73..3c06a6ea6958 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/dax.c b/fs/dax.c index 554b8e7d921c..6d8699feae2e 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 6c807017128d..b2f659bd661d 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -159,6 +159,7 @@ void *nd_region_provider_data(struct nd_region *nd_region); void *nd_blk_region_provider_data(struct nd_blk_region *ndbr); void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data); struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr); +unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr); unsigned int nd_region_acquire_lane(struct nd_region *nd_region); void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); u64 nd_fletcher64(void *addr, size_t len, bool le); diff --git a/include/linux/pmem.h b/include/linux/pmem.h deleted file mode 100644 index 559c00848583..000000000000 --- a/include/linux/pmem.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright(c) 2015 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ -#ifndef __PMEM_H__ -#define __PMEM_H__ - -#include -#include - -#ifdef CONFIG_ARCH_HAS_PMEM_API -#define ARCH_MEMREMAP_PMEM MEMREMAP_WB -#include -#else -#define ARCH_MEMREMAP_PMEM MEMREMAP_WT -/* - * These are simply here to enable compilation, all call sites gate - * calling these symbols with arch_has_pmem_api() and redirect to the - * implementation in asm/pmem.h. - */ -static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) -{ - BUG(); -} -#endif - -static inline bool arch_has_pmem_api(void) -{ - return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); -} - -/** - * memcpy_to_pmem - copy data to persistent memory - * @dst: destination buffer for the copy - * @src: source buffer for the copy - * @n: length of the copy in bytes - * - * Perform a memory copy that results in the destination of the copy - * being effectively evicted from, or never written to, the processor - * cache hierarchy after the copy completes. After memcpy_to_pmem() - * data may still reside in cpu or platform buffers, so this operation - * must be followed by a blkdev_issue_flush() on the pmem block device. - */ -static inline void memcpy_to_pmem(void *dst, const void *src, size_t n) -{ - if (arch_has_pmem_api()) - arch_memcpy_to_pmem(dst, src, n); - else - memcpy(dst, src, n); -} -#endif /* __PMEM_H__ */ -- cgit v1.2.3 From c00b396ef782cb2296200d868a0013d8ca1d615e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 23:11:57 -0700 Subject: libnvdimm, pmem: fix persistence warning The pmem driver assumes if platform firmware describes the memory devices associated with a persistent memory range and CONFIG_ARCH_HAS_PMEM_API=y that it has all the mechanism necessary to flush data to a power-fail safe zone. We warn if the firmware does not describe memory devices, but we also need to warn if the architecture does not claim pmem support. Cc: Jeff Moyer Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Ross Zwisler Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- drivers/nvdimm/region_devs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 3c06a6ea6958..41b4cdf5dea8 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1037,8 +1037,9 @@ int nvdimm_has_flush(struct nd_region *nd_region) { int i; - /* no nvdimm == flushing capability unknown */ - if (nd_region->ndr_mappings == 0) + /* no nvdimm or pmem api == flushing capability unknown */ + if (nd_region->ndr_mappings == 0 + || !IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)) return -ENXIO; for (i = 0; i < nd_region->ndr_mappings; i++) { -- cgit v1.2.3 From c9e582aa689f5418ca30e1e7a975039772c3a757 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 29 May 2017 23:12:19 -0700 Subject: libnvdimm, nfit: enable support for volatile ranges Allow volatile nfit ranges to participate in all the same infrastructure provided for persistent memory regions. A resulting resulting namespace device will still be called "pmem", but the parent region type will be "nd_volatile". This is in preparation for disabling the dax ->flush() operation in the pmem driver when it is hosted on a volatile range. Cc: Jan Kara Cc: Jeff Moyer Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Ross Zwisler Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 9 ++++++++- drivers/nvdimm/bus.c | 8 ++++---- drivers/nvdimm/core.c | 2 +- drivers/nvdimm/dax_devs.c | 2 +- drivers/nvdimm/dimm_devs.c | 2 +- drivers/nvdimm/namespace_devs.c | 8 ++++---- drivers/nvdimm/nd-core.h | 9 +++++++++ drivers/nvdimm/pfn_devs.c | 4 ++-- drivers/nvdimm/region_devs.c | 27 ++++++++++++++------------- 9 files changed, 44 insertions(+), 27 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index ac2436538b7e..60d1ca149cc1 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2227,6 +2227,13 @@ static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa) nfit_spa_type(spa) == NFIT_SPA_PCD); } +static bool nfit_spa_is_volatile(struct acpi_nfit_system_address *spa) +{ + return (nfit_spa_type(spa) == NFIT_SPA_VDISK || + nfit_spa_type(spa) == NFIT_SPA_VCD || + nfit_spa_type(spa) == NFIT_SPA_VOLATILE); +} + static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa) { @@ -2301,7 +2308,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, ndr_desc); if (!nfit_spa->nd_region) rc = -ENOMEM; - } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) { + } else if (nfit_spa_is_volatile(spa)) { nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus, ndr_desc); if (!nfit_spa->nd_region) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index e9361bffe5ee..4cfba534814b 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -38,13 +38,13 @@ static int to_nd_device_type(struct device *dev) { if (is_nvdimm(dev)) return ND_DEVICE_DIMM; - else if (is_nd_pmem(dev)) + else if (is_memory(dev)) return ND_DEVICE_REGION_PMEM; else if (is_nd_blk(dev)) return ND_DEVICE_REGION_BLK; else if (is_nd_dax(dev)) return ND_DEVICE_DAX_PMEM; - else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent)) + else if (is_nd_region(dev->parent)) return nd_region_to_nstype(to_nd_region(dev->parent)); return 0; @@ -56,7 +56,7 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env) * Ensure that region devices always have their numa node set as * early as possible. */ - if (is_nd_pmem(dev) || is_nd_blk(dev)) + if (is_nd_region(dev)) set_dev_node(dev, to_nd_region(dev)->numa_node); return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT, to_nd_device_type(dev)); @@ -65,7 +65,7 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env) static struct module *to_bus_provider(struct device *dev) { /* pin bus providers while regions are enabled */ - if (is_nd_pmem(dev) || is_nd_blk(dev)) { + if (is_nd_region(dev)) { struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); return nvdimm_bus->nd_desc->module; diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 2dee908e4bae..22e3ef463401 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -504,7 +504,7 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region, struct nvdimm_bus *nvdimm_bus; struct list_head *poison_list; - if (!is_nd_pmem(&nd_region->dev)) { + if (!is_memory(&nd_region->dev)) { dev_WARN_ONCE(&nd_region->dev, 1, "%s only valid for pmem regions\n", __func__); return; diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c index c1b6556aea6e..a304983ac417 100644 --- a/drivers/nvdimm/dax_devs.c +++ b/drivers/nvdimm/dax_devs.c @@ -89,7 +89,7 @@ struct device *nd_dax_create(struct nd_region *nd_region) struct device *dev = NULL; struct nd_dax *nd_dax; - if (!is_nd_pmem(&nd_region->dev)) + if (!is_memory(&nd_region->dev)) return NULL; nd_dax = nd_dax_alloc(nd_region); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 6a1e7a3c0c17..f0d1b7e5de01 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -419,7 +419,7 @@ int alias_dpa_busy(struct device *dev, void *data) struct resource *res; int i; - if (!is_nd_pmem(dev)) + if (!is_memory(dev)) return 0; nd_region = to_nd_region(dev); diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 4e9261ef8a95..57724da484d0 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -112,7 +112,7 @@ static int is_uuid_busy(struct device *dev, void *data) static int is_namespace_uuid_busy(struct device *dev, void *data) { - if (is_nd_pmem(dev) || is_nd_blk(dev)) + if (is_nd_region(dev)) return device_for_each_child(dev, data, is_uuid_busy); return 0; } @@ -783,7 +783,7 @@ static int __reserve_free_pmem(struct device *dev, void *data) struct nd_label_id label_id; int i; - if (!is_nd_pmem(dev)) + if (!is_memory(dev)) return 0; nd_region = to_nd_region(dev); @@ -1872,7 +1872,7 @@ static struct device *nd_namespace_pmem_create(struct nd_region *nd_region) struct resource *res; struct device *dev; - if (!is_nd_pmem(&nd_region->dev)) + if (!is_memory(&nd_region->dev)) return NULL; nspm = kzalloc(sizeof(*nspm), GFP_KERNEL); @@ -2152,7 +2152,7 @@ static struct device **scan_labels(struct nd_region *nd_region) } dev->parent = &nd_region->dev; devs[count++] = dev; - } else if (is_nd_pmem(&nd_region->dev)) { + } else if (is_memory(&nd_region->dev)) { /* clean unselected labels */ for (i = 0; i < nd_region->ndr_mappings; i++) { struct list_head *l, *e; diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 4c4bd209e725..86bc19ae30da 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -64,7 +64,16 @@ struct blk_alloc_info { bool is_nvdimm(struct device *dev); bool is_nd_pmem(struct device *dev); +bool is_nd_volatile(struct device *dev); bool is_nd_blk(struct device *dev); +static inline bool is_nd_region(struct device *dev) +{ + return is_nd_pmem(dev) || is_nd_blk(dev) || is_nd_volatile(dev); +} +static inline bool is_memory(struct device *dev) +{ + return is_nd_pmem(dev) || is_nd_volatile(dev); +} struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev); int __init nvdimm_bus_init(void); void nvdimm_bus_exit(void); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index a6c403600d19..5929eb65cee3 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -331,7 +331,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region) struct nd_pfn *nd_pfn; struct device *dev; - if (!is_nd_pmem(&nd_region->dev)) + if (!is_memory(&nd_region->dev)) return NULL; nd_pfn = nd_pfn_alloc(nd_region); @@ -354,7 +354,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) if (!pfn_sb || !ndns) return -ENODEV; - if (!is_nd_pmem(nd_pfn->dev.parent)) + if (!is_memory(nd_pfn->dev.parent)) return -ENODEV; if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0)) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 41b4cdf5dea8..53a64a16aba4 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -168,6 +168,11 @@ bool is_nd_blk(struct device *dev) return dev ? dev->type == &nd_blk_device_type : false; } +bool is_nd_volatile(struct device *dev) +{ + return dev ? dev->type == &nd_volatile_device_type : false; +} + struct nd_region *to_nd_region(struct device *dev) { struct nd_region *nd_region = container_of(dev, struct nd_region, dev); @@ -214,7 +219,7 @@ EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data); */ int nd_region_to_nstype(struct nd_region *nd_region) { - if (is_nd_pmem(&nd_region->dev)) { + if (is_memory(&nd_region->dev)) { u16 i, alias; for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) { @@ -242,7 +247,7 @@ static ssize_t size_show(struct device *dev, struct nd_region *nd_region = to_nd_region(dev); unsigned long long size = 0; - if (is_nd_pmem(dev)) { + if (is_memory(dev)) { size = nd_region->ndr_size; } else if (nd_region->ndr_mappings == 1) { struct nd_mapping *nd_mapping = &nd_region->mapping[0]; @@ -307,7 +312,7 @@ static ssize_t set_cookie_show(struct device *dev, struct nd_region *nd_region = to_nd_region(dev); struct nd_interleave_set *nd_set = nd_region->nd_set; - if (is_nd_pmem(dev) && nd_set) + if (is_memory(dev) && nd_set) /* pass, should be precluded by region_visible */; else return -ENXIO; @@ -334,7 +339,7 @@ resource_size_t nd_region_available_dpa(struct nd_region *nd_region) if (!ndd) return 0; - if (is_nd_pmem(&nd_region->dev)) { + if (is_memory(&nd_region->dev)) { available += nd_pmem_available_dpa(nd_region, nd_mapping, &overlap); if (overlap > blk_max_overlap) { @@ -520,10 +525,10 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) struct nd_interleave_set *nd_set = nd_region->nd_set; int type = nd_region_to_nstype(nd_region); - if (!is_nd_pmem(dev) && a == &dev_attr_pfn_seed.attr) + if (!is_memory(dev) && a == &dev_attr_pfn_seed.attr) return 0; - if (!is_nd_pmem(dev) && a == &dev_attr_dax_seed.attr) + if (!is_memory(dev) && a == &dev_attr_dax_seed.attr) return 0; if (!is_nd_pmem(dev) && a == &dev_attr_badblocks.attr) @@ -551,7 +556,7 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) || type == ND_DEVICE_NAMESPACE_BLK) && a == &dev_attr_available_size.attr) return a->mode; - else if (is_nd_pmem(dev) && nd_set) + else if (is_memory(dev) && nd_set) return a->mode; return 0; @@ -603,7 +608,7 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, { struct nd_region *nd_region; - if (!probe && (is_nd_pmem(dev) || is_nd_blk(dev))) { + if (!probe && is_nd_region(dev)) { int i; nd_region = to_nd_region(dev); @@ -621,12 +626,8 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, if (ndd) atomic_dec(&nvdimm->busy); } - - if (is_nd_pmem(dev)) - return; } - if (dev->parent && (is_nd_blk(dev->parent) || is_nd_pmem(dev->parent)) - && probe) { + if (dev->parent && is_nd_region(dev->parent) && probe) { nd_region = to_nd_region(dev->parent); nvdimm_bus_lock(dev); if (nd_region->ns_seed == dev) -- cgit v1.2.3 From 5d61e43b3975c0582003329d9de9d5e85abf5d33 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 27 Jun 2017 13:06:22 -0700 Subject: dax: remove default copy_from_iter fallback Require all dax-drivers to register a ->copy_from_iter() operation so that it is clear which dax_operations are optional and which must be implemented for filesystem-dax to operate. Cc: Gerald Schaefer Suggested-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/powerpc/sysdev/axonram.c | 8 ++++++++ drivers/block/brd.c | 8 ++++++++ drivers/dax/super.c | 2 -- drivers/s390/block/dcssblk.c | 8 ++++++++ include/linux/dax.h | 2 +- 5 files changed, 25 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index a7fe5fee744f..2799706106c6 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -163,8 +164,15 @@ axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pa return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn); } +static size_t axon_ram_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_from_iter(addr, bytes, i); +} + static const struct dax_operations axon_ram_dax_ops = { .direct_access = axon_ram_dax_direct_access, + .copy_from_iter = axon_ram_copy_from_iter, }; /** diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 57b574f2f66a..f2a7ac350f6a 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -22,6 +22,7 @@ #ifdef CONFIG_BLK_DEV_RAM_DAX #include #include +#include #endif #include @@ -354,8 +355,15 @@ static long brd_dax_direct_access(struct dax_device *dax_dev, return __brd_direct_access(brd, pgoff, nr_pages, kaddr, pfn); } +static size_t brd_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_from_iter(addr, bytes, i); +} + static const struct dax_operations brd_dax_ops = { .direct_access = brd_dax_direct_access, + .copy_from_iter = brd_dax_copy_from_iter, }; #endif diff --git a/drivers/dax/super.c b/drivers/dax/super.c index b7729e4d351a..9e0160b950d7 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -179,8 +179,6 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, if (!dax_alive(dax_dev)) return 0; - if (!dax_dev->ops->copy_from_iter) - return copy_from_iter(addr, bytes, i); return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i); } EXPORT_SYMBOL_GPL(dax_copy_from_iter); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 36e5280af3e4..88fa7b3f7a9d 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -43,8 +44,15 @@ static const struct block_device_operations dcssblk_devops = { .release = dcssblk_release, }; +static size_t dcssblk_dax_copy_from_iter(struct dax_device *dax_dev, + pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_from_iter(addr, bytes, i); +} + static const struct dax_operations dcssblk_dax_ops = { .direct_access = dcssblk_dax_direct_access, + .copy_from_iter = dcssblk_dax_copy_from_iter, }; struct dcssblk_dev_info { diff --git a/include/linux/dax.h b/include/linux/dax.h index 1f6b6072af64..73fca1bebaf3 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -16,7 +16,7 @@ struct dax_operations { */ long (*direct_access)(struct dax_device *, pgoff_t, long, void **, pfn_t *); - /* copy_from_iter: dax-driver override for default copy_from_iter */ + /* copy_from_iter: required operation for fs-dax direct-i/o */ size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, struct iov_iter *); /* flush: optional driver-specific cache management after writes */ -- cgit v1.2.3 From 9a60c3ef577beb0376704808949f2c1f8fb0672c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 27 Jun 2017 17:59:28 -0700 Subject: dax: convert to bitmask for flags In preparation for adding more flags, convert the existing flag to a bit-flag. Signed-off-by: Dan Williams --- drivers/dax/super.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 9e0160b950d7..8bf71195921b 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -116,13 +116,18 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) EXPORT_SYMBOL_GPL(__bdev_dax_supported); #endif +enum dax_device_flags { + /* !alive + rcu grace period == no new operations / mappings */ + DAXDEV_ALIVE, +}; + /** * struct dax_device - anchor object for dax services * @inode: core vfs * @cdev: optional character interface for "device dax" * @host: optional name for lookups where the device path is not available * @private: dax driver private data - * @alive: !alive + rcu grace period == no new operations / mappings + * @flags: state and boolean properties */ struct dax_device { struct hlist_node list; @@ -130,7 +135,7 @@ struct dax_device { struct cdev cdev; const char *host; void *private; - bool alive; + unsigned long flags; const struct dax_operations *ops; }; @@ -197,7 +202,7 @@ EXPORT_SYMBOL_GPL(dax_flush); bool dax_alive(struct dax_device *dax_dev) { lockdep_assert_held(&dax_srcu); - return dax_dev->alive; + return test_bit(DAXDEV_ALIVE, &dax_dev->flags); } EXPORT_SYMBOL_GPL(dax_alive); @@ -217,7 +222,7 @@ void kill_dax(struct dax_device *dax_dev) if (!dax_dev) return; - dax_dev->alive = false; + clear_bit(DAXDEV_ALIVE, &dax_dev->flags); synchronize_srcu(&dax_srcu); @@ -257,7 +262,7 @@ static void dax_destroy_inode(struct inode *inode) { struct dax_device *dax_dev = to_dax_dev(inode); - WARN_ONCE(dax_dev->alive, + WARN_ONCE(test_bit(DAXDEV_ALIVE, &dax_dev->flags), "kill_dax() must be called before final iput()\n"); call_rcu(&inode->i_rcu, dax_i_callback); } @@ -309,7 +314,7 @@ static struct dax_device *dax_dev_get(dev_t devt) dax_dev = to_dax_dev(inode); if (inode->i_state & I_NEW) { - dax_dev->alive = true; + set_bit(DAXDEV_ALIVE, &dax_dev->flags); inode->i_cdev = &dax_dev->cdev; inode->i_mode = S_IFCHR; inode->i_flags = S_DAX; -- cgit v1.2.3 From 6e0c90d691cd5d90569f5918ab03eb76c81f9c6e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 26 Jun 2017 21:28:41 -0700 Subject: libnvdimm, pmem, dax: export a cache control attribute The dax_flush() operation can be turned into a nop on platforms where firmware arranges for cpu caches to be flushed on a power-fail event. The ACPI 6.2 specification defines a mechanism for the platform to indicate this capability so the kernel can select the proper default. However, for other platforms, the administrator must toggle this setting manually. Given this flush setting is a dax-specific mechanism we advertise it through a 'dax' attribute group hanging off a host device. For example, a 'pmem0' block-device gets a 'dax' sysfs-subdirectory with a 'write_cache' attribute to control response to dax cache flush requests. This is similar to the 'queue/write_cache' attribute that appears under block devices. Cc: Jan Kara Cc: Jeff Moyer Cc: Matthew Wilcox Cc: Ross Zwisler Suggested-by: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/dax/super.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/nvdimm/pmem.c | 10 +++++++ include/linux/dax.h | 3 ++ 3 files changed, 92 insertions(+) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 8bf71195921b..4827251782a1 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -119,6 +119,8 @@ EXPORT_SYMBOL_GPL(__bdev_dax_supported); enum dax_device_flags { /* !alive + rcu grace period == no new operations / mappings */ DAXDEV_ALIVE, + /* gate whether dax_flush() calls the low level flush routine */ + DAXDEV_WRITE_CACHE, }; /** @@ -139,6 +141,71 @@ struct dax_device { const struct dax_operations *ops; }; +static ssize_t write_cache_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dax_device *dax_dev = dax_get_by_host(dev_name(dev)); + ssize_t rc; + + WARN_ON_ONCE(!dax_dev); + if (!dax_dev) + return -ENXIO; + + rc = sprintf(buf, "%d\n", !!test_bit(DAXDEV_WRITE_CACHE, + &dax_dev->flags)); + put_dax(dax_dev); + return rc; +} + +static ssize_t write_cache_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + bool write_cache; + int rc = strtobool(buf, &write_cache); + struct dax_device *dax_dev = dax_get_by_host(dev_name(dev)); + + WARN_ON_ONCE(!dax_dev); + if (!dax_dev) + return -ENXIO; + + if (rc) + len = rc; + else if (write_cache) + set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags); + else + clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags); + + put_dax(dax_dev); + return len; +} +static DEVICE_ATTR_RW(write_cache); + +static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, typeof(*dev), kobj); + struct dax_device *dax_dev = dax_get_by_host(dev_name(dev)); + + WARN_ON_ONCE(!dax_dev); + if (!dax_dev) + return 0; + + if (a == &dev_attr_write_cache.attr && !dax_dev->ops->flush) + return 0; + return a->mode; +} + +static struct attribute *dax_attributes[] = { + &dev_attr_write_cache.attr, + NULL, +}; + +struct attribute_group dax_attribute_group = { + .name = "dax", + .attrs = dax_attributes, + .is_visible = dax_visible, +}; +EXPORT_SYMBOL_GPL(dax_attribute_group); + /** * dax_direct_access() - translate a device pgoff to an absolute pfn * @dax_dev: a dax_device instance representing the logical memory range @@ -194,11 +261,23 @@ void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, if (!dax_alive(dax_dev)) return; + if (!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags)) + return; + if (dax_dev->ops->flush) dax_dev->ops->flush(dax_dev, pgoff, addr, size); } EXPORT_SYMBOL_GPL(dax_flush); +void dax_write_cache(struct dax_device *dax_dev, bool wc) +{ + if (wc) + set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags); + else + clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags); +} +EXPORT_SYMBOL_GPL(dax_write_cache); + bool dax_alive(struct dax_device *dax_dev) { lockdep_assert_held(&dax_srcu); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 06f6c27ec1e9..7339d184070e 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -253,6 +253,11 @@ static const struct dax_operations pmem_dax_ops = { .flush = pmem_dax_flush, }; +static const struct attribute_group *pmem_attribute_groups[] = { + &dax_attribute_group, + NULL, +}; + static void pmem_release_queue(void *q) { blk_cleanup_queue(q); @@ -287,6 +292,7 @@ static int pmem_attach_disk(struct device *dev, struct pmem_device *pmem; struct resource pfn_res; struct request_queue *q; + struct device *gendev; struct gendisk *disk; void *addr; @@ -384,8 +390,12 @@ static int pmem_attach_disk(struct device *dev, put_disk(disk); return -ENOMEM; } + dax_write_cache(dax_dev, true); pmem->dax_dev = dax_dev; + gendev = disk_to_dev(disk); + gendev->groups = pmem_attribute_groups; + device_add_disk(dev, disk); if (devm_add_action_or_reset(dev, pmem_release_disk, pmem)) return -ENOMEM; diff --git a/include/linux/dax.h b/include/linux/dax.h index 73fca1bebaf3..8f39db7439c3 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -23,6 +23,8 @@ struct dax_operations { void (*flush)(struct dax_device *, pgoff_t, void *, size_t); }; +extern struct attribute_group dax_attribute_group; + #if IS_ENABLED(CONFIG_DAX) struct dax_device *dax_get_by_host(const char *host); void put_dax(struct dax_device *dax_dev); @@ -84,6 +86,7 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t size); +void dax_write_cache(struct dax_device *dax_dev, bool wc); /* * We use lowest available bit in exceptional entry for locking, one bit for -- cgit v1.2.3 From 0b277961f4484fb3f142caaa1dd1748cb0b2cbee Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 9 Jun 2017 09:46:50 -0700 Subject: libnvdimm, pmem: disable dax flushing when pmem is fronting a volatile region The pmem driver attaches to both persistent and volatile memory ranges advertised by the ACPI NFIT. When the region is volatile it is redundant to spend cycles flushing caches at fsync(). Check if the hosting region is volatile and do not set dax_write_cache() if it is. Cc: Jan Kara Cc: Jeff Moyer Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Ross Zwisler Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 13 ++++++++----- drivers/nvdimm/region_devs.c | 6 ++++++ include/linux/libnvdimm.h | 1 + 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 7339d184070e..e7a40f77f729 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -284,10 +284,10 @@ static int pmem_attach_disk(struct device *dev, struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); struct nd_region *nd_region = to_nd_region(dev->parent); struct vmem_altmap __altmap, *altmap = NULL; + int nid = dev_to_node(dev), fua, wbc; struct resource *res = &nsio->res; struct nd_pfn *nd_pfn = NULL; struct dax_device *dax_dev; - int nid = dev_to_node(dev); struct nd_pfn_sb *pfn_sb; struct pmem_device *pmem; struct resource pfn_res; @@ -314,9 +314,12 @@ static int pmem_attach_disk(struct device *dev, dev_set_drvdata(dev, pmem); pmem->phys_addr = res->start; pmem->size = resource_size(res); - if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) - || nvdimm_has_flush(nd_region) < 0) + fua = nvdimm_has_flush(nd_region); + if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) || fua < 0) { dev_warn(dev, "unable to guarantee persistence of writes\n"); + fua = 0; + } + wbc = nvdimm_has_cache(nd_region); if (!devm_request_mem_region(dev, res->start, resource_size(res), dev_name(&ndns->dev))) { @@ -360,7 +363,7 @@ static int pmem_attach_disk(struct device *dev, return PTR_ERR(addr); pmem->virt_addr = addr; - blk_queue_write_cache(q, true, true); + blk_queue_write_cache(q, wbc, fua); blk_queue_make_request(q, pmem_make_request); blk_queue_physical_block_size(q, PAGE_SIZE); blk_queue_max_hw_sectors(q, UINT_MAX); @@ -390,7 +393,7 @@ static int pmem_attach_disk(struct device *dev, put_disk(disk); return -ENOMEM; } - dax_write_cache(dax_dev, true); + dax_write_cache(dax_dev, wbc); pmem->dax_dev = dax_dev; gendev = disk_to_dev(disk); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 53a64a16aba4..0c3b089b280a 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1060,6 +1060,12 @@ int nvdimm_has_flush(struct nd_region *nd_region) } EXPORT_SYMBOL_GPL(nvdimm_has_flush); +int nvdimm_has_cache(struct nd_region *nd_region) +{ + return is_nd_pmem(&nd_region->dev); +} +EXPORT_SYMBOL_GPL(nvdimm_has_cache); + void __exit nd_region_devs_exit(void) { ida_destroy(®ion_ida); diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index b2f659bd661d..a8ee1d0afd70 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -165,4 +165,5 @@ void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); u64 nd_fletcher64(void *addr, size_t len, bool le); void nvdimm_flush(struct nd_region *nd_region); int nvdimm_has_flush(struct nd_region *nd_region); +int nvdimm_has_cache(struct nd_region *nd_region); #endif /* __LIBNVDIMM_H__ */ -- cgit v1.2.3 From 5e93746f065c49445cf8115007fc887789438ec0 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 22 Jun 2017 15:44:41 +0530 Subject: acpi, nfit: constify *_attribute_group File size before: text data bss dec hex filename 20792 1580 994 23366 5b46 drivers/acpi/nfit/core.o File size After adding 'const': text data bss dec hex filename 20968 1388 994 23350 5b36 drivers/acpi/nfit/core.o Signed-off-by: Arvind Yadav Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index facbc6107165..957b13152d1b 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1066,7 +1066,7 @@ static struct attribute *acpi_nfit_attributes[] = { NULL, }; -static struct attribute_group acpi_nfit_attribute_group = { +static const struct attribute_group acpi_nfit_attribute_group = { .name = "nfit", .attrs = acpi_nfit_attributes, .is_visible = nfit_visible, @@ -1346,7 +1346,7 @@ static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj, return a->mode; } -static struct attribute_group acpi_nfit_dimm_attribute_group = { +static const struct attribute_group acpi_nfit_dimm_attribute_group = { .name = "nfit", .attrs = acpi_nfit_dimm_attributes, .is_visible = acpi_nfit_dimm_attr_visible, @@ -1640,7 +1640,7 @@ static struct attribute *acpi_nfit_region_attributes[] = { NULL, }; -static struct attribute_group acpi_nfit_region_attribute_group = { +static const struct attribute_group acpi_nfit_region_attribute_group = { .name = "nfit", .attrs = acpi_nfit_region_attributes, }; -- cgit v1.2.3 From 14e494542636b7a685c5bf27e695e3bb9ec3fe7d Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 28 Jun 2017 14:25:00 -0600 Subject: libnvdimm, btt: BTT updates for UEFI 2.7 format The UEFI 2.7 specification defines an updated BTT metadata format, bumping the revision to 2.0. Add support for the new format, while retaining compatibility for the old 1.1 format. Cc: Toshi Kani Cc: Linda Knippers Cc: Dan Williams Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/btt.c | 28 +++++++++++++------ drivers/nvdimm/btt.h | 2 ++ drivers/nvdimm/btt_devs.c | 46 +++++++++++++++++++++++++++---- drivers/nvdimm/claim.c | 1 + drivers/nvdimm/label.c | 6 ++++ drivers/nvdimm/label.h | 1 + drivers/nvdimm/namespace_devs.c | 61 +++++++++++++++++++++++++++++++++++++++-- drivers/nvdimm/nd.h | 3 ++ include/linux/nd.h | 1 + 9 files changed, 134 insertions(+), 15 deletions(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 983718b8fd9b..7ca11df80ae8 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -37,8 +37,8 @@ static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, struct nd_btt *nd_btt = arena->nd_btt; struct nd_namespace_common *ndns = nd_btt->ndns; - /* arena offsets are 4K from the base of the device */ - offset += SZ_4K; + /* arena offsets may be shifted from the base of the device */ + offset += arena->nd_btt->initial_offset; return nvdimm_read_bytes(ndns, offset, buf, n, flags); } @@ -48,8 +48,8 @@ static int arena_write_bytes(struct arena_info *arena, resource_size_t offset, struct nd_btt *nd_btt = arena->nd_btt; struct nd_namespace_common *ndns = nd_btt->ndns; - /* arena offsets are 4K from the base of the device */ - offset += SZ_4K; + /* arena offsets may be shifted from the base of the device */ + offset += arena->nd_btt->initial_offset; return nvdimm_write_bytes(ndns, offset, buf, n, flags); } @@ -576,8 +576,8 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, arena->internal_lbasize = roundup(arena->external_lbasize, INT_LBASIZE_ALIGNMENT); arena->nfree = BTT_DEFAULT_NFREE; - arena->version_major = 1; - arena->version_minor = 1; + arena->version_major = btt->nd_btt->version_major; + arena->version_minor = btt->nd_btt->version_minor; if (available % BTT_PG_SIZE) available -= (available % BTT_PG_SIZE); @@ -1425,6 +1425,7 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns) { struct nd_btt *nd_btt = to_nd_btt(ndns->claim); struct nd_region *nd_region; + struct btt_sb *btt_sb; struct btt *btt; size_t rawsize; @@ -1433,10 +1434,21 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns) return -ENODEV; } - rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K; + btt_sb = devm_kzalloc(&nd_btt->dev, sizeof(*btt_sb), GFP_KERNEL); + + /* + * If this returns < 0, that is ok as it just means there wasn't + * an existing BTT, and we're creating a new one. We still need to + * call this as we need the version dependent fields in nd_btt to be + * set correctly based on the holder class + */ + nd_btt_version(nd_btt, ndns, btt_sb); + + rawsize = nvdimm_namespace_capacity(ndns) - nd_btt->initial_offset; if (rawsize < ARENA_MIN_SIZE) { dev_dbg(&nd_btt->dev, "%s must be at least %ld bytes\n", - dev_name(&ndns->dev), ARENA_MIN_SIZE + SZ_4K); + dev_name(&ndns->dev), + ARENA_MIN_SIZE + nd_btt->initial_offset); return -ENXIO; } nd_region = to_nd_region(nd_btt->dev.parent); diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index b2f8651e5395..888e862907a0 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -184,5 +184,7 @@ struct btt { }; bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super); +int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns, + struct btt_sb *btt_sb); #endif diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 31d875a91569..3e359d282f8e 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -260,20 +260,55 @@ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super) } EXPORT_SYMBOL(nd_btt_arena_is_valid); +int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns, + struct btt_sb *btt_sb) +{ + if (ndns->claim_class == NVDIMM_CCLASS_BTT2) { + /* Probe/setup for BTT v2.0 */ + nd_btt->initial_offset = 0; + nd_btt->version_major = 2; + nd_btt->version_minor = 0; + if (nvdimm_read_bytes(ndns, 0, btt_sb, sizeof(*btt_sb), 0)) + return -ENXIO; + if (!nd_btt_arena_is_valid(nd_btt, btt_sb)) + return -ENODEV; + if ((le16_to_cpu(btt_sb->version_major) != 2) || + (le16_to_cpu(btt_sb->version_minor) != 0)) + return -ENODEV; + } else { + /* + * Probe/setup for BTT v1.1 (NVDIMM_CCLASS_NONE or + * NVDIMM_CCLASS_BTT) + */ + nd_btt->initial_offset = SZ_4K; + nd_btt->version_major = 1; + nd_btt->version_minor = 1; + if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb), 0)) + return -ENXIO; + if (!nd_btt_arena_is_valid(nd_btt, btt_sb)) + return -ENODEV; + if ((le16_to_cpu(btt_sb->version_major) != 1) || + (le16_to_cpu(btt_sb->version_minor) != 1)) + return -ENODEV; + } + return 0; +} +EXPORT_SYMBOL(nd_btt_version); + static int __nd_btt_probe(struct nd_btt *nd_btt, struct nd_namespace_common *ndns, struct btt_sb *btt_sb) { + int rc; + if (!btt_sb || !ndns || !nd_btt) return -ENODEV; - if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb), 0)) - return -ENXIO; - if (nvdimm_namespace_capacity(ndns) < SZ_16M) return -ENXIO; - if (!nd_btt_arena_is_valid(nd_btt, btt_sb)) - return -ENODEV; + rc = nd_btt_version(nd_btt, ndns, btt_sb); + if (rc < 0) + return rc; nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize); nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL); @@ -298,6 +333,7 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns) switch (ndns->claim_class) { case NVDIMM_CCLASS_NONE: case NVDIMM_CCLASS_BTT: + case NVDIMM_CCLASS_BTT2: break; default: return -ENODEV; diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index de9b1cce242e..8d23f68737d9 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -189,6 +189,7 @@ ssize_t nd_namespace_store(struct device *dev, case NVDIMM_CCLASS_NONE: break; case NVDIMM_CCLASS_BTT: + case NVDIMM_CCLASS_BTT2: if (!is_nd_btt(dev)) { len = -EBUSY; goto out_attach; diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 235f2089fab2..922b68718a1a 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -21,6 +21,7 @@ #include "nd.h" static guid_t nvdimm_btt_guid; +static guid_t nvdimm_btt2_guid; static guid_t nvdimm_pfn_guid; static guid_t nvdimm_dax_guid; @@ -578,6 +579,8 @@ enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid) { if (guid_equal(guid, &nvdimm_btt_guid)) return NVDIMM_CCLASS_BTT; + else if (guid_equal(guid, &nvdimm_btt2_guid)) + return NVDIMM_CCLASS_BTT2; else if (guid_equal(guid, &nvdimm_pfn_guid)) return NVDIMM_CCLASS_PFN; else if (guid_equal(guid, &nvdimm_dax_guid)) @@ -593,6 +596,8 @@ static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class, { if (claim_class == NVDIMM_CCLASS_BTT) return &nvdimm_btt_guid; + else if (claim_class == NVDIMM_CCLASS_BTT2) + return &nvdimm_btt2_guid; else if (claim_class == NVDIMM_CCLASS_PFN) return &nvdimm_pfn_guid; else if (claim_class == NVDIMM_CCLASS_DAX) @@ -1158,6 +1163,7 @@ int nd_blk_namespace_label_update(struct nd_region *nd_region, int __init nd_label_init(void) { WARN_ON(guid_parse(NVDIMM_BTT_GUID, &nvdimm_btt_guid)); + WARN_ON(guid_parse(NVDIMM_BTT2_GUID, &nvdimm_btt2_guid)); WARN_ON(guid_parse(NVDIMM_PFN_GUID, &nvdimm_pfn_guid)); WARN_ON(guid_parse(NVDIMM_DAX_GUID, &nvdimm_dax_guid)); diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h index 7c8e2cc9e73e..1ebf4d3d01ba 100644 --- a/drivers/nvdimm/label.h +++ b/drivers/nvdimm/label.h @@ -113,6 +113,7 @@ struct nd_namespace_label { }; #define NVDIMM_BTT_GUID "8aed63a2-29a2-4c66-8b12-f05d15d3922a" +#define NVDIMM_BTT2_GUID "18633bfc-1735-4217-8ac9-17239282d3f8" #define NVDIMM_PFN_GUID "266400ba-fb9f-4677-bcb0-968f11d0d225" #define NVDIMM_DAX_GUID "97a86d9c-3cdd-4eda-986f-5068b4f80088" diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index f05d9b0672bf..c96e31330213 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1411,6 +1411,58 @@ static ssize_t dpa_extents_show(struct device *dev, } static DEVICE_ATTR_RO(dpa_extents); +static int btt_claim_class(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + int i, loop_bitmask = 0; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); + struct nd_namespace_index *nsindex; + + nsindex = to_namespace_index(ndd, ndd->ns_current); + if (nsindex == NULL) + loop_bitmask |= 1; + else { + /* check whether existing labels are v1.1 or v1.2 */ + if (__le16_to_cpu(nsindex->major) == 1 + && __le16_to_cpu(nsindex->minor) == 1) + loop_bitmask |= 2; + else + loop_bitmask |= 4; + } + } + /* + * If nsindex is null loop_bitmask's bit 0 will be set, and if an index + * block is found, a v1.1 label for any mapping will set bit 1, and a + * v1.2 label will set bit 2. + * + * At the end of the loop, at most one of the three bits must be set. + * If multiple bits were set, it means the different mappings disagree + * about their labels, and this must be cleaned up first. + * + * If all the label index blocks are found to agree, nsindex of NULL + * implies labels haven't been initialized yet, and when they will, + * they will be of the 1.2 format, so we can assume BTT2.0 + * + * If 1.1 labels are found, we enforce BTT1.1, and if 1.2 labels are + * found, we enforce BTT2.0 + * + * If the loop was never entered, default to BTT1.1 (legacy namespaces) + */ + switch (loop_bitmask) { + case 0: + case 2: + return NVDIMM_CCLASS_BTT; + case 1: + case 4: + return NVDIMM_CCLASS_BTT2; + default: + return -ENXIO; + } +} + static ssize_t holder_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1433,7 +1485,7 @@ static ssize_t __holder_class_store(struct device *dev, const char *buf) return -EBUSY; if (strcmp(buf, "btt") == 0 || strcmp(buf, "btt\n") == 0) - ndns->claim_class = NVDIMM_CCLASS_BTT; + ndns->claim_class = btt_claim_class(dev); else if (strcmp(buf, "pfn") == 0 || strcmp(buf, "pfn\n") == 0) ndns->claim_class = NVDIMM_CCLASS_PFN; else if (strcmp(buf, "dax") == 0 || strcmp(buf, "dax\n") == 0) @@ -1443,6 +1495,10 @@ static ssize_t __holder_class_store(struct device *dev, const char *buf) else return -EINVAL; + /* btt_claim_class() could've returned an error */ + if (ndns->claim_class < 0) + return ndns->claim_class; + return 0; } @@ -1474,7 +1530,8 @@ static ssize_t holder_class_show(struct device *dev, device_lock(dev); if (ndns->claim_class == NVDIMM_CCLASS_NONE) rc = sprintf(buf, "\n"); - else if (ndns->claim_class == NVDIMM_CCLASS_BTT) + else if ((ndns->claim_class == NVDIMM_CCLASS_BTT) || + (ndns->claim_class == NVDIMM_CCLASS_BTT2)) rc = sprintf(buf, "btt\n"); else if (ndns->claim_class == NVDIMM_CCLASS_PFN) rc = sprintf(buf, "pfn\n"); diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index e802c877d783..e1b5715bd91f 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -195,6 +195,9 @@ struct nd_btt { u64 size; u8 *uuid; int id; + int initial_offset; + u16 version_major; + u16 version_minor; }; enum nd_pfn_mode { diff --git a/include/linux/nd.h b/include/linux/nd.h index 96069c543890..5dc6b695437d 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -24,6 +24,7 @@ enum nvdimm_event { enum nvdimm_claim_class { NVDIMM_CCLASS_NONE, NVDIMM_CCLASS_BTT, + NVDIMM_CCLASS_BTT2, NVDIMM_CCLASS_PFN, NVDIMM_CCLASS_DAX, NVDIMM_CCLASS_UNKNOWN, -- cgit v1.2.3 From d5d51fece79eafcdbc69d1836c28a3c9c7c8e862 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 29 Jun 2017 09:02:10 -0700 Subject: acpi, nfit: quiet invalid block-aperture-region warnings This state is already visible by userspace since the BLK region will not be enabled, and it is otherwise benign as it usually indicates that the DIMM is not configured. Signed-off-by: Dan Williams --- drivers/nvdimm/region_devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 282b8991ea83..ab141f8b5140 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -833,7 +833,7 @@ int nd_blk_region_init(struct nd_region *nd_region) return 0; if (nd_region->ndr_mappings < 1) { - dev_err(dev, "invalid BLK region\n"); + dev_dbg(dev, "invalid BLK region\n"); return -ENXIO; } -- cgit v1.2.3 From c13c43d54f2c6a3be1c675766778ac1ad8dfbfcc Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Thu, 29 Jun 2017 16:59:11 -0600 Subject: libnvdimm, btt: fix btt_rw_page not returning errors btt_rw_page was not propagating errors frm btt_do_bvec, resulting in any IO errors via the rw_page path going unnoticed. the pmem driver recently fixed this in e10624f pmem: fail io-requests to known bad blocks but same problem in BTT went neglected. Fixes: 5212e11fde4d ("nd_btt: atomic sector updates") Cc: Cc: Toshi Kani Cc: Dan Williams Cc: Jeff Moyer Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/btt.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 7ca11df80ae8..4e56e720288d 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1248,10 +1248,13 @@ static int btt_rw_page(struct block_device *bdev, sector_t sector, struct page *page, bool is_write) { struct btt *btt = bdev->bd_disk->private_data; + int rc; - btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector); - page_endio(page, is_write, 0); - return 0; + rc = btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector); + if (rc == 0) + page_endio(page, is_write, 0); + + return rc; } -- cgit v1.2.3 From 7e5a21dfe5524a85705d3bc7b540c849cc13e9a1 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Fri, 30 Jun 2017 18:32:52 -0600 Subject: libnvdimm: fix the clear-error check in nsio_rw_bytes A leftover from the 'bandaid' fix that disabled BTT error clearing in rw_bytes resulted in an incorrect check. After we converted these checks over to use the NVDIMM_IO_ATOMIC flag, the ndns->claim check was both redundant, and incorrect. Remove it. Fixes: 3ae3d67ba705 ("libnvdimm: add an atomic vs process context flag to rw_bytes") Cc: Cc: Dave Jiang Cc: Dan Williams Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/claim.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 8d23f68737d9..f8ad92b4dcd2 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -289,8 +289,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, * work around this collision. */ if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512) - && !(flags & NVDIMM_IO_ATOMIC) - && !ndns->claim) { + && !(flags & NVDIMM_IO_ATOMIC)) { long cleared; cleared = nvdimm_clear_poison(&ndns->dev, -- cgit v1.2.3 From 6aa734a2f38e2e17ac4de3561770b8676b27af2e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 30 Jun 2017 18:56:03 -0700 Subject: libnvdimm, region, pmem: fix 'badblocks' sysfs_get_dirent() reference lifetime We need to hold a reference on the 'dirent' until we are sure there are no more notifications that will be sent. As noted in the new comments we take advantage of the fact that the references are taken and dropped under device_lock() and that nd_device_notify() holds device_lock() over new badblocks notifications. The notifications that happen when badblocks are cleared only occur while the device is active. Also take the opportunity to fix up the error messages to report the user visible effect of a sysfs_get_dirent() failure. Fixes: 975750a98c26 ("libnvdimm, pmem: Add sysfs notifications to badblocks") Cc: Toshi Kani Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 16 ++++++++++++---- drivers/nvdimm/region.c | 13 +++++++++---- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 34189a145ac6..4a9cffc14512 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -382,10 +382,8 @@ static int pmem_attach_disk(struct device *dev, pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd, "badblocks"); - if (pmem->bb_state) - sysfs_put(pmem->bb_state); - else - dev_warn(dev, "sysfs_get_dirent 'badblocks' failed\n"); + if (!pmem->bb_state) + dev_warn(dev, "'badblocks' notification disabled\n"); return 0; } @@ -418,8 +416,18 @@ static int nd_pmem_probe(struct device *dev) static int nd_pmem_remove(struct device *dev) { + struct pmem_device *pmem = dev_get_drvdata(dev); + if (is_nd_btt(dev)) nvdimm_namespace_detach_btt(to_nd_btt(dev)); + else { + /* + * Note, this assumes device_lock() context to not race + * nd_pmem_notify() + */ + sysfs_put(pmem->bb_state); + pmem->bb_state = NULL; + } nvdimm_flush(to_nd_region(dev->parent)); return 0; diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index ca94029d20b3..034f0a07d627 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -60,11 +60,9 @@ static int nd_region_probe(struct device *dev) return -ENODEV; nd_region->bb_state = sysfs_get_dirent(nd_region->dev.kobj.sd, "badblocks"); - if (nd_region->bb_state) - sysfs_put(nd_region->bb_state); - else + if (!nd_region->bb_state) dev_warn(&nd_region->dev, - "sysfs_get_dirent 'badblocks' failed\n"); + "'badblocks' notification disabled\n"); ndr_res.start = nd_region->ndr_start; ndr_res.end = nd_region->ndr_start + nd_region->ndr_size - 1; nvdimm_badblocks_populate(nd_region, &nd_region->bb, &ndr_res); @@ -111,6 +109,13 @@ static int nd_region_remove(struct device *dev) dev_set_drvdata(dev, NULL); nvdimm_bus_unlock(dev); + /* + * Note, this assumes device_lock() context to not race + * nd_region_notify() + */ + sysfs_put(nd_region->bb_state); + nd_region->bb_state = NULL; + return 0; } -- cgit v1.2.3 From e6be2dcbefdb7c3817889363cee3e933695cba21 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Fri, 30 Jun 2017 18:32:51 -0600 Subject: libnvdimm, btt: convert some info messages to warn/err Some critical messages such as IO errors, metadata failures were printed with dev_info. Make them louder by upgrading them to dev_warn or dev_error. Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/btt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 4e56e720288d..2af329d6a833 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -323,7 +323,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane, old_ent = btt_log_get_old(log); if (old_ent < 0 || old_ent > 1) { - dev_info(to_dev(arena), + dev_err(to_dev(arena), "log corruption (%d): lane %d seq [%d, %d]\n", old_ent, lane, log[0].seq, log[1].seq); /* TODO set error state? */ @@ -684,7 +684,7 @@ static int discover_arenas(struct btt *btt) dev_info(to_dev(arena), "No existing arenas\n"); goto out; } else { - dev_info(to_dev(arena), + dev_err(to_dev(arena), "Found corrupted metadata!\n"); ret = -ENODEV; goto out; @@ -1227,7 +1227,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio) err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset, op_is_write(bio_op(bio)), iter.bi_sector); if (err) { - dev_info(&btt->nd_btt->dev, + dev_err(&btt->nd_btt->dev, "io error in %s sector %lld, len %d,\n", (op_is_write(bio_op(bio))) ? "WRITE" : "READ", @@ -1373,7 +1373,7 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize, } if (btt->init_state != INIT_READY && nd_region->ro) { - dev_info(dev, "%s is read-only, unable to init btt metadata\n", + dev_warn(dev, "%s is read-only, unable to init btt metadata\n", dev_name(&nd_region->dev)); return NULL; } else if (btt->init_state != INIT_READY) { -- cgit v1.2.3 From 53b85a449b15e0e2e6727d8855e4c8b7627577e1 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Fri, 30 Jun 2017 20:41:22 -0700 Subject: libnvdimm: passthru functions clear to send Have dsm functions called via the pass thru mechanism also be checked against clear to send. Signed-off-by: Jerry Hoemann Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 63ce50d9c1c5..9730db48a01b 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -910,6 +910,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, static char in_env[ND_CMD_MAX_ENVELOPE]; const struct nd_cmd_desc *desc = NULL; unsigned int cmd = _IOC_NR(ioctl_cmd); + unsigned int func = cmd; void __user *p = (void __user *) arg; struct device *dev = &nvdimm_bus->dev; struct nd_cmd_pkg pkg; @@ -975,6 +976,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, } if (cmd == ND_CMD_CALL) { + func = pkg.nd_command; dev_dbg(dev, "%s:%s, idx: %llu, in: %zu, out: %zu, len %zu\n", __func__, dimm_name, pkg.nd_command, in_len, out_len, buf_len); @@ -1023,7 +1025,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, } nvdimm_bus_lock(&nvdimm_bus->dev); - rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd, buf); + rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf); if (rc) goto out_unlock; -- cgit v1.2.3 From 37d74841b9d42b105cba053e70e9db0e395949da Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Fri, 30 Jun 2017 20:41:24 -0700 Subject: acpi, nfit: Enable DSM pass thru for root functions. Set ND_CMD_CALL in the cmd_mask to enable calling root functions via the pass thru mechanism. Signed-off-by: Jerry Hoemann Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 1 + include/uapi/linux/ndctl.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 957b13152d1b..b16e4ef7a5ca 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1623,6 +1623,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++) if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i)) set_bit(i, &nd_desc->cmd_mask); + set_bit(ND_CMD_CALL, &nd_desc->cmd_mask); } static ssize_t range_index_show(struct device *dev, diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 7ad3863cb88b..e23c37fedade 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -179,6 +179,7 @@ static inline const char *nvdimm_bus_cmd_name(unsigned cmd) [ND_CMD_ARS_START] = "ars_start", [ND_CMD_ARS_STATUS] = "ars_status", [ND_CMD_CLEAR_ERROR] = "clear_error", + [ND_CMD_CALL] = "cmd_call", }; if (cmd < ARRAY_SIZE(names) && names[cmd]) -- cgit v1.2.3 From 7db5bb33add5afe6c64e00516b0c928bfc937466 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Fri, 30 Jun 2017 20:53:24 -0700 Subject: libnvdimm, acpi, nfit: Add bus level dsm mask for pass thru. Add a bus level dsm_mask to nvdimm_bus_descriptor to allow the passthru calling mechanism to specify a different mask from the cmd_mask. Populate bus_dsm_mask and use it to filter dsm calls that user can make through the pass thru interface. Signed-off-by: Jerry Hoemann [djbw: use command number constants instead of a magic mask value] Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 27 +++++++++++++++++++++++++++ include/linux/libnvdimm.h | 1 + 2 files changed, 28 insertions(+) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index b16e4ef7a5ca..b7d7cc4fd2d0 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -253,6 +253,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, cmd_name = nvdimm_bus_cmd_name(cmd); cmd_mask = nd_desc->cmd_mask; dsm_mask = cmd_mask; + if (cmd == ND_CMD_CALL) + dsm_mask = nd_desc->bus_dsm_mask; desc = nd_cmd_bus_desc(cmd); guid = to_nfit_uuid(NFIT_DEV_BUS); handle = adev->handle; @@ -1608,11 +1610,23 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) acpi_desc); } +/* + * These constants are private because there are no kernel consumers of + * these commands. + */ +enum nfit_aux_cmds { + NFIT_CMD_TRANSLATE_SPA = 5, + NFIT_CMD_ARS_INJECT_SET = 7, + NFIT_CMD_ARS_INJECT_CLEAR = 8, + NFIT_CMD_ARS_INJECT_GET = 9, +}; + static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) { struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; const guid_t *guid = to_nfit_uuid(NFIT_DEV_BUS); struct acpi_device *adev; + unsigned long dsm_mask; int i; nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en; @@ -1624,6 +1638,19 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i)) set_bit(i, &nd_desc->cmd_mask); set_bit(ND_CMD_CALL, &nd_desc->cmd_mask); + + dsm_mask = + (1 << ND_CMD_ARS_CAP) | + (1 << ND_CMD_ARS_START) | + (1 << ND_CMD_ARS_STATUS) | + (1 << ND_CMD_CLEAR_ERROR) | + (1 << NFIT_CMD_TRANSLATE_SPA) | + (1 << NFIT_CMD_ARS_INJECT_SET) | + (1 << NFIT_CMD_ARS_INJECT_CLEAR) | + (1 << NFIT_CMD_ARS_INJECT_GET); + for_each_set_bit(i, &dsm_mask, BITS_PER_LONG) + if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i)) + set_bit(i, &nd_desc->bus_dsm_mask); } static ssize_t range_index_show(struct device *dev, diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 4b9f178c82e6..6aee1a6e4e63 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -55,6 +55,7 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; + unsigned long bus_dsm_mask; unsigned long cmd_mask; struct module *module; char *provider_name; -- cgit v1.2.3 From 41f95db7b999774fc70b5905c77436d019a910eb Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Fri, 30 Jun 2017 20:41:28 -0700 Subject: acpi, nfit: Show bus_dsm_mask in sysfs Display bus_dsm_mask in sysfs as /sys/bus/nd/devices/ndbusX/nfit/dsm_mask. Signed-off-by: Jerry Hoemann Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index b7d7cc4fd2d0..4f6939b51e90 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -929,6 +929,17 @@ static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc) return 0; } +static ssize_t bus_dsm_mask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); + + return sprintf(buf, "%#lx\n", nd_desc->bus_dsm_mask); +} +static struct device_attribute dev_attr_bus_dsm_mask = + __ATTR(dsm_mask, 0444, bus_dsm_mask_show, NULL); + static ssize_t revision_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1065,6 +1076,7 @@ static struct attribute *acpi_nfit_attributes[] = { &dev_attr_revision.attr, &dev_attr_scrub.attr, &dev_attr_hw_error_scrub.attr, + &dev_attr_bus_dsm_mask.attr, NULL, }; -- cgit v1.2.3 From 759d6a9641d7f52f9311aae4f2d90058adad3ac2 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Fri, 30 Jun 2017 20:41:29 -0700 Subject: libnvdimm: New ACPI 6.2 DSM functions ACPI 6.2 added new NVDIMM root DSM functions. Define their data structures. Signed-off-by: Jerry Hoemann Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index e23c37fedade..e15768fb4b99 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -105,7 +105,8 @@ struct nd_cmd_ars_cap { __u32 status; __u32 max_ars_out; __u32 clear_err_unit; - __u32 reserved; + __u16 flags; + __u16 reserved; } __packed; struct nd_cmd_ars_start { @@ -144,6 +145,43 @@ struct nd_cmd_clear_error { __u64 cleared; } __packed; +struct nd_cmd_trans_spa { + __u64 spa; + __u32 status; + __u8 flags; + __u8 _reserved[3]; + __u64 trans_length; + __u32 num_nvdimms; + struct nd_nvdimm_device { + __u32 nfit_device_handle; + __u32 _reserved; + __u64 dpa; + } __packed devices[0]; + +} __packed; + +struct nd_cmd_ars_err_inj { + __u64 err_inj_spa_range_base; + __u64 err_inj_spa_range_length; + __u8 err_inj_options; + __u32 status; +} __packed; + +struct nd_cmd_ars_err_inj_clr { + __u64 err_inj_clr_spa_range_base; + __u64 err_inj_clr_spa_range_length; + __u32 status; +} __packed; + +struct nd_cmd_ars_err_inj_stat { + __u32 status; + __u32 inj_err_rec_count; + struct nd_error_stat_query_record { + __u64 err_inj_stat_spa_range_base; + __u64 err_inj_stat_spa_range_length; + } __packed record[0]; +} __packed; + enum { ND_CMD_IMPLEMENTED = 0, -- cgit v1.2.3 From 807900395efebf9276178eb6157959f2e81fe013 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 29 Jun 2017 20:41:30 -0600 Subject: acpi/nfit: Issue Start ARS to retrieve existing records ACPI 6.2 defines in section 9.20.7.2 that the OSPM may call a Start ARS with Flags Bit [1] set upon receiving the 0x81 notification. Upon receiving the notification, the OSPM may decide to issue a Start ARS with Flags Bit [1] set to prepare for the retrieval of existing records and issue the Query ARS Status function to retrieve the records. Add support to call a Start ARS from acpi_nfit_uc_error_notify() with ND_ARS_RETURN_PREV_DATA set when HW_ERROR_SCRUB_ON is not set. Link: http://www.uefi.org/sites/default/files/resources/ACPI_6_2.pdf Signed-off-by: Toshi Kani Cc: Dan Williams Cc: Rafael J. Wysocki Cc: Vishal Verma Cc: Linda Knippers Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 13 ++++++++++--- drivers/acpi/nfit/mce.c | 2 +- drivers/acpi/nfit/nfit.h | 3 ++- include/uapi/linux/ndctl.h | 1 + 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 4f6939b51e90..1f6bc5065b0d 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1044,7 +1044,7 @@ static ssize_t scrub_store(struct device *dev, if (nd_desc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); - rc = acpi_nfit_ars_rescan(acpi_desc); + rc = acpi_nfit_ars_rescan(acpi_desc, 0); } device_unlock(dev); if (rc) @@ -2137,6 +2137,7 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa memset(&ars_start, 0, sizeof(ars_start)); ars_start.address = spa->address; ars_start.length = spa->length; + ars_start.flags = acpi_desc->ars_start_flags; if (nfit_spa_type(spa) == NFIT_SPA_PM) ars_start.type = ND_ARS_PERSISTENT; else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) @@ -2163,6 +2164,7 @@ static int ars_continue(struct acpi_nfit_desc *acpi_desc) ars_start.address = ars_status->restart_address; ars_start.length = ars_status->restart_length; ars_start.type = ars_status->type; + ars_start.flags = acpi_desc->ars_start_flags; rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start, sizeof(ars_start), &cmd_rc); if (rc < 0) @@ -2684,6 +2686,7 @@ static void acpi_nfit_scrub(struct work_struct *work) list_for_each_entry(nfit_spa, &acpi_desc->spas, list) acpi_nfit_async_scrub(acpi_desc, nfit_spa); acpi_desc->scrub_count++; + acpi_desc->ars_start_flags = 0; if (acpi_desc->scrub_count_state) sysfs_notify_dirent(acpi_desc->scrub_count_state); mutex_unlock(&acpi_desc->init_mutex); @@ -2702,6 +2705,7 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) return rc; } + acpi_desc->ars_start_flags = 0; if (!acpi_desc->cancel) queue_work(nfit_wq, &acpi_desc->work); return 0; @@ -2906,7 +2910,7 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, return 0; } -int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc) +int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags) { struct device *dev = acpi_desc->dev; struct nfit_spa *nfit_spa; @@ -2928,6 +2932,7 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc) nfit_spa->ars_required = 1; } + acpi_desc->ars_start_flags = flags; queue_work(nfit_wq, &acpi_desc->work); dev_dbg(dev, "%s: ars_scan triggered\n", __func__); mutex_unlock(&acpi_desc->init_mutex); @@ -3104,8 +3109,10 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle) static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle) { struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev); + u8 flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ? + 0 : ND_ARS_RETURN_PREV_DATA; - acpi_nfit_ars_rescan(acpi_desc); + acpi_nfit_ars_rescan(acpi_desc, flags); } void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event) diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c index fd86bec98dea..feeb95d574fa 100644 --- a/drivers/acpi/nfit/mce.c +++ b/drivers/acpi/nfit/mce.c @@ -79,7 +79,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, * already in progress, just let that be the last * authoritative one */ - acpi_nfit_ars_rescan(acpi_desc); + acpi_nfit_ars_rescan(acpi_desc, 0); } break; } diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index e3da60b2d686..54292db61262 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -155,6 +155,7 @@ struct acpi_nfit_desc { struct list_head idts; struct nvdimm_bus *nvdimm_bus; struct device *dev; + u8 ars_start_flags; struct nd_cmd_ars_status *ars_status; size_t ars_status_size; struct work_struct work; @@ -207,7 +208,7 @@ struct nfit_blk { extern struct list_head acpi_descs; extern struct mutex acpi_desc_lock; -int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc); +int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags); #ifdef CONFIG_X86_MCE void nfit_mce_register(void); diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index e15768fb4b99..6d3c54264d8e 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -207,6 +207,7 @@ enum { enum { ND_ARS_VOLATILE = 1, ND_ARS_PERSISTENT = 2, + ND_ARS_RETURN_PREV_DATA = 1 << 1, ND_CONFIG_LOCKED = 1, }; -- cgit v1.2.3 From 2de5148ffb12ff6b4088125f44818771e78e6830 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Jul 2017 16:30:44 -0700 Subject: libnvdimm, namespace: record 'lbasize' for pmem namespaces Commit f979b13c3cc5 "libnvdimm, label: honor the lba size specified in v1.2 labels") neglected to update the 'lbasize' in the label when the namespace sector_size attribute was written. We need this value in the label for inter-OS / pre-OS compatibility. Fixes: f979b13c3cc5 ("libnvdimm, label: honor the lba size specified in v1.2 labels") Signed-off-by: Dan Williams --- drivers/nvdimm/label.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 922b68718a1a..87796f840777 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -660,6 +660,7 @@ static int __pmem_label_update(struct nd_region *nd_region, nd_label->position = __cpu_to_le16(pos); nd_label->isetcookie = __cpu_to_le64(cookie); nd_label->rawsize = __cpu_to_le64(resource_size(res)); + nd_label->lbasize = __cpu_to_le64(nspm->lbasize); nd_label->dpa = __cpu_to_le64(res->start); nd_label->slot = __cpu_to_le32(slot); if (namespace_label_has(ndd, type_guid)) -- cgit v1.2.3