pfn_devs.c (22421B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. 4 */ 5#include <linux/memremap.h> 6#include <linux/blkdev.h> 7#include <linux/device.h> 8#include <linux/sizes.h> 9#include <linux/slab.h> 10#include <linux/fs.h> 11#include <linux/mm.h> 12#include "nd-core.h" 13#include "pfn.h" 14#include "nd.h" 15 16static void nd_pfn_release(struct device *dev) 17{ 18 struct nd_region *nd_region = to_nd_region(dev->parent); 19 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 20 21 dev_dbg(dev, "trace\n"); 22 nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns); 23 ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id); 24 kfree(nd_pfn->uuid); 25 kfree(nd_pfn); 26} 27 28struct nd_pfn *to_nd_pfn(struct device *dev) 29{ 30 struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev); 31 32 WARN_ON(!is_nd_pfn(dev)); 33 return nd_pfn; 34} 35EXPORT_SYMBOL(to_nd_pfn); 36 37static ssize_t mode_show(struct device *dev, 38 struct device_attribute *attr, char *buf) 39{ 40 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 41 42 switch (nd_pfn->mode) { 43 case PFN_MODE_RAM: 44 return sprintf(buf, "ram\n"); 45 case PFN_MODE_PMEM: 46 return sprintf(buf, "pmem\n"); 47 default: 48 return sprintf(buf, "none\n"); 49 } 50} 51 52static ssize_t mode_store(struct device *dev, 53 struct device_attribute *attr, const char *buf, size_t len) 54{ 55 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 56 ssize_t rc = 0; 57 58 device_lock(dev); 59 nvdimm_bus_lock(dev); 60 if (dev->driver) 61 rc = -EBUSY; 62 else { 63 size_t n = len - 1; 64 65 if (strncmp(buf, "pmem\n", n) == 0 66 || strncmp(buf, "pmem", n) == 0) { 67 nd_pfn->mode = PFN_MODE_PMEM; 68 } else if (strncmp(buf, "ram\n", n) == 0 69 || strncmp(buf, "ram", n) == 0) 70 nd_pfn->mode = PFN_MODE_RAM; 71 else if (strncmp(buf, "none\n", n) == 0 72 || strncmp(buf, "none", n) == 0) 73 nd_pfn->mode = PFN_MODE_NONE; 74 else 75 rc = -EINVAL; 76 } 77 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 78 buf[len - 1] == '\n' ? "" : "\n"); 79 nvdimm_bus_unlock(dev); 80 device_unlock(dev); 81 82 return rc ? rc : len; 83} 84static DEVICE_ATTR_RW(mode); 85 86static ssize_t align_show(struct device *dev, 87 struct device_attribute *attr, char *buf) 88{ 89 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 90 91 return sprintf(buf, "%ld\n", nd_pfn->align); 92} 93 94static unsigned long *nd_pfn_supported_alignments(unsigned long *alignments) 95{ 96 97 alignments[0] = PAGE_SIZE; 98 99 if (has_transparent_hugepage()) { 100 alignments[1] = HPAGE_PMD_SIZE; 101 if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) 102 alignments[2] = HPAGE_PUD_SIZE; 103 } 104 105 return alignments; 106} 107 108/* 109 * Use pmd mapping if supported as default alignment 110 */ 111static unsigned long nd_pfn_default_alignment(void) 112{ 113 114 if (has_transparent_hugepage()) 115 return HPAGE_PMD_SIZE; 116 return PAGE_SIZE; 117} 118 119static ssize_t align_store(struct device *dev, 120 struct device_attribute *attr, const char *buf, size_t len) 121{ 122 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 123 unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; 124 ssize_t rc; 125 126 device_lock(dev); 127 nvdimm_bus_lock(dev); 128 rc = nd_size_select_store(dev, buf, &nd_pfn->align, 129 nd_pfn_supported_alignments(aligns)); 130 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 131 buf[len - 1] == '\n' ? "" : "\n"); 132 nvdimm_bus_unlock(dev); 133 device_unlock(dev); 134 135 return rc ? rc : len; 136} 137static DEVICE_ATTR_RW(align); 138 139static ssize_t uuid_show(struct device *dev, 140 struct device_attribute *attr, char *buf) 141{ 142 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 143 144 if (nd_pfn->uuid) 145 return sprintf(buf, "%pUb\n", nd_pfn->uuid); 146 return sprintf(buf, "\n"); 147} 148 149static ssize_t uuid_store(struct device *dev, 150 struct device_attribute *attr, const char *buf, size_t len) 151{ 152 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 153 ssize_t rc; 154 155 device_lock(dev); 156 rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); 157 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 158 buf[len - 1] == '\n' ? "" : "\n"); 159 device_unlock(dev); 160 161 return rc ? rc : len; 162} 163static DEVICE_ATTR_RW(uuid); 164 165static ssize_t namespace_show(struct device *dev, 166 struct device_attribute *attr, char *buf) 167{ 168 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 169 ssize_t rc; 170 171 nvdimm_bus_lock(dev); 172 rc = sprintf(buf, "%s\n", nd_pfn->ndns 173 ? dev_name(&nd_pfn->ndns->dev) : ""); 174 nvdimm_bus_unlock(dev); 175 return rc; 176} 177 178static ssize_t namespace_store(struct device *dev, 179 struct device_attribute *attr, const char *buf, size_t len) 180{ 181 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 182 ssize_t rc; 183 184 device_lock(dev); 185 nvdimm_bus_lock(dev); 186 rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); 187 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 188 buf[len - 1] == '\n' ? "" : "\n"); 189 nvdimm_bus_unlock(dev); 190 device_unlock(dev); 191 192 return rc; 193} 194static DEVICE_ATTR_RW(namespace); 195 196static ssize_t resource_show(struct device *dev, 197 struct device_attribute *attr, char *buf) 198{ 199 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 200 ssize_t rc; 201 202 device_lock(dev); 203 if (dev->driver) { 204 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 205 u64 offset = __le64_to_cpu(pfn_sb->dataoff); 206 struct nd_namespace_common *ndns = nd_pfn->ndns; 207 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 208 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 209 210 rc = sprintf(buf, "%#llx\n", (unsigned long long) nsio->res.start 211 + start_pad + offset); 212 } else { 213 /* no address to convey if the pfn instance is disabled */ 214 rc = -ENXIO; 215 } 216 device_unlock(dev); 217 218 return rc; 219} 220static DEVICE_ATTR_ADMIN_RO(resource); 221 222static ssize_t size_show(struct device *dev, 223 struct device_attribute *attr, char *buf) 224{ 225 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 226 ssize_t rc; 227 228 device_lock(dev); 229 if (dev->driver) { 230 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 231 u64 offset = __le64_to_cpu(pfn_sb->dataoff); 232 struct nd_namespace_common *ndns = nd_pfn->ndns; 233 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 234 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 235 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 236 237 rc = sprintf(buf, "%llu\n", (unsigned long long) 238 resource_size(&nsio->res) - start_pad 239 - end_trunc - offset); 240 } else { 241 /* no size to convey if the pfn instance is disabled */ 242 rc = -ENXIO; 243 } 244 device_unlock(dev); 245 246 return rc; 247} 248static DEVICE_ATTR_RO(size); 249 250static ssize_t supported_alignments_show(struct device *dev, 251 struct device_attribute *attr, char *buf) 252{ 253 unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; 254 255 return nd_size_select_show(0, 256 nd_pfn_supported_alignments(aligns), buf); 257} 258static DEVICE_ATTR_RO(supported_alignments); 259 260static struct attribute *nd_pfn_attributes[] = { 261 &dev_attr_mode.attr, 262 &dev_attr_namespace.attr, 263 &dev_attr_uuid.attr, 264 &dev_attr_align.attr, 265 &dev_attr_resource.attr, 266 &dev_attr_size.attr, 267 &dev_attr_supported_alignments.attr, 268 NULL, 269}; 270 271static struct attribute_group nd_pfn_attribute_group = { 272 .attrs = nd_pfn_attributes, 273}; 274 275const struct attribute_group *nd_pfn_attribute_groups[] = { 276 &nd_pfn_attribute_group, 277 &nd_device_attribute_group, 278 &nd_numa_attribute_group, 279 NULL, 280}; 281 282static const struct device_type nd_pfn_device_type = { 283 .name = "nd_pfn", 284 .release = nd_pfn_release, 285 .groups = nd_pfn_attribute_groups, 286}; 287 288bool is_nd_pfn(struct device *dev) 289{ 290 return dev ? dev->type == &nd_pfn_device_type : false; 291} 292EXPORT_SYMBOL(is_nd_pfn); 293 294static struct lock_class_key nvdimm_pfn_key; 295 296struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, 297 struct nd_namespace_common *ndns) 298{ 299 struct device *dev; 300 301 if (!nd_pfn) 302 return NULL; 303 304 nd_pfn->mode = PFN_MODE_NONE; 305 nd_pfn->align = nd_pfn_default_alignment(); 306 dev = &nd_pfn->dev; 307 device_initialize(&nd_pfn->dev); 308 lockdep_set_class(&nd_pfn->dev.mutex, &nvdimm_pfn_key); 309 if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { 310 dev_dbg(&ndns->dev, "failed, already claimed by %s\n", 311 dev_name(ndns->claim)); 312 put_device(dev); 313 return NULL; 314 } 315 return dev; 316} 317 318static struct nd_pfn *nd_pfn_alloc(struct nd_region *nd_region) 319{ 320 struct nd_pfn *nd_pfn; 321 struct device *dev; 322 323 nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); 324 if (!nd_pfn) 325 return NULL; 326 327 nd_pfn->id = ida_simple_get(&nd_region->pfn_ida, 0, 0, GFP_KERNEL); 328 if (nd_pfn->id < 0) { 329 kfree(nd_pfn); 330 return NULL; 331 } 332 333 dev = &nd_pfn->dev; 334 dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); 335 dev->type = &nd_pfn_device_type; 336 dev->parent = &nd_region->dev; 337 338 return nd_pfn; 339} 340 341struct device *nd_pfn_create(struct nd_region *nd_region) 342{ 343 struct nd_pfn *nd_pfn; 344 struct device *dev; 345 346 if (!is_memory(&nd_region->dev)) 347 return NULL; 348 349 nd_pfn = nd_pfn_alloc(nd_region); 350 dev = nd_pfn_devinit(nd_pfn, NULL); 351 352 nd_device_register(dev); 353 return dev; 354} 355 356/* 357 * nd_pfn_clear_memmap_errors() clears any errors in the volatile memmap 358 * space associated with the namespace. If the memmap is set to DRAM, then 359 * this is a no-op. Since the memmap area is freshly initialized during 360 * probe, we have an opportunity to clear any badblocks in this area. 361 */ 362static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) 363{ 364 struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); 365 struct nd_namespace_common *ndns = nd_pfn->ndns; 366 void *zero_page = page_address(ZERO_PAGE(0)); 367 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 368 int num_bad, meta_num, rc, bb_present; 369 sector_t first_bad, meta_start; 370 struct nd_namespace_io *nsio; 371 372 if (nd_pfn->mode != PFN_MODE_PMEM) 373 return 0; 374 375 nsio = to_nd_namespace_io(&ndns->dev); 376 meta_start = (SZ_4K + sizeof(*pfn_sb)) >> 9; 377 meta_num = (le64_to_cpu(pfn_sb->dataoff) >> 9) - meta_start; 378 379 /* 380 * re-enable the namespace with correct size so that we can access 381 * the device memmap area. 382 */ 383 devm_namespace_disable(&nd_pfn->dev, ndns); 384 rc = devm_namespace_enable(&nd_pfn->dev, ndns, le64_to_cpu(pfn_sb->dataoff)); 385 if (rc) 386 return rc; 387 388 do { 389 unsigned long zero_len; 390 u64 nsoff; 391 392 bb_present = badblocks_check(&nd_region->bb, meta_start, 393 meta_num, &first_bad, &num_bad); 394 if (bb_present) { 395 dev_dbg(&nd_pfn->dev, "meta: %x badblocks at %llx\n", 396 num_bad, first_bad); 397 nsoff = ALIGN_DOWN((nd_region->ndr_start 398 + (first_bad << 9)) - nsio->res.start, 399 PAGE_SIZE); 400 zero_len = ALIGN(num_bad << 9, PAGE_SIZE); 401 while (zero_len) { 402 unsigned long chunk = min(zero_len, PAGE_SIZE); 403 404 rc = nvdimm_write_bytes(ndns, nsoff, zero_page, 405 chunk, 0); 406 if (rc) 407 break; 408 409 zero_len -= chunk; 410 nsoff += chunk; 411 } 412 if (rc) { 413 dev_err(&nd_pfn->dev, 414 "error clearing %x badblocks at %llx\n", 415 num_bad, first_bad); 416 return rc; 417 } 418 } 419 } while (bb_present); 420 421 return 0; 422} 423 424static bool nd_supported_alignment(unsigned long align) 425{ 426 int i; 427 unsigned long supported[MAX_NVDIMM_ALIGN] = { [0] = 0, }; 428 429 if (align == 0) 430 return false; 431 432 nd_pfn_supported_alignments(supported); 433 for (i = 0; supported[i]; i++) 434 if (align == supported[i]) 435 return true; 436 return false; 437} 438 439/** 440 * nd_pfn_validate - read and validate info-block 441 * @nd_pfn: fsdax namespace runtime state / properties 442 * @sig: 'devdax' or 'fsdax' signature 443 * 444 * Upon return the info-block buffer contents (->pfn_sb) are 445 * indeterminate when validation fails, and a coherent info-block 446 * otherwise. 447 */ 448int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) 449{ 450 u64 checksum, offset; 451 struct resource *res; 452 enum nd_pfn_mode mode; 453 struct nd_namespace_io *nsio; 454 unsigned long align, start_pad; 455 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 456 struct nd_namespace_common *ndns = nd_pfn->ndns; 457 const uuid_t *parent_uuid = nd_dev_to_uuid(&ndns->dev); 458 459 if (!pfn_sb || !ndns) 460 return -ENODEV; 461 462 if (!is_memory(nd_pfn->dev.parent)) 463 return -ENODEV; 464 465 if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0)) 466 return -ENXIO; 467 468 if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0) 469 return -ENODEV; 470 471 checksum = le64_to_cpu(pfn_sb->checksum); 472 pfn_sb->checksum = 0; 473 if (checksum != nd_sb_checksum((struct nd_gen_sb *) pfn_sb)) 474 return -ENODEV; 475 pfn_sb->checksum = cpu_to_le64(checksum); 476 477 if (memcmp(pfn_sb->parent_uuid, parent_uuid, 16) != 0) 478 return -ENODEV; 479 480 if (__le16_to_cpu(pfn_sb->version_minor) < 1) { 481 pfn_sb->start_pad = 0; 482 pfn_sb->end_trunc = 0; 483 } 484 485 if (__le16_to_cpu(pfn_sb->version_minor) < 2) 486 pfn_sb->align = 0; 487 488 if (__le16_to_cpu(pfn_sb->version_minor) < 4) { 489 pfn_sb->page_struct_size = cpu_to_le16(64); 490 pfn_sb->page_size = cpu_to_le32(PAGE_SIZE); 491 } 492 493 switch (le32_to_cpu(pfn_sb->mode)) { 494 case PFN_MODE_RAM: 495 case PFN_MODE_PMEM: 496 break; 497 default: 498 return -ENXIO; 499 } 500 501 align = le32_to_cpu(pfn_sb->align); 502 offset = le64_to_cpu(pfn_sb->dataoff); 503 start_pad = le32_to_cpu(pfn_sb->start_pad); 504 if (align == 0) 505 align = 1UL << ilog2(offset); 506 mode = le32_to_cpu(pfn_sb->mode); 507 508 if ((le32_to_cpu(pfn_sb->page_size) > PAGE_SIZE) && 509 (mode == PFN_MODE_PMEM)) { 510 dev_err(&nd_pfn->dev, 511 "init failed, page size mismatch %d\n", 512 le32_to_cpu(pfn_sb->page_size)); 513 return -EOPNOTSUPP; 514 } 515 516 if ((le16_to_cpu(pfn_sb->page_struct_size) < sizeof(struct page)) && 517 (mode == PFN_MODE_PMEM)) { 518 dev_err(&nd_pfn->dev, 519 "init failed, struct page size mismatch %d\n", 520 le16_to_cpu(pfn_sb->page_struct_size)); 521 return -EOPNOTSUPP; 522 } 523 524 /* 525 * Check whether the we support the alignment. For Dax if the 526 * superblock alignment is not matching, we won't initialize 527 * the device. 528 */ 529 if (!nd_supported_alignment(align) && 530 !memcmp(pfn_sb->signature, DAX_SIG, PFN_SIG_LEN)) { 531 dev_err(&nd_pfn->dev, "init failed, alignment mismatch: " 532 "%ld:%ld\n", nd_pfn->align, align); 533 return -EOPNOTSUPP; 534 } 535 536 if (!nd_pfn->uuid) { 537 /* 538 * When probing a namepace via nd_pfn_probe() the uuid 539 * is NULL (see: nd_pfn_devinit()) we init settings from 540 * pfn_sb 541 */ 542 nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL); 543 if (!nd_pfn->uuid) 544 return -ENOMEM; 545 nd_pfn->align = align; 546 nd_pfn->mode = mode; 547 } else { 548 /* 549 * When probing a pfn / dax instance we validate the 550 * live settings against the pfn_sb 551 */ 552 if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) 553 return -ENODEV; 554 555 /* 556 * If the uuid validates, but other settings mismatch 557 * return EINVAL because userspace has managed to change 558 * the configuration without specifying new 559 * identification. 560 */ 561 if (nd_pfn->align != align || nd_pfn->mode != mode) { 562 dev_err(&nd_pfn->dev, 563 "init failed, settings mismatch\n"); 564 dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n", 565 nd_pfn->align, align, nd_pfn->mode, 566 mode); 567 return -EOPNOTSUPP; 568 } 569 } 570 571 if (align > nvdimm_namespace_capacity(ndns)) { 572 dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n", 573 align, nvdimm_namespace_capacity(ndns)); 574 return -EOPNOTSUPP; 575 } 576 577 /* 578 * These warnings are verbose because they can only trigger in 579 * the case where the physical address alignment of the 580 * namespace has changed since the pfn superblock was 581 * established. 582 */ 583 nsio = to_nd_namespace_io(&ndns->dev); 584 res = &nsio->res; 585 if (offset >= resource_size(res)) { 586 dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", 587 dev_name(&ndns->dev)); 588 return -EOPNOTSUPP; 589 } 590 591 if ((align && !IS_ALIGNED(res->start + offset + start_pad, align)) 592 || !IS_ALIGNED(offset, PAGE_SIZE)) { 593 dev_err(&nd_pfn->dev, 594 "bad offset: %#llx dax disabled align: %#lx\n", 595 offset, align); 596 return -EOPNOTSUPP; 597 } 598 599 if (!IS_ALIGNED(res->start + le32_to_cpu(pfn_sb->start_pad), 600 memremap_compat_align())) { 601 dev_err(&nd_pfn->dev, "resource start misaligned\n"); 602 return -EOPNOTSUPP; 603 } 604 605 if (!IS_ALIGNED(res->end + 1 - le32_to_cpu(pfn_sb->end_trunc), 606 memremap_compat_align())) { 607 dev_err(&nd_pfn->dev, "resource end misaligned\n"); 608 return -EOPNOTSUPP; 609 } 610 611 return 0; 612} 613EXPORT_SYMBOL(nd_pfn_validate); 614 615int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) 616{ 617 int rc; 618 struct nd_pfn *nd_pfn; 619 struct device *pfn_dev; 620 struct nd_pfn_sb *pfn_sb; 621 struct nd_region *nd_region = to_nd_region(ndns->dev.parent); 622 623 if (ndns->force_raw) 624 return -ENODEV; 625 626 switch (ndns->claim_class) { 627 case NVDIMM_CCLASS_NONE: 628 case NVDIMM_CCLASS_PFN: 629 break; 630 default: 631 return -ENODEV; 632 } 633 634 nvdimm_bus_lock(&ndns->dev); 635 nd_pfn = nd_pfn_alloc(nd_region); 636 pfn_dev = nd_pfn_devinit(nd_pfn, ndns); 637 nvdimm_bus_unlock(&ndns->dev); 638 if (!pfn_dev) 639 return -ENOMEM; 640 pfn_sb = devm_kmalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); 641 nd_pfn = to_nd_pfn(pfn_dev); 642 nd_pfn->pfn_sb = pfn_sb; 643 rc = nd_pfn_validate(nd_pfn, PFN_SIG); 644 dev_dbg(dev, "pfn: %s\n", rc == 0 ? dev_name(pfn_dev) : "<none>"); 645 if (rc < 0) { 646 nd_detach_ndns(pfn_dev, &nd_pfn->ndns); 647 put_device(pfn_dev); 648 } else 649 nd_device_register(pfn_dev); 650 651 return rc; 652} 653EXPORT_SYMBOL(nd_pfn_probe); 654 655/* 656 * We hotplug memory at sub-section granularity, pad the reserved area 657 * from the previous section base to the namespace base address. 658 */ 659static unsigned long init_altmap_base(resource_size_t base) 660{ 661 unsigned long base_pfn = PHYS_PFN(base); 662 663 return SUBSECTION_ALIGN_DOWN(base_pfn); 664} 665 666static unsigned long init_altmap_reserve(resource_size_t base) 667{ 668 unsigned long reserve = nd_info_block_reserve() >> PAGE_SHIFT; 669 unsigned long base_pfn = PHYS_PFN(base); 670 671 reserve += base_pfn - SUBSECTION_ALIGN_DOWN(base_pfn); 672 return reserve; 673} 674 675static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 676{ 677 struct range *range = &pgmap->range; 678 struct vmem_altmap *altmap = &pgmap->altmap; 679 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 680 u64 offset = le64_to_cpu(pfn_sb->dataoff); 681 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 682 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 683 u32 reserve = nd_info_block_reserve(); 684 struct nd_namespace_common *ndns = nd_pfn->ndns; 685 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 686 resource_size_t base = nsio->res.start + start_pad; 687 resource_size_t end = nsio->res.end - end_trunc; 688 struct vmem_altmap __altmap = { 689 .base_pfn = init_altmap_base(base), 690 .reserve = init_altmap_reserve(base), 691 .end_pfn = PHYS_PFN(end), 692 }; 693 694 *range = (struct range) { 695 .start = nsio->res.start + start_pad, 696 .end = nsio->res.end - end_trunc, 697 }; 698 pgmap->nr_range = 1; 699 if (nd_pfn->mode == PFN_MODE_RAM) { 700 if (offset < reserve) 701 return -EINVAL; 702 nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); 703 } else if (nd_pfn->mode == PFN_MODE_PMEM) { 704 nd_pfn->npfns = PHYS_PFN((range_len(range) - offset)); 705 if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) 706 dev_info(&nd_pfn->dev, 707 "number of pfns truncated from %lld to %ld\n", 708 le64_to_cpu(nd_pfn->pfn_sb->npfns), 709 nd_pfn->npfns); 710 memcpy(altmap, &__altmap, sizeof(*altmap)); 711 altmap->free = PHYS_PFN(offset - reserve); 712 altmap->alloc = 0; 713 pgmap->flags |= PGMAP_ALTMAP_VALID; 714 } else 715 return -ENXIO; 716 717 return 0; 718} 719 720static int nd_pfn_init(struct nd_pfn *nd_pfn) 721{ 722 struct nd_namespace_common *ndns = nd_pfn->ndns; 723 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 724 resource_size_t start, size; 725 struct nd_region *nd_region; 726 unsigned long npfns, align; 727 u32 end_trunc; 728 struct nd_pfn_sb *pfn_sb; 729 phys_addr_t offset; 730 const char *sig; 731 u64 checksum; 732 int rc; 733 734 pfn_sb = devm_kmalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL); 735 if (!pfn_sb) 736 return -ENOMEM; 737 738 nd_pfn->pfn_sb = pfn_sb; 739 if (is_nd_dax(&nd_pfn->dev)) 740 sig = DAX_SIG; 741 else 742 sig = PFN_SIG; 743 744 rc = nd_pfn_validate(nd_pfn, sig); 745 if (rc == 0) 746 return nd_pfn_clear_memmap_errors(nd_pfn); 747 if (rc != -ENODEV) 748 return rc; 749 750 /* no info block, do init */; 751 memset(pfn_sb, 0, sizeof(*pfn_sb)); 752 753 nd_region = to_nd_region(nd_pfn->dev.parent); 754 if (nd_region->ro) { 755 dev_info(&nd_pfn->dev, 756 "%s is read-only, unable to init metadata\n", 757 dev_name(&nd_region->dev)); 758 return -ENXIO; 759 } 760 761 /* 762 * Note, we use 64 here for the standard size of struct page, 763 * debugging options may cause it to be larger in which case the 764 * implementation will limit the pfns advertised through 765 * ->direct_access() to those that are included in the memmap. 766 */ 767 start = nsio->res.start; 768 size = resource_size(&nsio->res); 769 npfns = PHYS_PFN(size - SZ_8K); 770 align = max(nd_pfn->align, memremap_compat_align()); 771 772 /* 773 * When @start is misaligned fail namespace creation. See 774 * the 'struct nd_pfn_sb' commentary on why ->start_pad is not 775 * an option. 776 */ 777 if (!IS_ALIGNED(start, memremap_compat_align())) { 778 dev_err(&nd_pfn->dev, "%s: start %pa misaligned to %#lx\n", 779 dev_name(&ndns->dev), &start, 780 memremap_compat_align()); 781 return -EINVAL; 782 } 783 end_trunc = start + size - ALIGN_DOWN(start + size, align); 784 if (nd_pfn->mode == PFN_MODE_PMEM) { 785 /* 786 * The altmap should be padded out to the block size used 787 * when populating the vmemmap. This *should* be equal to 788 * PMD_SIZE for most architectures. 789 * 790 * Also make sure size of struct page is less than 64. We 791 * want to make sure we use large enough size here so that 792 * we don't have a dynamic reserve space depending on 793 * struct page size. But we also want to make sure we notice 794 * when we end up adding new elements to struct page. 795 */ 796 BUILD_BUG_ON(sizeof(struct page) > MAX_STRUCT_PAGE_SIZE); 797 offset = ALIGN(start + SZ_8K + MAX_STRUCT_PAGE_SIZE * npfns, align) 798 - start; 799 } else if (nd_pfn->mode == PFN_MODE_RAM) 800 offset = ALIGN(start + SZ_8K, align) - start; 801 else 802 return -ENXIO; 803 804 if (offset >= size) { 805 dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", 806 dev_name(&ndns->dev)); 807 return -ENXIO; 808 } 809 810 npfns = PHYS_PFN(size - offset - end_trunc); 811 pfn_sb->mode = cpu_to_le32(nd_pfn->mode); 812 pfn_sb->dataoff = cpu_to_le64(offset); 813 pfn_sb->npfns = cpu_to_le64(npfns); 814 memcpy(pfn_sb->signature, sig, PFN_SIG_LEN); 815 memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); 816 memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); 817 pfn_sb->version_major = cpu_to_le16(1); 818 pfn_sb->version_minor = cpu_to_le16(4); 819 pfn_sb->end_trunc = cpu_to_le32(end_trunc); 820 pfn_sb->align = cpu_to_le32(nd_pfn->align); 821 pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE); 822 pfn_sb->page_size = cpu_to_le32(PAGE_SIZE); 823 checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); 824 pfn_sb->checksum = cpu_to_le64(checksum); 825 826 rc = nd_pfn_clear_memmap_errors(nd_pfn); 827 if (rc) 828 return rc; 829 830 return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0); 831} 832 833/* 834 * Determine the effective resource range and vmem_altmap from an nd_pfn 835 * instance. 836 */ 837int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 838{ 839 int rc; 840 841 if (!nd_pfn->uuid || !nd_pfn->ndns) 842 return -ENODEV; 843 844 rc = nd_pfn_init(nd_pfn); 845 if (rc) 846 return rc; 847 848 /* we need a valid pfn_sb before we can init a dev_pagemap */ 849 return __nvdimm_setup_pfn(nd_pfn, pgmap); 850} 851EXPORT_SYMBOL_GPL(nvdimm_setup_pfn);