papr_scm.c (42809B)
1// SPDX-License-Identifier: GPL-2.0 2 3#define pr_fmt(fmt) "papr-scm: " fmt 4 5#include <linux/of.h> 6#include <linux/kernel.h> 7#include <linux/module.h> 8#include <linux/ioport.h> 9#include <linux/slab.h> 10#include <linux/ndctl.h> 11#include <linux/sched.h> 12#include <linux/libnvdimm.h> 13#include <linux/platform_device.h> 14#include <linux/delay.h> 15#include <linux/seq_buf.h> 16#include <linux/nd.h> 17 18#include <asm/plpar_wrappers.h> 19#include <asm/papr_pdsm.h> 20#include <asm/mce.h> 21#include <asm/unaligned.h> 22#include <linux/perf_event.h> 23 24#define BIND_ANY_ADDR (~0ul) 25 26#define PAPR_SCM_DIMM_CMD_MASK \ 27 ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ 28 (1ul << ND_CMD_GET_CONFIG_DATA) | \ 29 (1ul << ND_CMD_SET_CONFIG_DATA) | \ 30 (1ul << ND_CMD_CALL)) 31 32/* DIMM health bitmap bitmap indicators */ 33/* SCM device is unable to persist memory contents */ 34#define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) 35/* SCM device failed to persist memory contents */ 36#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) 37/* SCM device contents are persisted from previous IPL */ 38#define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2)) 39/* SCM device contents are not persisted from previous IPL */ 40#define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) 41/* SCM device memory life remaining is critically low */ 42#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) 43/* SCM device will be garded off next IPL due to failure */ 44#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) 45/* SCM contents cannot persist due to current platform health status */ 46#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) 47/* SCM device is unable to persist memory contents in certain conditions */ 48#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7)) 49/* SCM device is encrypted */ 50#define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8)) 51/* SCM device has been scrubbed and locked */ 52#define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9)) 53 54/* Bits status indicators for health bitmap indicating unarmed dimm */ 55#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ 56 PAPR_PMEM_HEALTH_UNHEALTHY) 57 58/* Bits status indicators for health bitmap indicating unflushed dimm */ 59#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) 60 61/* Bits status indicators for health bitmap indicating unrestored dimm */ 62#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) 63 64/* Bit status indicators for smart event notification */ 65#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ 66 PAPR_PMEM_HEALTH_FATAL | \ 67 PAPR_PMEM_HEALTH_UNHEALTHY) 68 69#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS) 70#define PAPR_SCM_PERF_STATS_VERSION 0x1 71 72/* Struct holding a single performance metric */ 73struct papr_scm_perf_stat { 74 u8 stat_id[8]; 75 __be64 stat_val; 76} __packed; 77 78/* Struct exchanged between kernel and PHYP for fetching drc perf stats */ 79struct papr_scm_perf_stats { 80 u8 eye_catcher[8]; 81 /* Should be PAPR_SCM_PERF_STATS_VERSION */ 82 __be32 stats_version; 83 /* Number of stats following */ 84 __be32 num_statistics; 85 /* zero or more performance matrics */ 86 struct papr_scm_perf_stat scm_statistic[]; 87} __packed; 88 89/* private struct associated with each region */ 90struct papr_scm_priv { 91 struct platform_device *pdev; 92 struct device_node *dn; 93 uint32_t drc_index; 94 uint64_t blocks; 95 uint64_t block_size; 96 int metadata_size; 97 bool is_volatile; 98 bool hcall_flush_required; 99 100 uint64_t bound_addr; 101 102 struct nvdimm_bus_descriptor bus_desc; 103 struct nvdimm_bus *bus; 104 struct nvdimm *nvdimm; 105 struct resource res; 106 struct nd_region *region; 107 struct nd_interleave_set nd_set; 108 struct list_head region_list; 109 110 /* Protect dimm health data from concurrent read/writes */ 111 struct mutex health_mutex; 112 113 /* Last time the health information of the dimm was updated */ 114 unsigned long lasthealth_jiffies; 115 116 /* Health information for the dimm */ 117 u64 health_bitmap; 118 119 /* Holds the last known dirty shutdown counter value */ 120 u64 dirty_shutdown_counter; 121 122 /* length of the stat buffer as expected by phyp */ 123 size_t stat_buffer_len; 124 125 /* The bits which needs to be overridden */ 126 u64 health_bitmap_inject_mask; 127 128 /* array to have event_code and stat_id mappings */ 129 u8 *nvdimm_events_map; 130}; 131 132static int papr_scm_pmem_flush(struct nd_region *nd_region, 133 struct bio *bio __maybe_unused) 134{ 135 struct papr_scm_priv *p = nd_region_provider_data(nd_region); 136 unsigned long ret_buf[PLPAR_HCALL_BUFSIZE], token = 0; 137 long rc; 138 139 dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index); 140 141 do { 142 rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token); 143 token = ret_buf[0]; 144 145 /* Check if we are stalled for some time */ 146 if (H_IS_LONG_BUSY(rc)) { 147 msleep(get_longbusy_msecs(rc)); 148 rc = H_BUSY; 149 } else if (rc == H_BUSY) { 150 cond_resched(); 151 } 152 } while (rc == H_BUSY); 153 154 if (rc) { 155 dev_err(&p->pdev->dev, "flush error: %ld", rc); 156 rc = -EIO; 157 } else { 158 dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index); 159 } 160 161 return rc; 162} 163 164static LIST_HEAD(papr_nd_regions); 165static DEFINE_MUTEX(papr_ndr_lock); 166 167static int drc_pmem_bind(struct papr_scm_priv *p) 168{ 169 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 170 uint64_t saved = 0; 171 uint64_t token; 172 int64_t rc; 173 174 /* 175 * When the hypervisor cannot map all the requested memory in a single 176 * hcall it returns H_BUSY and we call again with the token until 177 * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS 178 * leave the system in an undefined state, so we wait. 179 */ 180 token = 0; 181 182 do { 183 rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0, 184 p->blocks, BIND_ANY_ADDR, token); 185 token = ret[0]; 186 if (!saved) 187 saved = ret[1]; 188 cond_resched(); 189 } while (rc == H_BUSY); 190 191 if (rc) 192 return rc; 193 194 p->bound_addr = saved; 195 dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", 196 p->drc_index, (unsigned long)saved); 197 return rc; 198} 199 200static void drc_pmem_unbind(struct papr_scm_priv *p) 201{ 202 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 203 uint64_t token = 0; 204 int64_t rc; 205 206 dev_dbg(&p->pdev->dev, "unbind drc 0x%x\n", p->drc_index); 207 208 /* NB: unbind has the same retry requirements as drc_pmem_bind() */ 209 do { 210 211 /* Unbind of all SCM resources associated with drcIndex */ 212 rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC, 213 p->drc_index, token); 214 token = ret[0]; 215 216 /* Check if we are stalled for some time */ 217 if (H_IS_LONG_BUSY(rc)) { 218 msleep(get_longbusy_msecs(rc)); 219 rc = H_BUSY; 220 } else if (rc == H_BUSY) { 221 cond_resched(); 222 } 223 224 } while (rc == H_BUSY); 225 226 if (rc) 227 dev_err(&p->pdev->dev, "unbind error: %lld\n", rc); 228 else 229 dev_dbg(&p->pdev->dev, "unbind drc 0x%x complete\n", 230 p->drc_index); 231 232 return; 233} 234 235static int drc_pmem_query_n_bind(struct papr_scm_priv *p) 236{ 237 unsigned long start_addr; 238 unsigned long end_addr; 239 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 240 int64_t rc; 241 242 243 rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret, 244 p->drc_index, 0); 245 if (rc) 246 goto err_out; 247 start_addr = ret[0]; 248 249 /* Make sure the full region is bound. */ 250 rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret, 251 p->drc_index, p->blocks - 1); 252 if (rc) 253 goto err_out; 254 end_addr = ret[0]; 255 256 if ((end_addr - start_addr) != ((p->blocks - 1) * p->block_size)) 257 goto err_out; 258 259 p->bound_addr = start_addr; 260 dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", p->drc_index, start_addr); 261 return rc; 262 263err_out: 264 dev_info(&p->pdev->dev, 265 "Failed to query, trying an unbind followed by bind"); 266 drc_pmem_unbind(p); 267 return drc_pmem_bind(p); 268} 269 270/* 271 * Query the Dimm performance stats from PHYP and copy them (if returned) to 272 * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast 273 * (num_stats + header) bytes. 274 * - If buff_stats == NULL the return value is the size in bytes of the buffer 275 * needed to hold all supported performance-statistics. 276 * - If buff_stats != NULL and num_stats == 0 then we copy all known 277 * performance-statistics to 'buff_stat' and expect to be large enough to 278 * hold them. 279 * - if buff_stats != NULL and num_stats > 0 then copy the requested 280 * performance-statistics to buff_stats. 281 */ 282static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p, 283 struct papr_scm_perf_stats *buff_stats, 284 unsigned int num_stats) 285{ 286 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 287 size_t size; 288 s64 rc; 289 290 /* Setup the out buffer */ 291 if (buff_stats) { 292 memcpy(buff_stats->eye_catcher, 293 PAPR_SCM_PERF_STATS_EYECATCHER, 8); 294 buff_stats->stats_version = 295 cpu_to_be32(PAPR_SCM_PERF_STATS_VERSION); 296 buff_stats->num_statistics = 297 cpu_to_be32(num_stats); 298 299 /* 300 * Calculate the buffer size based on num-stats provided 301 * or use the prefetched max buffer length 302 */ 303 if (num_stats) 304 /* Calculate size from the num_stats */ 305 size = sizeof(struct papr_scm_perf_stats) + 306 num_stats * sizeof(struct papr_scm_perf_stat); 307 else 308 size = p->stat_buffer_len; 309 } else { 310 /* In case of no out buffer ignore the size */ 311 size = 0; 312 } 313 314 /* Do the HCALL asking PHYP for info */ 315 rc = plpar_hcall(H_SCM_PERFORMANCE_STATS, ret, p->drc_index, 316 buff_stats ? virt_to_phys(buff_stats) : 0, 317 size); 318 319 /* Check if the error was due to an unknown stat-id */ 320 if (rc == H_PARTIAL) { 321 dev_err(&p->pdev->dev, 322 "Unknown performance stats, Err:0x%016lX\n", ret[0]); 323 return -ENOENT; 324 } else if (rc == H_AUTHORITY) { 325 dev_info(&p->pdev->dev, 326 "Permission denied while accessing performance stats"); 327 return -EPERM; 328 } else if (rc == H_UNSUPPORTED) { 329 dev_dbg(&p->pdev->dev, "Performance stats unsupported\n"); 330 return -EOPNOTSUPP; 331 } else if (rc != H_SUCCESS) { 332 dev_err(&p->pdev->dev, 333 "Failed to query performance stats, Err:%lld\n", rc); 334 return -EIO; 335 336 } else if (!size) { 337 /* Handle case where stat buffer size was requested */ 338 dev_dbg(&p->pdev->dev, 339 "Performance stats size %ld\n", ret[0]); 340 return ret[0]; 341 } 342 343 /* Successfully fetched the requested stats from phyp */ 344 dev_dbg(&p->pdev->dev, 345 "Performance stats returned %d stats\n", 346 be32_to_cpu(buff_stats->num_statistics)); 347 return 0; 348} 349 350#ifdef CONFIG_PERF_EVENTS 351#define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu) 352 353static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count) 354{ 355 struct papr_scm_perf_stat *stat; 356 struct papr_scm_perf_stats *stats; 357 struct papr_scm_priv *p = (struct papr_scm_priv *)dev->driver_data; 358 int rc, size; 359 360 /* Allocate request buffer enough to hold single performance stat */ 361 size = sizeof(struct papr_scm_perf_stats) + 362 sizeof(struct papr_scm_perf_stat); 363 364 if (!p || !p->nvdimm_events_map) 365 return -EINVAL; 366 367 stats = kzalloc(size, GFP_KERNEL); 368 if (!stats) 369 return -ENOMEM; 370 371 stat = &stats->scm_statistic[0]; 372 memcpy(&stat->stat_id, 373 &p->nvdimm_events_map[event->attr.config * sizeof(stat->stat_id)], 374 sizeof(stat->stat_id)); 375 stat->stat_val = 0; 376 377 rc = drc_pmem_query_stats(p, stats, 1); 378 if (rc < 0) { 379 kfree(stats); 380 return rc; 381 } 382 383 *count = be64_to_cpu(stat->stat_val); 384 kfree(stats); 385 return 0; 386} 387 388static int papr_scm_pmu_event_init(struct perf_event *event) 389{ 390 struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); 391 struct papr_scm_priv *p; 392 393 if (!nd_pmu) 394 return -EINVAL; 395 396 /* test the event attr type for PMU enumeration */ 397 if (event->attr.type != event->pmu->type) 398 return -ENOENT; 399 400 /* it does not support event sampling mode */ 401 if (is_sampling_event(event)) 402 return -EOPNOTSUPP; 403 404 /* no branch sampling */ 405 if (has_branch_stack(event)) 406 return -EOPNOTSUPP; 407 408 p = (struct papr_scm_priv *)nd_pmu->dev->driver_data; 409 if (!p) 410 return -EINVAL; 411 412 /* Invalid eventcode */ 413 if (event->attr.config == 0 || event->attr.config > 16) 414 return -EINVAL; 415 416 return 0; 417} 418 419static int papr_scm_pmu_add(struct perf_event *event, int flags) 420{ 421 u64 count; 422 int rc; 423 struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); 424 425 if (!nd_pmu) 426 return -EINVAL; 427 428 if (flags & PERF_EF_START) { 429 rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &count); 430 if (rc) 431 return rc; 432 433 local64_set(&event->hw.prev_count, count); 434 } 435 436 return 0; 437} 438 439static void papr_scm_pmu_read(struct perf_event *event) 440{ 441 u64 prev, now; 442 int rc; 443 struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu); 444 445 if (!nd_pmu) 446 return; 447 448 rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &now); 449 if (rc) 450 return; 451 452 prev = local64_xchg(&event->hw.prev_count, now); 453 local64_add(now - prev, &event->count); 454} 455 456static void papr_scm_pmu_del(struct perf_event *event, int flags) 457{ 458 papr_scm_pmu_read(event); 459} 460 461static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu *nd_pmu) 462{ 463 struct papr_scm_perf_stat *stat; 464 struct papr_scm_perf_stats *stats; 465 u32 available_events; 466 int index, rc = 0; 467 468 if (!p->stat_buffer_len) 469 return -ENOENT; 470 471 available_events = (p->stat_buffer_len - sizeof(struct papr_scm_perf_stats)) 472 / sizeof(struct papr_scm_perf_stat); 473 if (available_events == 0) 474 return -EOPNOTSUPP; 475 476 /* Allocate the buffer for phyp where stats are written */ 477 stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); 478 if (!stats) { 479 rc = -ENOMEM; 480 return rc; 481 } 482 483 /* Called to get list of events supported */ 484 rc = drc_pmem_query_stats(p, stats, 0); 485 if (rc) 486 goto out; 487 488 /* 489 * Allocate memory and populate nvdimm_event_map. 490 * Allocate an extra element for NULL entry 491 */ 492 p->nvdimm_events_map = kcalloc(available_events + 1, 493 sizeof(stat->stat_id), 494 GFP_KERNEL); 495 if (!p->nvdimm_events_map) { 496 rc = -ENOMEM; 497 goto out; 498 } 499 500 /* Copy all stat_ids to event map */ 501 for (index = 0, stat = stats->scm_statistic; 502 index < available_events; index++, ++stat) { 503 memcpy(&p->nvdimm_events_map[index * sizeof(stat->stat_id)], 504 &stat->stat_id, sizeof(stat->stat_id)); 505 } 506out: 507 kfree(stats); 508 return rc; 509} 510 511static void papr_scm_pmu_register(struct papr_scm_priv *p) 512{ 513 struct nvdimm_pmu *nd_pmu; 514 int rc, nodeid; 515 516 nd_pmu = kzalloc(sizeof(*nd_pmu), GFP_KERNEL); 517 if (!nd_pmu) { 518 rc = -ENOMEM; 519 goto pmu_err_print; 520 } 521 522 rc = papr_scm_pmu_check_events(p, nd_pmu); 523 if (rc) 524 goto pmu_check_events_err; 525 526 nd_pmu->pmu.task_ctx_nr = perf_invalid_context; 527 nd_pmu->pmu.name = nvdimm_name(p->nvdimm); 528 nd_pmu->pmu.event_init = papr_scm_pmu_event_init; 529 nd_pmu->pmu.read = papr_scm_pmu_read; 530 nd_pmu->pmu.add = papr_scm_pmu_add; 531 nd_pmu->pmu.del = papr_scm_pmu_del; 532 533 nd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_INTERRUPT | 534 PERF_PMU_CAP_NO_EXCLUDE; 535 536 /*updating the cpumask variable */ 537 nodeid = numa_map_to_online_node(dev_to_node(&p->pdev->dev)); 538 nd_pmu->arch_cpumask = *cpumask_of_node(nodeid); 539 540 rc = register_nvdimm_pmu(nd_pmu, p->pdev); 541 if (rc) 542 goto pmu_register_err; 543 544 /* 545 * Set archdata.priv value to nvdimm_pmu structure, to handle the 546 * unregistering of pmu device. 547 */ 548 p->pdev->archdata.priv = nd_pmu; 549 return; 550 551pmu_register_err: 552 kfree(p->nvdimm_events_map); 553pmu_check_events_err: 554 kfree(nd_pmu); 555pmu_err_print: 556 dev_info(&p->pdev->dev, "nvdimm pmu didn't register rc=%d\n", rc); 557} 558 559#else 560static void papr_scm_pmu_register(struct papr_scm_priv *p) { } 561#endif 562 563/* 564 * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the 565 * health information. 566 */ 567static int __drc_pmem_query_health(struct papr_scm_priv *p) 568{ 569 unsigned long ret[PLPAR_HCALL_BUFSIZE]; 570 u64 bitmap = 0; 571 long rc; 572 573 /* issue the hcall */ 574 rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); 575 if (rc == H_SUCCESS) 576 bitmap = ret[0] & ret[1]; 577 else if (rc == H_FUNCTION) 578 dev_info_once(&p->pdev->dev, 579 "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap"); 580 else { 581 582 dev_err(&p->pdev->dev, 583 "Failed to query health information, Err:%ld\n", rc); 584 return -ENXIO; 585 } 586 587 p->lasthealth_jiffies = jiffies; 588 /* Allow injecting specific health bits via inject mask. */ 589 if (p->health_bitmap_inject_mask) 590 bitmap = (bitmap & ~p->health_bitmap_inject_mask) | 591 p->health_bitmap_inject_mask; 592 WRITE_ONCE(p->health_bitmap, bitmap); 593 dev_dbg(&p->pdev->dev, 594 "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n", 595 ret[0], ret[1]); 596 597 return 0; 598} 599 600/* Min interval in seconds for assuming stable dimm health */ 601#define MIN_HEALTH_QUERY_INTERVAL 60 602 603/* Query cached health info and if needed call drc_pmem_query_health */ 604static int drc_pmem_query_health(struct papr_scm_priv *p) 605{ 606 unsigned long cache_timeout; 607 int rc; 608 609 /* Protect concurrent modifications to papr_scm_priv */ 610 rc = mutex_lock_interruptible(&p->health_mutex); 611 if (rc) 612 return rc; 613 614 /* Jiffies offset for which the health data is assumed to be same */ 615 cache_timeout = p->lasthealth_jiffies + 616 msecs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL * 1000); 617 618 /* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */ 619 if (time_after(jiffies, cache_timeout)) 620 rc = __drc_pmem_query_health(p); 621 else 622 /* Assume cached health data is valid */ 623 rc = 0; 624 625 mutex_unlock(&p->health_mutex); 626 return rc; 627} 628 629static int papr_scm_meta_get(struct papr_scm_priv *p, 630 struct nd_cmd_get_config_data_hdr *hdr) 631{ 632 unsigned long data[PLPAR_HCALL_BUFSIZE]; 633 unsigned long offset, data_offset; 634 int len, read; 635 int64_t ret; 636 637 if ((hdr->in_offset + hdr->in_length) > p->metadata_size) 638 return -EINVAL; 639 640 for (len = hdr->in_length; len; len -= read) { 641 642 data_offset = hdr->in_length - len; 643 offset = hdr->in_offset + data_offset; 644 645 if (len >= 8) 646 read = 8; 647 else if (len >= 4) 648 read = 4; 649 else if (len >= 2) 650 read = 2; 651 else 652 read = 1; 653 654 ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index, 655 offset, read); 656 657 if (ret == H_PARAMETER) /* bad DRC index */ 658 return -ENODEV; 659 if (ret) 660 return -EINVAL; /* other invalid parameter */ 661 662 switch (read) { 663 case 8: 664 *(uint64_t *)(hdr->out_buf + data_offset) = be64_to_cpu(data[0]); 665 break; 666 case 4: 667 *(uint32_t *)(hdr->out_buf + data_offset) = be32_to_cpu(data[0] & 0xffffffff); 668 break; 669 670 case 2: 671 *(uint16_t *)(hdr->out_buf + data_offset) = be16_to_cpu(data[0] & 0xffff); 672 break; 673 674 case 1: 675 *(uint8_t *)(hdr->out_buf + data_offset) = (data[0] & 0xff); 676 break; 677 } 678 } 679 return 0; 680} 681 682static int papr_scm_meta_set(struct papr_scm_priv *p, 683 struct nd_cmd_set_config_hdr *hdr) 684{ 685 unsigned long offset, data_offset; 686 int len, wrote; 687 unsigned long data; 688 __be64 data_be; 689 int64_t ret; 690 691 if ((hdr->in_offset + hdr->in_length) > p->metadata_size) 692 return -EINVAL; 693 694 for (len = hdr->in_length; len; len -= wrote) { 695 696 data_offset = hdr->in_length - len; 697 offset = hdr->in_offset + data_offset; 698 699 if (len >= 8) { 700 data = *(uint64_t *)(hdr->in_buf + data_offset); 701 data_be = cpu_to_be64(data); 702 wrote = 8; 703 } else if (len >= 4) { 704 data = *(uint32_t *)(hdr->in_buf + data_offset); 705 data &= 0xffffffff; 706 data_be = cpu_to_be32(data); 707 wrote = 4; 708 } else if (len >= 2) { 709 data = *(uint16_t *)(hdr->in_buf + data_offset); 710 data &= 0xffff; 711 data_be = cpu_to_be16(data); 712 wrote = 2; 713 } else { 714 data_be = *(uint8_t *)(hdr->in_buf + data_offset); 715 data_be &= 0xff; 716 wrote = 1; 717 } 718 719 ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, p->drc_index, 720 offset, data_be, wrote); 721 if (ret == H_PARAMETER) /* bad DRC index */ 722 return -ENODEV; 723 if (ret) 724 return -EINVAL; /* other invalid parameter */ 725 } 726 727 return 0; 728} 729 730/* 731 * Do a sanity checks on the inputs args to dimm-control function and return 732 * '0' if valid. Validation of PDSM payloads happens later in 733 * papr_scm_service_pdsm. 734 */ 735static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, 736 unsigned int buf_len) 737{ 738 unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK; 739 struct nd_cmd_pkg *nd_cmd; 740 struct papr_scm_priv *p; 741 enum papr_pdsm pdsm; 742 743 /* Only dimm-specific calls are supported atm */ 744 if (!nvdimm) 745 return -EINVAL; 746 747 /* get the provider data from struct nvdimm */ 748 p = nvdimm_provider_data(nvdimm); 749 750 if (!test_bit(cmd, &cmd_mask)) { 751 dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd); 752 return -EINVAL; 753 } 754 755 /* For CMD_CALL verify pdsm request */ 756 if (cmd == ND_CMD_CALL) { 757 /* Verify the envelope and envelop size */ 758 if (!buf || 759 buf_len < (sizeof(struct nd_cmd_pkg) + ND_PDSM_HDR_SIZE)) { 760 dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n", 761 buf_len); 762 return -EINVAL; 763 } 764 765 /* Verify that the nd_cmd_pkg.nd_family is correct */ 766 nd_cmd = (struct nd_cmd_pkg *)buf; 767 768 if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) { 769 dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n", 770 nd_cmd->nd_family); 771 return -EINVAL; 772 } 773 774 pdsm = (enum papr_pdsm)nd_cmd->nd_command; 775 776 /* Verify if the pdsm command is valid */ 777 if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) { 778 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n", 779 pdsm); 780 return -EINVAL; 781 } 782 783 /* Have enough space to hold returned 'nd_pkg_pdsm' header */ 784 if (nd_cmd->nd_size_out < ND_PDSM_HDR_SIZE) { 785 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid payload\n", 786 pdsm); 787 return -EINVAL; 788 } 789 } 790 791 /* Let the command be further processed */ 792 return 0; 793} 794 795static int papr_pdsm_fuel_gauge(struct papr_scm_priv *p, 796 union nd_pdsm_payload *payload) 797{ 798 int rc, size; 799 u64 statval; 800 struct papr_scm_perf_stat *stat; 801 struct papr_scm_perf_stats *stats; 802 803 /* Silently fail if fetching performance metrics isn't supported */ 804 if (!p->stat_buffer_len) 805 return 0; 806 807 /* Allocate request buffer enough to hold single performance stat */ 808 size = sizeof(struct papr_scm_perf_stats) + 809 sizeof(struct papr_scm_perf_stat); 810 811 stats = kzalloc(size, GFP_KERNEL); 812 if (!stats) 813 return -ENOMEM; 814 815 stat = &stats->scm_statistic[0]; 816 memcpy(&stat->stat_id, "MemLife ", sizeof(stat->stat_id)); 817 stat->stat_val = 0; 818 819 /* Fetch the fuel gauge and populate it in payload */ 820 rc = drc_pmem_query_stats(p, stats, 1); 821 if (rc < 0) { 822 dev_dbg(&p->pdev->dev, "Err(%d) fetching fuel gauge\n", rc); 823 goto free_stats; 824 } 825 826 statval = be64_to_cpu(stat->stat_val); 827 dev_dbg(&p->pdev->dev, 828 "Fetched fuel-gauge %llu", statval); 829 payload->health.extension_flags |= 830 PDSM_DIMM_HEALTH_RUN_GAUGE_VALID; 831 payload->health.dimm_fuel_gauge = statval; 832 833 rc = sizeof(struct nd_papr_pdsm_health); 834 835free_stats: 836 kfree(stats); 837 return rc; 838} 839 840/* Add the dirty-shutdown-counter value to the pdsm */ 841static int papr_pdsm_dsc(struct papr_scm_priv *p, 842 union nd_pdsm_payload *payload) 843{ 844 payload->health.extension_flags |= PDSM_DIMM_DSC_VALID; 845 payload->health.dimm_dsc = p->dirty_shutdown_counter; 846 847 return sizeof(struct nd_papr_pdsm_health); 848} 849 850/* Fetch the DIMM health info and populate it in provided package. */ 851static int papr_pdsm_health(struct papr_scm_priv *p, 852 union nd_pdsm_payload *payload) 853{ 854 int rc; 855 856 /* Ensure dimm health mutex is taken preventing concurrent access */ 857 rc = mutex_lock_interruptible(&p->health_mutex); 858 if (rc) 859 goto out; 860 861 /* Always fetch upto date dimm health data ignoring cached values */ 862 rc = __drc_pmem_query_health(p); 863 if (rc) { 864 mutex_unlock(&p->health_mutex); 865 goto out; 866 } 867 868 /* update health struct with various flags derived from health bitmap */ 869 payload->health = (struct nd_papr_pdsm_health) { 870 .extension_flags = 0, 871 .dimm_unarmed = !!(p->health_bitmap & PAPR_PMEM_UNARMED_MASK), 872 .dimm_bad_shutdown = !!(p->health_bitmap & PAPR_PMEM_BAD_SHUTDOWN_MASK), 873 .dimm_bad_restore = !!(p->health_bitmap & PAPR_PMEM_BAD_RESTORE_MASK), 874 .dimm_scrubbed = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), 875 .dimm_locked = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), 876 .dimm_encrypted = !!(p->health_bitmap & PAPR_PMEM_ENCRYPTED), 877 .dimm_health = PAPR_PDSM_DIMM_HEALTHY, 878 }; 879 880 /* Update field dimm_health based on health_bitmap flags */ 881 if (p->health_bitmap & PAPR_PMEM_HEALTH_FATAL) 882 payload->health.dimm_health = PAPR_PDSM_DIMM_FATAL; 883 else if (p->health_bitmap & PAPR_PMEM_HEALTH_CRITICAL) 884 payload->health.dimm_health = PAPR_PDSM_DIMM_CRITICAL; 885 else if (p->health_bitmap & PAPR_PMEM_HEALTH_UNHEALTHY) 886 payload->health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY; 887 888 /* struct populated hence can release the mutex now */ 889 mutex_unlock(&p->health_mutex); 890 891 /* Populate the fuel gauge meter in the payload */ 892 papr_pdsm_fuel_gauge(p, payload); 893 /* Populate the dirty-shutdown-counter field */ 894 papr_pdsm_dsc(p, payload); 895 896 rc = sizeof(struct nd_papr_pdsm_health); 897 898out: 899 return rc; 900} 901 902/* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */ 903static int papr_pdsm_smart_inject(struct papr_scm_priv *p, 904 union nd_pdsm_payload *payload) 905{ 906 int rc; 907 u32 supported_flags = 0; 908 u64 inject_mask = 0, clear_mask = 0; 909 u64 mask; 910 911 /* Check for individual smart error flags and update inject/clear masks */ 912 if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) { 913 supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL; 914 if (payload->smart_inject.fatal_enable) 915 inject_mask |= PAPR_PMEM_HEALTH_FATAL; 916 else 917 clear_mask |= PAPR_PMEM_HEALTH_FATAL; 918 } 919 920 if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) { 921 supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN; 922 if (payload->smart_inject.unsafe_shutdown_enable) 923 inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; 924 else 925 clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY; 926 } 927 928 dev_dbg(&p->pdev->dev, "[Smart-inject] inject_mask=%#llx clear_mask=%#llx\n", 929 inject_mask, clear_mask); 930 931 /* Prevent concurrent access to dimm health bitmap related members */ 932 rc = mutex_lock_interruptible(&p->health_mutex); 933 if (rc) 934 return rc; 935 936 /* Use inject/clear masks to set health_bitmap_inject_mask */ 937 mask = READ_ONCE(p->health_bitmap_inject_mask); 938 mask = (mask & ~clear_mask) | inject_mask; 939 WRITE_ONCE(p->health_bitmap_inject_mask, mask); 940 941 /* Invalidate cached health bitmap */ 942 p->lasthealth_jiffies = 0; 943 944 mutex_unlock(&p->health_mutex); 945 946 /* Return the supported flags back to userspace */ 947 payload->smart_inject.flags = supported_flags; 948 949 return sizeof(struct nd_papr_pdsm_health); 950} 951 952/* 953 * 'struct pdsm_cmd_desc' 954 * Identifies supported PDSMs' expected length of in/out payloads 955 * and pdsm service function. 956 * 957 * size_in : Size of input payload if any in the PDSM request. 958 * size_out : Size of output payload if any in the PDSM request. 959 * service : Service function for the PDSM request. Return semantics: 960 * rc < 0 : Error servicing PDSM and rc indicates the error. 961 * rc >=0 : Serviced successfully and 'rc' indicate number of 962 * bytes written to payload. 963 */ 964struct pdsm_cmd_desc { 965 u32 size_in; 966 u32 size_out; 967 int (*service)(struct papr_scm_priv *dimm, 968 union nd_pdsm_payload *payload); 969}; 970 971/* Holds all supported PDSMs' command descriptors */ 972static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { 973 [PAPR_PDSM_MIN] = { 974 .size_in = 0, 975 .size_out = 0, 976 .service = NULL, 977 }, 978 /* New PDSM command descriptors to be added below */ 979 980 [PAPR_PDSM_HEALTH] = { 981 .size_in = 0, 982 .size_out = sizeof(struct nd_papr_pdsm_health), 983 .service = papr_pdsm_health, 984 }, 985 986 [PAPR_PDSM_SMART_INJECT] = { 987 .size_in = sizeof(struct nd_papr_pdsm_smart_inject), 988 .size_out = sizeof(struct nd_papr_pdsm_smart_inject), 989 .service = papr_pdsm_smart_inject, 990 }, 991 /* Empty */ 992 [PAPR_PDSM_MAX] = { 993 .size_in = 0, 994 .size_out = 0, 995 .service = NULL, 996 }, 997}; 998 999/* Given a valid pdsm cmd return its command descriptor else return NULL */ 1000static inline const struct pdsm_cmd_desc *pdsm_cmd_desc(enum papr_pdsm cmd) 1001{ 1002 if (cmd >= 0 || cmd < ARRAY_SIZE(__pdsm_cmd_descriptors)) 1003 return &__pdsm_cmd_descriptors[cmd]; 1004 1005 return NULL; 1006} 1007 1008/* 1009 * For a given pdsm request call an appropriate service function. 1010 * Returns errors if any while handling the pdsm command package. 1011 */ 1012static int papr_scm_service_pdsm(struct papr_scm_priv *p, 1013 struct nd_cmd_pkg *pkg) 1014{ 1015 /* Get the PDSM header and PDSM command */ 1016 struct nd_pkg_pdsm *pdsm_pkg = (struct nd_pkg_pdsm *)pkg->nd_payload; 1017 enum papr_pdsm pdsm = (enum papr_pdsm)pkg->nd_command; 1018 const struct pdsm_cmd_desc *pdsc; 1019 int rc; 1020 1021 /* Fetch corresponding pdsm descriptor for validation and servicing */ 1022 pdsc = pdsm_cmd_desc(pdsm); 1023 1024 /* Validate pdsm descriptor */ 1025 /* Ensure that reserved fields are 0 */ 1026 if (pdsm_pkg->reserved[0] || pdsm_pkg->reserved[1]) { 1027 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid reserved field\n", 1028 pdsm); 1029 return -EINVAL; 1030 } 1031 1032 /* If pdsm expects some input, then ensure that the size_in matches */ 1033 if (pdsc->size_in && 1034 pkg->nd_size_in != (pdsc->size_in + ND_PDSM_HDR_SIZE)) { 1035 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_in=%d\n", 1036 pdsm, pkg->nd_size_in); 1037 return -EINVAL; 1038 } 1039 1040 /* If pdsm wants to return data, then ensure that size_out matches */ 1041 if (pdsc->size_out && 1042 pkg->nd_size_out != (pdsc->size_out + ND_PDSM_HDR_SIZE)) { 1043 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_out=%d\n", 1044 pdsm, pkg->nd_size_out); 1045 return -EINVAL; 1046 } 1047 1048 /* Service the pdsm */ 1049 if (pdsc->service) { 1050 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm); 1051 1052 rc = pdsc->service(p, &pdsm_pkg->payload); 1053 1054 if (rc < 0) { 1055 /* error encountered while servicing pdsm */ 1056 pdsm_pkg->cmd_status = rc; 1057 pkg->nd_fw_size = ND_PDSM_HDR_SIZE; 1058 } else { 1059 /* pdsm serviced and 'rc' bytes written to payload */ 1060 pdsm_pkg->cmd_status = 0; 1061 pkg->nd_fw_size = ND_PDSM_HDR_SIZE + rc; 1062 } 1063 } else { 1064 dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n", 1065 pdsm); 1066 pdsm_pkg->cmd_status = -ENOENT; 1067 pkg->nd_fw_size = ND_PDSM_HDR_SIZE; 1068 } 1069 1070 return pdsm_pkg->cmd_status; 1071} 1072 1073static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, 1074 struct nvdimm *nvdimm, unsigned int cmd, void *buf, 1075 unsigned int buf_len, int *cmd_rc) 1076{ 1077 struct nd_cmd_get_config_size *get_size_hdr; 1078 struct nd_cmd_pkg *call_pkg = NULL; 1079 struct papr_scm_priv *p; 1080 int rc; 1081 1082 rc = is_cmd_valid(nvdimm, cmd, buf, buf_len); 1083 if (rc) { 1084 pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, rc); 1085 return rc; 1086 } 1087 1088 /* Use a local variable in case cmd_rc pointer is NULL */ 1089 if (!cmd_rc) 1090 cmd_rc = &rc; 1091 1092 p = nvdimm_provider_data(nvdimm); 1093 1094 switch (cmd) { 1095 case ND_CMD_GET_CONFIG_SIZE: 1096 get_size_hdr = buf; 1097 1098 get_size_hdr->status = 0; 1099 get_size_hdr->max_xfer = 8; 1100 get_size_hdr->config_size = p->metadata_size; 1101 *cmd_rc = 0; 1102 break; 1103 1104 case ND_CMD_GET_CONFIG_DATA: 1105 *cmd_rc = papr_scm_meta_get(p, buf); 1106 break; 1107 1108 case ND_CMD_SET_CONFIG_DATA: 1109 *cmd_rc = papr_scm_meta_set(p, buf); 1110 break; 1111 1112 case ND_CMD_CALL: 1113 call_pkg = (struct nd_cmd_pkg *)buf; 1114 *cmd_rc = papr_scm_service_pdsm(p, call_pkg); 1115 break; 1116 1117 default: 1118 dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd); 1119 return -EINVAL; 1120 } 1121 1122 dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc); 1123 1124 return 0; 1125} 1126 1127static ssize_t health_bitmap_inject_show(struct device *dev, 1128 struct device_attribute *attr, 1129 char *buf) 1130{ 1131 struct nvdimm *dimm = to_nvdimm(dev); 1132 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1133 1134 return sprintf(buf, "%#llx\n", 1135 READ_ONCE(p->health_bitmap_inject_mask)); 1136} 1137 1138static DEVICE_ATTR_ADMIN_RO(health_bitmap_inject); 1139 1140static ssize_t perf_stats_show(struct device *dev, 1141 struct device_attribute *attr, char *buf) 1142{ 1143 int index; 1144 ssize_t rc; 1145 struct seq_buf s; 1146 struct papr_scm_perf_stat *stat; 1147 struct papr_scm_perf_stats *stats; 1148 struct nvdimm *dimm = to_nvdimm(dev); 1149 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1150 1151 if (!p->stat_buffer_len) 1152 return -ENOENT; 1153 1154 /* Allocate the buffer for phyp where stats are written */ 1155 stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); 1156 if (!stats) 1157 return -ENOMEM; 1158 1159 /* Ask phyp to return all dimm perf stats */ 1160 rc = drc_pmem_query_stats(p, stats, 0); 1161 if (rc) 1162 goto free_stats; 1163 /* 1164 * Go through the returned output buffer and print stats and 1165 * values. Since stat_id is essentially a char string of 1166 * 8 bytes, simply use the string format specifier to print it. 1167 */ 1168 seq_buf_init(&s, buf, PAGE_SIZE); 1169 for (index = 0, stat = stats->scm_statistic; 1170 index < be32_to_cpu(stats->num_statistics); 1171 ++index, ++stat) { 1172 seq_buf_printf(&s, "%.8s = 0x%016llX\n", 1173 stat->stat_id, 1174 be64_to_cpu(stat->stat_val)); 1175 } 1176 1177free_stats: 1178 kfree(stats); 1179 return rc ? rc : (ssize_t)seq_buf_used(&s); 1180} 1181static DEVICE_ATTR_ADMIN_RO(perf_stats); 1182 1183static ssize_t flags_show(struct device *dev, 1184 struct device_attribute *attr, char *buf) 1185{ 1186 struct nvdimm *dimm = to_nvdimm(dev); 1187 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1188 struct seq_buf s; 1189 u64 health; 1190 int rc; 1191 1192 rc = drc_pmem_query_health(p); 1193 if (rc) 1194 return rc; 1195 1196 /* Copy health_bitmap locally, check masks & update out buffer */ 1197 health = READ_ONCE(p->health_bitmap); 1198 1199 seq_buf_init(&s, buf, PAGE_SIZE); 1200 if (health & PAPR_PMEM_UNARMED_MASK) 1201 seq_buf_printf(&s, "not_armed "); 1202 1203 if (health & PAPR_PMEM_BAD_SHUTDOWN_MASK) 1204 seq_buf_printf(&s, "flush_fail "); 1205 1206 if (health & PAPR_PMEM_BAD_RESTORE_MASK) 1207 seq_buf_printf(&s, "restore_fail "); 1208 1209 if (health & PAPR_PMEM_ENCRYPTED) 1210 seq_buf_printf(&s, "encrypted "); 1211 1212 if (health & PAPR_PMEM_SMART_EVENT_MASK) 1213 seq_buf_printf(&s, "smart_notify "); 1214 1215 if (health & PAPR_PMEM_SCRUBBED_AND_LOCKED) 1216 seq_buf_printf(&s, "scrubbed locked "); 1217 1218 if (seq_buf_used(&s)) 1219 seq_buf_printf(&s, "\n"); 1220 1221 return seq_buf_used(&s); 1222} 1223DEVICE_ATTR_RO(flags); 1224 1225static ssize_t dirty_shutdown_show(struct device *dev, 1226 struct device_attribute *attr, char *buf) 1227{ 1228 struct nvdimm *dimm = to_nvdimm(dev); 1229 struct papr_scm_priv *p = nvdimm_provider_data(dimm); 1230 1231 return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter); 1232} 1233DEVICE_ATTR_RO(dirty_shutdown); 1234 1235static umode_t papr_nd_attribute_visible(struct kobject *kobj, 1236 struct attribute *attr, int n) 1237{ 1238 struct device *dev = kobj_to_dev(kobj); 1239 struct nvdimm *nvdimm = to_nvdimm(dev); 1240 struct papr_scm_priv *p = nvdimm_provider_data(nvdimm); 1241 1242 /* For if perf-stats not available remove perf_stats sysfs */ 1243 if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0) 1244 return 0; 1245 1246 return attr->mode; 1247} 1248 1249/* papr_scm specific dimm attributes */ 1250static struct attribute *papr_nd_attributes[] = { 1251 &dev_attr_flags.attr, 1252 &dev_attr_perf_stats.attr, 1253 &dev_attr_dirty_shutdown.attr, 1254 &dev_attr_health_bitmap_inject.attr, 1255 NULL, 1256}; 1257 1258static const struct attribute_group papr_nd_attribute_group = { 1259 .name = "papr", 1260 .is_visible = papr_nd_attribute_visible, 1261 .attrs = papr_nd_attributes, 1262}; 1263 1264static const struct attribute_group *papr_nd_attr_groups[] = { 1265 &papr_nd_attribute_group, 1266 NULL, 1267}; 1268 1269static int papr_scm_nvdimm_init(struct papr_scm_priv *p) 1270{ 1271 struct device *dev = &p->pdev->dev; 1272 struct nd_mapping_desc mapping; 1273 struct nd_region_desc ndr_desc; 1274 unsigned long dimm_flags; 1275 int target_nid, online_nid; 1276 1277 p->bus_desc.ndctl = papr_scm_ndctl; 1278 p->bus_desc.module = THIS_MODULE; 1279 p->bus_desc.of_node = p->pdev->dev.of_node; 1280 p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL); 1281 1282 /* Set the dimm command family mask to accept PDSMs */ 1283 set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask); 1284 1285 if (!p->bus_desc.provider_name) 1286 return -ENOMEM; 1287 1288 p->bus = nvdimm_bus_register(NULL, &p->bus_desc); 1289 if (!p->bus) { 1290 dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn); 1291 kfree(p->bus_desc.provider_name); 1292 return -ENXIO; 1293 } 1294 1295 dimm_flags = 0; 1296 set_bit(NDD_LABELING, &dimm_flags); 1297 1298 /* 1299 * Check if the nvdimm is unarmed. No locking needed as we are still 1300 * initializing. Ignore error encountered if any. 1301 */ 1302 __drc_pmem_query_health(p); 1303 1304 if (p->health_bitmap & PAPR_PMEM_UNARMED_MASK) 1305 set_bit(NDD_UNARMED, &dimm_flags); 1306 1307 p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups, 1308 dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); 1309 if (!p->nvdimm) { 1310 dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn); 1311 goto err; 1312 } 1313 1314 if (nvdimm_bus_check_dimm_count(p->bus, 1)) 1315 goto err; 1316 1317 /* now add the region */ 1318 1319 memset(&mapping, 0, sizeof(mapping)); 1320 mapping.nvdimm = p->nvdimm; 1321 mapping.start = 0; 1322 mapping.size = p->blocks * p->block_size; // XXX: potential overflow? 1323 1324 memset(&ndr_desc, 0, sizeof(ndr_desc)); 1325 target_nid = dev_to_node(&p->pdev->dev); 1326 online_nid = numa_map_to_online_node(target_nid); 1327 ndr_desc.numa_node = online_nid; 1328 ndr_desc.target_node = target_nid; 1329 ndr_desc.res = &p->res; 1330 ndr_desc.of_node = p->dn; 1331 ndr_desc.provider_data = p; 1332 ndr_desc.mapping = &mapping; 1333 ndr_desc.num_mappings = 1; 1334 ndr_desc.nd_set = &p->nd_set; 1335 1336 if (p->hcall_flush_required) { 1337 set_bit(ND_REGION_ASYNC, &ndr_desc.flags); 1338 ndr_desc.flush = papr_scm_pmem_flush; 1339 } 1340 1341 if (p->is_volatile) 1342 p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc); 1343 else { 1344 set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags); 1345 p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc); 1346 } 1347 if (!p->region) { 1348 dev_err(dev, "Error registering region %pR from %pOF\n", 1349 ndr_desc.res, p->dn); 1350 goto err; 1351 } 1352 if (target_nid != online_nid) 1353 dev_info(dev, "Region registered with target node %d and online node %d", 1354 target_nid, online_nid); 1355 1356 mutex_lock(&papr_ndr_lock); 1357 list_add_tail(&p->region_list, &papr_nd_regions); 1358 mutex_unlock(&papr_ndr_lock); 1359 1360 return 0; 1361 1362err: nvdimm_bus_unregister(p->bus); 1363 kfree(p->bus_desc.provider_name); 1364 return -ENXIO; 1365} 1366 1367static void papr_scm_add_badblock(struct nd_region *region, 1368 struct nvdimm_bus *bus, u64 phys_addr) 1369{ 1370 u64 aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES); 1371 1372 if (nvdimm_bus_add_badrange(bus, aligned_addr, L1_CACHE_BYTES)) { 1373 pr_err("Bad block registration for 0x%llx failed\n", phys_addr); 1374 return; 1375 } 1376 1377 pr_debug("Add memory range (0x%llx - 0x%llx) as bad range\n", 1378 aligned_addr, aligned_addr + L1_CACHE_BYTES); 1379 1380 nvdimm_region_notify(region, NVDIMM_REVALIDATE_POISON); 1381} 1382 1383static int handle_mce_ue(struct notifier_block *nb, unsigned long val, 1384 void *data) 1385{ 1386 struct machine_check_event *evt = data; 1387 struct papr_scm_priv *p; 1388 u64 phys_addr; 1389 bool found = false; 1390 1391 if (evt->error_type != MCE_ERROR_TYPE_UE) 1392 return NOTIFY_DONE; 1393 1394 if (list_empty(&papr_nd_regions)) 1395 return NOTIFY_DONE; 1396 1397 /* 1398 * The physical address obtained here is PAGE_SIZE aligned, so get the 1399 * exact address from the effective address 1400 */ 1401 phys_addr = evt->u.ue_error.physical_address + 1402 (evt->u.ue_error.effective_address & ~PAGE_MASK); 1403 1404 if (!evt->u.ue_error.physical_address_provided || 1405 !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT))) 1406 return NOTIFY_DONE; 1407 1408 /* mce notifier is called from a process context, so mutex is safe */ 1409 mutex_lock(&papr_ndr_lock); 1410 list_for_each_entry(p, &papr_nd_regions, region_list) { 1411 if (phys_addr >= p->res.start && phys_addr <= p->res.end) { 1412 found = true; 1413 break; 1414 } 1415 } 1416 1417 if (found) 1418 papr_scm_add_badblock(p->region, p->bus, phys_addr); 1419 1420 mutex_unlock(&papr_ndr_lock); 1421 1422 return found ? NOTIFY_OK : NOTIFY_DONE; 1423} 1424 1425static struct notifier_block mce_ue_nb = { 1426 .notifier_call = handle_mce_ue 1427}; 1428 1429static int papr_scm_probe(struct platform_device *pdev) 1430{ 1431 struct device_node *dn = pdev->dev.of_node; 1432 u32 drc_index, metadata_size; 1433 u64 blocks, block_size; 1434 struct papr_scm_priv *p; 1435 u8 uuid_raw[UUID_SIZE]; 1436 const char *uuid_str; 1437 ssize_t stat_size; 1438 uuid_t uuid; 1439 int rc; 1440 1441 /* check we have all the required DT properties */ 1442 if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) { 1443 dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn); 1444 return -ENODEV; 1445 } 1446 1447 if (of_property_read_u64(dn, "ibm,block-size", &block_size)) { 1448 dev_err(&pdev->dev, "%pOF: missing block-size!\n", dn); 1449 return -ENODEV; 1450 } 1451 1452 if (of_property_read_u64(dn, "ibm,number-of-blocks", &blocks)) { 1453 dev_err(&pdev->dev, "%pOF: missing number-of-blocks!\n", dn); 1454 return -ENODEV; 1455 } 1456 1457 if (of_property_read_string(dn, "ibm,unit-guid", &uuid_str)) { 1458 dev_err(&pdev->dev, "%pOF: missing unit-guid!\n", dn); 1459 return -ENODEV; 1460 } 1461 1462 1463 p = kzalloc(sizeof(*p), GFP_KERNEL); 1464 if (!p) 1465 return -ENOMEM; 1466 1467 /* Initialize the dimm mutex */ 1468 mutex_init(&p->health_mutex); 1469 1470 /* optional DT properties */ 1471 of_property_read_u32(dn, "ibm,metadata-size", &metadata_size); 1472 1473 p->dn = dn; 1474 p->drc_index = drc_index; 1475 p->block_size = block_size; 1476 p->blocks = blocks; 1477 p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required"); 1478 p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required"); 1479 1480 if (of_property_read_u64(dn, "ibm,persistence-failed-count", 1481 &p->dirty_shutdown_counter)) 1482 p->dirty_shutdown_counter = 0; 1483 1484 /* We just need to ensure that set cookies are unique across */ 1485 uuid_parse(uuid_str, &uuid); 1486 1487 /* 1488 * The cookie1 and cookie2 are not really little endian. 1489 * We store a raw buffer representation of the 1490 * uuid string so that we can compare this with the label 1491 * area cookie irrespective of the endian configuration 1492 * with which the kernel is built. 1493 * 1494 * Historically we stored the cookie in the below format. 1495 * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa 1496 * cookie1 was 0xfd423b0b671b5172 1497 * cookie2 was 0xaabce8cae35b1d8d 1498 */ 1499 export_uuid(uuid_raw, &uuid); 1500 p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]); 1501 p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]); 1502 1503 /* might be zero */ 1504 p->metadata_size = metadata_size; 1505 p->pdev = pdev; 1506 1507 /* request the hypervisor to bind this region to somewhere in memory */ 1508 rc = drc_pmem_bind(p); 1509 1510 /* If phyp says drc memory still bound then force unbound and retry */ 1511 if (rc == H_OVERLAP) 1512 rc = drc_pmem_query_n_bind(p); 1513 1514 if (rc != H_SUCCESS) { 1515 dev_err(&p->pdev->dev, "bind err: %d\n", rc); 1516 rc = -ENXIO; 1517 goto err; 1518 } 1519 1520 /* setup the resource for the newly bound range */ 1521 p->res.start = p->bound_addr; 1522 p->res.end = p->bound_addr + p->blocks * p->block_size - 1; 1523 p->res.name = pdev->name; 1524 p->res.flags = IORESOURCE_MEM; 1525 1526 /* Try retrieving the stat buffer and see if its supported */ 1527 stat_size = drc_pmem_query_stats(p, NULL, 0); 1528 if (stat_size > 0) { 1529 p->stat_buffer_len = stat_size; 1530 dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n", 1531 p->stat_buffer_len); 1532 } 1533 1534 rc = papr_scm_nvdimm_init(p); 1535 if (rc) 1536 goto err2; 1537 1538 platform_set_drvdata(pdev, p); 1539 papr_scm_pmu_register(p); 1540 1541 return 0; 1542 1543err2: drc_pmem_unbind(p); 1544err: kfree(p); 1545 return rc; 1546} 1547 1548static int papr_scm_remove(struct platform_device *pdev) 1549{ 1550 struct papr_scm_priv *p = platform_get_drvdata(pdev); 1551 1552 mutex_lock(&papr_ndr_lock); 1553 list_del(&p->region_list); 1554 mutex_unlock(&papr_ndr_lock); 1555 1556 nvdimm_bus_unregister(p->bus); 1557 drc_pmem_unbind(p); 1558 1559 if (pdev->archdata.priv) 1560 unregister_nvdimm_pmu(pdev->archdata.priv); 1561 1562 pdev->archdata.priv = NULL; 1563 kfree(p->nvdimm_events_map); 1564 kfree(p->bus_desc.provider_name); 1565 kfree(p); 1566 1567 return 0; 1568} 1569 1570static const struct of_device_id papr_scm_match[] = { 1571 { .compatible = "ibm,pmemory" }, 1572 { .compatible = "ibm,pmemory-v2" }, 1573 { }, 1574}; 1575 1576static struct platform_driver papr_scm_driver = { 1577 .probe = papr_scm_probe, 1578 .remove = papr_scm_remove, 1579 .driver = { 1580 .name = "papr_scm", 1581 .of_match_table = papr_scm_match, 1582 }, 1583}; 1584 1585static int __init papr_scm_init(void) 1586{ 1587 int ret; 1588 1589 ret = platform_driver_register(&papr_scm_driver); 1590 if (!ret) 1591 mce_register_notifier(&mce_ue_nb); 1592 1593 return ret; 1594} 1595module_init(papr_scm_init); 1596 1597static void __exit papr_scm_exit(void) 1598{ 1599 mce_unregister_notifier(&mce_ue_nb); 1600 platform_driver_unregister(&papr_scm_driver); 1601} 1602module_exit(papr_scm_exit); 1603 1604MODULE_DEVICE_TABLE(of, papr_scm_match); 1605MODULE_LICENSE("GPL"); 1606MODULE_AUTHOR("IBM Corporation");