idle.c (39068B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * PowerNV cpuidle code 4 * 5 * Copyright 2015 IBM Corp. 6 */ 7 8#include <linux/types.h> 9#include <linux/mm.h> 10#include <linux/slab.h> 11#include <linux/of.h> 12#include <linux/device.h> 13#include <linux/cpu.h> 14 15#include <asm/firmware.h> 16#include <asm/interrupt.h> 17#include <asm/machdep.h> 18#include <asm/opal.h> 19#include <asm/cputhreads.h> 20#include <asm/cpuidle.h> 21#include <asm/code-patching.h> 22#include <asm/smp.h> 23#include <asm/runlatch.h> 24#include <asm/dbell.h> 25 26#include "powernv.h" 27#include "subcore.h" 28 29/* Power ISA 3.0 allows for stop states 0x0 - 0xF */ 30#define MAX_STOP_STATE 0xF 31 32#define P9_STOP_SPR_MSR 2000 33#define P9_STOP_SPR_PSSCR 855 34 35static u32 supported_cpuidle_states; 36struct pnv_idle_states_t *pnv_idle_states; 37int nr_pnv_idle_states; 38 39/* 40 * The default stop state that will be used by ppc_md.power_save 41 * function on platforms that support stop instruction. 42 */ 43static u64 pnv_default_stop_val; 44static u64 pnv_default_stop_mask; 45static bool default_stop_found; 46 47/* 48 * First stop state levels when SPR and TB loss can occur. 49 */ 50static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; 51static u64 deep_spr_loss_state = MAX_STOP_STATE + 1; 52 53/* 54 * psscr value and mask of the deepest stop idle state. 55 * Used when a cpu is offlined. 56 */ 57static u64 pnv_deepest_stop_psscr_val; 58static u64 pnv_deepest_stop_psscr_mask; 59static u64 pnv_deepest_stop_flag; 60static bool deepest_stop_found; 61 62static unsigned long power7_offline_type; 63 64static int __init pnv_save_sprs_for_deep_states(void) 65{ 66 int cpu; 67 int rc; 68 69 /* 70 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across 71 * all cpus at boot. Get these reg values of current cpu and use the 72 * same across all cpus. 73 */ 74 uint64_t lpcr_val = mfspr(SPRN_LPCR); 75 uint64_t hid0_val = mfspr(SPRN_HID0); 76 uint64_t hmeer_val = mfspr(SPRN_HMEER); 77 uint64_t msr_val = MSR_IDLE; 78 uint64_t psscr_val = pnv_deepest_stop_psscr_val; 79 80 for_each_present_cpu(cpu) { 81 uint64_t pir = get_hard_smp_processor_id(cpu); 82 uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu]; 83 84 rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val); 85 if (rc != 0) 86 return rc; 87 88 rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 89 if (rc != 0) 90 return rc; 91 92 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 93 rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val); 94 if (rc) 95 return rc; 96 97 rc = opal_slw_set_reg(pir, 98 P9_STOP_SPR_PSSCR, psscr_val); 99 100 if (rc) 101 return rc; 102 } 103 104 /* HIDs are per core registers */ 105 if (cpu_thread_in_core(cpu) == 0) { 106 107 rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val); 108 if (rc != 0) 109 return rc; 110 111 rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val); 112 if (rc != 0) 113 return rc; 114 115 /* Only p8 needs to set extra HID registers */ 116 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 117 uint64_t hid1_val = mfspr(SPRN_HID1); 118 uint64_t hid4_val = mfspr(SPRN_HID4); 119 uint64_t hid5_val = mfspr(SPRN_HID5); 120 121 rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val); 122 if (rc != 0) 123 return rc; 124 125 rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val); 126 if (rc != 0) 127 return rc; 128 129 rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val); 130 if (rc != 0) 131 return rc; 132 } 133 } 134 } 135 136 return 0; 137} 138 139u32 pnv_get_supported_cpuidle_states(void) 140{ 141 return supported_cpuidle_states; 142} 143EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states); 144 145static void pnv_fastsleep_workaround_apply(void *info) 146 147{ 148 int cpu = smp_processor_id(); 149 int rc; 150 int *err = info; 151 152 if (cpu_first_thread_sibling(cpu) != cpu) 153 return; 154 155 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, 156 OPAL_CONFIG_IDLE_APPLY); 157 if (rc) 158 *err = 1; 159} 160 161static bool power7_fastsleep_workaround_entry = true; 162static bool power7_fastsleep_workaround_exit = true; 163 164/* 165 * Used to store fastsleep workaround state 166 * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) 167 * 1 - Workaround applied once, never undone. 168 */ 169static u8 fastsleep_workaround_applyonce; 170 171static ssize_t show_fastsleep_workaround_applyonce(struct device *dev, 172 struct device_attribute *attr, char *buf) 173{ 174 return sprintf(buf, "%u\n", fastsleep_workaround_applyonce); 175} 176 177static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, 178 struct device_attribute *attr, const char *buf, 179 size_t count) 180{ 181 int err; 182 u8 val; 183 184 if (kstrtou8(buf, 0, &val) || val != 1) 185 return -EINVAL; 186 187 if (fastsleep_workaround_applyonce == 1) 188 return count; 189 190 /* 191 * fastsleep_workaround_applyonce = 1 implies 192 * fastsleep workaround needs to be left in 'applied' state on all 193 * the cores. Do this by- 194 * 1. Disable the 'undo' workaround in fastsleep exit path 195 * 2. Sendi IPIs to all the cores which have at least one online thread 196 * 3. Disable the 'apply' workaround in fastsleep entry path 197 * 198 * There is no need to send ipi to cores which have all threads 199 * offlined, as last thread of the core entering fastsleep or deeper 200 * state would have applied workaround. 201 */ 202 power7_fastsleep_workaround_exit = false; 203 204 cpus_read_lock(); 205 on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1); 206 cpus_read_unlock(); 207 if (err) { 208 pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply"); 209 goto fail; 210 } 211 212 power7_fastsleep_workaround_entry = false; 213 214 fastsleep_workaround_applyonce = 1; 215 216 return count; 217fail: 218 return -EIO; 219} 220 221static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, 222 show_fastsleep_workaround_applyonce, 223 store_fastsleep_workaround_applyonce); 224 225static inline void atomic_start_thread_idle(void) 226{ 227 int cpu = raw_smp_processor_id(); 228 int first = cpu_first_thread_sibling(cpu); 229 int thread_nr = cpu_thread_in_core(cpu); 230 unsigned long *state = &paca_ptrs[first]->idle_state; 231 232 clear_bit(thread_nr, state); 233} 234 235static inline void atomic_stop_thread_idle(void) 236{ 237 int cpu = raw_smp_processor_id(); 238 int first = cpu_first_thread_sibling(cpu); 239 int thread_nr = cpu_thread_in_core(cpu); 240 unsigned long *state = &paca_ptrs[first]->idle_state; 241 242 set_bit(thread_nr, state); 243} 244 245static inline void atomic_lock_thread_idle(void) 246{ 247 int cpu = raw_smp_processor_id(); 248 int first = cpu_first_thread_sibling(cpu); 249 unsigned long *state = &paca_ptrs[first]->idle_state; 250 251 while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state))) 252 barrier(); 253} 254 255static inline void atomic_unlock_and_stop_thread_idle(void) 256{ 257 int cpu = raw_smp_processor_id(); 258 int first = cpu_first_thread_sibling(cpu); 259 unsigned long thread = 1UL << cpu_thread_in_core(cpu); 260 unsigned long *state = &paca_ptrs[first]->idle_state; 261 u64 s = READ_ONCE(*state); 262 u64 new, tmp; 263 264 BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT)); 265 BUG_ON(s & thread); 266 267again: 268 new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT; 269 tmp = cmpxchg(state, s, new); 270 if (unlikely(tmp != s)) { 271 s = tmp; 272 goto again; 273 } 274} 275 276static inline void atomic_unlock_thread_idle(void) 277{ 278 int cpu = raw_smp_processor_id(); 279 int first = cpu_first_thread_sibling(cpu); 280 unsigned long *state = &paca_ptrs[first]->idle_state; 281 282 BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state)); 283 clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state); 284} 285 286/* P7 and P8 */ 287struct p7_sprs { 288 /* per core */ 289 u64 tscr; 290 u64 worc; 291 292 /* per subcore */ 293 u64 sdr1; 294 u64 rpr; 295 296 /* per thread */ 297 u64 lpcr; 298 u64 hfscr; 299 u64 fscr; 300 u64 purr; 301 u64 spurr; 302 u64 dscr; 303 u64 wort; 304 305 /* per thread SPRs that get lost in shallow states */ 306 u64 amr; 307 u64 iamr; 308 u64 uamor; 309 /* amor is restored to constant ~0 */ 310}; 311 312static unsigned long power7_idle_insn(unsigned long type) 313{ 314 int cpu = raw_smp_processor_id(); 315 int first = cpu_first_thread_sibling(cpu); 316 unsigned long *state = &paca_ptrs[first]->idle_state; 317 unsigned long thread = 1UL << cpu_thread_in_core(cpu); 318 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 319 unsigned long srr1; 320 bool full_winkle; 321 struct p7_sprs sprs = {}; /* avoid false use-uninitialised */ 322 bool sprs_saved = false; 323 int rc; 324 325 if (unlikely(type != PNV_THREAD_NAP)) { 326 atomic_lock_thread_idle(); 327 328 BUG_ON(!(*state & thread)); 329 *state &= ~thread; 330 331 if (power7_fastsleep_workaround_entry) { 332 if ((*state & core_thread_mask) == 0) { 333 rc = opal_config_cpu_idle_state( 334 OPAL_CONFIG_IDLE_FASTSLEEP, 335 OPAL_CONFIG_IDLE_APPLY); 336 BUG_ON(rc); 337 } 338 } 339 340 if (type == PNV_THREAD_WINKLE) { 341 sprs.tscr = mfspr(SPRN_TSCR); 342 sprs.worc = mfspr(SPRN_WORC); 343 344 sprs.sdr1 = mfspr(SPRN_SDR1); 345 sprs.rpr = mfspr(SPRN_RPR); 346 347 sprs.lpcr = mfspr(SPRN_LPCR); 348 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 349 sprs.hfscr = mfspr(SPRN_HFSCR); 350 sprs.fscr = mfspr(SPRN_FSCR); 351 } 352 sprs.purr = mfspr(SPRN_PURR); 353 sprs.spurr = mfspr(SPRN_SPURR); 354 sprs.dscr = mfspr(SPRN_DSCR); 355 sprs.wort = mfspr(SPRN_WORT); 356 357 sprs_saved = true; 358 359 /* 360 * Increment winkle counter and set all winkle bits if 361 * all threads are winkling. This allows wakeup side to 362 * distinguish between fast sleep and winkle state 363 * loss. Fast sleep still has to resync the timebase so 364 * this may not be a really big win. 365 */ 366 *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 367 if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) 368 >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT 369 == threads_per_core) 370 *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS; 371 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 372 } 373 374 atomic_unlock_thread_idle(); 375 } 376 377 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 378 sprs.amr = mfspr(SPRN_AMR); 379 sprs.iamr = mfspr(SPRN_IAMR); 380 sprs.uamor = mfspr(SPRN_UAMOR); 381 } 382 383 local_paca->thread_idle_state = type; 384 srr1 = isa206_idle_insn_mayloss(type); /* go idle */ 385 local_paca->thread_idle_state = PNV_THREAD_RUNNING; 386 387 WARN_ON_ONCE(!srr1); 388 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 389 390 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 391 if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { 392 /* 393 * We don't need an isync after the mtsprs here because 394 * the upcoming mtmsrd is execution synchronizing. 395 */ 396 mtspr(SPRN_AMR, sprs.amr); 397 mtspr(SPRN_IAMR, sprs.iamr); 398 mtspr(SPRN_AMOR, ~0); 399 mtspr(SPRN_UAMOR, sprs.uamor); 400 } 401 } 402 403 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 404 hmi_exception_realmode(NULL); 405 406 if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) { 407 if (unlikely(type != PNV_THREAD_NAP)) { 408 atomic_lock_thread_idle(); 409 if (type == PNV_THREAD_WINKLE) { 410 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 411 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 412 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); 413 } 414 atomic_unlock_and_stop_thread_idle(); 415 } 416 return srr1; 417 } 418 419 /* HV state loss */ 420 BUG_ON(type == PNV_THREAD_NAP); 421 422 atomic_lock_thread_idle(); 423 424 full_winkle = false; 425 if (type == PNV_THREAD_WINKLE) { 426 WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); 427 *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; 428 if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) { 429 *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); 430 full_winkle = true; 431 BUG_ON(!sprs_saved); 432 } 433 } 434 435 WARN_ON(*state & thread); 436 437 if ((*state & core_thread_mask) != 0) 438 goto core_woken; 439 440 /* Per-core SPRs */ 441 if (full_winkle) { 442 mtspr(SPRN_TSCR, sprs.tscr); 443 mtspr(SPRN_WORC, sprs.worc); 444 } 445 446 if (power7_fastsleep_workaround_exit) { 447 rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, 448 OPAL_CONFIG_IDLE_UNDO); 449 BUG_ON(rc); 450 } 451 452 /* TB */ 453 if (opal_resync_timebase() != OPAL_SUCCESS) 454 BUG(); 455 456core_woken: 457 if (!full_winkle) 458 goto subcore_woken; 459 460 if ((*state & local_paca->subcore_sibling_mask) != 0) 461 goto subcore_woken; 462 463 /* Per-subcore SPRs */ 464 mtspr(SPRN_SDR1, sprs.sdr1); 465 mtspr(SPRN_RPR, sprs.rpr); 466 467subcore_woken: 468 /* 469 * isync after restoring shared SPRs and before unlocking. Unlock 470 * only contains hwsync which does not necessarily do the right 471 * thing for SPRs. 472 */ 473 isync(); 474 atomic_unlock_and_stop_thread_idle(); 475 476 /* Fast sleep does not lose SPRs */ 477 if (!full_winkle) 478 return srr1; 479 480 /* Per-thread SPRs */ 481 mtspr(SPRN_LPCR, sprs.lpcr); 482 if (cpu_has_feature(CPU_FTR_ARCH_207S)) { 483 mtspr(SPRN_HFSCR, sprs.hfscr); 484 mtspr(SPRN_FSCR, sprs.fscr); 485 } 486 mtspr(SPRN_PURR, sprs.purr); 487 mtspr(SPRN_SPURR, sprs.spurr); 488 mtspr(SPRN_DSCR, sprs.dscr); 489 mtspr(SPRN_WORT, sprs.wort); 490 491 mtspr(SPRN_SPRG3, local_paca->sprg_vdso); 492 493#ifdef CONFIG_PPC_64S_HASH_MMU 494 /* 495 * The SLB has to be restored here, but it sometimes still 496 * contains entries, so the __ variant must be used to prevent 497 * multi hits. 498 */ 499 __slb_restore_bolted_realmode(); 500#endif 501 502 return srr1; 503} 504 505extern unsigned long idle_kvm_start_guest(unsigned long srr1); 506 507#ifdef CONFIG_HOTPLUG_CPU 508static unsigned long power7_offline(void) 509{ 510 unsigned long srr1; 511 512 mtmsr(MSR_IDLE); 513 514#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 515 /* Tell KVM we're entering idle. */ 516 /******************************************************/ 517 /* N O T E W E L L ! ! ! N O T E W E L L */ 518 /* The following store to HSTATE_HWTHREAD_STATE(r13) */ 519 /* MUST occur in real mode, i.e. with the MMU off, */ 520 /* and the MMU must stay off until we clear this flag */ 521 /* and test HSTATE_HWTHREAD_REQ(r13) in */ 522 /* pnv_powersave_wakeup in this file. */ 523 /* The reason is that another thread can switch the */ 524 /* MMU to a guest context whenever this flag is set */ 525 /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ 526 /* that would potentially cause this thread to start */ 527 /* executing instructions from guest memory in */ 528 /* hypervisor mode, leading to a host crash or data */ 529 /* corruption, or worse. */ 530 /******************************************************/ 531 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; 532#endif 533 534 __ppc64_runlatch_off(); 535 srr1 = power7_idle_insn(power7_offline_type); 536 __ppc64_runlatch_on(); 537 538#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 539 local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; 540 /* Order setting hwthread_state vs. testing hwthread_req */ 541 smp_mb(); 542 if (local_paca->kvm_hstate.hwthread_req) 543 srr1 = idle_kvm_start_guest(srr1); 544#endif 545 546 mtmsr(MSR_KERNEL); 547 548 return srr1; 549} 550#endif 551 552void power7_idle_type(unsigned long type) 553{ 554 unsigned long srr1; 555 556 if (!prep_irq_for_idle_irqsoff()) 557 return; 558 559 mtmsr(MSR_IDLE); 560 __ppc64_runlatch_off(); 561 srr1 = power7_idle_insn(type); 562 __ppc64_runlatch_on(); 563 mtmsr(MSR_KERNEL); 564 565 fini_irq_for_idle_irqsoff(); 566 irq_set_pending_from_srr1(srr1); 567} 568 569static void power7_idle(void) 570{ 571 if (!powersave_nap) 572 return; 573 574 power7_idle_type(PNV_THREAD_NAP); 575} 576 577struct p9_sprs { 578 /* per core */ 579 u64 ptcr; 580 u64 rpr; 581 u64 tscr; 582 u64 ldbar; 583 584 /* per thread */ 585 u64 lpcr; 586 u64 hfscr; 587 u64 fscr; 588 u64 pid; 589 u64 purr; 590 u64 spurr; 591 u64 dscr; 592 u64 ciabr; 593 594 u64 mmcra; 595 u32 mmcr0; 596 u32 mmcr1; 597 u64 mmcr2; 598 599 /* per thread SPRs that get lost in shallow states */ 600 u64 amr; 601 u64 iamr; 602 u64 amor; 603 u64 uamor; 604}; 605 606static unsigned long power9_idle_stop(unsigned long psscr) 607{ 608 int cpu = raw_smp_processor_id(); 609 int first = cpu_first_thread_sibling(cpu); 610 unsigned long *state = &paca_ptrs[first]->idle_state; 611 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 612 unsigned long srr1; 613 unsigned long pls; 614 unsigned long mmcr0 = 0; 615 unsigned long mmcra = 0; 616 struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ 617 bool sprs_saved = false; 618 619 if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { 620 /* EC=ESL=0 case */ 621 622 /* 623 * Wake synchronously. SRESET via xscom may still cause 624 * a 0x100 powersave wakeup with SRR1 reason! 625 */ 626 srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ 627 if (likely(!srr1)) 628 return 0; 629 630 /* 631 * Registers not saved, can't recover! 632 * This would be a hardware bug 633 */ 634 BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); 635 636 goto out; 637 } 638 639 /* EC=ESL=1 case */ 640#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 641 if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) { 642 local_paca->requested_psscr = psscr; 643 /* order setting requested_psscr vs testing dont_stop */ 644 smp_mb(); 645 if (atomic_read(&local_paca->dont_stop)) { 646 local_paca->requested_psscr = 0; 647 return 0; 648 } 649 } 650#endif 651 652 if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { 653 /* 654 * POWER9 DD2 can incorrectly set PMAO when waking up 655 * after a state-loss idle. Saving and restoring MMCR0 656 * over idle is a workaround. 657 */ 658 mmcr0 = mfspr(SPRN_MMCR0); 659 } 660 661 if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { 662 sprs.lpcr = mfspr(SPRN_LPCR); 663 sprs.hfscr = mfspr(SPRN_HFSCR); 664 sprs.fscr = mfspr(SPRN_FSCR); 665 sprs.pid = mfspr(SPRN_PID); 666 sprs.purr = mfspr(SPRN_PURR); 667 sprs.spurr = mfspr(SPRN_SPURR); 668 sprs.dscr = mfspr(SPRN_DSCR); 669 sprs.ciabr = mfspr(SPRN_CIABR); 670 671 sprs.mmcra = mfspr(SPRN_MMCRA); 672 sprs.mmcr0 = mfspr(SPRN_MMCR0); 673 sprs.mmcr1 = mfspr(SPRN_MMCR1); 674 sprs.mmcr2 = mfspr(SPRN_MMCR2); 675 676 sprs.ptcr = mfspr(SPRN_PTCR); 677 sprs.rpr = mfspr(SPRN_RPR); 678 sprs.tscr = mfspr(SPRN_TSCR); 679 if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) 680 sprs.ldbar = mfspr(SPRN_LDBAR); 681 682 sprs_saved = true; 683 684 atomic_start_thread_idle(); 685 } 686 687 sprs.amr = mfspr(SPRN_AMR); 688 sprs.iamr = mfspr(SPRN_IAMR); 689 sprs.uamor = mfspr(SPRN_UAMOR); 690 691 srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ 692 693#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 694 local_paca->requested_psscr = 0; 695#endif 696 697 psscr = mfspr(SPRN_PSSCR); 698 699 WARN_ON_ONCE(!srr1); 700 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 701 702 if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { 703 /* 704 * We don't need an isync after the mtsprs here because the 705 * upcoming mtmsrd is execution synchronizing. 706 */ 707 mtspr(SPRN_AMR, sprs.amr); 708 mtspr(SPRN_IAMR, sprs.iamr); 709 mtspr(SPRN_AMOR, ~0); 710 mtspr(SPRN_UAMOR, sprs.uamor); 711 712 /* 713 * Workaround for POWER9 DD2.0, if we lost resources, the ERAT 714 * might have been corrupted and needs flushing. We also need 715 * to reload MMCR0 (see mmcr0 comment above). 716 */ 717 if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { 718 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT); 719 mtspr(SPRN_MMCR0, mmcr0); 720 } 721 722 /* 723 * DD2.2 and earlier need to set then clear bit 60 in MMCRA 724 * to ensure the PMU starts running. 725 */ 726 mmcra = mfspr(SPRN_MMCRA); 727 mmcra |= PPC_BIT(60); 728 mtspr(SPRN_MMCRA, mmcra); 729 mmcra &= ~PPC_BIT(60); 730 mtspr(SPRN_MMCRA, mmcra); 731 } 732 733 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 734 hmi_exception_realmode(NULL); 735 736 /* 737 * On POWER9, SRR1 bits do not match exactly as expected. 738 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so 739 * just always test PSSCR for SPR/TB state loss. 740 */ 741 pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; 742 if (likely(pls < deep_spr_loss_state)) { 743 if (sprs_saved) 744 atomic_stop_thread_idle(); 745 goto out; 746 } 747 748 /* HV state loss */ 749 BUG_ON(!sprs_saved); 750 751 atomic_lock_thread_idle(); 752 753 if ((*state & core_thread_mask) != 0) 754 goto core_woken; 755 756 /* Per-core SPRs */ 757 mtspr(SPRN_PTCR, sprs.ptcr); 758 mtspr(SPRN_RPR, sprs.rpr); 759 mtspr(SPRN_TSCR, sprs.tscr); 760 761 if (pls >= pnv_first_tb_loss_level) { 762 /* TB loss */ 763 if (opal_resync_timebase() != OPAL_SUCCESS) 764 BUG(); 765 } 766 767 /* 768 * isync after restoring shared SPRs and before unlocking. Unlock 769 * only contains hwsync which does not necessarily do the right 770 * thing for SPRs. 771 */ 772 isync(); 773 774core_woken: 775 atomic_unlock_and_stop_thread_idle(); 776 777 /* Per-thread SPRs */ 778 mtspr(SPRN_LPCR, sprs.lpcr); 779 mtspr(SPRN_HFSCR, sprs.hfscr); 780 mtspr(SPRN_FSCR, sprs.fscr); 781 mtspr(SPRN_PID, sprs.pid); 782 mtspr(SPRN_PURR, sprs.purr); 783 mtspr(SPRN_SPURR, sprs.spurr); 784 mtspr(SPRN_DSCR, sprs.dscr); 785 mtspr(SPRN_CIABR, sprs.ciabr); 786 787 mtspr(SPRN_MMCRA, sprs.mmcra); 788 mtspr(SPRN_MMCR0, sprs.mmcr0); 789 mtspr(SPRN_MMCR1, sprs.mmcr1); 790 mtspr(SPRN_MMCR2, sprs.mmcr2); 791 if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) 792 mtspr(SPRN_LDBAR, sprs.ldbar); 793 794 mtspr(SPRN_SPRG3, local_paca->sprg_vdso); 795 796 if (!radix_enabled()) 797 __slb_restore_bolted_realmode(); 798 799out: 800 mtmsr(MSR_KERNEL); 801 802 return srr1; 803} 804 805#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 806/* 807 * This is used in working around bugs in thread reconfiguration 808 * on POWER9 (at least up to Nimbus DD2.2) relating to transactional 809 * memory and the way that XER[SO] is checkpointed. 810 * This function forces the core into SMT4 in order by asking 811 * all other threads not to stop, and sending a message to any 812 * that are in a stop state. 813 * Must be called with preemption disabled. 814 */ 815void pnv_power9_force_smt4_catch(void) 816{ 817 int cpu, cpu0, thr; 818 int awake_threads = 1; /* this thread is awake */ 819 int poke_threads = 0; 820 int need_awake = threads_per_core; 821 822 cpu = smp_processor_id(); 823 cpu0 = cpu & ~(threads_per_core - 1); 824 for (thr = 0; thr < threads_per_core; ++thr) { 825 if (cpu != cpu0 + thr) 826 atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); 827 } 828 /* order setting dont_stop vs testing requested_psscr */ 829 smp_mb(); 830 for (thr = 0; thr < threads_per_core; ++thr) { 831 if (!paca_ptrs[cpu0+thr]->requested_psscr) 832 ++awake_threads; 833 else 834 poke_threads |= (1 << thr); 835 } 836 837 /* If at least 3 threads are awake, the core is in SMT4 already */ 838 if (awake_threads < need_awake) { 839 /* We have to wake some threads; we'll use msgsnd */ 840 for (thr = 0; thr < threads_per_core; ++thr) { 841 if (poke_threads & (1 << thr)) { 842 ppc_msgsnd_sync(); 843 ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, 844 paca_ptrs[cpu0+thr]->hw_cpu_id); 845 } 846 } 847 /* now spin until at least 3 threads are awake */ 848 do { 849 for (thr = 0; thr < threads_per_core; ++thr) { 850 if ((poke_threads & (1 << thr)) && 851 !paca_ptrs[cpu0+thr]->requested_psscr) { 852 ++awake_threads; 853 poke_threads &= ~(1 << thr); 854 } 855 } 856 } while (awake_threads < need_awake); 857 } 858} 859EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch); 860 861void pnv_power9_force_smt4_release(void) 862{ 863 int cpu, cpu0, thr; 864 865 cpu = smp_processor_id(); 866 cpu0 = cpu & ~(threads_per_core - 1); 867 868 /* clear all the dont_stop flags */ 869 for (thr = 0; thr < threads_per_core; ++thr) { 870 if (cpu != cpu0 + thr) 871 atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop); 872 } 873} 874EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release); 875#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 876 877struct p10_sprs { 878 /* 879 * SPRs that get lost in shallow states: 880 * 881 * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1 882 * isa300 idle routines restore CR, LR. 883 * CTR is volatile 884 * idle thread doesn't use FP or VEC 885 * kernel doesn't use TAR 886 * HSPRG1 is only live in HV interrupt entry 887 * SPRG2 is only live in KVM guests, KVM handles it. 888 */ 889}; 890 891static unsigned long power10_idle_stop(unsigned long psscr) 892{ 893 int cpu = raw_smp_processor_id(); 894 int first = cpu_first_thread_sibling(cpu); 895 unsigned long *state = &paca_ptrs[first]->idle_state; 896 unsigned long core_thread_mask = (1UL << threads_per_core) - 1; 897 unsigned long srr1; 898 unsigned long pls; 899// struct p10_sprs sprs = {}; /* avoid false used-uninitialised */ 900 bool sprs_saved = false; 901 902 if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { 903 /* EC=ESL=0 case */ 904 905 /* 906 * Wake synchronously. SRESET via xscom may still cause 907 * a 0x100 powersave wakeup with SRR1 reason! 908 */ 909 srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ 910 if (likely(!srr1)) 911 return 0; 912 913 /* 914 * Registers not saved, can't recover! 915 * This would be a hardware bug 916 */ 917 BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); 918 919 goto out; 920 } 921 922 /* EC=ESL=1 case */ 923 if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) { 924 /* XXX: save SPRs for deep state loss here. */ 925 926 sprs_saved = true; 927 928 atomic_start_thread_idle(); 929 } 930 931 srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ 932 933 psscr = mfspr(SPRN_PSSCR); 934 935 WARN_ON_ONCE(!srr1); 936 WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); 937 938 if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) 939 hmi_exception_realmode(NULL); 940 941 /* 942 * On POWER10, SRR1 bits do not match exactly as expected. 943 * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so 944 * just always test PSSCR for SPR/TB state loss. 945 */ 946 pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; 947 if (likely(pls < deep_spr_loss_state)) { 948 if (sprs_saved) 949 atomic_stop_thread_idle(); 950 goto out; 951 } 952 953 /* HV state loss */ 954 BUG_ON(!sprs_saved); 955 956 atomic_lock_thread_idle(); 957 958 if ((*state & core_thread_mask) != 0) 959 goto core_woken; 960 961 /* XXX: restore per-core SPRs here */ 962 963 if (pls >= pnv_first_tb_loss_level) { 964 /* TB loss */ 965 if (opal_resync_timebase() != OPAL_SUCCESS) 966 BUG(); 967 } 968 969 /* 970 * isync after restoring shared SPRs and before unlocking. Unlock 971 * only contains hwsync which does not necessarily do the right 972 * thing for SPRs. 973 */ 974 isync(); 975 976core_woken: 977 atomic_unlock_and_stop_thread_idle(); 978 979 /* XXX: restore per-thread SPRs here */ 980 981 if (!radix_enabled()) 982 __slb_restore_bolted_realmode(); 983 984out: 985 mtmsr(MSR_KERNEL); 986 987 return srr1; 988} 989 990#ifdef CONFIG_HOTPLUG_CPU 991static unsigned long arch300_offline_stop(unsigned long psscr) 992{ 993 unsigned long srr1; 994 995 if (cpu_has_feature(CPU_FTR_ARCH_31)) 996 srr1 = power10_idle_stop(psscr); 997 else 998 srr1 = power9_idle_stop(psscr); 999 1000 return srr1; 1001} 1002#endif 1003 1004void arch300_idle_type(unsigned long stop_psscr_val, 1005 unsigned long stop_psscr_mask) 1006{ 1007 unsigned long psscr; 1008 unsigned long srr1; 1009 1010 if (!prep_irq_for_idle_irqsoff()) 1011 return; 1012 1013 psscr = mfspr(SPRN_PSSCR); 1014 psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; 1015 1016 __ppc64_runlatch_off(); 1017 if (cpu_has_feature(CPU_FTR_ARCH_31)) 1018 srr1 = power10_idle_stop(psscr); 1019 else 1020 srr1 = power9_idle_stop(psscr); 1021 __ppc64_runlatch_on(); 1022 1023 fini_irq_for_idle_irqsoff(); 1024 1025 irq_set_pending_from_srr1(srr1); 1026} 1027 1028/* 1029 * Used for ppc_md.power_save which needs a function with no parameters 1030 */ 1031static void arch300_idle(void) 1032{ 1033 arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask); 1034} 1035 1036#ifdef CONFIG_HOTPLUG_CPU 1037 1038void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) 1039{ 1040 u64 pir = get_hard_smp_processor_id(cpu); 1041 1042 mtspr(SPRN_LPCR, lpcr_val); 1043 1044 /* 1045 * Program the LPCR via stop-api only if the deepest stop state 1046 * can lose hypervisor context. 1047 */ 1048 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) 1049 opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); 1050} 1051 1052/* 1053 * pnv_cpu_offline: A function that puts the CPU into the deepest 1054 * available platform idle state on a CPU-Offline. 1055 * interrupts hard disabled and no lazy irq pending. 1056 */ 1057unsigned long pnv_cpu_offline(unsigned int cpu) 1058{ 1059 unsigned long srr1; 1060 1061 __ppc64_runlatch_off(); 1062 1063 if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) { 1064 unsigned long psscr; 1065 1066 psscr = mfspr(SPRN_PSSCR); 1067 psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | 1068 pnv_deepest_stop_psscr_val; 1069 srr1 = arch300_offline_stop(psscr); 1070 } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) { 1071 srr1 = power7_offline(); 1072 } else { 1073 /* This is the fallback method. We emulate snooze */ 1074 while (!generic_check_cpu_restart(cpu)) { 1075 HMT_low(); 1076 HMT_very_low(); 1077 } 1078 srr1 = 0; 1079 HMT_medium(); 1080 } 1081 1082 __ppc64_runlatch_on(); 1083 1084 return srr1; 1085} 1086#endif 1087 1088/* 1089 * Power ISA 3.0 idle initialization. 1090 * 1091 * POWER ISA 3.0 defines a new SPR Processor stop Status and Control 1092 * Register (PSSCR) to control idle behavior. 1093 * 1094 * PSSCR layout: 1095 * ---------------------------------------------------------- 1096 * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL | 1097 * ---------------------------------------------------------- 1098 * 0 4 41 42 43 44 48 54 56 60 1099 * 1100 * PSSCR key fields: 1101 * Bits 0:3 - Power-Saving Level Status (PLS). This field indicates the 1102 * lowest power-saving state the thread entered since stop instruction was 1103 * last executed. 1104 * 1105 * Bit 41 - Status Disable(SD) 1106 * 0 - Shows PLS entries 1107 * 1 - PLS entries are all 0 1108 * 1109 * Bit 42 - Enable State Loss 1110 * 0 - No state is lost irrespective of other fields 1111 * 1 - Allows state loss 1112 * 1113 * Bit 43 - Exit Criterion 1114 * 0 - Exit from power-save mode on any interrupt 1115 * 1 - Exit from power-save mode controlled by LPCR's PECE bits 1116 * 1117 * Bits 44:47 - Power-Saving Level Limit 1118 * This limits the power-saving level that can be entered into. 1119 * 1120 * Bits 60:63 - Requested Level 1121 * Used to specify which power-saving level must be entered on executing 1122 * stop instruction 1123 */ 1124 1125int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) 1126{ 1127 int err = 0; 1128 1129 /* 1130 * psscr_mask == 0xf indicates an older firmware. 1131 * Set remaining fields of psscr to the default values. 1132 * See NOTE above definition of PSSCR_HV_DEFAULT_VAL 1133 */ 1134 if (*psscr_mask == 0xf) { 1135 *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL; 1136 *psscr_mask = PSSCR_HV_DEFAULT_MASK; 1137 return err; 1138 } 1139 1140 /* 1141 * New firmware is expected to set the psscr_val bits correctly. 1142 * Validate that the following invariants are correctly maintained by 1143 * the new firmware. 1144 * - ESL bit value matches the EC bit value. 1145 * - ESL bit is set for all the deep stop states. 1146 */ 1147 if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) { 1148 err = ERR_EC_ESL_MISMATCH; 1149 } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) && 1150 GET_PSSCR_ESL(*psscr_val) == 0) { 1151 err = ERR_DEEP_STATE_ESL_MISMATCH; 1152 } 1153 1154 return err; 1155} 1156 1157/* 1158 * pnv_arch300_idle_init: Initializes the default idle state, first 1159 * deep idle state and deepest idle state on 1160 * ISA 3.0 CPUs. 1161 * 1162 * @np: /ibm,opal/power-mgt device node 1163 * @flags: cpu-idle-state-flags array 1164 * @dt_idle_states: Number of idle state entries 1165 * Returns 0 on success 1166 */ 1167static void __init pnv_arch300_idle_init(void) 1168{ 1169 u64 max_residency_ns = 0; 1170 int i; 1171 1172 /* stop is not really architected, we only have p9,p10 drivers */ 1173 if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9)) 1174 return; 1175 1176 /* 1177 * pnv_deepest_stop_{val,mask} should be set to values corresponding to 1178 * the deepest stop state. 1179 * 1180 * pnv_default_stop_{val,mask} should be set to values corresponding to 1181 * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state. 1182 */ 1183 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; 1184 deep_spr_loss_state = MAX_STOP_STATE + 1; 1185 for (i = 0; i < nr_pnv_idle_states; i++) { 1186 int err; 1187 struct pnv_idle_states_t *state = &pnv_idle_states[i]; 1188 u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; 1189 1190 /* No deep loss driver implemented for POWER10 yet */ 1191 if (pvr_version_is(PVR_POWER10) && 1192 state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT)) 1193 continue; 1194 1195 if ((state->flags & OPAL_PM_TIMEBASE_STOP) && 1196 (pnv_first_tb_loss_level > psscr_rl)) 1197 pnv_first_tb_loss_level = psscr_rl; 1198 1199 if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && 1200 (deep_spr_loss_state > psscr_rl)) 1201 deep_spr_loss_state = psscr_rl; 1202 1203 /* 1204 * The idle code does not deal with TB loss occurring 1205 * in a shallower state than SPR loss, so force it to 1206 * behave like SPRs are lost if TB is lost. POWER9 would 1207 * never encounter this, but a POWER8 core would if it 1208 * implemented the stop instruction. So this is for forward 1209 * compatibility. 1210 */ 1211 if ((state->flags & OPAL_PM_TIMEBASE_STOP) && 1212 (deep_spr_loss_state > psscr_rl)) 1213 deep_spr_loss_state = psscr_rl; 1214 1215 err = validate_psscr_val_mask(&state->psscr_val, 1216 &state->psscr_mask, 1217 state->flags); 1218 if (err) { 1219 report_invalid_psscr_val(state->psscr_val, err); 1220 continue; 1221 } 1222 1223 state->valid = true; 1224 1225 if (max_residency_ns < state->residency_ns) { 1226 max_residency_ns = state->residency_ns; 1227 pnv_deepest_stop_psscr_val = state->psscr_val; 1228 pnv_deepest_stop_psscr_mask = state->psscr_mask; 1229 pnv_deepest_stop_flag = state->flags; 1230 deepest_stop_found = true; 1231 } 1232 1233 if (!default_stop_found && 1234 (state->flags & OPAL_PM_STOP_INST_FAST)) { 1235 pnv_default_stop_val = state->psscr_val; 1236 pnv_default_stop_mask = state->psscr_mask; 1237 default_stop_found = true; 1238 WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT); 1239 } 1240 } 1241 1242 if (unlikely(!default_stop_found)) { 1243 pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n"); 1244 } else { 1245 ppc_md.power_save = arch300_idle; 1246 pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n", 1247 pnv_default_stop_val, pnv_default_stop_mask); 1248 } 1249 1250 if (unlikely(!deepest_stop_found)) { 1251 pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait"); 1252 } else { 1253 pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n", 1254 pnv_deepest_stop_psscr_val, 1255 pnv_deepest_stop_psscr_mask); 1256 } 1257 1258 pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n", 1259 deep_spr_loss_state); 1260 1261 pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n", 1262 pnv_first_tb_loss_level); 1263} 1264 1265static void __init pnv_disable_deep_states(void) 1266{ 1267 /* 1268 * The stop-api is unable to restore hypervisor 1269 * resources on wakeup from platform idle states which 1270 * lose full context. So disable such states. 1271 */ 1272 supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; 1273 pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); 1274 pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); 1275 1276 if (cpu_has_feature(CPU_FTR_ARCH_300) && 1277 (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { 1278 /* 1279 * Use the default stop state for CPU-Hotplug 1280 * if available. 1281 */ 1282 if (default_stop_found) { 1283 pnv_deepest_stop_psscr_val = pnv_default_stop_val; 1284 pnv_deepest_stop_psscr_mask = pnv_default_stop_mask; 1285 pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", 1286 pnv_deepest_stop_psscr_val); 1287 } else { /* Fallback to snooze loop for CPU-Hotplug */ 1288 deepest_stop_found = false; 1289 pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); 1290 } 1291 } 1292} 1293 1294/* 1295 * Probe device tree for supported idle states 1296 */ 1297static void __init pnv_probe_idle_states(void) 1298{ 1299 int i; 1300 1301 if (nr_pnv_idle_states < 0) { 1302 pr_warn("cpuidle-powernv: no idle states found in the DT\n"); 1303 return; 1304 } 1305 1306 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1307 pnv_arch300_idle_init(); 1308 1309 for (i = 0; i < nr_pnv_idle_states; i++) 1310 supported_cpuidle_states |= pnv_idle_states[i].flags; 1311} 1312 1313/* 1314 * This function parses device-tree and populates all the information 1315 * into pnv_idle_states structure. It also sets up nr_pnv_idle_states 1316 * which is the number of cpuidle states discovered through device-tree. 1317 */ 1318 1319static int __init pnv_parse_cpuidle_dt(void) 1320{ 1321 struct device_node *np; 1322 int nr_idle_states, i; 1323 int rc = 0; 1324 u32 *temp_u32; 1325 u64 *temp_u64; 1326 const char **temp_string; 1327 1328 np = of_find_node_by_path("/ibm,opal/power-mgt"); 1329 if (!np) { 1330 pr_warn("opal: PowerMgmt Node not found\n"); 1331 return -ENODEV; 1332 } 1333 nr_idle_states = of_property_count_u32_elems(np, 1334 "ibm,cpu-idle-state-flags"); 1335 1336 pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states), 1337 GFP_KERNEL); 1338 temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL); 1339 temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL); 1340 temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL); 1341 1342 if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) { 1343 pr_err("Could not allocate memory for dt parsing\n"); 1344 rc = -ENOMEM; 1345 goto out; 1346 } 1347 1348 /* Read flags */ 1349 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags", 1350 temp_u32, nr_idle_states)) { 1351 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n"); 1352 rc = -EINVAL; 1353 goto out; 1354 } 1355 for (i = 0; i < nr_idle_states; i++) 1356 pnv_idle_states[i].flags = temp_u32[i]; 1357 1358 /* Read latencies */ 1359 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns", 1360 temp_u32, nr_idle_states)) { 1361 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n"); 1362 rc = -EINVAL; 1363 goto out; 1364 } 1365 for (i = 0; i < nr_idle_states; i++) 1366 pnv_idle_states[i].latency_ns = temp_u32[i]; 1367 1368 /* Read residencies */ 1369 if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns", 1370 temp_u32, nr_idle_states)) { 1371 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n"); 1372 rc = -EINVAL; 1373 goto out; 1374 } 1375 for (i = 0; i < nr_idle_states; i++) 1376 pnv_idle_states[i].residency_ns = temp_u32[i]; 1377 1378 /* For power9 and later */ 1379 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1380 /* Read pm_crtl_val */ 1381 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr", 1382 temp_u64, nr_idle_states)) { 1383 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n"); 1384 rc = -EINVAL; 1385 goto out; 1386 } 1387 for (i = 0; i < nr_idle_states; i++) 1388 pnv_idle_states[i].psscr_val = temp_u64[i]; 1389 1390 /* Read pm_crtl_mask */ 1391 if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask", 1392 temp_u64, nr_idle_states)) { 1393 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n"); 1394 rc = -EINVAL; 1395 goto out; 1396 } 1397 for (i = 0; i < nr_idle_states; i++) 1398 pnv_idle_states[i].psscr_mask = temp_u64[i]; 1399 } 1400 1401 /* 1402 * power8 specific properties ibm,cpu-idle-state-pmicr-mask and 1403 * ibm,cpu-idle-state-pmicr-val were never used and there is no 1404 * plan to use it in near future. Hence, not parsing these properties 1405 */ 1406 1407 if (of_property_read_string_array(np, "ibm,cpu-idle-state-names", 1408 temp_string, nr_idle_states) < 0) { 1409 pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n"); 1410 rc = -EINVAL; 1411 goto out; 1412 } 1413 for (i = 0; i < nr_idle_states; i++) 1414 strlcpy(pnv_idle_states[i].name, temp_string[i], 1415 PNV_IDLE_NAME_LEN); 1416 nr_pnv_idle_states = nr_idle_states; 1417 rc = 0; 1418out: 1419 kfree(temp_u32); 1420 kfree(temp_u64); 1421 kfree(temp_string); 1422 return rc; 1423} 1424 1425static int __init pnv_init_idle_states(void) 1426{ 1427 int cpu; 1428 int rc = 0; 1429 1430 /* Set up PACA fields */ 1431 for_each_present_cpu(cpu) { 1432 struct paca_struct *p = paca_ptrs[cpu]; 1433 1434 p->idle_state = 0; 1435 if (cpu == cpu_first_thread_sibling(cpu)) 1436 p->idle_state = (1 << threads_per_core) - 1; 1437 1438 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 1439 /* P7/P8 nap */ 1440 p->thread_idle_state = PNV_THREAD_RUNNING; 1441 } else if (pvr_version_is(PVR_POWER9)) { 1442 /* P9 stop workarounds */ 1443#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1444 p->requested_psscr = 0; 1445 atomic_set(&p->dont_stop, 0); 1446#endif 1447 } 1448 } 1449 1450 /* In case we error out nr_pnv_idle_states will be zero */ 1451 nr_pnv_idle_states = 0; 1452 supported_cpuidle_states = 0; 1453 1454 if (cpuidle_disable != IDLE_NO_OVERRIDE) 1455 goto out; 1456 rc = pnv_parse_cpuidle_dt(); 1457 if (rc) 1458 return rc; 1459 pnv_probe_idle_states(); 1460 1461 if (!cpu_has_feature(CPU_FTR_ARCH_300)) { 1462 if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { 1463 power7_fastsleep_workaround_entry = false; 1464 power7_fastsleep_workaround_exit = false; 1465 } else { 1466 /* 1467 * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that 1468 * workaround is needed to use fastsleep. Provide sysfs 1469 * control to choose how this workaround has to be 1470 * applied. 1471 */ 1472 device_create_file(cpu_subsys.dev_root, 1473 &dev_attr_fastsleep_workaround_applyonce); 1474 } 1475 1476 update_subcore_sibling_mask(); 1477 1478 if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) { 1479 ppc_md.power_save = power7_idle; 1480 power7_offline_type = PNV_THREAD_NAP; 1481 } 1482 1483 if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) && 1484 (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)) 1485 power7_offline_type = PNV_THREAD_WINKLE; 1486 else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) || 1487 (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) 1488 power7_offline_type = PNV_THREAD_SLEEP; 1489 } 1490 1491 if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { 1492 if (pnv_save_sprs_for_deep_states()) 1493 pnv_disable_deep_states(); 1494 } 1495 1496out: 1497 return 0; 1498} 1499machine_subsys_initcall(powernv, pnv_init_idle_states);