perf_event.c (30117B)
1// SPDX-License-Identifier: GPL-2.0 2// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. 3 4#include <linux/errno.h> 5#include <linux/interrupt.h> 6#include <linux/module.h> 7#include <linux/of.h> 8#include <linux/perf_event.h> 9#include <linux/platform_device.h> 10 11#define CSKY_PMU_MAX_EVENTS 32 12#define DEFAULT_COUNT_WIDTH 48 13 14#define HPCR "<0, 0x0>" /* PMU Control reg */ 15#define HPSPR "<0, 0x1>" /* Start PC reg */ 16#define HPEPR "<0, 0x2>" /* End PC reg */ 17#define HPSIR "<0, 0x3>" /* Soft Counter reg */ 18#define HPCNTENR "<0, 0x4>" /* Count Enable reg */ 19#define HPINTENR "<0, 0x5>" /* Interrupt Enable reg */ 20#define HPOFSR "<0, 0x6>" /* Interrupt Status reg */ 21 22/* The events for a given PMU register set. */ 23struct pmu_hw_events { 24 /* 25 * The events that are active on the PMU for the given index. 26 */ 27 struct perf_event *events[CSKY_PMU_MAX_EVENTS]; 28 29 /* 30 * A 1 bit for an index indicates that the counter is being used for 31 * an event. A 0 means that the counter can be used. 32 */ 33 unsigned long used_mask[BITS_TO_LONGS(CSKY_PMU_MAX_EVENTS)]; 34}; 35 36static uint64_t (*hw_raw_read_mapping[CSKY_PMU_MAX_EVENTS])(void); 37static void (*hw_raw_write_mapping[CSKY_PMU_MAX_EVENTS])(uint64_t val); 38 39static struct csky_pmu_t { 40 struct pmu pmu; 41 struct pmu_hw_events __percpu *hw_events; 42 struct platform_device *plat_device; 43 uint32_t count_width; 44 uint32_t hpcr; 45 u64 max_period; 46} csky_pmu; 47static int csky_pmu_irq; 48 49#define to_csky_pmu(p) (container_of(p, struct csky_pmu, pmu)) 50 51#define cprgr(reg) \ 52({ \ 53 unsigned int tmp; \ 54 asm volatile("cprgr %0, "reg"\n" \ 55 : "=r"(tmp) \ 56 : \ 57 : "memory"); \ 58 tmp; \ 59}) 60 61#define cpwgr(reg, val) \ 62({ \ 63 asm volatile( \ 64 "cpwgr %0, "reg"\n" \ 65 : \ 66 : "r"(val) \ 67 : "memory"); \ 68}) 69 70#define cprcr(reg) \ 71({ \ 72 unsigned int tmp; \ 73 asm volatile("cprcr %0, "reg"\n" \ 74 : "=r"(tmp) \ 75 : \ 76 : "memory"); \ 77 tmp; \ 78}) 79 80#define cpwcr(reg, val) \ 81({ \ 82 asm volatile( \ 83 "cpwcr %0, "reg"\n" \ 84 : \ 85 : "r"(val) \ 86 : "memory"); \ 87}) 88 89/* cycle counter */ 90uint64_t csky_pmu_read_cc(void) 91{ 92 uint32_t lo, hi, tmp; 93 uint64_t result; 94 95 do { 96 tmp = cprgr("<0, 0x3>"); 97 lo = cprgr("<0, 0x2>"); 98 hi = cprgr("<0, 0x3>"); 99 } while (hi != tmp); 100 101 result = (uint64_t) (hi) << 32; 102 result |= lo; 103 104 return result; 105} 106 107static void csky_pmu_write_cc(uint64_t val) 108{ 109 cpwgr("<0, 0x2>", (uint32_t) val); 110 cpwgr("<0, 0x3>", (uint32_t) (val >> 32)); 111} 112 113/* instruction counter */ 114static uint64_t csky_pmu_read_ic(void) 115{ 116 uint32_t lo, hi, tmp; 117 uint64_t result; 118 119 do { 120 tmp = cprgr("<0, 0x5>"); 121 lo = cprgr("<0, 0x4>"); 122 hi = cprgr("<0, 0x5>"); 123 } while (hi != tmp); 124 125 result = (uint64_t) (hi) << 32; 126 result |= lo; 127 128 return result; 129} 130 131static void csky_pmu_write_ic(uint64_t val) 132{ 133 cpwgr("<0, 0x4>", (uint32_t) val); 134 cpwgr("<0, 0x5>", (uint32_t) (val >> 32)); 135} 136 137/* l1 icache access counter */ 138static uint64_t csky_pmu_read_icac(void) 139{ 140 uint32_t lo, hi, tmp; 141 uint64_t result; 142 143 do { 144 tmp = cprgr("<0, 0x7>"); 145 lo = cprgr("<0, 0x6>"); 146 hi = cprgr("<0, 0x7>"); 147 } while (hi != tmp); 148 149 result = (uint64_t) (hi) << 32; 150 result |= lo; 151 152 return result; 153} 154 155static void csky_pmu_write_icac(uint64_t val) 156{ 157 cpwgr("<0, 0x6>", (uint32_t) val); 158 cpwgr("<0, 0x7>", (uint32_t) (val >> 32)); 159} 160 161/* l1 icache miss counter */ 162static uint64_t csky_pmu_read_icmc(void) 163{ 164 uint32_t lo, hi, tmp; 165 uint64_t result; 166 167 do { 168 tmp = cprgr("<0, 0x9>"); 169 lo = cprgr("<0, 0x8>"); 170 hi = cprgr("<0, 0x9>"); 171 } while (hi != tmp); 172 173 result = (uint64_t) (hi) << 32; 174 result |= lo; 175 176 return result; 177} 178 179static void csky_pmu_write_icmc(uint64_t val) 180{ 181 cpwgr("<0, 0x8>", (uint32_t) val); 182 cpwgr("<0, 0x9>", (uint32_t) (val >> 32)); 183} 184 185/* l1 dcache access counter */ 186static uint64_t csky_pmu_read_dcac(void) 187{ 188 uint32_t lo, hi, tmp; 189 uint64_t result; 190 191 do { 192 tmp = cprgr("<0, 0xb>"); 193 lo = cprgr("<0, 0xa>"); 194 hi = cprgr("<0, 0xb>"); 195 } while (hi != tmp); 196 197 result = (uint64_t) (hi) << 32; 198 result |= lo; 199 200 return result; 201} 202 203static void csky_pmu_write_dcac(uint64_t val) 204{ 205 cpwgr("<0, 0xa>", (uint32_t) val); 206 cpwgr("<0, 0xb>", (uint32_t) (val >> 32)); 207} 208 209/* l1 dcache miss counter */ 210static uint64_t csky_pmu_read_dcmc(void) 211{ 212 uint32_t lo, hi, tmp; 213 uint64_t result; 214 215 do { 216 tmp = cprgr("<0, 0xd>"); 217 lo = cprgr("<0, 0xc>"); 218 hi = cprgr("<0, 0xd>"); 219 } while (hi != tmp); 220 221 result = (uint64_t) (hi) << 32; 222 result |= lo; 223 224 return result; 225} 226 227static void csky_pmu_write_dcmc(uint64_t val) 228{ 229 cpwgr("<0, 0xc>", (uint32_t) val); 230 cpwgr("<0, 0xd>", (uint32_t) (val >> 32)); 231} 232 233/* l2 cache access counter */ 234static uint64_t csky_pmu_read_l2ac(void) 235{ 236 uint32_t lo, hi, tmp; 237 uint64_t result; 238 239 do { 240 tmp = cprgr("<0, 0xf>"); 241 lo = cprgr("<0, 0xe>"); 242 hi = cprgr("<0, 0xf>"); 243 } while (hi != tmp); 244 245 result = (uint64_t) (hi) << 32; 246 result |= lo; 247 248 return result; 249} 250 251static void csky_pmu_write_l2ac(uint64_t val) 252{ 253 cpwgr("<0, 0xe>", (uint32_t) val); 254 cpwgr("<0, 0xf>", (uint32_t) (val >> 32)); 255} 256 257/* l2 cache miss counter */ 258static uint64_t csky_pmu_read_l2mc(void) 259{ 260 uint32_t lo, hi, tmp; 261 uint64_t result; 262 263 do { 264 tmp = cprgr("<0, 0x11>"); 265 lo = cprgr("<0, 0x10>"); 266 hi = cprgr("<0, 0x11>"); 267 } while (hi != tmp); 268 269 result = (uint64_t) (hi) << 32; 270 result |= lo; 271 272 return result; 273} 274 275static void csky_pmu_write_l2mc(uint64_t val) 276{ 277 cpwgr("<0, 0x10>", (uint32_t) val); 278 cpwgr("<0, 0x11>", (uint32_t) (val >> 32)); 279} 280 281/* I-UTLB miss counter */ 282static uint64_t csky_pmu_read_iutlbmc(void) 283{ 284 uint32_t lo, hi, tmp; 285 uint64_t result; 286 287 do { 288 tmp = cprgr("<0, 0x15>"); 289 lo = cprgr("<0, 0x14>"); 290 hi = cprgr("<0, 0x15>"); 291 } while (hi != tmp); 292 293 result = (uint64_t) (hi) << 32; 294 result |= lo; 295 296 return result; 297} 298 299static void csky_pmu_write_iutlbmc(uint64_t val) 300{ 301 cpwgr("<0, 0x14>", (uint32_t) val); 302 cpwgr("<0, 0x15>", (uint32_t) (val >> 32)); 303} 304 305/* D-UTLB miss counter */ 306static uint64_t csky_pmu_read_dutlbmc(void) 307{ 308 uint32_t lo, hi, tmp; 309 uint64_t result; 310 311 do { 312 tmp = cprgr("<0, 0x17>"); 313 lo = cprgr("<0, 0x16>"); 314 hi = cprgr("<0, 0x17>"); 315 } while (hi != tmp); 316 317 result = (uint64_t) (hi) << 32; 318 result |= lo; 319 320 return result; 321} 322 323static void csky_pmu_write_dutlbmc(uint64_t val) 324{ 325 cpwgr("<0, 0x16>", (uint32_t) val); 326 cpwgr("<0, 0x17>", (uint32_t) (val >> 32)); 327} 328 329/* JTLB miss counter */ 330static uint64_t csky_pmu_read_jtlbmc(void) 331{ 332 uint32_t lo, hi, tmp; 333 uint64_t result; 334 335 do { 336 tmp = cprgr("<0, 0x19>"); 337 lo = cprgr("<0, 0x18>"); 338 hi = cprgr("<0, 0x19>"); 339 } while (hi != tmp); 340 341 result = (uint64_t) (hi) << 32; 342 result |= lo; 343 344 return result; 345} 346 347static void csky_pmu_write_jtlbmc(uint64_t val) 348{ 349 cpwgr("<0, 0x18>", (uint32_t) val); 350 cpwgr("<0, 0x19>", (uint32_t) (val >> 32)); 351} 352 353/* software counter */ 354static uint64_t csky_pmu_read_softc(void) 355{ 356 uint32_t lo, hi, tmp; 357 uint64_t result; 358 359 do { 360 tmp = cprgr("<0, 0x1b>"); 361 lo = cprgr("<0, 0x1a>"); 362 hi = cprgr("<0, 0x1b>"); 363 } while (hi != tmp); 364 365 result = (uint64_t) (hi) << 32; 366 result |= lo; 367 368 return result; 369} 370 371static void csky_pmu_write_softc(uint64_t val) 372{ 373 cpwgr("<0, 0x1a>", (uint32_t) val); 374 cpwgr("<0, 0x1b>", (uint32_t) (val >> 32)); 375} 376 377/* conditional branch mispredict counter */ 378static uint64_t csky_pmu_read_cbmc(void) 379{ 380 uint32_t lo, hi, tmp; 381 uint64_t result; 382 383 do { 384 tmp = cprgr("<0, 0x1d>"); 385 lo = cprgr("<0, 0x1c>"); 386 hi = cprgr("<0, 0x1d>"); 387 } while (hi != tmp); 388 389 result = (uint64_t) (hi) << 32; 390 result |= lo; 391 392 return result; 393} 394 395static void csky_pmu_write_cbmc(uint64_t val) 396{ 397 cpwgr("<0, 0x1c>", (uint32_t) val); 398 cpwgr("<0, 0x1d>", (uint32_t) (val >> 32)); 399} 400 401/* conditional branch instruction counter */ 402static uint64_t csky_pmu_read_cbic(void) 403{ 404 uint32_t lo, hi, tmp; 405 uint64_t result; 406 407 do { 408 tmp = cprgr("<0, 0x1f>"); 409 lo = cprgr("<0, 0x1e>"); 410 hi = cprgr("<0, 0x1f>"); 411 } while (hi != tmp); 412 413 result = (uint64_t) (hi) << 32; 414 result |= lo; 415 416 return result; 417} 418 419static void csky_pmu_write_cbic(uint64_t val) 420{ 421 cpwgr("<0, 0x1e>", (uint32_t) val); 422 cpwgr("<0, 0x1f>", (uint32_t) (val >> 32)); 423} 424 425/* indirect branch mispredict counter */ 426static uint64_t csky_pmu_read_ibmc(void) 427{ 428 uint32_t lo, hi, tmp; 429 uint64_t result; 430 431 do { 432 tmp = cprgr("<0, 0x21>"); 433 lo = cprgr("<0, 0x20>"); 434 hi = cprgr("<0, 0x21>"); 435 } while (hi != tmp); 436 437 result = (uint64_t) (hi) << 32; 438 result |= lo; 439 440 return result; 441} 442 443static void csky_pmu_write_ibmc(uint64_t val) 444{ 445 cpwgr("<0, 0x20>", (uint32_t) val); 446 cpwgr("<0, 0x21>", (uint32_t) (val >> 32)); 447} 448 449/* indirect branch instruction counter */ 450static uint64_t csky_pmu_read_ibic(void) 451{ 452 uint32_t lo, hi, tmp; 453 uint64_t result; 454 455 do { 456 tmp = cprgr("<0, 0x23>"); 457 lo = cprgr("<0, 0x22>"); 458 hi = cprgr("<0, 0x23>"); 459 } while (hi != tmp); 460 461 result = (uint64_t) (hi) << 32; 462 result |= lo; 463 464 return result; 465} 466 467static void csky_pmu_write_ibic(uint64_t val) 468{ 469 cpwgr("<0, 0x22>", (uint32_t) val); 470 cpwgr("<0, 0x23>", (uint32_t) (val >> 32)); 471} 472 473/* LSU spec fail counter */ 474static uint64_t csky_pmu_read_lsfc(void) 475{ 476 uint32_t lo, hi, tmp; 477 uint64_t result; 478 479 do { 480 tmp = cprgr("<0, 0x25>"); 481 lo = cprgr("<0, 0x24>"); 482 hi = cprgr("<0, 0x25>"); 483 } while (hi != tmp); 484 485 result = (uint64_t) (hi) << 32; 486 result |= lo; 487 488 return result; 489} 490 491static void csky_pmu_write_lsfc(uint64_t val) 492{ 493 cpwgr("<0, 0x24>", (uint32_t) val); 494 cpwgr("<0, 0x25>", (uint32_t) (val >> 32)); 495} 496 497/* store instruction counter */ 498static uint64_t csky_pmu_read_sic(void) 499{ 500 uint32_t lo, hi, tmp; 501 uint64_t result; 502 503 do { 504 tmp = cprgr("<0, 0x27>"); 505 lo = cprgr("<0, 0x26>"); 506 hi = cprgr("<0, 0x27>"); 507 } while (hi != tmp); 508 509 result = (uint64_t) (hi) << 32; 510 result |= lo; 511 512 return result; 513} 514 515static void csky_pmu_write_sic(uint64_t val) 516{ 517 cpwgr("<0, 0x26>", (uint32_t) val); 518 cpwgr("<0, 0x27>", (uint32_t) (val >> 32)); 519} 520 521/* dcache read access counter */ 522static uint64_t csky_pmu_read_dcrac(void) 523{ 524 uint32_t lo, hi, tmp; 525 uint64_t result; 526 527 do { 528 tmp = cprgr("<0, 0x29>"); 529 lo = cprgr("<0, 0x28>"); 530 hi = cprgr("<0, 0x29>"); 531 } while (hi != tmp); 532 533 result = (uint64_t) (hi) << 32; 534 result |= lo; 535 536 return result; 537} 538 539static void csky_pmu_write_dcrac(uint64_t val) 540{ 541 cpwgr("<0, 0x28>", (uint32_t) val); 542 cpwgr("<0, 0x29>", (uint32_t) (val >> 32)); 543} 544 545/* dcache read miss counter */ 546static uint64_t csky_pmu_read_dcrmc(void) 547{ 548 uint32_t lo, hi, tmp; 549 uint64_t result; 550 551 do { 552 tmp = cprgr("<0, 0x2b>"); 553 lo = cprgr("<0, 0x2a>"); 554 hi = cprgr("<0, 0x2b>"); 555 } while (hi != tmp); 556 557 result = (uint64_t) (hi) << 32; 558 result |= lo; 559 560 return result; 561} 562 563static void csky_pmu_write_dcrmc(uint64_t val) 564{ 565 cpwgr("<0, 0x2a>", (uint32_t) val); 566 cpwgr("<0, 0x2b>", (uint32_t) (val >> 32)); 567} 568 569/* dcache write access counter */ 570static uint64_t csky_pmu_read_dcwac(void) 571{ 572 uint32_t lo, hi, tmp; 573 uint64_t result; 574 575 do { 576 tmp = cprgr("<0, 0x2d>"); 577 lo = cprgr("<0, 0x2c>"); 578 hi = cprgr("<0, 0x2d>"); 579 } while (hi != tmp); 580 581 result = (uint64_t) (hi) << 32; 582 result |= lo; 583 584 return result; 585} 586 587static void csky_pmu_write_dcwac(uint64_t val) 588{ 589 cpwgr("<0, 0x2c>", (uint32_t) val); 590 cpwgr("<0, 0x2d>", (uint32_t) (val >> 32)); 591} 592 593/* dcache write miss counter */ 594static uint64_t csky_pmu_read_dcwmc(void) 595{ 596 uint32_t lo, hi, tmp; 597 uint64_t result; 598 599 do { 600 tmp = cprgr("<0, 0x2f>"); 601 lo = cprgr("<0, 0x2e>"); 602 hi = cprgr("<0, 0x2f>"); 603 } while (hi != tmp); 604 605 result = (uint64_t) (hi) << 32; 606 result |= lo; 607 608 return result; 609} 610 611static void csky_pmu_write_dcwmc(uint64_t val) 612{ 613 cpwgr("<0, 0x2e>", (uint32_t) val); 614 cpwgr("<0, 0x2f>", (uint32_t) (val >> 32)); 615} 616 617/* l2cache read access counter */ 618static uint64_t csky_pmu_read_l2rac(void) 619{ 620 uint32_t lo, hi, tmp; 621 uint64_t result; 622 623 do { 624 tmp = cprgr("<0, 0x31>"); 625 lo = cprgr("<0, 0x30>"); 626 hi = cprgr("<0, 0x31>"); 627 } while (hi != tmp); 628 629 result = (uint64_t) (hi) << 32; 630 result |= lo; 631 632 return result; 633} 634 635static void csky_pmu_write_l2rac(uint64_t val) 636{ 637 cpwgr("<0, 0x30>", (uint32_t) val); 638 cpwgr("<0, 0x31>", (uint32_t) (val >> 32)); 639} 640 641/* l2cache read miss counter */ 642static uint64_t csky_pmu_read_l2rmc(void) 643{ 644 uint32_t lo, hi, tmp; 645 uint64_t result; 646 647 do { 648 tmp = cprgr("<0, 0x33>"); 649 lo = cprgr("<0, 0x32>"); 650 hi = cprgr("<0, 0x33>"); 651 } while (hi != tmp); 652 653 result = (uint64_t) (hi) << 32; 654 result |= lo; 655 656 return result; 657} 658 659static void csky_pmu_write_l2rmc(uint64_t val) 660{ 661 cpwgr("<0, 0x32>", (uint32_t) val); 662 cpwgr("<0, 0x33>", (uint32_t) (val >> 32)); 663} 664 665/* l2cache write access counter */ 666static uint64_t csky_pmu_read_l2wac(void) 667{ 668 uint32_t lo, hi, tmp; 669 uint64_t result; 670 671 do { 672 tmp = cprgr("<0, 0x35>"); 673 lo = cprgr("<0, 0x34>"); 674 hi = cprgr("<0, 0x35>"); 675 } while (hi != tmp); 676 677 result = (uint64_t) (hi) << 32; 678 result |= lo; 679 680 return result; 681} 682 683static void csky_pmu_write_l2wac(uint64_t val) 684{ 685 cpwgr("<0, 0x34>", (uint32_t) val); 686 cpwgr("<0, 0x35>", (uint32_t) (val >> 32)); 687} 688 689/* l2cache write miss counter */ 690static uint64_t csky_pmu_read_l2wmc(void) 691{ 692 uint32_t lo, hi, tmp; 693 uint64_t result; 694 695 do { 696 tmp = cprgr("<0, 0x37>"); 697 lo = cprgr("<0, 0x36>"); 698 hi = cprgr("<0, 0x37>"); 699 } while (hi != tmp); 700 701 result = (uint64_t) (hi) << 32; 702 result |= lo; 703 704 return result; 705} 706 707static void csky_pmu_write_l2wmc(uint64_t val) 708{ 709 cpwgr("<0, 0x36>", (uint32_t) val); 710 cpwgr("<0, 0x37>", (uint32_t) (val >> 32)); 711} 712 713#define HW_OP_UNSUPPORTED 0xffff 714static const int csky_pmu_hw_map[PERF_COUNT_HW_MAX] = { 715 [PERF_COUNT_HW_CPU_CYCLES] = 0x1, 716 [PERF_COUNT_HW_INSTRUCTIONS] = 0x2, 717 [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, 718 [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, 719 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0xf, 720 [PERF_COUNT_HW_BRANCH_MISSES] = 0xe, 721 [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, 722 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, 723 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, 724 [PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED, 725}; 726 727#define C(_x) PERF_COUNT_HW_CACHE_##_x 728#define CACHE_OP_UNSUPPORTED 0xffff 729static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { 730 [C(L1D)] = { 731#ifdef CONFIG_CPU_CK810 732 [C(OP_READ)] = { 733 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 734 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 735 }, 736 [C(OP_WRITE)] = { 737 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 738 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 739 }, 740 [C(OP_PREFETCH)] = { 741 [C(RESULT_ACCESS)] = 0x5, 742 [C(RESULT_MISS)] = 0x6, 743 }, 744#else 745 [C(OP_READ)] = { 746 [C(RESULT_ACCESS)] = 0x14, 747 [C(RESULT_MISS)] = 0x15, 748 }, 749 [C(OP_WRITE)] = { 750 [C(RESULT_ACCESS)] = 0x16, 751 [C(RESULT_MISS)] = 0x17, 752 }, 753 [C(OP_PREFETCH)] = { 754 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 755 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 756 }, 757#endif 758 }, 759 [C(L1I)] = { 760 [C(OP_READ)] = { 761 [C(RESULT_ACCESS)] = 0x3, 762 [C(RESULT_MISS)] = 0x4, 763 }, 764 [C(OP_WRITE)] = { 765 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 766 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 767 }, 768 [C(OP_PREFETCH)] = { 769 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 770 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 771 }, 772 }, 773 [C(LL)] = { 774#ifdef CONFIG_CPU_CK810 775 [C(OP_READ)] = { 776 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 777 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 778 }, 779 [C(OP_WRITE)] = { 780 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 781 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 782 }, 783 [C(OP_PREFETCH)] = { 784 [C(RESULT_ACCESS)] = 0x7, 785 [C(RESULT_MISS)] = 0x8, 786 }, 787#else 788 [C(OP_READ)] = { 789 [C(RESULT_ACCESS)] = 0x18, 790 [C(RESULT_MISS)] = 0x19, 791 }, 792 [C(OP_WRITE)] = { 793 [C(RESULT_ACCESS)] = 0x1a, 794 [C(RESULT_MISS)] = 0x1b, 795 }, 796 [C(OP_PREFETCH)] = { 797 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 798 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 799 }, 800#endif 801 }, 802 [C(DTLB)] = { 803#ifdef CONFIG_CPU_CK810 804 [C(OP_READ)] = { 805 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 806 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 807 }, 808 [C(OP_WRITE)] = { 809 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 810 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 811 }, 812#else 813 [C(OP_READ)] = { 814 [C(RESULT_ACCESS)] = 0x14, 815 [C(RESULT_MISS)] = 0xb, 816 }, 817 [C(OP_WRITE)] = { 818 [C(RESULT_ACCESS)] = 0x16, 819 [C(RESULT_MISS)] = 0xb, 820 }, 821#endif 822 [C(OP_PREFETCH)] = { 823 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 824 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 825 }, 826 }, 827 [C(ITLB)] = { 828#ifdef CONFIG_CPU_CK810 829 [C(OP_READ)] = { 830 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 831 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 832 }, 833#else 834 [C(OP_READ)] = { 835 [C(RESULT_ACCESS)] = 0x3, 836 [C(RESULT_MISS)] = 0xa, 837 }, 838#endif 839 [C(OP_WRITE)] = { 840 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 841 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 842 }, 843 [C(OP_PREFETCH)] = { 844 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 845 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 846 }, 847 }, 848 [C(BPU)] = { 849 [C(OP_READ)] = { 850 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 851 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 852 }, 853 [C(OP_WRITE)] = { 854 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 855 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 856 }, 857 [C(OP_PREFETCH)] = { 858 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 859 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 860 }, 861 }, 862 [C(NODE)] = { 863 [C(OP_READ)] = { 864 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 865 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 866 }, 867 [C(OP_WRITE)] = { 868 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 869 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 870 }, 871 [C(OP_PREFETCH)] = { 872 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 873 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 874 }, 875 }, 876}; 877 878int csky_pmu_event_set_period(struct perf_event *event) 879{ 880 struct hw_perf_event *hwc = &event->hw; 881 s64 left = local64_read(&hwc->period_left); 882 s64 period = hwc->sample_period; 883 int ret = 0; 884 885 if (unlikely(left <= -period)) { 886 left = period; 887 local64_set(&hwc->period_left, left); 888 hwc->last_period = period; 889 ret = 1; 890 } 891 892 if (unlikely(left <= 0)) { 893 left += period; 894 local64_set(&hwc->period_left, left); 895 hwc->last_period = period; 896 ret = 1; 897 } 898 899 if (left > (s64)csky_pmu.max_period) 900 left = csky_pmu.max_period; 901 902 /* 903 * The hw event starts counting from this event offset, 904 * mark it to be able to extract future "deltas": 905 */ 906 local64_set(&hwc->prev_count, (u64)(-left)); 907 908 if (hw_raw_write_mapping[hwc->idx] != NULL) 909 hw_raw_write_mapping[hwc->idx]((u64)(-left) & 910 csky_pmu.max_period); 911 912 cpwcr(HPOFSR, ~BIT(hwc->idx) & cprcr(HPOFSR)); 913 914 perf_event_update_userpage(event); 915 916 return ret; 917} 918 919static void csky_perf_event_update(struct perf_event *event, 920 struct hw_perf_event *hwc) 921{ 922 uint64_t prev_raw_count = local64_read(&hwc->prev_count); 923 /* 924 * Sign extend count value to 64bit, otherwise delta calculation 925 * would be incorrect when overflow occurs. 926 */ 927 uint64_t new_raw_count = sign_extend64( 928 hw_raw_read_mapping[hwc->idx](), csky_pmu.count_width - 1); 929 int64_t delta = new_raw_count - prev_raw_count; 930 931 /* 932 * We aren't afraid of hwc->prev_count changing beneath our feet 933 * because there's no way for us to re-enter this function anytime. 934 */ 935 local64_set(&hwc->prev_count, new_raw_count); 936 local64_add(delta, &event->count); 937 local64_sub(delta, &hwc->period_left); 938} 939 940static void csky_pmu_reset(void *info) 941{ 942 cpwcr(HPCR, BIT(31) | BIT(30) | BIT(1)); 943} 944 945static void csky_pmu_read(struct perf_event *event) 946{ 947 csky_perf_event_update(event, &event->hw); 948} 949 950static int csky_pmu_cache_event(u64 config) 951{ 952 unsigned int cache_type, cache_op, cache_result; 953 954 cache_type = (config >> 0) & 0xff; 955 cache_op = (config >> 8) & 0xff; 956 cache_result = (config >> 16) & 0xff; 957 958 if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 959 return -EINVAL; 960 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 961 return -EINVAL; 962 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 963 return -EINVAL; 964 965 return csky_pmu_cache_map[cache_type][cache_op][cache_result]; 966} 967 968static int csky_pmu_event_init(struct perf_event *event) 969{ 970 struct hw_perf_event *hwc = &event->hw; 971 int ret; 972 973 switch (event->attr.type) { 974 case PERF_TYPE_HARDWARE: 975 if (event->attr.config >= PERF_COUNT_HW_MAX) 976 return -ENOENT; 977 ret = csky_pmu_hw_map[event->attr.config]; 978 if (ret == HW_OP_UNSUPPORTED) 979 return -ENOENT; 980 hwc->idx = ret; 981 break; 982 case PERF_TYPE_HW_CACHE: 983 ret = csky_pmu_cache_event(event->attr.config); 984 if (ret == CACHE_OP_UNSUPPORTED) 985 return -ENOENT; 986 hwc->idx = ret; 987 break; 988 case PERF_TYPE_RAW: 989 if (hw_raw_read_mapping[event->attr.config] == NULL) 990 return -ENOENT; 991 hwc->idx = event->attr.config; 992 break; 993 default: 994 return -ENOENT; 995 } 996 997 if (event->attr.exclude_user) 998 csky_pmu.hpcr = BIT(2); 999 else if (event->attr.exclude_kernel) 1000 csky_pmu.hpcr = BIT(3); 1001 else 1002 csky_pmu.hpcr = BIT(2) | BIT(3); 1003 1004 csky_pmu.hpcr |= BIT(1) | BIT(0); 1005 1006 return 0; 1007} 1008 1009/* starts all counters */ 1010static void csky_pmu_enable(struct pmu *pmu) 1011{ 1012 cpwcr(HPCR, csky_pmu.hpcr); 1013} 1014 1015/* stops all counters */ 1016static void csky_pmu_disable(struct pmu *pmu) 1017{ 1018 cpwcr(HPCR, BIT(1)); 1019} 1020 1021static void csky_pmu_start(struct perf_event *event, int flags) 1022{ 1023 unsigned long flg; 1024 struct hw_perf_event *hwc = &event->hw; 1025 int idx = hwc->idx; 1026 1027 if (WARN_ON_ONCE(idx == -1)) 1028 return; 1029 1030 if (flags & PERF_EF_RELOAD) 1031 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 1032 1033 hwc->state = 0; 1034 1035 csky_pmu_event_set_period(event); 1036 1037 local_irq_save(flg); 1038 1039 cpwcr(HPINTENR, BIT(idx) | cprcr(HPINTENR)); 1040 cpwcr(HPCNTENR, BIT(idx) | cprcr(HPCNTENR)); 1041 1042 local_irq_restore(flg); 1043} 1044 1045static void csky_pmu_stop_event(struct perf_event *event) 1046{ 1047 unsigned long flg; 1048 struct hw_perf_event *hwc = &event->hw; 1049 int idx = hwc->idx; 1050 1051 local_irq_save(flg); 1052 1053 cpwcr(HPINTENR, ~BIT(idx) & cprcr(HPINTENR)); 1054 cpwcr(HPCNTENR, ~BIT(idx) & cprcr(HPCNTENR)); 1055 1056 local_irq_restore(flg); 1057} 1058 1059static void csky_pmu_stop(struct perf_event *event, int flags) 1060{ 1061 if (!(event->hw.state & PERF_HES_STOPPED)) { 1062 csky_pmu_stop_event(event); 1063 event->hw.state |= PERF_HES_STOPPED; 1064 } 1065 1066 if ((flags & PERF_EF_UPDATE) && 1067 !(event->hw.state & PERF_HES_UPTODATE)) { 1068 csky_perf_event_update(event, &event->hw); 1069 event->hw.state |= PERF_HES_UPTODATE; 1070 } 1071} 1072 1073static void csky_pmu_del(struct perf_event *event, int flags) 1074{ 1075 struct pmu_hw_events *hw_events = this_cpu_ptr(csky_pmu.hw_events); 1076 struct hw_perf_event *hwc = &event->hw; 1077 1078 csky_pmu_stop(event, PERF_EF_UPDATE); 1079 1080 hw_events->events[hwc->idx] = NULL; 1081 1082 perf_event_update_userpage(event); 1083} 1084 1085/* allocate hardware counter and optionally start counting */ 1086static int csky_pmu_add(struct perf_event *event, int flags) 1087{ 1088 struct pmu_hw_events *hw_events = this_cpu_ptr(csky_pmu.hw_events); 1089 struct hw_perf_event *hwc = &event->hw; 1090 1091 hw_events->events[hwc->idx] = event; 1092 1093 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 1094 1095 if (flags & PERF_EF_START) 1096 csky_pmu_start(event, PERF_EF_RELOAD); 1097 1098 perf_event_update_userpage(event); 1099 1100 return 0; 1101} 1102 1103static irqreturn_t csky_pmu_handle_irq(int irq_num, void *dev) 1104{ 1105 struct perf_sample_data data; 1106 struct pmu_hw_events *cpuc = this_cpu_ptr(csky_pmu.hw_events); 1107 struct pt_regs *regs; 1108 int idx; 1109 1110 /* 1111 * Did an overflow occur? 1112 */ 1113 if (!cprcr(HPOFSR)) 1114 return IRQ_NONE; 1115 1116 /* 1117 * Handle the counter(s) overflow(s) 1118 */ 1119 regs = get_irq_regs(); 1120 1121 csky_pmu_disable(&csky_pmu.pmu); 1122 1123 for (idx = 0; idx < CSKY_PMU_MAX_EVENTS; ++idx) { 1124 struct perf_event *event = cpuc->events[idx]; 1125 struct hw_perf_event *hwc; 1126 1127 /* Ignore if we don't have an event. */ 1128 if (!event) 1129 continue; 1130 /* 1131 * We have a single interrupt for all counters. Check that 1132 * each counter has overflowed before we process it. 1133 */ 1134 if (!(cprcr(HPOFSR) & BIT(idx))) 1135 continue; 1136 1137 hwc = &event->hw; 1138 csky_perf_event_update(event, &event->hw); 1139 perf_sample_data_init(&data, 0, hwc->last_period); 1140 csky_pmu_event_set_period(event); 1141 1142 if (perf_event_overflow(event, &data, regs)) 1143 csky_pmu_stop_event(event); 1144 } 1145 1146 csky_pmu_enable(&csky_pmu.pmu); 1147 1148 /* 1149 * Handle the pending perf events. 1150 * 1151 * Note: this call *must* be run with interrupts disabled. For 1152 * platforms that can have the PMU interrupts raised as an NMI, this 1153 * will not work. 1154 */ 1155 irq_work_run(); 1156 1157 return IRQ_HANDLED; 1158} 1159 1160static int csky_pmu_request_irq(irq_handler_t handler) 1161{ 1162 int err, irqs; 1163 struct platform_device *pmu_device = csky_pmu.plat_device; 1164 1165 if (!pmu_device) 1166 return -ENODEV; 1167 1168 irqs = min(pmu_device->num_resources, num_possible_cpus()); 1169 if (irqs < 1) { 1170 pr_err("no irqs for PMUs defined\n"); 1171 return -ENODEV; 1172 } 1173 1174 csky_pmu_irq = platform_get_irq(pmu_device, 0); 1175 if (csky_pmu_irq < 0) 1176 return -ENODEV; 1177 err = request_percpu_irq(csky_pmu_irq, handler, "csky-pmu", 1178 this_cpu_ptr(csky_pmu.hw_events)); 1179 if (err) { 1180 pr_err("unable to request IRQ%d for CSKY PMU counters\n", 1181 csky_pmu_irq); 1182 return err; 1183 } 1184 1185 return 0; 1186} 1187 1188static void csky_pmu_free_irq(void) 1189{ 1190 int irq; 1191 struct platform_device *pmu_device = csky_pmu.plat_device; 1192 1193 irq = platform_get_irq(pmu_device, 0); 1194 if (irq >= 0) 1195 free_percpu_irq(irq, this_cpu_ptr(csky_pmu.hw_events)); 1196} 1197 1198int init_hw_perf_events(void) 1199{ 1200 csky_pmu.hw_events = alloc_percpu_gfp(struct pmu_hw_events, 1201 GFP_KERNEL); 1202 if (!csky_pmu.hw_events) { 1203 pr_info("failed to allocate per-cpu PMU data.\n"); 1204 return -ENOMEM; 1205 } 1206 1207 csky_pmu.pmu = (struct pmu) { 1208 .pmu_enable = csky_pmu_enable, 1209 .pmu_disable = csky_pmu_disable, 1210 .event_init = csky_pmu_event_init, 1211 .add = csky_pmu_add, 1212 .del = csky_pmu_del, 1213 .start = csky_pmu_start, 1214 .stop = csky_pmu_stop, 1215 .read = csky_pmu_read, 1216 }; 1217 1218 memset((void *)hw_raw_read_mapping, 0, 1219 sizeof(hw_raw_read_mapping[CSKY_PMU_MAX_EVENTS])); 1220 1221 hw_raw_read_mapping[0x1] = csky_pmu_read_cc; 1222 hw_raw_read_mapping[0x2] = csky_pmu_read_ic; 1223 hw_raw_read_mapping[0x3] = csky_pmu_read_icac; 1224 hw_raw_read_mapping[0x4] = csky_pmu_read_icmc; 1225 hw_raw_read_mapping[0x5] = csky_pmu_read_dcac; 1226 hw_raw_read_mapping[0x6] = csky_pmu_read_dcmc; 1227 hw_raw_read_mapping[0x7] = csky_pmu_read_l2ac; 1228 hw_raw_read_mapping[0x8] = csky_pmu_read_l2mc; 1229 hw_raw_read_mapping[0xa] = csky_pmu_read_iutlbmc; 1230 hw_raw_read_mapping[0xb] = csky_pmu_read_dutlbmc; 1231 hw_raw_read_mapping[0xc] = csky_pmu_read_jtlbmc; 1232 hw_raw_read_mapping[0xd] = csky_pmu_read_softc; 1233 hw_raw_read_mapping[0xe] = csky_pmu_read_cbmc; 1234 hw_raw_read_mapping[0xf] = csky_pmu_read_cbic; 1235 hw_raw_read_mapping[0x10] = csky_pmu_read_ibmc; 1236 hw_raw_read_mapping[0x11] = csky_pmu_read_ibic; 1237 hw_raw_read_mapping[0x12] = csky_pmu_read_lsfc; 1238 hw_raw_read_mapping[0x13] = csky_pmu_read_sic; 1239 hw_raw_read_mapping[0x14] = csky_pmu_read_dcrac; 1240 hw_raw_read_mapping[0x15] = csky_pmu_read_dcrmc; 1241 hw_raw_read_mapping[0x16] = csky_pmu_read_dcwac; 1242 hw_raw_read_mapping[0x17] = csky_pmu_read_dcwmc; 1243 hw_raw_read_mapping[0x18] = csky_pmu_read_l2rac; 1244 hw_raw_read_mapping[0x19] = csky_pmu_read_l2rmc; 1245 hw_raw_read_mapping[0x1a] = csky_pmu_read_l2wac; 1246 hw_raw_read_mapping[0x1b] = csky_pmu_read_l2wmc; 1247 1248 memset((void *)hw_raw_write_mapping, 0, 1249 sizeof(hw_raw_write_mapping[CSKY_PMU_MAX_EVENTS])); 1250 1251 hw_raw_write_mapping[0x1] = csky_pmu_write_cc; 1252 hw_raw_write_mapping[0x2] = csky_pmu_write_ic; 1253 hw_raw_write_mapping[0x3] = csky_pmu_write_icac; 1254 hw_raw_write_mapping[0x4] = csky_pmu_write_icmc; 1255 hw_raw_write_mapping[0x5] = csky_pmu_write_dcac; 1256 hw_raw_write_mapping[0x6] = csky_pmu_write_dcmc; 1257 hw_raw_write_mapping[0x7] = csky_pmu_write_l2ac; 1258 hw_raw_write_mapping[0x8] = csky_pmu_write_l2mc; 1259 hw_raw_write_mapping[0xa] = csky_pmu_write_iutlbmc; 1260 hw_raw_write_mapping[0xb] = csky_pmu_write_dutlbmc; 1261 hw_raw_write_mapping[0xc] = csky_pmu_write_jtlbmc; 1262 hw_raw_write_mapping[0xd] = csky_pmu_write_softc; 1263 hw_raw_write_mapping[0xe] = csky_pmu_write_cbmc; 1264 hw_raw_write_mapping[0xf] = csky_pmu_write_cbic; 1265 hw_raw_write_mapping[0x10] = csky_pmu_write_ibmc; 1266 hw_raw_write_mapping[0x11] = csky_pmu_write_ibic; 1267 hw_raw_write_mapping[0x12] = csky_pmu_write_lsfc; 1268 hw_raw_write_mapping[0x13] = csky_pmu_write_sic; 1269 hw_raw_write_mapping[0x14] = csky_pmu_write_dcrac; 1270 hw_raw_write_mapping[0x15] = csky_pmu_write_dcrmc; 1271 hw_raw_write_mapping[0x16] = csky_pmu_write_dcwac; 1272 hw_raw_write_mapping[0x17] = csky_pmu_write_dcwmc; 1273 hw_raw_write_mapping[0x18] = csky_pmu_write_l2rac; 1274 hw_raw_write_mapping[0x19] = csky_pmu_write_l2rmc; 1275 hw_raw_write_mapping[0x1a] = csky_pmu_write_l2wac; 1276 hw_raw_write_mapping[0x1b] = csky_pmu_write_l2wmc; 1277 1278 return 0; 1279} 1280 1281static int csky_pmu_starting_cpu(unsigned int cpu) 1282{ 1283 enable_percpu_irq(csky_pmu_irq, 0); 1284 return 0; 1285} 1286 1287static int csky_pmu_dying_cpu(unsigned int cpu) 1288{ 1289 disable_percpu_irq(csky_pmu_irq); 1290 return 0; 1291} 1292 1293int csky_pmu_device_probe(struct platform_device *pdev, 1294 const struct of_device_id *of_table) 1295{ 1296 struct device_node *node = pdev->dev.of_node; 1297 int ret; 1298 1299 ret = init_hw_perf_events(); 1300 if (ret) { 1301 pr_notice("[perf] failed to probe PMU!\n"); 1302 return ret; 1303 } 1304 1305 if (of_property_read_u32(node, "count-width", 1306 &csky_pmu.count_width)) { 1307 csky_pmu.count_width = DEFAULT_COUNT_WIDTH; 1308 } 1309 csky_pmu.max_period = BIT_ULL(csky_pmu.count_width) - 1; 1310 1311 csky_pmu.plat_device = pdev; 1312 1313 /* Ensure the PMU has sane values out of reset. */ 1314 on_each_cpu(csky_pmu_reset, &csky_pmu, 1); 1315 1316 ret = csky_pmu_request_irq(csky_pmu_handle_irq); 1317 if (ret) { 1318 csky_pmu.pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; 1319 pr_notice("[perf] PMU request irq fail!\n"); 1320 } 1321 1322 ret = cpuhp_setup_state(CPUHP_AP_PERF_CSKY_ONLINE, "AP_PERF_ONLINE", 1323 csky_pmu_starting_cpu, 1324 csky_pmu_dying_cpu); 1325 if (ret) { 1326 csky_pmu_free_irq(); 1327 free_percpu(csky_pmu.hw_events); 1328 return ret; 1329 } 1330 1331 ret = perf_pmu_register(&csky_pmu.pmu, "cpu", PERF_TYPE_RAW); 1332 if (ret) { 1333 csky_pmu_free_irq(); 1334 free_percpu(csky_pmu.hw_events); 1335 } 1336 1337 return ret; 1338} 1339 1340static const struct of_device_id csky_pmu_of_device_ids[] = { 1341 {.compatible = "csky,csky-pmu"}, 1342 {}, 1343}; 1344 1345static int csky_pmu_dev_probe(struct platform_device *pdev) 1346{ 1347 return csky_pmu_device_probe(pdev, csky_pmu_of_device_ids); 1348} 1349 1350static struct platform_driver csky_pmu_driver = { 1351 .driver = { 1352 .name = "csky-pmu", 1353 .of_match_table = csky_pmu_of_device_ids, 1354 }, 1355 .probe = csky_pmu_dev_probe, 1356}; 1357 1358static int __init csky_pmu_probe(void) 1359{ 1360 int ret; 1361 1362 ret = platform_driver_register(&csky_pmu_driver); 1363 if (ret) 1364 pr_notice("[perf] PMU initialization failed\n"); 1365 else 1366 pr_notice("[perf] PMU initialization done\n"); 1367 1368 return ret; 1369} 1370 1371device_initcall(csky_pmu_probe);