radix_tlb.c (42543B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * TLB flush routines for radix kernels. 4 * 5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation. 6 */ 7 8#include <linux/mm.h> 9#include <linux/hugetlb.h> 10#include <linux/memblock.h> 11#include <linux/mmu_context.h> 12#include <linux/sched/mm.h> 13#include <linux/debugfs.h> 14 15#include <asm/ppc-opcode.h> 16#include <asm/tlb.h> 17#include <asm/tlbflush.h> 18#include <asm/trace.h> 19#include <asm/cputhreads.h> 20#include <asm/plpar_wrappers.h> 21 22#include "internal.h" 23 24/* 25 * tlbiel instruction for radix, set invalidation 26 * i.e., r=1 and is=01 or is=10 or is=11 27 */ 28static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, 29 unsigned int pid, 30 unsigned int ric, unsigned int prs) 31{ 32 unsigned long rb; 33 unsigned long rs; 34 35 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); 36 rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); 37 38 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1) 39 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs) 40 : "memory"); 41} 42 43static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) 44{ 45 unsigned int set; 46 47 asm volatile("ptesync": : :"memory"); 48 49 /* 50 * Flush the first set of the TLB, and the entire Page Walk Cache 51 * and partition table entries. Then flush the remaining sets of the 52 * TLB. 53 */ 54 55 if (early_cpu_has_feature(CPU_FTR_HVMODE)) { 56 /* MSR[HV] should flush partition scope translations first. */ 57 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0); 58 59 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 60 for (set = 1; set < num_sets; set++) 61 tlbiel_radix_set_isa300(set, is, 0, 62 RIC_FLUSH_TLB, 0); 63 } 64 } 65 66 /* Flush process scoped entries. */ 67 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1); 68 69 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) { 70 for (set = 1; set < num_sets; set++) 71 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1); 72 } 73 74 ppc_after_tlbiel_barrier(); 75} 76 77void radix__tlbiel_all(unsigned int action) 78{ 79 unsigned int is; 80 81 switch (action) { 82 case TLB_INVAL_SCOPE_GLOBAL: 83 is = 3; 84 break; 85 case TLB_INVAL_SCOPE_LPID: 86 is = 2; 87 break; 88 default: 89 BUG(); 90 } 91 92 if (early_cpu_has_feature(CPU_FTR_ARCH_300)) 93 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is); 94 else 95 WARN(1, "%s called on pre-POWER9 CPU\n", __func__); 96 97 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); 98} 99 100static __always_inline void __tlbiel_pid(unsigned long pid, int set, 101 unsigned long ric) 102{ 103 unsigned long rb,rs,prs,r; 104 105 rb = PPC_BIT(53); /* IS = 1 */ 106 rb |= set << PPC_BITLSHIFT(51); 107 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); 108 prs = 1; /* process scoped */ 109 r = 1; /* radix format */ 110 111 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 112 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 113 trace_tlbie(0, 1, rb, rs, ric, prs, r); 114} 115 116static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) 117{ 118 unsigned long rb,rs,prs,r; 119 120 rb = PPC_BIT(53); /* IS = 1 */ 121 rs = pid << PPC_BITLSHIFT(31); 122 prs = 1; /* process scoped */ 123 r = 1; /* radix format */ 124 125 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 126 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 127 trace_tlbie(0, 0, rb, rs, ric, prs, r); 128} 129 130static __always_inline void __tlbie_pid_lpid(unsigned long pid, 131 unsigned long lpid, 132 unsigned long ric) 133{ 134 unsigned long rb, rs, prs, r; 135 136 rb = PPC_BIT(53); /* IS = 1 */ 137 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 138 prs = 1; /* process scoped */ 139 r = 1; /* radix format */ 140 141 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 142 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 143 trace_tlbie(0, 0, rb, rs, ric, prs, r); 144} 145static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) 146{ 147 unsigned long rb,rs,prs,r; 148 149 rb = PPC_BIT(52); /* IS = 2 */ 150 rs = lpid; 151 prs = 0; /* partition scoped */ 152 r = 1; /* radix format */ 153 154 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 155 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 156 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 157} 158 159static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 160{ 161 unsigned long rb,rs,prs,r; 162 163 rb = PPC_BIT(52); /* IS = 2 */ 164 rs = lpid; 165 prs = 1; /* process scoped */ 166 r = 1; /* radix format */ 167 168 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 169 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 170 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 171} 172 173static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, 174 unsigned long ap, unsigned long ric) 175{ 176 unsigned long rb,rs,prs,r; 177 178 rb = va & ~(PPC_BITMASK(52, 63)); 179 rb |= ap << PPC_BITLSHIFT(58); 180 rs = pid << PPC_BITLSHIFT(31); 181 prs = 1; /* process scoped */ 182 r = 1; /* radix format */ 183 184 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) 185 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 186 trace_tlbie(0, 1, rb, rs, ric, prs, r); 187} 188 189static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, 190 unsigned long ap, unsigned long ric) 191{ 192 unsigned long rb,rs,prs,r; 193 194 rb = va & ~(PPC_BITMASK(52, 63)); 195 rb |= ap << PPC_BITLSHIFT(58); 196 rs = pid << PPC_BITLSHIFT(31); 197 prs = 1; /* process scoped */ 198 r = 1; /* radix format */ 199 200 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 201 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 202 trace_tlbie(0, 0, rb, rs, ric, prs, r); 203} 204 205static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, 206 unsigned long lpid, 207 unsigned long ap, unsigned long ric) 208{ 209 unsigned long rb, rs, prs, r; 210 211 rb = va & ~(PPC_BITMASK(52, 63)); 212 rb |= ap << PPC_BITLSHIFT(58); 213 rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); 214 prs = 1; /* process scoped */ 215 r = 1; /* radix format */ 216 217 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 218 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 219 trace_tlbie(0, 0, rb, rs, ric, prs, r); 220} 221 222static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, 223 unsigned long ap, unsigned long ric) 224{ 225 unsigned long rb,rs,prs,r; 226 227 rb = va & ~(PPC_BITMASK(52, 63)); 228 rb |= ap << PPC_BITLSHIFT(58); 229 rs = lpid; 230 prs = 0; /* partition scoped */ 231 r = 1; /* radix format */ 232 233 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 234 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); 235 trace_tlbie(lpid, 0, rb, rs, ric, prs, r); 236} 237 238 239static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, 240 unsigned long ap) 241{ 242 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 243 asm volatile("ptesync": : :"memory"); 244 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); 245 } 246 247 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 248 asm volatile("ptesync": : :"memory"); 249 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 250 } 251} 252 253static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, 254 unsigned long ap) 255{ 256 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 257 asm volatile("ptesync": : :"memory"); 258 __tlbie_pid(0, RIC_FLUSH_TLB); 259 } 260 261 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 262 asm volatile("ptesync": : :"memory"); 263 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); 264 } 265} 266 267static inline void fixup_tlbie_va_range_lpid(unsigned long va, 268 unsigned long pid, 269 unsigned long lpid, 270 unsigned long ap) 271{ 272 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 273 asm volatile("ptesync" : : : "memory"); 274 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 275 } 276 277 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 278 asm volatile("ptesync" : : : "memory"); 279 __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); 280 } 281} 282 283static inline void fixup_tlbie_pid(unsigned long pid) 284{ 285 /* 286 * We can use any address for the invalidation, pick one which is 287 * probably unused as an optimisation. 288 */ 289 unsigned long va = ((1UL << 52) - 1); 290 291 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 292 asm volatile("ptesync": : :"memory"); 293 __tlbie_pid(0, RIC_FLUSH_TLB); 294 } 295 296 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 297 asm volatile("ptesync": : :"memory"); 298 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 299 } 300} 301 302static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) 303{ 304 /* 305 * We can use any address for the invalidation, pick one which is 306 * probably unused as an optimisation. 307 */ 308 unsigned long va = ((1UL << 52) - 1); 309 310 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 311 asm volatile("ptesync" : : : "memory"); 312 __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); 313 } 314 315 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 316 asm volatile("ptesync" : : : "memory"); 317 __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), 318 RIC_FLUSH_TLB); 319 } 320} 321 322static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, 323 unsigned long ap) 324{ 325 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 326 asm volatile("ptesync": : :"memory"); 327 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB); 328 } 329 330 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 331 asm volatile("ptesync": : :"memory"); 332 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB); 333 } 334} 335 336static inline void fixup_tlbie_lpid(unsigned long lpid) 337{ 338 /* 339 * We can use any address for the invalidation, pick one which is 340 * probably unused as an optimisation. 341 */ 342 unsigned long va = ((1UL << 52) - 1); 343 344 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 345 asm volatile("ptesync": : :"memory"); 346 __tlbie_lpid(0, RIC_FLUSH_TLB); 347 } 348 349 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 350 asm volatile("ptesync": : :"memory"); 351 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); 352 } 353} 354 355/* 356 * We use 128 set in radix mode and 256 set in hpt mode. 357 */ 358static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) 359{ 360 int set; 361 362 asm volatile("ptesync": : :"memory"); 363 364 switch (ric) { 365 case RIC_FLUSH_PWC: 366 367 /* For PWC, only one flush is needed */ 368 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 369 ppc_after_tlbiel_barrier(); 370 return; 371 case RIC_FLUSH_TLB: 372 __tlbiel_pid(pid, 0, RIC_FLUSH_TLB); 373 break; 374 case RIC_FLUSH_ALL: 375 default: 376 /* 377 * Flush the first set of the TLB, and if 378 * we're doing a RIC_FLUSH_ALL, also flush 379 * the entire Page Walk Cache. 380 */ 381 __tlbiel_pid(pid, 0, RIC_FLUSH_ALL); 382 } 383 384 if (!cpu_has_feature(CPU_FTR_ARCH_31)) { 385 /* For the remaining sets, just flush the TLB */ 386 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) 387 __tlbiel_pid(pid, set, RIC_FLUSH_TLB); 388 } 389 390 ppc_after_tlbiel_barrier(); 391 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); 392} 393 394static inline void _tlbie_pid(unsigned long pid, unsigned long ric) 395{ 396 asm volatile("ptesync": : :"memory"); 397 398 /* 399 * Workaround the fact that the "ric" argument to __tlbie_pid 400 * must be a compile-time constraint to match the "i" constraint 401 * in the asm statement. 402 */ 403 switch (ric) { 404 case RIC_FLUSH_TLB: 405 __tlbie_pid(pid, RIC_FLUSH_TLB); 406 fixup_tlbie_pid(pid); 407 break; 408 case RIC_FLUSH_PWC: 409 __tlbie_pid(pid, RIC_FLUSH_PWC); 410 break; 411 case RIC_FLUSH_ALL: 412 default: 413 __tlbie_pid(pid, RIC_FLUSH_ALL); 414 fixup_tlbie_pid(pid); 415 } 416 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 417} 418 419static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, 420 unsigned long ric) 421{ 422 asm volatile("ptesync" : : : "memory"); 423 424 /* 425 * Workaround the fact that the "ric" argument to __tlbie_pid 426 * must be a compile-time contraint to match the "i" constraint 427 * in the asm statement. 428 */ 429 switch (ric) { 430 case RIC_FLUSH_TLB: 431 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 432 fixup_tlbie_pid_lpid(pid, lpid); 433 break; 434 case RIC_FLUSH_PWC: 435 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 436 break; 437 case RIC_FLUSH_ALL: 438 default: 439 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 440 fixup_tlbie_pid_lpid(pid, lpid); 441 } 442 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 443} 444struct tlbiel_pid { 445 unsigned long pid; 446 unsigned long ric; 447}; 448 449static void do_tlbiel_pid(void *info) 450{ 451 struct tlbiel_pid *t = info; 452 453 if (t->ric == RIC_FLUSH_TLB) 454 _tlbiel_pid(t->pid, RIC_FLUSH_TLB); 455 else if (t->ric == RIC_FLUSH_PWC) 456 _tlbiel_pid(t->pid, RIC_FLUSH_PWC); 457 else 458 _tlbiel_pid(t->pid, RIC_FLUSH_ALL); 459} 460 461static inline void _tlbiel_pid_multicast(struct mm_struct *mm, 462 unsigned long pid, unsigned long ric) 463{ 464 struct cpumask *cpus = mm_cpumask(mm); 465 struct tlbiel_pid t = { .pid = pid, .ric = ric }; 466 467 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1); 468 /* 469 * Always want the CPU translations to be invalidated with tlbiel in 470 * these paths, so while coprocessors must use tlbie, we can not 471 * optimise away the tlbiel component. 472 */ 473 if (atomic_read(&mm->context.copros) > 0) 474 _tlbie_pid(pid, RIC_FLUSH_ALL); 475} 476 477static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) 478{ 479 asm volatile("ptesync": : :"memory"); 480 481 /* 482 * Workaround the fact that the "ric" argument to __tlbie_pid 483 * must be a compile-time contraint to match the "i" constraint 484 * in the asm statement. 485 */ 486 switch (ric) { 487 case RIC_FLUSH_TLB: 488 __tlbie_lpid(lpid, RIC_FLUSH_TLB); 489 fixup_tlbie_lpid(lpid); 490 break; 491 case RIC_FLUSH_PWC: 492 __tlbie_lpid(lpid, RIC_FLUSH_PWC); 493 break; 494 case RIC_FLUSH_ALL: 495 default: 496 __tlbie_lpid(lpid, RIC_FLUSH_ALL); 497 fixup_tlbie_lpid(lpid); 498 } 499 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 500} 501 502static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric) 503{ 504 /* 505 * Workaround the fact that the "ric" argument to __tlbie_pid 506 * must be a compile-time contraint to match the "i" constraint 507 * in the asm statement. 508 */ 509 switch (ric) { 510 case RIC_FLUSH_TLB: 511 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB); 512 break; 513 case RIC_FLUSH_PWC: 514 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC); 515 break; 516 case RIC_FLUSH_ALL: 517 default: 518 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 519 } 520 fixup_tlbie_lpid(lpid); 521 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 522} 523 524static inline void __tlbiel_va_range(unsigned long start, unsigned long end, 525 unsigned long pid, unsigned long page_size, 526 unsigned long psize) 527{ 528 unsigned long addr; 529 unsigned long ap = mmu_get_ap(psize); 530 531 for (addr = start; addr < end; addr += page_size) 532 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); 533} 534 535static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, 536 unsigned long psize, unsigned long ric) 537{ 538 unsigned long ap = mmu_get_ap(psize); 539 540 asm volatile("ptesync": : :"memory"); 541 __tlbiel_va(va, pid, ap, ric); 542 ppc_after_tlbiel_barrier(); 543} 544 545static inline void _tlbiel_va_range(unsigned long start, unsigned long end, 546 unsigned long pid, unsigned long page_size, 547 unsigned long psize, bool also_pwc) 548{ 549 asm volatile("ptesync": : :"memory"); 550 if (also_pwc) 551 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 552 __tlbiel_va_range(start, end, pid, page_size, psize); 553 ppc_after_tlbiel_barrier(); 554} 555 556static inline void __tlbie_va_range(unsigned long start, unsigned long end, 557 unsigned long pid, unsigned long page_size, 558 unsigned long psize) 559{ 560 unsigned long addr; 561 unsigned long ap = mmu_get_ap(psize); 562 563 for (addr = start; addr < end; addr += page_size) 564 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); 565 566 fixup_tlbie_va_range(addr - page_size, pid, ap); 567} 568 569static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, 570 unsigned long pid, unsigned long lpid, 571 unsigned long page_size, 572 unsigned long psize) 573{ 574 unsigned long addr; 575 unsigned long ap = mmu_get_ap(psize); 576 577 for (addr = start; addr < end; addr += page_size) 578 __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); 579 580 fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); 581} 582 583static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, 584 unsigned long psize, unsigned long ric) 585{ 586 unsigned long ap = mmu_get_ap(psize); 587 588 asm volatile("ptesync": : :"memory"); 589 __tlbie_va(va, pid, ap, ric); 590 fixup_tlbie_va(va, pid, ap); 591 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 592} 593 594struct tlbiel_va { 595 unsigned long pid; 596 unsigned long va; 597 unsigned long psize; 598 unsigned long ric; 599}; 600 601static void do_tlbiel_va(void *info) 602{ 603 struct tlbiel_va *t = info; 604 605 if (t->ric == RIC_FLUSH_TLB) 606 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB); 607 else if (t->ric == RIC_FLUSH_PWC) 608 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC); 609 else 610 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL); 611} 612 613static inline void _tlbiel_va_multicast(struct mm_struct *mm, 614 unsigned long va, unsigned long pid, 615 unsigned long psize, unsigned long ric) 616{ 617 struct cpumask *cpus = mm_cpumask(mm); 618 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric }; 619 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1); 620 if (atomic_read(&mm->context.copros) > 0) 621 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB); 622} 623 624struct tlbiel_va_range { 625 unsigned long pid; 626 unsigned long start; 627 unsigned long end; 628 unsigned long page_size; 629 unsigned long psize; 630 bool also_pwc; 631}; 632 633static void do_tlbiel_va_range(void *info) 634{ 635 struct tlbiel_va_range *t = info; 636 637 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size, 638 t->psize, t->also_pwc); 639} 640 641static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, 642 unsigned long psize, unsigned long ric) 643{ 644 unsigned long ap = mmu_get_ap(psize); 645 646 asm volatile("ptesync": : :"memory"); 647 __tlbie_lpid_va(va, lpid, ap, ric); 648 fixup_tlbie_lpid_va(va, lpid, ap); 649 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 650} 651 652static inline void _tlbie_va_range(unsigned long start, unsigned long end, 653 unsigned long pid, unsigned long page_size, 654 unsigned long psize, bool also_pwc) 655{ 656 asm volatile("ptesync": : :"memory"); 657 if (also_pwc) 658 __tlbie_pid(pid, RIC_FLUSH_PWC); 659 __tlbie_va_range(start, end, pid, page_size, psize); 660 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 661} 662 663static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, 664 unsigned long pid, unsigned long lpid, 665 unsigned long page_size, 666 unsigned long psize, bool also_pwc) 667{ 668 asm volatile("ptesync" : : : "memory"); 669 if (also_pwc) 670 __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 671 __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); 672 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 673} 674 675static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, 676 unsigned long start, unsigned long end, 677 unsigned long pid, unsigned long page_size, 678 unsigned long psize, bool also_pwc) 679{ 680 struct cpumask *cpus = mm_cpumask(mm); 681 struct tlbiel_va_range t = { .start = start, .end = end, 682 .pid = pid, .page_size = page_size, 683 .psize = psize, .also_pwc = also_pwc }; 684 685 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1); 686 if (atomic_read(&mm->context.copros) > 0) 687 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 688} 689 690/* 691 * Base TLB flushing operations: 692 * 693 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 694 * - flush_tlb_page(vma, vmaddr) flushes one page 695 * - flush_tlb_range(vma, start, end) flushes a range of pages 696 * - flush_tlb_kernel_range(start, end) flushes kernel pages 697 * 698 * - local_* variants of page and mm only apply to the current 699 * processor 700 */ 701void radix__local_flush_tlb_mm(struct mm_struct *mm) 702{ 703 unsigned long pid; 704 705 preempt_disable(); 706 pid = mm->context.id; 707 if (pid != MMU_NO_CONTEXT) 708 _tlbiel_pid(pid, RIC_FLUSH_TLB); 709 preempt_enable(); 710} 711EXPORT_SYMBOL(radix__local_flush_tlb_mm); 712 713#ifndef CONFIG_SMP 714void radix__local_flush_all_mm(struct mm_struct *mm) 715{ 716 unsigned long pid; 717 718 preempt_disable(); 719 pid = mm->context.id; 720 if (pid != MMU_NO_CONTEXT) 721 _tlbiel_pid(pid, RIC_FLUSH_ALL); 722 preempt_enable(); 723} 724EXPORT_SYMBOL(radix__local_flush_all_mm); 725 726static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 727{ 728 radix__local_flush_all_mm(mm); 729} 730#endif /* CONFIG_SMP */ 731 732void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 733 int psize) 734{ 735 unsigned long pid; 736 737 preempt_disable(); 738 pid = mm->context.id; 739 if (pid != MMU_NO_CONTEXT) 740 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 741 preempt_enable(); 742} 743 744void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 745{ 746#ifdef CONFIG_HUGETLB_PAGE 747 /* need the return fix for nohash.c */ 748 if (is_vm_hugetlb_page(vma)) 749 return radix__local_flush_hugetlb_page(vma, vmaddr); 750#endif 751 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 752} 753EXPORT_SYMBOL(radix__local_flush_tlb_page); 754 755static bool mm_needs_flush_escalation(struct mm_struct *mm) 756{ 757 /* 758 * P9 nest MMU has issues with the page walk cache 759 * caching PTEs and not flushing them properly when 760 * RIC = 0 for a PID/LPID invalidate 761 */ 762 if (atomic_read(&mm->context.copros) > 0) 763 return true; 764 return false; 765} 766 767/* 768 * If always_flush is true, then flush even if this CPU can't be removed 769 * from mm_cpumask. 770 */ 771void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) 772{ 773 unsigned long pid = mm->context.id; 774 int cpu = smp_processor_id(); 775 776 /* 777 * A kthread could have done a mmget_not_zero() after the flushing CPU 778 * checked mm_cpumask, and be in the process of kthread_use_mm when 779 * interrupted here. In that case, current->mm will be set to mm, 780 * because kthread_use_mm() setting ->mm and switching to the mm is 781 * done with interrupts off. 782 */ 783 if (current->mm == mm) 784 goto out; 785 786 if (current->active_mm == mm) { 787 WARN_ON_ONCE(current->mm != NULL); 788 /* Is a kernel thread and is using mm as the lazy tlb */ 789 mmgrab(&init_mm); 790 current->active_mm = &init_mm; 791 switch_mm_irqs_off(mm, &init_mm, current); 792 mmdrop(mm); 793 } 794 795 /* 796 * This IPI may be initiated from any source including those not 797 * running the mm, so there may be a racing IPI that comes after 798 * this one which finds the cpumask already clear. Check and avoid 799 * underflowing the active_cpus count in that case. The race should 800 * not otherwise be a problem, but the TLB must be flushed because 801 * that's what the caller expects. 802 */ 803 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { 804 atomic_dec(&mm->context.active_cpus); 805 cpumask_clear_cpu(cpu, mm_cpumask(mm)); 806 always_flush = true; 807 } 808 809out: 810 if (always_flush) 811 _tlbiel_pid(pid, RIC_FLUSH_ALL); 812} 813 814#ifdef CONFIG_SMP 815static void do_exit_flush_lazy_tlb(void *arg) 816{ 817 struct mm_struct *mm = arg; 818 exit_lazy_flush_tlb(mm, true); 819} 820 821static void exit_flush_lazy_tlbs(struct mm_struct *mm) 822{ 823 /* 824 * Would be nice if this was async so it could be run in 825 * parallel with our local flush, but generic code does not 826 * give a good API for it. Could extend the generic code or 827 * make a special powerpc IPI for flushing TLBs. 828 * For now it's not too performance critical. 829 */ 830 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, 831 (void *)mm, 1); 832} 833 834#else /* CONFIG_SMP */ 835static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } 836#endif /* CONFIG_SMP */ 837 838static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock); 839 840/* 841 * Interval between flushes at which we send out IPIs to check whether the 842 * mm_cpumask can be trimmed for the case where it's not a single-threaded 843 * process flushing its own mm. The intent is to reduce the cost of later 844 * flushes. Don't want this to be so low that it adds noticable cost to TLB 845 * flushing, or so high that it doesn't help reduce global TLBIEs. 846 */ 847static unsigned long tlb_mm_cpumask_trim_timer = 1073; 848 849static bool tick_and_test_trim_clock(void) 850{ 851 if (__this_cpu_inc_return(mm_cpumask_trim_clock) == 852 tlb_mm_cpumask_trim_timer) { 853 __this_cpu_write(mm_cpumask_trim_clock, 0); 854 return true; 855 } 856 return false; 857} 858 859enum tlb_flush_type { 860 FLUSH_TYPE_NONE, 861 FLUSH_TYPE_LOCAL, 862 FLUSH_TYPE_GLOBAL, 863}; 864 865static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) 866{ 867 int active_cpus = atomic_read(&mm->context.active_cpus); 868 int cpu = smp_processor_id(); 869 870 if (active_cpus == 0) 871 return FLUSH_TYPE_NONE; 872 if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) { 873 if (current->mm != mm) { 874 /* 875 * Asynchronous flush sources may trim down to nothing 876 * if the process is not running, so occasionally try 877 * to trim. 878 */ 879 if (tick_and_test_trim_clock()) { 880 exit_lazy_flush_tlb(mm, true); 881 return FLUSH_TYPE_NONE; 882 } 883 } 884 return FLUSH_TYPE_LOCAL; 885 } 886 887 /* Coprocessors require TLBIE to invalidate nMMU. */ 888 if (atomic_read(&mm->context.copros) > 0) 889 return FLUSH_TYPE_GLOBAL; 890 891 /* 892 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs 893 * because the mm is being taken down anyway, and a TLBIE tends to 894 * be faster than an IPI+TLBIEL. 895 */ 896 if (fullmm) 897 return FLUSH_TYPE_GLOBAL; 898 899 /* 900 * If we are running the only thread of a single-threaded process, 901 * then we should almost always be able to trim off the rest of the 902 * CPU mask (except in the case of use_mm() races), so always try 903 * trimming the mask. 904 */ 905 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) { 906 exit_flush_lazy_tlbs(mm); 907 /* 908 * use_mm() race could prevent IPIs from being able to clear 909 * the cpumask here, however those users are established 910 * after our first check (and so after the PTEs are removed), 911 * and the TLB still gets flushed by the IPI, so this CPU 912 * will only require a local flush. 913 */ 914 return FLUSH_TYPE_LOCAL; 915 } 916 917 /* 918 * Occasionally try to trim down the cpumask. It's possible this can 919 * bring the mask to zero, which results in no flush. 920 */ 921 if (tick_and_test_trim_clock()) { 922 exit_flush_lazy_tlbs(mm); 923 if (current->mm == mm) 924 return FLUSH_TYPE_LOCAL; 925 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) 926 exit_lazy_flush_tlb(mm, true); 927 return FLUSH_TYPE_NONE; 928 } 929 930 return FLUSH_TYPE_GLOBAL; 931} 932 933#ifdef CONFIG_SMP 934void radix__flush_tlb_mm(struct mm_struct *mm) 935{ 936 unsigned long pid; 937 enum tlb_flush_type type; 938 939 pid = mm->context.id; 940 if (unlikely(pid == MMU_NO_CONTEXT)) 941 return; 942 943 preempt_disable(); 944 /* 945 * Order loads of mm_cpumask (in flush_type_needed) vs previous 946 * stores to clear ptes before the invalidate. See barrier in 947 * switch_mm_irqs_off 948 */ 949 smp_mb(); 950 type = flush_type_needed(mm, false); 951 if (type == FLUSH_TYPE_LOCAL) { 952 _tlbiel_pid(pid, RIC_FLUSH_TLB); 953 } else if (type == FLUSH_TYPE_GLOBAL) { 954 if (!mmu_has_feature(MMU_FTR_GTSE)) { 955 unsigned long tgt = H_RPTI_TARGET_CMMU; 956 957 if (atomic_read(&mm->context.copros) > 0) 958 tgt |= H_RPTI_TARGET_NMMU; 959 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 960 H_RPTI_PAGE_ALL, 0, -1UL); 961 } else if (cputlb_use_tlbie()) { 962 if (mm_needs_flush_escalation(mm)) 963 _tlbie_pid(pid, RIC_FLUSH_ALL); 964 else 965 _tlbie_pid(pid, RIC_FLUSH_TLB); 966 } else { 967 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); 968 } 969 } 970 preempt_enable(); 971} 972EXPORT_SYMBOL(radix__flush_tlb_mm); 973 974static void __flush_all_mm(struct mm_struct *mm, bool fullmm) 975{ 976 unsigned long pid; 977 enum tlb_flush_type type; 978 979 pid = mm->context.id; 980 if (unlikely(pid == MMU_NO_CONTEXT)) 981 return; 982 983 preempt_disable(); 984 smp_mb(); /* see radix__flush_tlb_mm */ 985 type = flush_type_needed(mm, fullmm); 986 if (type == FLUSH_TYPE_LOCAL) { 987 _tlbiel_pid(pid, RIC_FLUSH_ALL); 988 } else if (type == FLUSH_TYPE_GLOBAL) { 989 if (!mmu_has_feature(MMU_FTR_GTSE)) { 990 unsigned long tgt = H_RPTI_TARGET_CMMU; 991 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 992 H_RPTI_TYPE_PRT; 993 994 if (atomic_read(&mm->context.copros) > 0) 995 tgt |= H_RPTI_TARGET_NMMU; 996 pseries_rpt_invalidate(pid, tgt, type, 997 H_RPTI_PAGE_ALL, 0, -1UL); 998 } else if (cputlb_use_tlbie()) 999 _tlbie_pid(pid, RIC_FLUSH_ALL); 1000 else 1001 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 1002 } 1003 preempt_enable(); 1004} 1005 1006void radix__flush_all_mm(struct mm_struct *mm) 1007{ 1008 __flush_all_mm(mm, false); 1009} 1010EXPORT_SYMBOL(radix__flush_all_mm); 1011 1012void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, 1013 int psize) 1014{ 1015 unsigned long pid; 1016 enum tlb_flush_type type; 1017 1018 pid = mm->context.id; 1019 if (unlikely(pid == MMU_NO_CONTEXT)) 1020 return; 1021 1022 preempt_disable(); 1023 smp_mb(); /* see radix__flush_tlb_mm */ 1024 type = flush_type_needed(mm, false); 1025 if (type == FLUSH_TYPE_LOCAL) { 1026 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 1027 } else if (type == FLUSH_TYPE_GLOBAL) { 1028 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1029 unsigned long tgt, pg_sizes, size; 1030 1031 tgt = H_RPTI_TARGET_CMMU; 1032 pg_sizes = psize_to_rpti_pgsize(psize); 1033 size = 1UL << mmu_psize_to_shift(psize); 1034 1035 if (atomic_read(&mm->context.copros) > 0) 1036 tgt |= H_RPTI_TARGET_NMMU; 1037 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, 1038 pg_sizes, vmaddr, 1039 vmaddr + size); 1040 } else if (cputlb_use_tlbie()) 1041 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); 1042 else 1043 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); 1044 } 1045 preempt_enable(); 1046} 1047 1048void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 1049{ 1050#ifdef CONFIG_HUGETLB_PAGE 1051 if (is_vm_hugetlb_page(vma)) 1052 return radix__flush_hugetlb_page(vma, vmaddr); 1053#endif 1054 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize); 1055} 1056EXPORT_SYMBOL(radix__flush_tlb_page); 1057 1058#endif /* CONFIG_SMP */ 1059 1060static void do_tlbiel_kernel(void *info) 1061{ 1062 _tlbiel_pid(0, RIC_FLUSH_ALL); 1063} 1064 1065static inline void _tlbiel_kernel_broadcast(void) 1066{ 1067 on_each_cpu(do_tlbiel_kernel, NULL, 1); 1068 if (tlbie_capable) { 1069 /* 1070 * Coherent accelerators don't refcount kernel memory mappings, 1071 * so have to always issue a tlbie for them. This is quite a 1072 * slow path anyway. 1073 */ 1074 _tlbie_pid(0, RIC_FLUSH_ALL); 1075 } 1076} 1077 1078/* 1079 * If kernel TLBIs ever become local rather than global, then 1080 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it 1081 * assumes kernel TLBIs are global. 1082 */ 1083void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) 1084{ 1085 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1086 unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU; 1087 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1088 H_RPTI_TYPE_PRT; 1089 1090 pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL, 1091 start, end); 1092 } else if (cputlb_use_tlbie()) 1093 _tlbie_pid(0, RIC_FLUSH_ALL); 1094 else 1095 _tlbiel_kernel_broadcast(); 1096} 1097EXPORT_SYMBOL(radix__flush_tlb_kernel_range); 1098 1099#define TLB_FLUSH_ALL -1UL 1100 1101/* 1102 * Number of pages above which we invalidate the entire PID rather than 1103 * flush individual pages, for local and global flushes respectively. 1104 * 1105 * tlbie goes out to the interconnect and individual ops are more costly. 1106 * It also does not iterate over sets like the local tlbiel variant when 1107 * invalidating a full PID, so it has a far lower threshold to change from 1108 * individual page flushes to full-pid flushes. 1109 */ 1110static u32 tlb_single_page_flush_ceiling __read_mostly = 33; 1111static u32 tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; 1112 1113static inline void __radix__flush_tlb_range(struct mm_struct *mm, 1114 unsigned long start, unsigned long end) 1115{ 1116 unsigned long pid; 1117 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; 1118 unsigned long page_size = 1UL << page_shift; 1119 unsigned long nr_pages = (end - start) >> page_shift; 1120 bool fullmm = (end == TLB_FLUSH_ALL); 1121 bool flush_pid, flush_pwc = false; 1122 enum tlb_flush_type type; 1123 1124 pid = mm->context.id; 1125 if (unlikely(pid == MMU_NO_CONTEXT)) 1126 return; 1127 1128 preempt_disable(); 1129 smp_mb(); /* see radix__flush_tlb_mm */ 1130 type = flush_type_needed(mm, fullmm); 1131 if (type == FLUSH_TYPE_NONE) 1132 goto out; 1133 1134 if (fullmm) 1135 flush_pid = true; 1136 else if (type == FLUSH_TYPE_GLOBAL) 1137 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1138 else 1139 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1140 /* 1141 * full pid flush already does the PWC flush. if it is not full pid 1142 * flush check the range is more than PMD and force a pwc flush 1143 * mremap() depends on this behaviour. 1144 */ 1145 if (!flush_pid && (end - start) >= PMD_SIZE) 1146 flush_pwc = true; 1147 1148 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1149 unsigned long type = H_RPTI_TYPE_TLB; 1150 unsigned long tgt = H_RPTI_TARGET_CMMU; 1151 unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1152 1153 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) 1154 pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M); 1155 if (atomic_read(&mm->context.copros) > 0) 1156 tgt |= H_RPTI_TARGET_NMMU; 1157 if (flush_pwc) 1158 type |= H_RPTI_TYPE_PWC; 1159 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1160 } else if (flush_pid) { 1161 /* 1162 * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL 1163 */ 1164 if (type == FLUSH_TYPE_LOCAL) { 1165 _tlbiel_pid(pid, RIC_FLUSH_ALL); 1166 } else { 1167 if (cputlb_use_tlbie()) { 1168 _tlbie_pid(pid, RIC_FLUSH_ALL); 1169 } else { 1170 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); 1171 } 1172 } 1173 } else { 1174 bool hflush = false; 1175 unsigned long hstart, hend; 1176 1177 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 1178 hstart = (start + PMD_SIZE - 1) & PMD_MASK; 1179 hend = end & PMD_MASK; 1180 if (hstart < hend) 1181 hflush = true; 1182 } 1183 1184 if (type == FLUSH_TYPE_LOCAL) { 1185 asm volatile("ptesync": : :"memory"); 1186 if (flush_pwc) 1187 /* For PWC, only one flush is needed */ 1188 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); 1189 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); 1190 if (hflush) 1191 __tlbiel_va_range(hstart, hend, pid, 1192 PMD_SIZE, MMU_PAGE_2M); 1193 ppc_after_tlbiel_barrier(); 1194 } else if (cputlb_use_tlbie()) { 1195 asm volatile("ptesync": : :"memory"); 1196 if (flush_pwc) 1197 __tlbie_pid(pid, RIC_FLUSH_PWC); 1198 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); 1199 if (hflush) 1200 __tlbie_va_range(hstart, hend, pid, 1201 PMD_SIZE, MMU_PAGE_2M); 1202 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1203 } else { 1204 _tlbiel_va_range_multicast(mm, 1205 start, end, pid, page_size, mmu_virtual_psize, flush_pwc); 1206 if (hflush) 1207 _tlbiel_va_range_multicast(mm, 1208 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc); 1209 } 1210 } 1211out: 1212 preempt_enable(); 1213} 1214 1215void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 1216 unsigned long end) 1217 1218{ 1219#ifdef CONFIG_HUGETLB_PAGE 1220 if (is_vm_hugetlb_page(vma)) 1221 return radix__flush_hugetlb_tlb_range(vma, start, end); 1222#endif 1223 1224 __radix__flush_tlb_range(vma->vm_mm, start, end); 1225} 1226EXPORT_SYMBOL(radix__flush_tlb_range); 1227 1228static int radix_get_mmu_psize(int page_size) 1229{ 1230 int psize; 1231 1232 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) 1233 psize = mmu_virtual_psize; 1234 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) 1235 psize = MMU_PAGE_2M; 1236 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) 1237 psize = MMU_PAGE_1G; 1238 else 1239 return -1; 1240 return psize; 1241} 1242 1243/* 1244 * Flush partition scoped LPID address translation for all CPUs. 1245 */ 1246void radix__flush_tlb_lpid_page(unsigned int lpid, 1247 unsigned long addr, 1248 unsigned long page_size) 1249{ 1250 int psize = radix_get_mmu_psize(page_size); 1251 1252 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB); 1253} 1254EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page); 1255 1256/* 1257 * Flush partition scoped PWC from LPID for all CPUs. 1258 */ 1259void radix__flush_pwc_lpid(unsigned int lpid) 1260{ 1261 _tlbie_lpid(lpid, RIC_FLUSH_PWC); 1262} 1263EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid); 1264 1265/* 1266 * Flush partition scoped translations from LPID (=LPIDR) 1267 */ 1268void radix__flush_all_lpid(unsigned int lpid) 1269{ 1270 _tlbie_lpid(lpid, RIC_FLUSH_ALL); 1271} 1272EXPORT_SYMBOL_GPL(radix__flush_all_lpid); 1273 1274/* 1275 * Flush process scoped translations from LPID (=LPIDR) 1276 */ 1277void radix__flush_all_lpid_guest(unsigned int lpid) 1278{ 1279 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL); 1280} 1281 1282void radix__tlb_flush(struct mmu_gather *tlb) 1283{ 1284 int psize = 0; 1285 struct mm_struct *mm = tlb->mm; 1286 int page_size = tlb->page_size; 1287 unsigned long start = tlb->start; 1288 unsigned long end = tlb->end; 1289 1290 /* 1291 * if page size is not something we understand, do a full mm flush 1292 * 1293 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush 1294 * that flushes the process table entry cache upon process teardown. 1295 * See the comment for radix in arch_exit_mmap(). 1296 */ 1297 if (tlb->fullmm || tlb->need_flush_all) { 1298 __flush_all_mm(mm, true); 1299 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { 1300 if (!tlb->freed_tables) 1301 radix__flush_tlb_mm(mm); 1302 else 1303 radix__flush_all_mm(mm); 1304 } else { 1305 if (!tlb->freed_tables) 1306 radix__flush_tlb_range_psize(mm, start, end, psize); 1307 else 1308 radix__flush_tlb_pwc_range_psize(mm, start, end, psize); 1309 } 1310} 1311 1312static void __radix__flush_tlb_range_psize(struct mm_struct *mm, 1313 unsigned long start, unsigned long end, 1314 int psize, bool also_pwc) 1315{ 1316 unsigned long pid; 1317 unsigned int page_shift = mmu_psize_defs[psize].shift; 1318 unsigned long page_size = 1UL << page_shift; 1319 unsigned long nr_pages = (end - start) >> page_shift; 1320 bool fullmm = (end == TLB_FLUSH_ALL); 1321 bool flush_pid; 1322 enum tlb_flush_type type; 1323 1324 pid = mm->context.id; 1325 if (unlikely(pid == MMU_NO_CONTEXT)) 1326 return; 1327 1328 fullmm = (end == TLB_FLUSH_ALL); 1329 1330 preempt_disable(); 1331 smp_mb(); /* see radix__flush_tlb_mm */ 1332 type = flush_type_needed(mm, fullmm); 1333 if (type == FLUSH_TYPE_NONE) 1334 goto out; 1335 1336 if (fullmm) 1337 flush_pid = true; 1338 else if (type == FLUSH_TYPE_GLOBAL) 1339 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1340 else 1341 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; 1342 1343 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { 1344 unsigned long tgt = H_RPTI_TARGET_CMMU; 1345 unsigned long type = H_RPTI_TYPE_TLB; 1346 unsigned long pg_sizes = psize_to_rpti_pgsize(psize); 1347 1348 if (also_pwc) 1349 type |= H_RPTI_TYPE_PWC; 1350 if (atomic_read(&mm->context.copros) > 0) 1351 tgt |= H_RPTI_TARGET_NMMU; 1352 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); 1353 } else if (flush_pid) { 1354 if (type == FLUSH_TYPE_LOCAL) { 1355 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1356 } else { 1357 if (cputlb_use_tlbie()) { 1358 if (mm_needs_flush_escalation(mm)) 1359 also_pwc = true; 1360 1361 _tlbie_pid(pid, 1362 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1363 } else { 1364 _tlbiel_pid_multicast(mm, pid, 1365 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); 1366 } 1367 1368 } 1369 } else { 1370 if (type == FLUSH_TYPE_LOCAL) 1371 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); 1372 else if (cputlb_use_tlbie()) 1373 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); 1374 else 1375 _tlbiel_va_range_multicast(mm, 1376 start, end, pid, page_size, psize, also_pwc); 1377 } 1378out: 1379 preempt_enable(); 1380} 1381 1382void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, 1383 unsigned long end, int psize) 1384{ 1385 return __radix__flush_tlb_range_psize(mm, start, end, psize, false); 1386} 1387 1388void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start, 1389 unsigned long end, int psize) 1390{ 1391 __radix__flush_tlb_range_psize(mm, start, end, psize, true); 1392} 1393 1394#ifdef CONFIG_TRANSPARENT_HUGEPAGE 1395void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) 1396{ 1397 unsigned long pid, end; 1398 enum tlb_flush_type type; 1399 1400 pid = mm->context.id; 1401 if (unlikely(pid == MMU_NO_CONTEXT)) 1402 return; 1403 1404 /* 4k page size, just blow the world */ 1405 if (PAGE_SIZE == 0x1000) { 1406 radix__flush_all_mm(mm); 1407 return; 1408 } 1409 1410 end = addr + HPAGE_PMD_SIZE; 1411 1412 /* Otherwise first do the PWC, then iterate the pages. */ 1413 preempt_disable(); 1414 smp_mb(); /* see radix__flush_tlb_mm */ 1415 type = flush_type_needed(mm, false); 1416 if (type == FLUSH_TYPE_LOCAL) { 1417 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1418 } else if (type == FLUSH_TYPE_GLOBAL) { 1419 if (!mmu_has_feature(MMU_FTR_GTSE)) { 1420 unsigned long tgt, type, pg_sizes; 1421 1422 tgt = H_RPTI_TARGET_CMMU; 1423 type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | 1424 H_RPTI_TYPE_PRT; 1425 pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); 1426 1427 if (atomic_read(&mm->context.copros) > 0) 1428 tgt |= H_RPTI_TARGET_NMMU; 1429 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, 1430 addr, end); 1431 } else if (cputlb_use_tlbie()) 1432 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1433 else 1434 _tlbiel_va_range_multicast(mm, 1435 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); 1436 } 1437 1438 preempt_enable(); 1439} 1440#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 1441 1442void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, 1443 unsigned long start, unsigned long end) 1444{ 1445 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); 1446} 1447EXPORT_SYMBOL(radix__flush_pmd_tlb_range); 1448 1449void radix__flush_tlb_all(void) 1450{ 1451 unsigned long rb,prs,r,rs; 1452 unsigned long ric = RIC_FLUSH_ALL; 1453 1454 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ 1455 prs = 0; /* partition scoped */ 1456 r = 1; /* radix format */ 1457 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ 1458 1459 asm volatile("ptesync": : :"memory"); 1460 /* 1461 * now flush guest entries by passing PRS = 1 and LPID != 0 1462 */ 1463 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1464 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); 1465 /* 1466 * now flush host entires by passing PRS = 0 and LPID == 0 1467 */ 1468 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 1469 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); 1470 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 1471} 1472 1473#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 1474/* 1475 * Performs process-scoped invalidations for a given LPID 1476 * as part of H_RPT_INVALIDATE hcall. 1477 */ 1478void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid, 1479 unsigned long type, unsigned long pg_sizes, 1480 unsigned long start, unsigned long end) 1481{ 1482 unsigned long psize, nr_pages; 1483 struct mmu_psize_def *def; 1484 bool flush_pid; 1485 1486 /* 1487 * A H_RPTI_TYPE_ALL request implies RIC=3, hence 1488 * do a single IS=1 based flush. 1489 */ 1490 if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) { 1491 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); 1492 return; 1493 } 1494 1495 if (type & H_RPTI_TYPE_PWC) 1496 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); 1497 1498 /* Full PID flush */ 1499 if (start == 0 && end == -1) 1500 return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1501 1502 /* Do range invalidation for all the valid page sizes */ 1503 for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { 1504 def = &mmu_psize_defs[psize]; 1505 if (!(pg_sizes & def->h_rpt_pgsize)) 1506 continue; 1507 1508 nr_pages = (end - start) >> def->shift; 1509 flush_pid = nr_pages > tlb_single_page_flush_ceiling; 1510 1511 /* 1512 * If the number of pages spanning the range is above 1513 * the ceiling, convert the request into a full PID flush. 1514 * And since PID flush takes out all the page sizes, there 1515 * is no need to consider remaining page sizes. 1516 */ 1517 if (flush_pid) { 1518 _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); 1519 return; 1520 } 1521 _tlbie_va_range_lpid(start, end, pid, lpid, 1522 (1UL << def->shift), psize, false); 1523 } 1524} 1525EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt); 1526 1527#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 1528 1529static int __init create_tlb_single_page_flush_ceiling(void) 1530{ 1531 debugfs_create_u32("tlb_single_page_flush_ceiling", 0600, 1532 arch_debugfs_dir, &tlb_single_page_flush_ceiling); 1533 debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600, 1534 arch_debugfs_dir, &tlb_local_single_page_flush_ceiling); 1535 return 0; 1536} 1537late_initcall(create_tlb_single_page_flush_ceiling); 1538