mlock.c (19461B)
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * linux/mm/mlock.c 4 * 5 * (C) Copyright 1995 Linus Torvalds 6 * (C) Copyright 2002 Christoph Hellwig 7 */ 8 9#include <linux/capability.h> 10#include <linux/mman.h> 11#include <linux/mm.h> 12#include <linux/sched/user.h> 13#include <linux/swap.h> 14#include <linux/swapops.h> 15#include <linux/pagemap.h> 16#include <linux/pagevec.h> 17#include <linux/pagewalk.h> 18#include <linux/mempolicy.h> 19#include <linux/syscalls.h> 20#include <linux/sched.h> 21#include <linux/export.h> 22#include <linux/rmap.h> 23#include <linux/mmzone.h> 24#include <linux/hugetlb.h> 25#include <linux/memcontrol.h> 26#include <linux/mm_inline.h> 27#include <linux/secretmem.h> 28 29#include "internal.h" 30 31struct mlock_pvec { 32 local_lock_t lock; 33 struct pagevec vec; 34}; 35 36static DEFINE_PER_CPU(struct mlock_pvec, mlock_pvec) = { 37 .lock = INIT_LOCAL_LOCK(lock), 38}; 39 40bool can_do_mlock(void) 41{ 42 if (rlimit(RLIMIT_MEMLOCK) != 0) 43 return true; 44 if (capable(CAP_IPC_LOCK)) 45 return true; 46 return false; 47} 48EXPORT_SYMBOL(can_do_mlock); 49 50/* 51 * Mlocked pages are marked with PageMlocked() flag for efficient testing 52 * in vmscan and, possibly, the fault path; and to support semi-accurate 53 * statistics. 54 * 55 * An mlocked page [PageMlocked(page)] is unevictable. As such, it will 56 * be placed on the LRU "unevictable" list, rather than the [in]active lists. 57 * The unevictable list is an LRU sibling list to the [in]active lists. 58 * PageUnevictable is set to indicate the unevictable state. 59 */ 60 61static struct lruvec *__mlock_page(struct page *page, struct lruvec *lruvec) 62{ 63 /* There is nothing more we can do while it's off LRU */ 64 if (!TestClearPageLRU(page)) 65 return lruvec; 66 67 lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec); 68 69 if (unlikely(page_evictable(page))) { 70 /* 71 * This is a little surprising, but quite possible: 72 * PageMlocked must have got cleared already by another CPU. 73 * Could this page be on the Unevictable LRU? I'm not sure, 74 * but move it now if so. 75 */ 76 if (PageUnevictable(page)) { 77 del_page_from_lru_list(page, lruvec); 78 ClearPageUnevictable(page); 79 add_page_to_lru_list(page, lruvec); 80 __count_vm_events(UNEVICTABLE_PGRESCUED, 81 thp_nr_pages(page)); 82 } 83 goto out; 84 } 85 86 if (PageUnevictable(page)) { 87 if (PageMlocked(page)) 88 page->mlock_count++; 89 goto out; 90 } 91 92 del_page_from_lru_list(page, lruvec); 93 ClearPageActive(page); 94 SetPageUnevictable(page); 95 page->mlock_count = !!PageMlocked(page); 96 add_page_to_lru_list(page, lruvec); 97 __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page)); 98out: 99 SetPageLRU(page); 100 return lruvec; 101} 102 103static struct lruvec *__mlock_new_page(struct page *page, struct lruvec *lruvec) 104{ 105 VM_BUG_ON_PAGE(PageLRU(page), page); 106 107 lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec); 108 109 /* As above, this is a little surprising, but possible */ 110 if (unlikely(page_evictable(page))) 111 goto out; 112 113 SetPageUnevictable(page); 114 page->mlock_count = !!PageMlocked(page); 115 __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page)); 116out: 117 add_page_to_lru_list(page, lruvec); 118 SetPageLRU(page); 119 return lruvec; 120} 121 122static struct lruvec *__munlock_page(struct page *page, struct lruvec *lruvec) 123{ 124 int nr_pages = thp_nr_pages(page); 125 bool isolated = false; 126 127 if (!TestClearPageLRU(page)) 128 goto munlock; 129 130 isolated = true; 131 lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec); 132 133 if (PageUnevictable(page)) { 134 /* Then mlock_count is maintained, but might undercount */ 135 if (page->mlock_count) 136 page->mlock_count--; 137 if (page->mlock_count) 138 goto out; 139 } 140 /* else assume that was the last mlock: reclaim will fix it if not */ 141 142munlock: 143 if (TestClearPageMlocked(page)) { 144 __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); 145 if (isolated || !PageUnevictable(page)) 146 __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages); 147 else 148 __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages); 149 } 150 151 /* page_evictable() has to be checked *after* clearing Mlocked */ 152 if (isolated && PageUnevictable(page) && page_evictable(page)) { 153 del_page_from_lru_list(page, lruvec); 154 ClearPageUnevictable(page); 155 add_page_to_lru_list(page, lruvec); 156 __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); 157 } 158out: 159 if (isolated) 160 SetPageLRU(page); 161 return lruvec; 162} 163 164/* 165 * Flags held in the low bits of a struct page pointer on the mlock_pvec. 166 */ 167#define LRU_PAGE 0x1 168#define NEW_PAGE 0x2 169static inline struct page *mlock_lru(struct page *page) 170{ 171 return (struct page *)((unsigned long)page + LRU_PAGE); 172} 173 174static inline struct page *mlock_new(struct page *page) 175{ 176 return (struct page *)((unsigned long)page + NEW_PAGE); 177} 178 179/* 180 * mlock_pagevec() is derived from pagevec_lru_move_fn(): 181 * perhaps that can make use of such page pointer flags in future, 182 * but for now just keep it for mlock. We could use three separate 183 * pagevecs instead, but one feels better (munlocking a full pagevec 184 * does not need to drain mlocking pagevecs first). 185 */ 186static void mlock_pagevec(struct pagevec *pvec) 187{ 188 struct lruvec *lruvec = NULL; 189 unsigned long mlock; 190 struct page *page; 191 int i; 192 193 for (i = 0; i < pagevec_count(pvec); i++) { 194 page = pvec->pages[i]; 195 mlock = (unsigned long)page & (LRU_PAGE | NEW_PAGE); 196 page = (struct page *)((unsigned long)page - mlock); 197 pvec->pages[i] = page; 198 199 if (mlock & LRU_PAGE) 200 lruvec = __mlock_page(page, lruvec); 201 else if (mlock & NEW_PAGE) 202 lruvec = __mlock_new_page(page, lruvec); 203 else 204 lruvec = __munlock_page(page, lruvec); 205 } 206 207 if (lruvec) 208 unlock_page_lruvec_irq(lruvec); 209 release_pages(pvec->pages, pvec->nr); 210 pagevec_reinit(pvec); 211} 212 213void mlock_page_drain_local(void) 214{ 215 struct pagevec *pvec; 216 217 local_lock(&mlock_pvec.lock); 218 pvec = this_cpu_ptr(&mlock_pvec.vec); 219 if (pagevec_count(pvec)) 220 mlock_pagevec(pvec); 221 local_unlock(&mlock_pvec.lock); 222} 223 224void mlock_page_drain_remote(int cpu) 225{ 226 struct pagevec *pvec; 227 228 WARN_ON_ONCE(cpu_online(cpu)); 229 pvec = &per_cpu(mlock_pvec.vec, cpu); 230 if (pagevec_count(pvec)) 231 mlock_pagevec(pvec); 232} 233 234bool need_mlock_page_drain(int cpu) 235{ 236 return pagevec_count(&per_cpu(mlock_pvec.vec, cpu)); 237} 238 239/** 240 * mlock_folio - mlock a folio already on (or temporarily off) LRU 241 * @folio: folio to be mlocked. 242 */ 243void mlock_folio(struct folio *folio) 244{ 245 struct pagevec *pvec; 246 247 local_lock(&mlock_pvec.lock); 248 pvec = this_cpu_ptr(&mlock_pvec.vec); 249 250 if (!folio_test_set_mlocked(folio)) { 251 int nr_pages = folio_nr_pages(folio); 252 253 zone_stat_mod_folio(folio, NR_MLOCK, nr_pages); 254 __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); 255 } 256 257 folio_get(folio); 258 if (!pagevec_add(pvec, mlock_lru(&folio->page)) || 259 folio_test_large(folio) || lru_cache_disabled()) 260 mlock_pagevec(pvec); 261 local_unlock(&mlock_pvec.lock); 262} 263 264/** 265 * mlock_new_page - mlock a newly allocated page not yet on LRU 266 * @page: page to be mlocked, either a normal page or a THP head. 267 */ 268void mlock_new_page(struct page *page) 269{ 270 struct pagevec *pvec; 271 int nr_pages = thp_nr_pages(page); 272 273 local_lock(&mlock_pvec.lock); 274 pvec = this_cpu_ptr(&mlock_pvec.vec); 275 SetPageMlocked(page); 276 mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); 277 __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); 278 279 get_page(page); 280 if (!pagevec_add(pvec, mlock_new(page)) || 281 PageHead(page) || lru_cache_disabled()) 282 mlock_pagevec(pvec); 283 local_unlock(&mlock_pvec.lock); 284} 285 286/** 287 * munlock_page - munlock a page 288 * @page: page to be munlocked, either a normal page or a THP head. 289 */ 290void munlock_page(struct page *page) 291{ 292 struct pagevec *pvec; 293 294 local_lock(&mlock_pvec.lock); 295 pvec = this_cpu_ptr(&mlock_pvec.vec); 296 /* 297 * TestClearPageMlocked(page) must be left to __munlock_page(), 298 * which will check whether the page is multiply mlocked. 299 */ 300 301 get_page(page); 302 if (!pagevec_add(pvec, page) || 303 PageHead(page) || lru_cache_disabled()) 304 mlock_pagevec(pvec); 305 local_unlock(&mlock_pvec.lock); 306} 307 308static int mlock_pte_range(pmd_t *pmd, unsigned long addr, 309 unsigned long end, struct mm_walk *walk) 310 311{ 312 struct vm_area_struct *vma = walk->vma; 313 spinlock_t *ptl; 314 pte_t *start_pte, *pte; 315 struct page *page; 316 317 ptl = pmd_trans_huge_lock(pmd, vma); 318 if (ptl) { 319 if (!pmd_present(*pmd)) 320 goto out; 321 if (is_huge_zero_pmd(*pmd)) 322 goto out; 323 page = pmd_page(*pmd); 324 if (vma->vm_flags & VM_LOCKED) 325 mlock_folio(page_folio(page)); 326 else 327 munlock_page(page); 328 goto out; 329 } 330 331 start_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 332 for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) { 333 if (!pte_present(*pte)) 334 continue; 335 page = vm_normal_page(vma, addr, *pte); 336 if (!page) 337 continue; 338 if (PageTransCompound(page)) 339 continue; 340 if (vma->vm_flags & VM_LOCKED) 341 mlock_folio(page_folio(page)); 342 else 343 munlock_page(page); 344 } 345 pte_unmap(start_pte); 346out: 347 spin_unlock(ptl); 348 cond_resched(); 349 return 0; 350} 351 352/* 353 * mlock_vma_pages_range() - mlock any pages already in the range, 354 * or munlock all pages in the range. 355 * @vma - vma containing range to be mlock()ed or munlock()ed 356 * @start - start address in @vma of the range 357 * @end - end of range in @vma 358 * @newflags - the new set of flags for @vma. 359 * 360 * Called for mlock(), mlock2() and mlockall(), to set @vma VM_LOCKED; 361 * called for munlock() and munlockall(), to clear VM_LOCKED from @vma. 362 */ 363static void mlock_vma_pages_range(struct vm_area_struct *vma, 364 unsigned long start, unsigned long end, vm_flags_t newflags) 365{ 366 static const struct mm_walk_ops mlock_walk_ops = { 367 .pmd_entry = mlock_pte_range, 368 }; 369 370 /* 371 * There is a slight chance that concurrent page migration, 372 * or page reclaim finding a page of this now-VM_LOCKED vma, 373 * will call mlock_vma_page() and raise page's mlock_count: 374 * double counting, leaving the page unevictable indefinitely. 375 * Communicate this danger to mlock_vma_page() with VM_IO, 376 * which is a VM_SPECIAL flag not allowed on VM_LOCKED vmas. 377 * mmap_lock is held in write mode here, so this weird 378 * combination should not be visible to other mmap_lock users; 379 * but WRITE_ONCE so rmap walkers must see VM_IO if VM_LOCKED. 380 */ 381 if (newflags & VM_LOCKED) 382 newflags |= VM_IO; 383 WRITE_ONCE(vma->vm_flags, newflags); 384 385 lru_add_drain(); 386 walk_page_range(vma->vm_mm, start, end, &mlock_walk_ops, NULL); 387 lru_add_drain(); 388 389 if (newflags & VM_IO) { 390 newflags &= ~VM_IO; 391 WRITE_ONCE(vma->vm_flags, newflags); 392 } 393} 394 395/* 396 * mlock_fixup - handle mlock[all]/munlock[all] requests. 397 * 398 * Filters out "special" vmas -- VM_LOCKED never gets set for these, and 399 * munlock is a no-op. However, for some special vmas, we go ahead and 400 * populate the ptes. 401 * 402 * For vmas that pass the filters, merge/split as appropriate. 403 */ 404static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, 405 unsigned long start, unsigned long end, vm_flags_t newflags) 406{ 407 struct mm_struct *mm = vma->vm_mm; 408 pgoff_t pgoff; 409 int nr_pages; 410 int ret = 0; 411 vm_flags_t oldflags = vma->vm_flags; 412 413 if (newflags == oldflags || (oldflags & VM_SPECIAL) || 414 is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) || 415 vma_is_dax(vma) || vma_is_secretmem(vma)) 416 /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ 417 goto out; 418 419 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 420 *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, 421 vma->vm_file, pgoff, vma_policy(vma), 422 vma->vm_userfaultfd_ctx, anon_vma_name(vma)); 423 if (*prev) { 424 vma = *prev; 425 goto success; 426 } 427 428 if (start != vma->vm_start) { 429 ret = split_vma(mm, vma, start, 1); 430 if (ret) 431 goto out; 432 } 433 434 if (end != vma->vm_end) { 435 ret = split_vma(mm, vma, end, 0); 436 if (ret) 437 goto out; 438 } 439 440success: 441 /* 442 * Keep track of amount of locked VM. 443 */ 444 nr_pages = (end - start) >> PAGE_SHIFT; 445 if (!(newflags & VM_LOCKED)) 446 nr_pages = -nr_pages; 447 else if (oldflags & VM_LOCKED) 448 nr_pages = 0; 449 mm->locked_vm += nr_pages; 450 451 /* 452 * vm_flags is protected by the mmap_lock held in write mode. 453 * It's okay if try_to_unmap_one unmaps a page just after we 454 * set VM_LOCKED, populate_vma_page_range will bring it back. 455 */ 456 457 if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) { 458 /* No work to do, and mlocking twice would be wrong */ 459 vma->vm_flags = newflags; 460 } else { 461 mlock_vma_pages_range(vma, start, end, newflags); 462 } 463out: 464 *prev = vma; 465 return ret; 466} 467 468static int apply_vma_lock_flags(unsigned long start, size_t len, 469 vm_flags_t flags) 470{ 471 unsigned long nstart, end, tmp; 472 struct vm_area_struct *vma, *prev; 473 int error; 474 475 VM_BUG_ON(offset_in_page(start)); 476 VM_BUG_ON(len != PAGE_ALIGN(len)); 477 end = start + len; 478 if (end < start) 479 return -EINVAL; 480 if (end == start) 481 return 0; 482 vma = find_vma(current->mm, start); 483 if (!vma || vma->vm_start > start) 484 return -ENOMEM; 485 486 prev = vma->vm_prev; 487 if (start > vma->vm_start) 488 prev = vma; 489 490 for (nstart = start ; ; ) { 491 vm_flags_t newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; 492 493 newflags |= flags; 494 495 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ 496 tmp = vma->vm_end; 497 if (tmp > end) 498 tmp = end; 499 error = mlock_fixup(vma, &prev, nstart, tmp, newflags); 500 if (error) 501 break; 502 nstart = tmp; 503 if (nstart < prev->vm_end) 504 nstart = prev->vm_end; 505 if (nstart >= end) 506 break; 507 508 vma = prev->vm_next; 509 if (!vma || vma->vm_start != nstart) { 510 error = -ENOMEM; 511 break; 512 } 513 } 514 return error; 515} 516 517/* 518 * Go through vma areas and sum size of mlocked 519 * vma pages, as return value. 520 * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT) 521 * is also counted. 522 * Return value: previously mlocked page counts 523 */ 524static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm, 525 unsigned long start, size_t len) 526{ 527 struct vm_area_struct *vma; 528 unsigned long count = 0; 529 530 if (mm == NULL) 531 mm = current->mm; 532 533 vma = find_vma(mm, start); 534 if (vma == NULL) 535 return 0; 536 537 for (; vma ; vma = vma->vm_next) { 538 if (start >= vma->vm_end) 539 continue; 540 if (start + len <= vma->vm_start) 541 break; 542 if (vma->vm_flags & VM_LOCKED) { 543 if (start > vma->vm_start) 544 count -= (start - vma->vm_start); 545 if (start + len < vma->vm_end) { 546 count += start + len - vma->vm_start; 547 break; 548 } 549 count += vma->vm_end - vma->vm_start; 550 } 551 } 552 553 return count >> PAGE_SHIFT; 554} 555 556/* 557 * convert get_user_pages() return value to posix mlock() error 558 */ 559static int __mlock_posix_error_return(long retval) 560{ 561 if (retval == -EFAULT) 562 retval = -ENOMEM; 563 else if (retval == -ENOMEM) 564 retval = -EAGAIN; 565 return retval; 566} 567 568static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags) 569{ 570 unsigned long locked; 571 unsigned long lock_limit; 572 int error = -ENOMEM; 573 574 start = untagged_addr(start); 575 576 if (!can_do_mlock()) 577 return -EPERM; 578 579 len = PAGE_ALIGN(len + (offset_in_page(start))); 580 start &= PAGE_MASK; 581 582 lock_limit = rlimit(RLIMIT_MEMLOCK); 583 lock_limit >>= PAGE_SHIFT; 584 locked = len >> PAGE_SHIFT; 585 586 if (mmap_write_lock_killable(current->mm)) 587 return -EINTR; 588 589 locked += current->mm->locked_vm; 590 if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) { 591 /* 592 * It is possible that the regions requested intersect with 593 * previously mlocked areas, that part area in "mm->locked_vm" 594 * should not be counted to new mlock increment count. So check 595 * and adjust locked count if necessary. 596 */ 597 locked -= count_mm_mlocked_page_nr(current->mm, 598 start, len); 599 } 600 601 /* check against resource limits */ 602 if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) 603 error = apply_vma_lock_flags(start, len, flags); 604 605 mmap_write_unlock(current->mm); 606 if (error) 607 return error; 608 609 error = __mm_populate(start, len, 0); 610 if (error) 611 return __mlock_posix_error_return(error); 612 return 0; 613} 614 615SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) 616{ 617 return do_mlock(start, len, VM_LOCKED); 618} 619 620SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags) 621{ 622 vm_flags_t vm_flags = VM_LOCKED; 623 624 if (flags & ~MLOCK_ONFAULT) 625 return -EINVAL; 626 627 if (flags & MLOCK_ONFAULT) 628 vm_flags |= VM_LOCKONFAULT; 629 630 return do_mlock(start, len, vm_flags); 631} 632 633SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) 634{ 635 int ret; 636 637 start = untagged_addr(start); 638 639 len = PAGE_ALIGN(len + (offset_in_page(start))); 640 start &= PAGE_MASK; 641 642 if (mmap_write_lock_killable(current->mm)) 643 return -EINTR; 644 ret = apply_vma_lock_flags(start, len, 0); 645 mmap_write_unlock(current->mm); 646 647 return ret; 648} 649 650/* 651 * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall) 652 * and translate into the appropriate modifications to mm->def_flags and/or the 653 * flags for all current VMAs. 654 * 655 * There are a couple of subtleties with this. If mlockall() is called multiple 656 * times with different flags, the values do not necessarily stack. If mlockall 657 * is called once including the MCL_FUTURE flag and then a second time without 658 * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags. 659 */ 660static int apply_mlockall_flags(int flags) 661{ 662 struct vm_area_struct *vma, *prev = NULL; 663 vm_flags_t to_add = 0; 664 665 current->mm->def_flags &= VM_LOCKED_CLEAR_MASK; 666 if (flags & MCL_FUTURE) { 667 current->mm->def_flags |= VM_LOCKED; 668 669 if (flags & MCL_ONFAULT) 670 current->mm->def_flags |= VM_LOCKONFAULT; 671 672 if (!(flags & MCL_CURRENT)) 673 goto out; 674 } 675 676 if (flags & MCL_CURRENT) { 677 to_add |= VM_LOCKED; 678 if (flags & MCL_ONFAULT) 679 to_add |= VM_LOCKONFAULT; 680 } 681 682 for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { 683 vm_flags_t newflags; 684 685 newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; 686 newflags |= to_add; 687 688 /* Ignore errors */ 689 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); 690 cond_resched(); 691 } 692out: 693 return 0; 694} 695 696SYSCALL_DEFINE1(mlockall, int, flags) 697{ 698 unsigned long lock_limit; 699 int ret; 700 701 if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) || 702 flags == MCL_ONFAULT) 703 return -EINVAL; 704 705 if (!can_do_mlock()) 706 return -EPERM; 707 708 lock_limit = rlimit(RLIMIT_MEMLOCK); 709 lock_limit >>= PAGE_SHIFT; 710 711 if (mmap_write_lock_killable(current->mm)) 712 return -EINTR; 713 714 ret = -ENOMEM; 715 if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || 716 capable(CAP_IPC_LOCK)) 717 ret = apply_mlockall_flags(flags); 718 mmap_write_unlock(current->mm); 719 if (!ret && (flags & MCL_CURRENT)) 720 mm_populate(0, TASK_SIZE); 721 722 return ret; 723} 724 725SYSCALL_DEFINE0(munlockall) 726{ 727 int ret; 728 729 if (mmap_write_lock_killable(current->mm)) 730 return -EINTR; 731 ret = apply_mlockall_flags(0); 732 mmap_write_unlock(current->mm); 733 return ret; 734} 735 736/* 737 * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB 738 * shm segments) get accounted against the user_struct instead. 739 */ 740static DEFINE_SPINLOCK(shmlock_user_lock); 741 742int user_shm_lock(size_t size, struct ucounts *ucounts) 743{ 744 unsigned long lock_limit, locked; 745 long memlock; 746 int allowed = 0; 747 748 locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 749 lock_limit = rlimit(RLIMIT_MEMLOCK); 750 if (lock_limit != RLIM_INFINITY) 751 lock_limit >>= PAGE_SHIFT; 752 spin_lock(&shmlock_user_lock); 753 memlock = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); 754 755 if ((memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) { 756 dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); 757 goto out; 758 } 759 if (!get_ucounts(ucounts)) { 760 dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); 761 allowed = 0; 762 goto out; 763 } 764 allowed = 1; 765out: 766 spin_unlock(&shmlock_user_lock); 767 return allowed; 768} 769 770void user_shm_unlock(size_t size, struct ucounts *ucounts) 771{ 772 spin_lock(&shmlock_user_lock); 773 dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, (size + PAGE_SIZE - 1) >> PAGE_SHIFT); 774 spin_unlock(&shmlock_user_lock); 775 put_ucounts(ucounts); 776}