cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

page_counter.c (6884B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * Lockless hierarchical page accounting & limiting
      4 *
      5 * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
      6 */
      7
      8#include <linux/page_counter.h>
      9#include <linux/atomic.h>
     10#include <linux/kernel.h>
     11#include <linux/string.h>
     12#include <linux/sched.h>
     13#include <linux/bug.h>
     14#include <asm/page.h>
     15
     16static void propagate_protected_usage(struct page_counter *c,
     17				      unsigned long usage)
     18{
     19	unsigned long protected, old_protected;
     20	unsigned long low, min;
     21	long delta;
     22
     23	if (!c->parent)
     24		return;
     25
     26	min = READ_ONCE(c->min);
     27	if (min || atomic_long_read(&c->min_usage)) {
     28		protected = min(usage, min);
     29		old_protected = atomic_long_xchg(&c->min_usage, protected);
     30		delta = protected - old_protected;
     31		if (delta)
     32			atomic_long_add(delta, &c->parent->children_min_usage);
     33	}
     34
     35	low = READ_ONCE(c->low);
     36	if (low || atomic_long_read(&c->low_usage)) {
     37		protected = min(usage, low);
     38		old_protected = atomic_long_xchg(&c->low_usage, protected);
     39		delta = protected - old_protected;
     40		if (delta)
     41			atomic_long_add(delta, &c->parent->children_low_usage);
     42	}
     43}
     44
     45/**
     46 * page_counter_cancel - take pages out of the local counter
     47 * @counter: counter
     48 * @nr_pages: number of pages to cancel
     49 */
     50void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
     51{
     52	long new;
     53
     54	new = atomic_long_sub_return(nr_pages, &counter->usage);
     55	/* More uncharges than charges? */
     56	if (WARN_ONCE(new < 0, "page_counter underflow: %ld nr_pages=%lu\n",
     57		      new, nr_pages)) {
     58		new = 0;
     59		atomic_long_set(&counter->usage, new);
     60	}
     61	propagate_protected_usage(counter, new);
     62}
     63
     64/**
     65 * page_counter_charge - hierarchically charge pages
     66 * @counter: counter
     67 * @nr_pages: number of pages to charge
     68 *
     69 * NOTE: This does not consider any configured counter limits.
     70 */
     71void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
     72{
     73	struct page_counter *c;
     74
     75	for (c = counter; c; c = c->parent) {
     76		long new;
     77
     78		new = atomic_long_add_return(nr_pages, &c->usage);
     79		propagate_protected_usage(c, new);
     80		/*
     81		 * This is indeed racy, but we can live with some
     82		 * inaccuracy in the watermark.
     83		 */
     84		if (new > READ_ONCE(c->watermark))
     85			WRITE_ONCE(c->watermark, new);
     86	}
     87}
     88
     89/**
     90 * page_counter_try_charge - try to hierarchically charge pages
     91 * @counter: counter
     92 * @nr_pages: number of pages to charge
     93 * @fail: points first counter to hit its limit, if any
     94 *
     95 * Returns %true on success, or %false and @fail if the counter or one
     96 * of its ancestors has hit its configured limit.
     97 */
     98bool page_counter_try_charge(struct page_counter *counter,
     99			     unsigned long nr_pages,
    100			     struct page_counter **fail)
    101{
    102	struct page_counter *c;
    103
    104	for (c = counter; c; c = c->parent) {
    105		long new;
    106		/*
    107		 * Charge speculatively to avoid an expensive CAS.  If
    108		 * a bigger charge fails, it might falsely lock out a
    109		 * racing smaller charge and send it into reclaim
    110		 * early, but the error is limited to the difference
    111		 * between the two sizes, which is less than 2M/4M in
    112		 * case of a THP locking out a regular page charge.
    113		 *
    114		 * The atomic_long_add_return() implies a full memory
    115		 * barrier between incrementing the count and reading
    116		 * the limit.  When racing with page_counter_set_max(),
    117		 * we either see the new limit or the setter sees the
    118		 * counter has changed and retries.
    119		 */
    120		new = atomic_long_add_return(nr_pages, &c->usage);
    121		if (new > c->max) {
    122			atomic_long_sub(nr_pages, &c->usage);
    123			/*
    124			 * This is racy, but we can live with some
    125			 * inaccuracy in the failcnt which is only used
    126			 * to report stats.
    127			 */
    128			data_race(c->failcnt++);
    129			*fail = c;
    130			goto failed;
    131		}
    132		propagate_protected_usage(c, new);
    133		/*
    134		 * Just like with failcnt, we can live with some
    135		 * inaccuracy in the watermark.
    136		 */
    137		if (new > READ_ONCE(c->watermark))
    138			WRITE_ONCE(c->watermark, new);
    139	}
    140	return true;
    141
    142failed:
    143	for (c = counter; c != *fail; c = c->parent)
    144		page_counter_cancel(c, nr_pages);
    145
    146	return false;
    147}
    148
    149/**
    150 * page_counter_uncharge - hierarchically uncharge pages
    151 * @counter: counter
    152 * @nr_pages: number of pages to uncharge
    153 */
    154void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
    155{
    156	struct page_counter *c;
    157
    158	for (c = counter; c; c = c->parent)
    159		page_counter_cancel(c, nr_pages);
    160}
    161
    162/**
    163 * page_counter_set_max - set the maximum number of pages allowed
    164 * @counter: counter
    165 * @nr_pages: limit to set
    166 *
    167 * Returns 0 on success, -EBUSY if the current number of pages on the
    168 * counter already exceeds the specified limit.
    169 *
    170 * The caller must serialize invocations on the same counter.
    171 */
    172int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages)
    173{
    174	for (;;) {
    175		unsigned long old;
    176		long usage;
    177
    178		/*
    179		 * Update the limit while making sure that it's not
    180		 * below the concurrently-changing counter value.
    181		 *
    182		 * The xchg implies two full memory barriers before
    183		 * and after, so the read-swap-read is ordered and
    184		 * ensures coherency with page_counter_try_charge():
    185		 * that function modifies the count before checking
    186		 * the limit, so if it sees the old limit, we see the
    187		 * modified counter and retry.
    188		 */
    189		usage = page_counter_read(counter);
    190
    191		if (usage > nr_pages)
    192			return -EBUSY;
    193
    194		old = xchg(&counter->max, nr_pages);
    195
    196		if (page_counter_read(counter) <= usage)
    197			return 0;
    198
    199		counter->max = old;
    200		cond_resched();
    201	}
    202}
    203
    204/**
    205 * page_counter_set_min - set the amount of protected memory
    206 * @counter: counter
    207 * @nr_pages: value to set
    208 *
    209 * The caller must serialize invocations on the same counter.
    210 */
    211void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages)
    212{
    213	struct page_counter *c;
    214
    215	WRITE_ONCE(counter->min, nr_pages);
    216
    217	for (c = counter; c; c = c->parent)
    218		propagate_protected_usage(c, atomic_long_read(&c->usage));
    219}
    220
    221/**
    222 * page_counter_set_low - set the amount of protected memory
    223 * @counter: counter
    224 * @nr_pages: value to set
    225 *
    226 * The caller must serialize invocations on the same counter.
    227 */
    228void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
    229{
    230	struct page_counter *c;
    231
    232	WRITE_ONCE(counter->low, nr_pages);
    233
    234	for (c = counter; c; c = c->parent)
    235		propagate_protected_usage(c, atomic_long_read(&c->usage));
    236}
    237
    238/**
    239 * page_counter_memparse - memparse() for page counter limits
    240 * @buf: string to parse
    241 * @max: string meaning maximum possible value
    242 * @nr_pages: returns the result in number of pages
    243 *
    244 * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
    245 * limited to %PAGE_COUNTER_MAX.
    246 */
    247int page_counter_memparse(const char *buf, const char *max,
    248			  unsigned long *nr_pages)
    249{
    250	char *end;
    251	u64 bytes;
    252
    253	if (!strcmp(buf, max)) {
    254		*nr_pages = PAGE_COUNTER_MAX;
    255		return 0;
    256	}
    257
    258	bytes = memparse(buf, &end);
    259	if (*end != '\0')
    260		return -EINVAL;
    261
    262	*nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
    263
    264	return 0;
    265}