cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

mapping_dirty_helpers.c (10801B)


      1// SPDX-License-Identifier: GPL-2.0
      2#include <linux/pagewalk.h>
      3#include <linux/hugetlb.h>
      4#include <linux/bitops.h>
      5#include <linux/mmu_notifier.h>
      6#include <linux/mm_inline.h>
      7#include <asm/cacheflush.h>
      8#include <asm/tlbflush.h>
      9
     10/**
     11 * struct wp_walk - Private struct for pagetable walk callbacks
     12 * @range: Range for mmu notifiers
     13 * @tlbflush_start: Address of first modified pte
     14 * @tlbflush_end: Address of last modified pte + 1
     15 * @total: Total number of modified ptes
     16 */
     17struct wp_walk {
     18	struct mmu_notifier_range range;
     19	unsigned long tlbflush_start;
     20	unsigned long tlbflush_end;
     21	unsigned long total;
     22};
     23
     24/**
     25 * wp_pte - Write-protect a pte
     26 * @pte: Pointer to the pte
     27 * @addr: The start of protecting virtual address
     28 * @end: The end of protecting virtual address
     29 * @walk: pagetable walk callback argument
     30 *
     31 * The function write-protects a pte and records the range in
     32 * virtual address space of touched ptes for efficient range TLB flushes.
     33 */
     34static int wp_pte(pte_t *pte, unsigned long addr, unsigned long end,
     35		  struct mm_walk *walk)
     36{
     37	struct wp_walk *wpwalk = walk->private;
     38	pte_t ptent = *pte;
     39
     40	if (pte_write(ptent)) {
     41		pte_t old_pte = ptep_modify_prot_start(walk->vma, addr, pte);
     42
     43		ptent = pte_wrprotect(old_pte);
     44		ptep_modify_prot_commit(walk->vma, addr, pte, old_pte, ptent);
     45		wpwalk->total++;
     46		wpwalk->tlbflush_start = min(wpwalk->tlbflush_start, addr);
     47		wpwalk->tlbflush_end = max(wpwalk->tlbflush_end,
     48					   addr + PAGE_SIZE);
     49	}
     50
     51	return 0;
     52}
     53
     54/**
     55 * struct clean_walk - Private struct for the clean_record_pte function.
     56 * @base: struct wp_walk we derive from
     57 * @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap
     58 * @bitmap: Bitmap with one bit for each page offset in the address_space range
     59 * covered.
     60 * @start: Address_space page offset of first modified pte relative
     61 * to @bitmap_pgoff
     62 * @end: Address_space page offset of last modified pte relative
     63 * to @bitmap_pgoff
     64 */
     65struct clean_walk {
     66	struct wp_walk base;
     67	pgoff_t bitmap_pgoff;
     68	unsigned long *bitmap;
     69	pgoff_t start;
     70	pgoff_t end;
     71};
     72
     73#define to_clean_walk(_wpwalk) container_of(_wpwalk, struct clean_walk, base)
     74
     75/**
     76 * clean_record_pte - Clean a pte and record its address space offset in a
     77 * bitmap
     78 * @pte: Pointer to the pte
     79 * @addr: The start of virtual address to be clean
     80 * @end: The end of virtual address to be clean
     81 * @walk: pagetable walk callback argument
     82 *
     83 * The function cleans a pte and records the range in
     84 * virtual address space of touched ptes for efficient TLB flushes.
     85 * It also records dirty ptes in a bitmap representing page offsets
     86 * in the address_space, as well as the first and last of the bits
     87 * touched.
     88 */
     89static int clean_record_pte(pte_t *pte, unsigned long addr,
     90			    unsigned long end, struct mm_walk *walk)
     91{
     92	struct wp_walk *wpwalk = walk->private;
     93	struct clean_walk *cwalk = to_clean_walk(wpwalk);
     94	pte_t ptent = *pte;
     95
     96	if (pte_dirty(ptent)) {
     97		pgoff_t pgoff = ((addr - walk->vma->vm_start) >> PAGE_SHIFT) +
     98			walk->vma->vm_pgoff - cwalk->bitmap_pgoff;
     99		pte_t old_pte = ptep_modify_prot_start(walk->vma, addr, pte);
    100
    101		ptent = pte_mkclean(old_pte);
    102		ptep_modify_prot_commit(walk->vma, addr, pte, old_pte, ptent);
    103
    104		wpwalk->total++;
    105		wpwalk->tlbflush_start = min(wpwalk->tlbflush_start, addr);
    106		wpwalk->tlbflush_end = max(wpwalk->tlbflush_end,
    107					   addr + PAGE_SIZE);
    108
    109		__set_bit(pgoff, cwalk->bitmap);
    110		cwalk->start = min(cwalk->start, pgoff);
    111		cwalk->end = max(cwalk->end, pgoff + 1);
    112	}
    113
    114	return 0;
    115}
    116
    117/*
    118 * wp_clean_pmd_entry - The pagewalk pmd callback.
    119 *
    120 * Dirty-tracking should take place on the PTE level, so
    121 * WARN() if encountering a dirty huge pmd.
    122 * Furthermore, never split huge pmds, since that currently
    123 * causes dirty info loss. The pagefault handler should do
    124 * that if needed.
    125 */
    126static int wp_clean_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long end,
    127			      struct mm_walk *walk)
    128{
    129	pmd_t pmdval = pmd_read_atomic(pmd);
    130
    131	if (!pmd_trans_unstable(&pmdval))
    132		return 0;
    133
    134	if (pmd_none(pmdval)) {
    135		walk->action = ACTION_AGAIN;
    136		return 0;
    137	}
    138
    139	/* Huge pmd, present or migrated */
    140	walk->action = ACTION_CONTINUE;
    141	if (pmd_trans_huge(pmdval) || pmd_devmap(pmdval))
    142		WARN_ON(pmd_write(pmdval) || pmd_dirty(pmdval));
    143
    144	return 0;
    145}
    146
    147/*
    148 * wp_clean_pud_entry - The pagewalk pud callback.
    149 *
    150 * Dirty-tracking should take place on the PTE level, so
    151 * WARN() if encountering a dirty huge puds.
    152 * Furthermore, never split huge puds, since that currently
    153 * causes dirty info loss. The pagefault handler should do
    154 * that if needed.
    155 */
    156static int wp_clean_pud_entry(pud_t *pud, unsigned long addr, unsigned long end,
    157			      struct mm_walk *walk)
    158{
    159	pud_t pudval = READ_ONCE(*pud);
    160
    161	if (!pud_trans_unstable(&pudval))
    162		return 0;
    163
    164	if (pud_none(pudval)) {
    165		walk->action = ACTION_AGAIN;
    166		return 0;
    167	}
    168
    169#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
    170	/* Huge pud */
    171	walk->action = ACTION_CONTINUE;
    172	if (pud_trans_huge(pudval) || pud_devmap(pudval))
    173		WARN_ON(pud_write(pudval) || pud_dirty(pudval));
    174#endif
    175
    176	return 0;
    177}
    178
    179/*
    180 * wp_clean_pre_vma - The pagewalk pre_vma callback.
    181 *
    182 * The pre_vma callback performs the cache flush, stages the tlb flush
    183 * and calls the necessary mmu notifiers.
    184 */
    185static int wp_clean_pre_vma(unsigned long start, unsigned long end,
    186			    struct mm_walk *walk)
    187{
    188	struct wp_walk *wpwalk = walk->private;
    189
    190	wpwalk->tlbflush_start = end;
    191	wpwalk->tlbflush_end = start;
    192
    193	mmu_notifier_range_init(&wpwalk->range, MMU_NOTIFY_PROTECTION_PAGE, 0,
    194				walk->vma, walk->mm, start, end);
    195	mmu_notifier_invalidate_range_start(&wpwalk->range);
    196	flush_cache_range(walk->vma, start, end);
    197
    198	/*
    199	 * We're not using tlb_gather_mmu() since typically
    200	 * only a small subrange of PTEs are affected, whereas
    201	 * tlb_gather_mmu() records the full range.
    202	 */
    203	inc_tlb_flush_pending(walk->mm);
    204
    205	return 0;
    206}
    207
    208/*
    209 * wp_clean_post_vma - The pagewalk post_vma callback.
    210 *
    211 * The post_vma callback performs the tlb flush and calls necessary mmu
    212 * notifiers.
    213 */
    214static void wp_clean_post_vma(struct mm_walk *walk)
    215{
    216	struct wp_walk *wpwalk = walk->private;
    217
    218	if (mm_tlb_flush_nested(walk->mm))
    219		flush_tlb_range(walk->vma, wpwalk->range.start,
    220				wpwalk->range.end);
    221	else if (wpwalk->tlbflush_end > wpwalk->tlbflush_start)
    222		flush_tlb_range(walk->vma, wpwalk->tlbflush_start,
    223				wpwalk->tlbflush_end);
    224
    225	mmu_notifier_invalidate_range_end(&wpwalk->range);
    226	dec_tlb_flush_pending(walk->mm);
    227}
    228
    229/*
    230 * wp_clean_test_walk - The pagewalk test_walk callback.
    231 *
    232 * Won't perform dirty-tracking on COW, read-only or HUGETLB vmas.
    233 */
    234static int wp_clean_test_walk(unsigned long start, unsigned long end,
    235			      struct mm_walk *walk)
    236{
    237	unsigned long vm_flags = READ_ONCE(walk->vma->vm_flags);
    238
    239	/* Skip non-applicable VMAs */
    240	if ((vm_flags & (VM_SHARED | VM_MAYWRITE | VM_HUGETLB)) !=
    241	    (VM_SHARED | VM_MAYWRITE))
    242		return 1;
    243
    244	return 0;
    245}
    246
    247static const struct mm_walk_ops clean_walk_ops = {
    248	.pte_entry = clean_record_pte,
    249	.pmd_entry = wp_clean_pmd_entry,
    250	.pud_entry = wp_clean_pud_entry,
    251	.test_walk = wp_clean_test_walk,
    252	.pre_vma = wp_clean_pre_vma,
    253	.post_vma = wp_clean_post_vma
    254};
    255
    256static const struct mm_walk_ops wp_walk_ops = {
    257	.pte_entry = wp_pte,
    258	.pmd_entry = wp_clean_pmd_entry,
    259	.pud_entry = wp_clean_pud_entry,
    260	.test_walk = wp_clean_test_walk,
    261	.pre_vma = wp_clean_pre_vma,
    262	.post_vma = wp_clean_post_vma
    263};
    264
    265/**
    266 * wp_shared_mapping_range - Write-protect all ptes in an address space range
    267 * @mapping: The address_space we want to write protect
    268 * @first_index: The first page offset in the range
    269 * @nr: Number of incremental page offsets to cover
    270 *
    271 * Note: This function currently skips transhuge page-table entries, since
    272 * it's intended for dirty-tracking on the PTE level. It will warn on
    273 * encountering transhuge write-enabled entries, though, and can easily be
    274 * extended to handle them as well.
    275 *
    276 * Return: The number of ptes actually write-protected. Note that
    277 * already write-protected ptes are not counted.
    278 */
    279unsigned long wp_shared_mapping_range(struct address_space *mapping,
    280				      pgoff_t first_index, pgoff_t nr)
    281{
    282	struct wp_walk wpwalk = { .total = 0 };
    283
    284	i_mmap_lock_read(mapping);
    285	WARN_ON(walk_page_mapping(mapping, first_index, nr, &wp_walk_ops,
    286				  &wpwalk));
    287	i_mmap_unlock_read(mapping);
    288
    289	return wpwalk.total;
    290}
    291EXPORT_SYMBOL_GPL(wp_shared_mapping_range);
    292
    293/**
    294 * clean_record_shared_mapping_range - Clean and record all ptes in an
    295 * address space range
    296 * @mapping: The address_space we want to clean
    297 * @first_index: The first page offset in the range
    298 * @nr: Number of incremental page offsets to cover
    299 * @bitmap_pgoff: The page offset of the first bit in @bitmap
    300 * @bitmap: Pointer to a bitmap of at least @nr bits. The bitmap needs to
    301 * cover the whole range @first_index..@first_index + @nr.
    302 * @start: Pointer to number of the first set bit in @bitmap.
    303 * is modified as new bits are set by the function.
    304 * @end: Pointer to the number of the last set bit in @bitmap.
    305 * none set. The value is modified as new bits are set by the function.
    306 *
    307 * Note: When this function returns there is no guarantee that a CPU has
    308 * not already dirtied new ptes. However it will not clean any ptes not
    309 * reported in the bitmap. The guarantees are as follows:
    310 * a) All ptes dirty when the function starts executing will end up recorded
    311 *    in the bitmap.
    312 * b) All ptes dirtied after that will either remain dirty, be recorded in the
    313 *    bitmap or both.
    314 *
    315 * If a caller needs to make sure all dirty ptes are picked up and none
    316 * additional are added, it first needs to write-protect the address-space
    317 * range and make sure new writers are blocked in page_mkwrite() or
    318 * pfn_mkwrite(). And then after a TLB flush following the write-protection
    319 * pick up all dirty bits.
    320 *
    321 * This function currently skips transhuge page-table entries, since
    322 * it's intended for dirty-tracking on the PTE level. It will warn on
    323 * encountering transhuge dirty entries, though, and can easily be extended
    324 * to handle them as well.
    325 *
    326 * Return: The number of dirty ptes actually cleaned.
    327 */
    328unsigned long clean_record_shared_mapping_range(struct address_space *mapping,
    329						pgoff_t first_index, pgoff_t nr,
    330						pgoff_t bitmap_pgoff,
    331						unsigned long *bitmap,
    332						pgoff_t *start,
    333						pgoff_t *end)
    334{
    335	bool none_set = (*start >= *end);
    336	struct clean_walk cwalk = {
    337		.base = { .total = 0 },
    338		.bitmap_pgoff = bitmap_pgoff,
    339		.bitmap = bitmap,
    340		.start = none_set ? nr : *start,
    341		.end = none_set ? 0 : *end,
    342	};
    343
    344	i_mmap_lock_read(mapping);
    345	WARN_ON(walk_page_mapping(mapping, first_index, nr, &clean_walk_ops,
    346				  &cwalk.base));
    347	i_mmap_unlock_read(mapping);
    348
    349	*start = cwalk.start;
    350	*end = cwalk.end;
    351
    352	return cwalk.base.total;
    353}
    354EXPORT_SYMBOL_GPL(clean_record_shared_mapping_range);