cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

drm_cache.c (9837B)


      1/**************************************************************************
      2 *
      3 * Copyright (c) 2006-2007 Tungsten Graphics, Inc., Cedar Park, TX., USA
      4 * All Rights Reserved.
      5 *
      6 * Permission is hereby granted, free of charge, to any person obtaining a
      7 * copy of this software and associated documentation files (the
      8 * "Software"), to deal in the Software without restriction, including
      9 * without limitation the rights to use, copy, modify, merge, publish,
     10 * distribute, sub license, and/or sell copies of the Software, and to
     11 * permit persons to whom the Software is furnished to do so, subject to
     12 * the following conditions:
     13 *
     14 * The above copyright notice and this permission notice (including the
     15 * next paragraph) shall be included in all copies or substantial portions
     16 * of the Software.
     17 *
     18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     21 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
     22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
     25 *
     26 **************************************************************************/
     27/*
     28 * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
     29 */
     30#include <linux/cc_platform.h>
     31#include <linux/export.h>
     32#include <linux/highmem.h>
     33#include <linux/ioport.h>
     34#include <linux/iosys-map.h>
     35#include <xen/xen.h>
     36
     37#include <drm/drm_cache.h>
     38
     39/* A small bounce buffer that fits on the stack. */
     40#define MEMCPY_BOUNCE_SIZE 128
     41
     42#if defined(CONFIG_X86)
     43#include <asm/smp.h>
     44
     45/*
     46 * clflushopt is an unordered instruction which needs fencing with mfence or
     47 * sfence to avoid ordering issues.  For drm_clflush_page this fencing happens
     48 * in the caller.
     49 */
     50static void
     51drm_clflush_page(struct page *page)
     52{
     53	uint8_t *page_virtual;
     54	unsigned int i;
     55	const int size = boot_cpu_data.x86_clflush_size;
     56
     57	if (unlikely(page == NULL))
     58		return;
     59
     60	page_virtual = kmap_atomic(page);
     61	for (i = 0; i < PAGE_SIZE; i += size)
     62		clflushopt(page_virtual + i);
     63	kunmap_atomic(page_virtual);
     64}
     65
     66static void drm_cache_flush_clflush(struct page *pages[],
     67				    unsigned long num_pages)
     68{
     69	unsigned long i;
     70
     71	mb(); /*Full memory barrier used before so that CLFLUSH is ordered*/
     72	for (i = 0; i < num_pages; i++)
     73		drm_clflush_page(*pages++);
     74	mb(); /*Also used after CLFLUSH so that all cache is flushed*/
     75}
     76#endif
     77
     78/**
     79 * drm_clflush_pages - Flush dcache lines of a set of pages.
     80 * @pages: List of pages to be flushed.
     81 * @num_pages: Number of pages in the array.
     82 *
     83 * Flush every data cache line entry that points to an address belonging
     84 * to a page in the array.
     85 */
     86void
     87drm_clflush_pages(struct page *pages[], unsigned long num_pages)
     88{
     89
     90#if defined(CONFIG_X86)
     91	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
     92		drm_cache_flush_clflush(pages, num_pages);
     93		return;
     94	}
     95
     96	if (wbinvd_on_all_cpus())
     97		pr_err("Timed out waiting for cache flush\n");
     98
     99#elif defined(__powerpc__)
    100	unsigned long i;
    101
    102	for (i = 0; i < num_pages; i++) {
    103		struct page *page = pages[i];
    104		void *page_virtual;
    105
    106		if (unlikely(page == NULL))
    107			continue;
    108
    109		page_virtual = kmap_atomic(page);
    110		flush_dcache_range((unsigned long)page_virtual,
    111				   (unsigned long)page_virtual + PAGE_SIZE);
    112		kunmap_atomic(page_virtual);
    113	}
    114#else
    115	WARN_ONCE(1, "Architecture has no drm_cache.c support\n");
    116#endif
    117}
    118EXPORT_SYMBOL(drm_clflush_pages);
    119
    120/**
    121 * drm_clflush_sg - Flush dcache lines pointing to a scather-gather.
    122 * @st: struct sg_table.
    123 *
    124 * Flush every data cache line entry that points to an address in the
    125 * sg.
    126 */
    127void
    128drm_clflush_sg(struct sg_table *st)
    129{
    130#if defined(CONFIG_X86)
    131	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
    132		struct sg_page_iter sg_iter;
    133
    134		mb(); /*CLFLUSH is ordered only by using memory barriers*/
    135		for_each_sgtable_page(st, &sg_iter, 0)
    136			drm_clflush_page(sg_page_iter_page(&sg_iter));
    137		mb(); /*Make sure that all cache line entry is flushed*/
    138
    139		return;
    140	}
    141
    142	if (wbinvd_on_all_cpus())
    143		pr_err("Timed out waiting for cache flush\n");
    144#else
    145	WARN_ONCE(1, "Architecture has no drm_cache.c support\n");
    146#endif
    147}
    148EXPORT_SYMBOL(drm_clflush_sg);
    149
    150/**
    151 * drm_clflush_virt_range - Flush dcache lines of a region
    152 * @addr: Initial kernel memory address.
    153 * @length: Region size.
    154 *
    155 * Flush every data cache line entry that points to an address in the
    156 * region requested.
    157 */
    158void
    159drm_clflush_virt_range(void *addr, unsigned long length)
    160{
    161#if defined(CONFIG_X86)
    162	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
    163		const int size = boot_cpu_data.x86_clflush_size;
    164		void *end = addr + length;
    165
    166		addr = (void *)(((unsigned long)addr) & -size);
    167		mb(); /*CLFLUSH is only ordered with a full memory barrier*/
    168		for (; addr < end; addr += size)
    169			clflushopt(addr);
    170		clflushopt(end - 1); /* force serialisation */
    171		mb(); /*Ensure that every data cache line entry is flushed*/
    172		return;
    173	}
    174
    175	if (wbinvd_on_all_cpus())
    176		pr_err("Timed out waiting for cache flush\n");
    177#else
    178	WARN_ONCE(1, "Architecture has no drm_cache.c support\n");
    179#endif
    180}
    181EXPORT_SYMBOL(drm_clflush_virt_range);
    182
    183bool drm_need_swiotlb(int dma_bits)
    184{
    185	struct resource *tmp;
    186	resource_size_t max_iomem = 0;
    187
    188	/*
    189	 * Xen paravirtual hosts require swiotlb regardless of requested dma
    190	 * transfer size.
    191	 *
    192	 * NOTE: Really, what it requires is use of the dma_alloc_coherent
    193	 *       allocator used in ttm_dma_populate() instead of
    194	 *       ttm_populate_and_map_pages(), which bounce buffers so much in
    195	 *       Xen it leads to swiotlb buffer exhaustion.
    196	 */
    197	if (xen_pv_domain())
    198		return true;
    199
    200	/*
    201	 * Enforce dma_alloc_coherent when memory encryption is active as well
    202	 * for the same reasons as for Xen paravirtual hosts.
    203	 */
    204	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
    205		return true;
    206
    207	for (tmp = iomem_resource.child; tmp; tmp = tmp->sibling)
    208		max_iomem = max(max_iomem,  tmp->end);
    209
    210	return max_iomem > ((u64)1 << dma_bits);
    211}
    212EXPORT_SYMBOL(drm_need_swiotlb);
    213
    214static void memcpy_fallback(struct iosys_map *dst,
    215			    const struct iosys_map *src,
    216			    unsigned long len)
    217{
    218	if (!dst->is_iomem && !src->is_iomem) {
    219		memcpy(dst->vaddr, src->vaddr, len);
    220	} else if (!src->is_iomem) {
    221		iosys_map_memcpy_to(dst, 0, src->vaddr, len);
    222	} else if (!dst->is_iomem) {
    223		memcpy_fromio(dst->vaddr, src->vaddr_iomem, len);
    224	} else {
    225		/*
    226		 * Bounce size is not performance tuned, but using a
    227		 * bounce buffer like this is significantly faster than
    228		 * resorting to ioreadxx() + iowritexx().
    229		 */
    230		char bounce[MEMCPY_BOUNCE_SIZE];
    231		void __iomem *_src = src->vaddr_iomem;
    232		void __iomem *_dst = dst->vaddr_iomem;
    233
    234		while (len >= MEMCPY_BOUNCE_SIZE) {
    235			memcpy_fromio(bounce, _src, MEMCPY_BOUNCE_SIZE);
    236			memcpy_toio(_dst, bounce, MEMCPY_BOUNCE_SIZE);
    237			_src += MEMCPY_BOUNCE_SIZE;
    238			_dst += MEMCPY_BOUNCE_SIZE;
    239			len -= MEMCPY_BOUNCE_SIZE;
    240		}
    241		if (len) {
    242			memcpy_fromio(bounce, _src, MEMCPY_BOUNCE_SIZE);
    243			memcpy_toio(_dst, bounce, MEMCPY_BOUNCE_SIZE);
    244		}
    245	}
    246}
    247
    248#ifdef CONFIG_X86
    249
    250static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
    251
    252static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
    253{
    254	kernel_fpu_begin();
    255
    256	while (len >= 4) {
    257		asm("movntdqa	(%0), %%xmm0\n"
    258		    "movntdqa 16(%0), %%xmm1\n"
    259		    "movntdqa 32(%0), %%xmm2\n"
    260		    "movntdqa 48(%0), %%xmm3\n"
    261		    "movaps %%xmm0,   (%1)\n"
    262		    "movaps %%xmm1, 16(%1)\n"
    263		    "movaps %%xmm2, 32(%1)\n"
    264		    "movaps %%xmm3, 48(%1)\n"
    265		    :: "r" (src), "r" (dst) : "memory");
    266		src += 64;
    267		dst += 64;
    268		len -= 4;
    269	}
    270	while (len--) {
    271		asm("movntdqa (%0), %%xmm0\n"
    272		    "movaps %%xmm0, (%1)\n"
    273		    :: "r" (src), "r" (dst) : "memory");
    274		src += 16;
    275		dst += 16;
    276	}
    277
    278	kernel_fpu_end();
    279}
    280
    281/*
    282 * __drm_memcpy_from_wc copies @len bytes from @src to @dst using
    283 * non-temporal instructions where available. Note that all arguments
    284 * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
    285 * of 16.
    286 */
    287static void __drm_memcpy_from_wc(void *dst, const void *src, unsigned long len)
    288{
    289	if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
    290		memcpy(dst, src, len);
    291	else if (likely(len))
    292		__memcpy_ntdqa(dst, src, len >> 4);
    293}
    294
    295/**
    296 * drm_memcpy_from_wc - Perform the fastest available memcpy from a source
    297 * that may be WC.
    298 * @dst: The destination pointer
    299 * @src: The source pointer
    300 * @len: The size of the area o transfer in bytes
    301 *
    302 * Tries an arch optimized memcpy for prefetching reading out of a WC region,
    303 * and if no such beast is available, falls back to a normal memcpy.
    304 */
    305void drm_memcpy_from_wc(struct iosys_map *dst,
    306			const struct iosys_map *src,
    307			unsigned long len)
    308{
    309	if (WARN_ON(in_interrupt())) {
    310		memcpy_fallback(dst, src, len);
    311		return;
    312	}
    313
    314	if (static_branch_likely(&has_movntdqa)) {
    315		__drm_memcpy_from_wc(dst->is_iomem ?
    316				     (void __force *)dst->vaddr_iomem :
    317				     dst->vaddr,
    318				     src->is_iomem ?
    319				     (void const __force *)src->vaddr_iomem :
    320				     src->vaddr,
    321				     len);
    322		return;
    323	}
    324
    325	memcpy_fallback(dst, src, len);
    326}
    327EXPORT_SYMBOL(drm_memcpy_from_wc);
    328
    329/*
    330 * drm_memcpy_init_early - One time initialization of the WC memcpy code
    331 */
    332void drm_memcpy_init_early(void)
    333{
    334	/*
    335	 * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions
    336	 * emulation. So don't enable movntdqa in hypervisor guest.
    337	 */
    338	if (static_cpu_has(X86_FEATURE_XMM4_1) &&
    339	    !boot_cpu_has(X86_FEATURE_HYPERVISOR))
    340		static_branch_enable(&has_movntdqa);
    341}
    342#else
    343void drm_memcpy_from_wc(struct iosys_map *dst,
    344			const struct iosys_map *src,
    345			unsigned long len)
    346{
    347	WARN_ON(in_interrupt());
    348
    349	memcpy_fallback(dst, src, len);
    350}
    351EXPORT_SYMBOL(drm_memcpy_from_wc);
    352
    353void drm_memcpy_init_early(void)
    354{
    355}
    356#endif /* CONFIG_X86 */