cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

page.c (19359B)


      1/*
      2 * This file is subject to the terms and conditions of the GNU General Public
      3 * License.  See the file "COPYING" in the main directory of this archive
      4 * for more details.
      5 *
      6 * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
      7 * Copyright (C) 2007  Maciej W. Rozycki
      8 * Copyright (C) 2008  Thiemo Seufer
      9 * Copyright (C) 2012  MIPS Technologies, Inc.
     10 */
     11#include <linux/kernel.h>
     12#include <linux/sched.h>
     13#include <linux/smp.h>
     14#include <linux/mm.h>
     15#include <linux/proc_fs.h>
     16
     17#include <asm/bugs.h>
     18#include <asm/cacheops.h>
     19#include <asm/cpu-type.h>
     20#include <asm/inst.h>
     21#include <asm/io.h>
     22#include <asm/page.h>
     23#include <asm/prefetch.h>
     24#include <asm/bootinfo.h>
     25#include <asm/mipsregs.h>
     26#include <asm/mmu_context.h>
     27#include <asm/cpu.h>
     28
     29#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
     30#include <asm/sibyte/sb1250.h>
     31#include <asm/sibyte/sb1250_regs.h>
     32#include <asm/sibyte/sb1250_dma.h>
     33#endif
     34
     35#include <asm/uasm.h>
     36
     37/* Registers used in the assembled routines. */
     38#define ZERO 0
     39#define AT 2
     40#define A0 4
     41#define A1 5
     42#define A2 6
     43#define T0 8
     44#define T1 9
     45#define T2 10
     46#define T3 11
     47#define T9 25
     48#define RA 31
     49
     50/* Handle labels (which must be positive integers). */
     51enum label_id {
     52	label_clear_nopref = 1,
     53	label_clear_pref,
     54	label_copy_nopref,
     55	label_copy_pref_both,
     56	label_copy_pref_store,
     57};
     58
     59UASM_L_LA(_clear_nopref)
     60UASM_L_LA(_clear_pref)
     61UASM_L_LA(_copy_nopref)
     62UASM_L_LA(_copy_pref_both)
     63UASM_L_LA(_copy_pref_store)
     64
     65/* We need one branch and therefore one relocation per target label. */
     66static struct uasm_label labels[5];
     67static struct uasm_reloc relocs[5];
     68
     69#define cpu_is_r4600_v1_x()	((read_c0_prid() & 0xfffffff0) == 0x00002010)
     70#define cpu_is_r4600_v2_x()	((read_c0_prid() & 0xfffffff0) == 0x00002020)
     71
     72/*
     73 * R6 has a limited offset of the pref instruction.
     74 * Skip it if the offset is more than 9 bits.
     75 */
     76#define _uasm_i_pref(a, b, c, d)		\
     77do {						\
     78	if (cpu_has_mips_r6) {			\
     79		if (c <= 0xff && c >= -0x100)	\
     80			uasm_i_pref(a, b, c, d);\
     81	} else {				\
     82		uasm_i_pref(a, b, c, d);	\
     83	}					\
     84} while(0)
     85
     86static int pref_bias_clear_store;
     87static int pref_bias_copy_load;
     88static int pref_bias_copy_store;
     89
     90static u32 pref_src_mode;
     91static u32 pref_dst_mode;
     92
     93static int clear_word_size;
     94static int copy_word_size;
     95
     96static int half_clear_loop_size;
     97static int half_copy_loop_size;
     98
     99static int cache_line_size;
    100#define cache_line_mask() (cache_line_size - 1)
    101
    102static inline void
    103pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off)
    104{
    105	if (cpu_has_64bit_gp_regs &&
    106	    IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS) &&
    107	    r4k_daddiu_bug()) {
    108		if (off > 0x7fff) {
    109			uasm_i_lui(buf, T9, uasm_rel_hi(off));
    110			uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
    111		} else
    112			uasm_i_addiu(buf, T9, ZERO, off);
    113		uasm_i_daddu(buf, reg1, reg2, T9);
    114	} else {
    115		if (off > 0x7fff) {
    116			uasm_i_lui(buf, T9, uasm_rel_hi(off));
    117			uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
    118			UASM_i_ADDU(buf, reg1, reg2, T9);
    119		} else
    120			UASM_i_ADDIU(buf, reg1, reg2, off);
    121	}
    122}
    123
    124static void set_prefetch_parameters(void)
    125{
    126	if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg)
    127		clear_word_size = 8;
    128	else
    129		clear_word_size = 4;
    130
    131	if (cpu_has_64bit_gp_regs)
    132		copy_word_size = 8;
    133	else
    134		copy_word_size = 4;
    135
    136	/*
    137	 * The pref's used here are using "streaming" hints, which cause the
    138	 * copied data to be kicked out of the cache sooner.  A page copy often
    139	 * ends up copying a lot more data than is commonly used, so this seems
    140	 * to make sense in terms of reducing cache pollution, but I've no real
    141	 * performance data to back this up.
    142	 */
    143	if (cpu_has_prefetch) {
    144		/*
    145		 * XXX: Most prefetch bias values in here are based on
    146		 * guesswork.
    147		 */
    148		cache_line_size = cpu_dcache_line_size();
    149		switch (current_cpu_type()) {
    150		case CPU_R5500:
    151		case CPU_TX49XX:
    152			/* These processors only support the Pref_Load. */
    153			pref_bias_copy_load = 256;
    154			break;
    155
    156		case CPU_R10000:
    157		case CPU_R12000:
    158		case CPU_R14000:
    159		case CPU_R16000:
    160			/*
    161			 * Those values have been experimentally tuned for an
    162			 * Origin 200.
    163			 */
    164			pref_bias_clear_store = 512;
    165			pref_bias_copy_load = 256;
    166			pref_bias_copy_store = 256;
    167			pref_src_mode = Pref_LoadStreamed;
    168			pref_dst_mode = Pref_StoreStreamed;
    169			break;
    170
    171		case CPU_SB1:
    172		case CPU_SB1A:
    173			pref_bias_clear_store = 128;
    174			pref_bias_copy_load = 128;
    175			pref_bias_copy_store = 128;
    176			/*
    177			 * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed
    178			 * hints are broken.
    179			 */
    180			if (current_cpu_type() == CPU_SB1 &&
    181			    (current_cpu_data.processor_id & 0xff) < 0x02) {
    182				pref_src_mode = Pref_Load;
    183				pref_dst_mode = Pref_Store;
    184			} else {
    185				pref_src_mode = Pref_LoadStreamed;
    186				pref_dst_mode = Pref_StoreStreamed;
    187			}
    188			break;
    189
    190		case CPU_LOONGSON64:
    191			/* Loongson-3 only support the Pref_Load/Pref_Store. */
    192			pref_bias_clear_store = 128;
    193			pref_bias_copy_load = 128;
    194			pref_bias_copy_store = 128;
    195			pref_src_mode = Pref_Load;
    196			pref_dst_mode = Pref_Store;
    197			break;
    198
    199		default:
    200			pref_bias_clear_store = 128;
    201			pref_bias_copy_load = 256;
    202			pref_bias_copy_store = 128;
    203			pref_src_mode = Pref_LoadStreamed;
    204			if (cpu_has_mips_r6)
    205				/*
    206				 * Bit 30 (Pref_PrepareForStore) has been
    207				 * removed from MIPS R6. Use bit 5
    208				 * (Pref_StoreStreamed).
    209				 */
    210				pref_dst_mode = Pref_StoreStreamed;
    211			else
    212				pref_dst_mode = Pref_PrepareForStore;
    213			break;
    214		}
    215	} else {
    216		if (cpu_has_cache_cdex_s)
    217			cache_line_size = cpu_scache_line_size();
    218		else if (cpu_has_cache_cdex_p)
    219			cache_line_size = cpu_dcache_line_size();
    220	}
    221	/*
    222	 * Too much unrolling will overflow the available space in
    223	 * clear_space_array / copy_page_array.
    224	 */
    225	half_clear_loop_size = min(16 * clear_word_size,
    226				   max(cache_line_size >> 1,
    227				       4 * clear_word_size));
    228	half_copy_loop_size = min(16 * copy_word_size,
    229				  max(cache_line_size >> 1,
    230				      4 * copy_word_size));
    231}
    232
    233static void build_clear_store(u32 **buf, int off)
    234{
    235	if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) {
    236		uasm_i_sd(buf, ZERO, off, A0);
    237	} else {
    238		uasm_i_sw(buf, ZERO, off, A0);
    239	}
    240}
    241
    242static inline void build_clear_pref(u32 **buf, int off)
    243{
    244	if (off & cache_line_mask())
    245		return;
    246
    247	if (pref_bias_clear_store) {
    248		_uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
    249			    A0);
    250	} else if (cache_line_size == (half_clear_loop_size << 1)) {
    251		if (cpu_has_cache_cdex_s) {
    252			uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
    253		} else if (cpu_has_cache_cdex_p) {
    254			if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) &&
    255			    cpu_is_r4600_v1_x()) {
    256				uasm_i_nop(buf);
    257				uasm_i_nop(buf);
    258				uasm_i_nop(buf);
    259				uasm_i_nop(buf);
    260			}
    261
    262			if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) &&
    263			    cpu_is_r4600_v2_x())
    264				uasm_i_lw(buf, ZERO, ZERO, AT);
    265
    266			uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
    267		}
    268	}
    269}
    270
    271extern u32 __clear_page_start;
    272extern u32 __clear_page_end;
    273extern u32 __copy_page_start;
    274extern u32 __copy_page_end;
    275
    276void build_clear_page(void)
    277{
    278	int off;
    279	u32 *buf = &__clear_page_start;
    280	struct uasm_label *l = labels;
    281	struct uasm_reloc *r = relocs;
    282	int i;
    283	static atomic_t run_once = ATOMIC_INIT(0);
    284
    285	if (atomic_xchg(&run_once, 1)) {
    286		return;
    287	}
    288
    289	memset(labels, 0, sizeof(labels));
    290	memset(relocs, 0, sizeof(relocs));
    291
    292	set_prefetch_parameters();
    293
    294	/*
    295	 * This algorithm makes the following assumptions:
    296	 *   - The prefetch bias is a multiple of 2 words.
    297	 *   - The prefetch bias is less than one page.
    298	 */
    299	BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
    300	BUG_ON(PAGE_SIZE < pref_bias_clear_store);
    301
    302	off = PAGE_SIZE - pref_bias_clear_store;
    303	if (off > 0xffff || !pref_bias_clear_store)
    304		pg_addiu(&buf, A2, A0, off);
    305	else
    306		uasm_i_ori(&buf, A2, A0, off);
    307
    308	if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x())
    309		uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));
    310
    311	off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size)
    312				* cache_line_size : 0;
    313	while (off) {
    314		build_clear_pref(&buf, -off);
    315		off -= cache_line_size;
    316	}
    317	uasm_l_clear_pref(&l, buf);
    318	do {
    319		build_clear_pref(&buf, off);
    320		build_clear_store(&buf, off);
    321		off += clear_word_size;
    322	} while (off < half_clear_loop_size);
    323	pg_addiu(&buf, A0, A0, 2 * off);
    324	off = -off;
    325	do {
    326		build_clear_pref(&buf, off);
    327		if (off == -clear_word_size)
    328			uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
    329		build_clear_store(&buf, off);
    330		off += clear_word_size;
    331	} while (off < 0);
    332
    333	if (pref_bias_clear_store) {
    334		pg_addiu(&buf, A2, A0, pref_bias_clear_store);
    335		uasm_l_clear_nopref(&l, buf);
    336		off = 0;
    337		do {
    338			build_clear_store(&buf, off);
    339			off += clear_word_size;
    340		} while (off < half_clear_loop_size);
    341		pg_addiu(&buf, A0, A0, 2 * off);
    342		off = -off;
    343		do {
    344			if (off == -clear_word_size)
    345				uasm_il_bne(&buf, &r, A0, A2,
    346					    label_clear_nopref);
    347			build_clear_store(&buf, off);
    348			off += clear_word_size;
    349		} while (off < 0);
    350	}
    351
    352	uasm_i_jr(&buf, RA);
    353	uasm_i_nop(&buf);
    354
    355	BUG_ON(buf > &__clear_page_end);
    356
    357	uasm_resolve_relocs(relocs, labels);
    358
    359	pr_debug("Synthesized clear page handler (%u instructions).\n",
    360		 (u32)(buf - &__clear_page_start));
    361
    362	pr_debug("\t.set push\n");
    363	pr_debug("\t.set noreorder\n");
    364	for (i = 0; i < (buf - &__clear_page_start); i++)
    365		pr_debug("\t.word 0x%08x\n", (&__clear_page_start)[i]);
    366	pr_debug("\t.set pop\n");
    367}
    368
    369static void build_copy_load(u32 **buf, int reg, int off)
    370{
    371	if (cpu_has_64bit_gp_regs) {
    372		uasm_i_ld(buf, reg, off, A1);
    373	} else {
    374		uasm_i_lw(buf, reg, off, A1);
    375	}
    376}
    377
    378static void build_copy_store(u32 **buf, int reg, int off)
    379{
    380	if (cpu_has_64bit_gp_regs) {
    381		uasm_i_sd(buf, reg, off, A0);
    382	} else {
    383		uasm_i_sw(buf, reg, off, A0);
    384	}
    385}
    386
    387static inline void build_copy_load_pref(u32 **buf, int off)
    388{
    389	if (off & cache_line_mask())
    390		return;
    391
    392	if (pref_bias_copy_load)
    393		_uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1);
    394}
    395
    396static inline void build_copy_store_pref(u32 **buf, int off)
    397{
    398	if (off & cache_line_mask())
    399		return;
    400
    401	if (pref_bias_copy_store) {
    402		_uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
    403			    A0);
    404	} else if (cache_line_size == (half_copy_loop_size << 1)) {
    405		if (cpu_has_cache_cdex_s) {
    406			uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
    407		} else if (cpu_has_cache_cdex_p) {
    408			if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) &&
    409			    cpu_is_r4600_v1_x()) {
    410				uasm_i_nop(buf);
    411				uasm_i_nop(buf);
    412				uasm_i_nop(buf);
    413				uasm_i_nop(buf);
    414			}
    415
    416			if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) &&
    417			    cpu_is_r4600_v2_x())
    418				uasm_i_lw(buf, ZERO, ZERO, AT);
    419
    420			uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
    421		}
    422	}
    423}
    424
    425void build_copy_page(void)
    426{
    427	int off;
    428	u32 *buf = &__copy_page_start;
    429	struct uasm_label *l = labels;
    430	struct uasm_reloc *r = relocs;
    431	int i;
    432	static atomic_t run_once = ATOMIC_INIT(0);
    433
    434	if (atomic_xchg(&run_once, 1)) {
    435		return;
    436	}
    437
    438	memset(labels, 0, sizeof(labels));
    439	memset(relocs, 0, sizeof(relocs));
    440
    441	set_prefetch_parameters();
    442
    443	/*
    444	 * This algorithm makes the following assumptions:
    445	 *   - All prefetch biases are multiples of 8 words.
    446	 *   - The prefetch biases are less than one page.
    447	 *   - The store prefetch bias isn't greater than the load
    448	 *     prefetch bias.
    449	 */
    450	BUG_ON(pref_bias_copy_load % (8 * copy_word_size));
    451	BUG_ON(pref_bias_copy_store % (8 * copy_word_size));
    452	BUG_ON(PAGE_SIZE < pref_bias_copy_load);
    453	BUG_ON(pref_bias_copy_store > pref_bias_copy_load);
    454
    455	off = PAGE_SIZE - pref_bias_copy_load;
    456	if (off > 0xffff || !pref_bias_copy_load)
    457		pg_addiu(&buf, A2, A0, off);
    458	else
    459		uasm_i_ori(&buf, A2, A0, off);
    460
    461	if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x())
    462		uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));
    463
    464	off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) *
    465				cache_line_size : 0;
    466	while (off) {
    467		build_copy_load_pref(&buf, -off);
    468		off -= cache_line_size;
    469	}
    470	off = cache_line_size ? min(8, pref_bias_copy_store / cache_line_size) *
    471				cache_line_size : 0;
    472	while (off) {
    473		build_copy_store_pref(&buf, -off);
    474		off -= cache_line_size;
    475	}
    476	uasm_l_copy_pref_both(&l, buf);
    477	do {
    478		build_copy_load_pref(&buf, off);
    479		build_copy_load(&buf, T0, off);
    480		build_copy_load_pref(&buf, off + copy_word_size);
    481		build_copy_load(&buf, T1, off + copy_word_size);
    482		build_copy_load_pref(&buf, off + 2 * copy_word_size);
    483		build_copy_load(&buf, T2, off + 2 * copy_word_size);
    484		build_copy_load_pref(&buf, off + 3 * copy_word_size);
    485		build_copy_load(&buf, T3, off + 3 * copy_word_size);
    486		build_copy_store_pref(&buf, off);
    487		build_copy_store(&buf, T0, off);
    488		build_copy_store_pref(&buf, off + copy_word_size);
    489		build_copy_store(&buf, T1, off + copy_word_size);
    490		build_copy_store_pref(&buf, off + 2 * copy_word_size);
    491		build_copy_store(&buf, T2, off + 2 * copy_word_size);
    492		build_copy_store_pref(&buf, off + 3 * copy_word_size);
    493		build_copy_store(&buf, T3, off + 3 * copy_word_size);
    494		off += 4 * copy_word_size;
    495	} while (off < half_copy_loop_size);
    496	pg_addiu(&buf, A1, A1, 2 * off);
    497	pg_addiu(&buf, A0, A0, 2 * off);
    498	off = -off;
    499	do {
    500		build_copy_load_pref(&buf, off);
    501		build_copy_load(&buf, T0, off);
    502		build_copy_load_pref(&buf, off + copy_word_size);
    503		build_copy_load(&buf, T1, off + copy_word_size);
    504		build_copy_load_pref(&buf, off + 2 * copy_word_size);
    505		build_copy_load(&buf, T2, off + 2 * copy_word_size);
    506		build_copy_load_pref(&buf, off + 3 * copy_word_size);
    507		build_copy_load(&buf, T3, off + 3 * copy_word_size);
    508		build_copy_store_pref(&buf, off);
    509		build_copy_store(&buf, T0, off);
    510		build_copy_store_pref(&buf, off + copy_word_size);
    511		build_copy_store(&buf, T1, off + copy_word_size);
    512		build_copy_store_pref(&buf, off + 2 * copy_word_size);
    513		build_copy_store(&buf, T2, off + 2 * copy_word_size);
    514		build_copy_store_pref(&buf, off + 3 * copy_word_size);
    515		if (off == -(4 * copy_word_size))
    516			uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both);
    517		build_copy_store(&buf, T3, off + 3 * copy_word_size);
    518		off += 4 * copy_word_size;
    519	} while (off < 0);
    520
    521	if (pref_bias_copy_load - pref_bias_copy_store) {
    522		pg_addiu(&buf, A2, A0,
    523			 pref_bias_copy_load - pref_bias_copy_store);
    524		uasm_l_copy_pref_store(&l, buf);
    525		off = 0;
    526		do {
    527			build_copy_load(&buf, T0, off);
    528			build_copy_load(&buf, T1, off + copy_word_size);
    529			build_copy_load(&buf, T2, off + 2 * copy_word_size);
    530			build_copy_load(&buf, T3, off + 3 * copy_word_size);
    531			build_copy_store_pref(&buf, off);
    532			build_copy_store(&buf, T0, off);
    533			build_copy_store_pref(&buf, off + copy_word_size);
    534			build_copy_store(&buf, T1, off + copy_word_size);
    535			build_copy_store_pref(&buf, off + 2 * copy_word_size);
    536			build_copy_store(&buf, T2, off + 2 * copy_word_size);
    537			build_copy_store_pref(&buf, off + 3 * copy_word_size);
    538			build_copy_store(&buf, T3, off + 3 * copy_word_size);
    539			off += 4 * copy_word_size;
    540		} while (off < half_copy_loop_size);
    541		pg_addiu(&buf, A1, A1, 2 * off);
    542		pg_addiu(&buf, A0, A0, 2 * off);
    543		off = -off;
    544		do {
    545			build_copy_load(&buf, T0, off);
    546			build_copy_load(&buf, T1, off + copy_word_size);
    547			build_copy_load(&buf, T2, off + 2 * copy_word_size);
    548			build_copy_load(&buf, T3, off + 3 * copy_word_size);
    549			build_copy_store_pref(&buf, off);
    550			build_copy_store(&buf, T0, off);
    551			build_copy_store_pref(&buf, off + copy_word_size);
    552			build_copy_store(&buf, T1, off + copy_word_size);
    553			build_copy_store_pref(&buf, off + 2 * copy_word_size);
    554			build_copy_store(&buf, T2, off + 2 * copy_word_size);
    555			build_copy_store_pref(&buf, off + 3 * copy_word_size);
    556			if (off == -(4 * copy_word_size))
    557				uasm_il_bne(&buf, &r, A2, A0,
    558					    label_copy_pref_store);
    559			build_copy_store(&buf, T3, off + 3 * copy_word_size);
    560			off += 4 * copy_word_size;
    561		} while (off < 0);
    562	}
    563
    564	if (pref_bias_copy_store) {
    565		pg_addiu(&buf, A2, A0, pref_bias_copy_store);
    566		uasm_l_copy_nopref(&l, buf);
    567		off = 0;
    568		do {
    569			build_copy_load(&buf, T0, off);
    570			build_copy_load(&buf, T1, off + copy_word_size);
    571			build_copy_load(&buf, T2, off + 2 * copy_word_size);
    572			build_copy_load(&buf, T3, off + 3 * copy_word_size);
    573			build_copy_store(&buf, T0, off);
    574			build_copy_store(&buf, T1, off + copy_word_size);
    575			build_copy_store(&buf, T2, off + 2 * copy_word_size);
    576			build_copy_store(&buf, T3, off + 3 * copy_word_size);
    577			off += 4 * copy_word_size;
    578		} while (off < half_copy_loop_size);
    579		pg_addiu(&buf, A1, A1, 2 * off);
    580		pg_addiu(&buf, A0, A0, 2 * off);
    581		off = -off;
    582		do {
    583			build_copy_load(&buf, T0, off);
    584			build_copy_load(&buf, T1, off + copy_word_size);
    585			build_copy_load(&buf, T2, off + 2 * copy_word_size);
    586			build_copy_load(&buf, T3, off + 3 * copy_word_size);
    587			build_copy_store(&buf, T0, off);
    588			build_copy_store(&buf, T1, off + copy_word_size);
    589			build_copy_store(&buf, T2, off + 2 * copy_word_size);
    590			if (off == -(4 * copy_word_size))
    591				uasm_il_bne(&buf, &r, A2, A0,
    592					    label_copy_nopref);
    593			build_copy_store(&buf, T3, off + 3 * copy_word_size);
    594			off += 4 * copy_word_size;
    595		} while (off < 0);
    596	}
    597
    598	uasm_i_jr(&buf, RA);
    599	uasm_i_nop(&buf);
    600
    601	BUG_ON(buf > &__copy_page_end);
    602
    603	uasm_resolve_relocs(relocs, labels);
    604
    605	pr_debug("Synthesized copy page handler (%u instructions).\n",
    606		 (u32)(buf - &__copy_page_start));
    607
    608	pr_debug("\t.set push\n");
    609	pr_debug("\t.set noreorder\n");
    610	for (i = 0; i < (buf - &__copy_page_start); i++)
    611		pr_debug("\t.word 0x%08x\n", (&__copy_page_start)[i]);
    612	pr_debug("\t.set pop\n");
    613}
    614
    615#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
    616extern void clear_page_cpu(void *page);
    617extern void copy_page_cpu(void *to, void *from);
    618
    619/*
    620 * Pad descriptors to cacheline, since each is exclusively owned by a
    621 * particular CPU.
    622 */
    623struct dmadscr {
    624	u64 dscr_a;
    625	u64 dscr_b;
    626	u64 pad_a;
    627	u64 pad_b;
    628} ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS];
    629
    630void clear_page(void *page)
    631{
    632	u64 to_phys = CPHYSADDR((unsigned long)page);
    633	unsigned int cpu = smp_processor_id();
    634
    635	/* if the page is not in KSEG0, use old way */
    636	if ((long)KSEGX((unsigned long)page) != (long)CKSEG0)
    637		return clear_page_cpu(page);
    638
    639	page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM |
    640				 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
    641	page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
    642	__raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
    643
    644	/*
    645	 * Don't really want to do it this way, but there's no
    646	 * reliable way to delay completion detection.
    647	 */
    648	while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
    649		 & M_DM_DSCR_BASE_INTERRUPT))
    650		;
    651	__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
    652}
    653EXPORT_SYMBOL(clear_page);
    654
    655void copy_page(void *to, void *from)
    656{
    657	u64 from_phys = CPHYSADDR((unsigned long)from);
    658	u64 to_phys = CPHYSADDR((unsigned long)to);
    659	unsigned int cpu = smp_processor_id();
    660
    661	/* if any page is not in KSEG0, use old way */
    662	if ((long)KSEGX((unsigned long)to) != (long)CKSEG0
    663	    || (long)KSEGX((unsigned long)from) != (long)CKSEG0)
    664		return copy_page_cpu(to, from);
    665
    666	page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST |
    667				 M_DM_DSCRA_INTERRUPT;
    668	page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
    669	__raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
    670
    671	/*
    672	 * Don't really want to do it this way, but there's no
    673	 * reliable way to delay completion detection.
    674	 */
    675	while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
    676		 & M_DM_DSCR_BASE_INTERRUPT))
    677		;
    678	__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
    679}
    680EXPORT_SYMBOL(copy_page);
    681
    682#endif /* CONFIG_SIBYTE_DMA_PAGEOPS */