cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

pacache.S (32014B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 *  PARISC TLB and cache flushing support
      4 *  Copyright (C) 2000-2001 Hewlett-Packard (John Marvin)
      5 *  Copyright (C) 2001 Matthew Wilcox (willy at parisc-linux.org)
      6 *  Copyright (C) 2002 Richard Hirst (rhirst with parisc-linux.org)
      7 */
      8
      9/*
     10 * NOTE: fdc,fic, and pdc instructions that use base register modification
     11 *       should only use index and base registers that are not shadowed,
     12 *       so that the fast path emulation in the non access miss handler
     13 *       can be used.
     14 */
     15
     16#ifdef CONFIG_64BIT
     17	.level	2.0w
     18#else
     19	.level	2.0
     20#endif
     21
     22#include <asm/psw.h>
     23#include <asm/assembly.h>
     24#include <asm/cache.h>
     25#include <asm/ldcw.h>
     26#include <asm/alternative.h>
     27#include <linux/linkage.h>
     28#include <linux/init.h>
     29#include <linux/pgtable.h>
     30
     31	.section .text.hot
     32	.align	16
     33
     34ENTRY_CFI(flush_tlb_all_local)
     35	/*
     36	 * The pitlbe and pdtlbe instructions should only be used to
     37	 * flush the entire tlb. Also, there needs to be no intervening
     38	 * tlb operations, e.g. tlb misses, so the operation needs
     39	 * to happen in real mode with all interruptions disabled.
     40	 */
     41
     42	/* pcxt_ssm_bug	- relied upon translation! PA 2.0 Arch. F-4 and F-5 */
     43	rsm		PSW_SM_I, %r19		/* save I-bit state */
     44	load32		PA(1f), %r1
     45	nop
     46	nop
     47	nop
     48	nop
     49	nop
     50
     51	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
     52	mtctl		%r0, %cr17		/* Clear IIASQ tail */
     53	mtctl		%r0, %cr17		/* Clear IIASQ head */
     54	mtctl		%r1, %cr18		/* IIAOQ head */
     55	ldo		4(%r1), %r1
     56	mtctl		%r1, %cr18		/* IIAOQ tail */
     57	load32		REAL_MODE_PSW, %r1
     58	mtctl           %r1, %ipsw
     59	rfi
     60	nop
     61
     621:      load32		PA(cache_info), %r1
     63
     64	/* Flush Instruction Tlb */
     65
     6688:	LDREG		ITLB_SID_BASE(%r1), %r20
     67	LDREG		ITLB_SID_STRIDE(%r1), %r21
     68	LDREG		ITLB_SID_COUNT(%r1), %r22
     69	LDREG		ITLB_OFF_BASE(%r1), %arg0
     70	LDREG		ITLB_OFF_STRIDE(%r1), %arg1
     71	LDREG		ITLB_OFF_COUNT(%r1), %arg2
     72	LDREG		ITLB_LOOP(%r1), %arg3
     73
     74	addib,COND(=)		-1, %arg3, fitoneloop	/* Preadjust and test */
     75	movb,<,n	%arg3, %r31, fitdone	/* If loop < 0, skip */
     76	copy		%arg0, %r28		/* Init base addr */
     77
     78fitmanyloop:					/* Loop if LOOP >= 2 */
     79	mtsp		%r20, %sr1
     80	add		%r21, %r20, %r20	/* increment space */
     81	copy		%arg2, %r29		/* Init middle loop count */
     82
     83fitmanymiddle:					/* Loop if LOOP >= 2 */
     84	addib,COND(>)		-1, %r31, fitmanymiddle	/* Adjusted inner loop decr */
     85	pitlbe		%r0(%sr1, %r28)
     86	pitlbe,m	%arg1(%sr1, %r28)	/* Last pitlbe and addr adjust */
     87	addib,COND(>)		-1, %r29, fitmanymiddle	/* Middle loop decr */
     88	copy		%arg3, %r31		/* Re-init inner loop count */
     89
     90	movb,tr		%arg0, %r28, fitmanyloop /* Re-init base addr */
     91	addib,COND(<=),n	-1, %r22, fitdone	/* Outer loop count decr */
     92
     93fitoneloop:					/* Loop if LOOP = 1 */
     94	mtsp		%r20, %sr1
     95	copy		%arg0, %r28		/* init base addr */
     96	copy		%arg2, %r29		/* init middle loop count */
     97
     98fitonemiddle:					/* Loop if LOOP = 1 */
     99	addib,COND(>)		-1, %r29, fitonemiddle	/* Middle loop count decr */
    100	pitlbe,m	%arg1(%sr1, %r28)	/* pitlbe for one loop */
    101
    102	addib,COND(>)		-1, %r22, fitoneloop	/* Outer loop count decr */
    103	add		%r21, %r20, %r20		/* increment space */
    104
    105fitdone:
    106	ALTERNATIVE(88b, fitdone, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
    107
    108	/* Flush Data Tlb */
    109
    110	LDREG		DTLB_SID_BASE(%r1), %r20
    111	LDREG		DTLB_SID_STRIDE(%r1), %r21
    112	LDREG		DTLB_SID_COUNT(%r1), %r22
    113	LDREG		DTLB_OFF_BASE(%r1), %arg0
    114	LDREG		DTLB_OFF_STRIDE(%r1), %arg1
    115	LDREG		DTLB_OFF_COUNT(%r1), %arg2
    116	LDREG		DTLB_LOOP(%r1), %arg3
    117
    118	addib,COND(=)		-1, %arg3, fdtoneloop	/* Preadjust and test */
    119	movb,<,n	%arg3, %r31, fdtdone	/* If loop < 0, skip */
    120	copy		%arg0, %r28		/* Init base addr */
    121
    122fdtmanyloop:					/* Loop if LOOP >= 2 */
    123	mtsp		%r20, %sr1
    124	add		%r21, %r20, %r20	/* increment space */
    125	copy		%arg2, %r29		/* Init middle loop count */
    126
    127fdtmanymiddle:					/* Loop if LOOP >= 2 */
    128	addib,COND(>)		-1, %r31, fdtmanymiddle	/* Adjusted inner loop decr */
    129	pdtlbe		%r0(%sr1, %r28)
    130	pdtlbe,m	%arg1(%sr1, %r28)	/* Last pdtlbe and addr adjust */
    131	addib,COND(>)		-1, %r29, fdtmanymiddle	/* Middle loop decr */
    132	copy		%arg3, %r31		/* Re-init inner loop count */
    133
    134	movb,tr		%arg0, %r28, fdtmanyloop /* Re-init base addr */
    135	addib,COND(<=),n	-1, %r22,fdtdone	/* Outer loop count decr */
    136
    137fdtoneloop:					/* Loop if LOOP = 1 */
    138	mtsp		%r20, %sr1
    139	copy		%arg0, %r28		/* init base addr */
    140	copy		%arg2, %r29		/* init middle loop count */
    141
    142fdtonemiddle:					/* Loop if LOOP = 1 */
    143	addib,COND(>)		-1, %r29, fdtonemiddle	/* Middle loop count decr */
    144	pdtlbe,m	%arg1(%sr1, %r28)	/* pdtlbe for one loop */
    145
    146	addib,COND(>)		-1, %r22, fdtoneloop	/* Outer loop count decr */
    147	add		%r21, %r20, %r20	/* increment space */
    148
    149
    150fdtdone:
    151	/*
    152	 * Switch back to virtual mode
    153	 */
    154	/* pcxt_ssm_bug */
    155	rsm		PSW_SM_I, %r0
    156	load32		2f, %r1
    157	nop
    158	nop
    159	nop
    160	nop
    161	nop
    162
    163	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
    164	mtctl		%r0, %cr17		/* Clear IIASQ tail */
    165	mtctl		%r0, %cr17		/* Clear IIASQ head */
    166	mtctl		%r1, %cr18		/* IIAOQ head */
    167	ldo		4(%r1), %r1
    168	mtctl		%r1, %cr18		/* IIAOQ tail */
    169	load32		KERNEL_PSW, %r1
    170	or		%r1, %r19, %r1	/* I-bit to state on entry */
    171	mtctl		%r1, %ipsw	/* restore I-bit (entire PSW) */
    172	rfi
    173	nop
    174
    1752:      bv		%r0(%r2)
    176	nop
    177
    178	/*
    179	 * When running in qemu, drop whole flush_tlb_all_local function and
    180	 * replace by one pdtlbe instruction, for which QEMU will drop all
    181	 * local TLB entries.
    182	 */
    1833:	pdtlbe		%r0(%sr1,%r0)
    184	bv,n		%r0(%r2)
    185	ALTERNATIVE_CODE(flush_tlb_all_local, 2, ALT_COND_RUN_ON_QEMU, 3b)
    186ENDPROC_CFI(flush_tlb_all_local)
    187
    188	.import cache_info,data
    189
    190ENTRY_CFI(flush_instruction_cache_local)
    19188:	load32		cache_info, %r1
    192
    193	/* Flush Instruction Cache */
    194
    195	LDREG		ICACHE_BASE(%r1), %arg0
    196	LDREG		ICACHE_STRIDE(%r1), %arg1
    197	LDREG		ICACHE_COUNT(%r1), %arg2
    198	LDREG		ICACHE_LOOP(%r1), %arg3
    199	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
    200	mtsp		%r0, %sr1
    201	addib,COND(=)		-1, %arg3, fioneloop	/* Preadjust and test */
    202	movb,<,n	%arg3, %r31, fisync	/* If loop < 0, do sync */
    203
    204fimanyloop:					/* Loop if LOOP >= 2 */
    205	addib,COND(>)		-1, %r31, fimanyloop	/* Adjusted inner loop decr */
    206	fice            %r0(%sr1, %arg0)
    207	fice,m		%arg1(%sr1, %arg0)	/* Last fice and addr adjust */
    208	movb,tr		%arg3, %r31, fimanyloop	/* Re-init inner loop count */
    209	addib,COND(<=),n	-1, %arg2, fisync	/* Outer loop decr */
    210
    211fioneloop:					/* Loop if LOOP = 1 */
    212	/* Some implementations may flush with a single fice instruction */
    213	cmpib,COND(>>=),n	15, %arg2, fioneloop2
    214
    215fioneloop1:
    216	fice,m		%arg1(%sr1, %arg0)
    217	fice,m		%arg1(%sr1, %arg0)
    218	fice,m		%arg1(%sr1, %arg0)
    219	fice,m		%arg1(%sr1, %arg0)
    220	fice,m		%arg1(%sr1, %arg0)
    221	fice,m		%arg1(%sr1, %arg0)
    222	fice,m		%arg1(%sr1, %arg0)
    223	fice,m		%arg1(%sr1, %arg0)
    224	fice,m		%arg1(%sr1, %arg0)
    225	fice,m		%arg1(%sr1, %arg0)
    226	fice,m		%arg1(%sr1, %arg0)
    227	fice,m		%arg1(%sr1, %arg0)
    228	fice,m		%arg1(%sr1, %arg0)
    229	fice,m		%arg1(%sr1, %arg0)
    230	fice,m		%arg1(%sr1, %arg0)
    231	addib,COND(>)	-16, %arg2, fioneloop1
    232	fice,m		%arg1(%sr1, %arg0)
    233
    234	/* Check if done */
    235	cmpb,COND(=),n	%arg2, %r0, fisync	/* Predict branch taken */
    236
    237fioneloop2:
    238	addib,COND(>)	-1, %arg2, fioneloop2	/* Outer loop count decr */
    239	fice,m		%arg1(%sr1, %arg0)	/* Fice for one loop */
    240
    241fisync:
    242	sync
    243	mtsm		%r22			/* restore I-bit */
    24489:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
    245	bv		%r0(%r2)
    246	nop
    247ENDPROC_CFI(flush_instruction_cache_local)
    248
    249
    250	.import cache_info, data
    251ENTRY_CFI(flush_data_cache_local)
    25288:	load32		cache_info, %r1
    253
    254	/* Flush Data Cache */
    255
    256	LDREG		DCACHE_BASE(%r1), %arg0
    257	LDREG		DCACHE_STRIDE(%r1), %arg1
    258	LDREG		DCACHE_COUNT(%r1), %arg2
    259	LDREG		DCACHE_LOOP(%r1), %arg3
    260	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
    261	mtsp		%r0, %sr1
    262	addib,COND(=)		-1, %arg3, fdoneloop	/* Preadjust and test */
    263	movb,<,n	%arg3, %r31, fdsync	/* If loop < 0, do sync */
    264
    265fdmanyloop:					/* Loop if LOOP >= 2 */
    266	addib,COND(>)		-1, %r31, fdmanyloop	/* Adjusted inner loop decr */
    267	fdce		%r0(%sr1, %arg0)
    268	fdce,m		%arg1(%sr1, %arg0)	/* Last fdce and addr adjust */
    269	movb,tr		%arg3, %r31, fdmanyloop	/* Re-init inner loop count */
    270	addib,COND(<=),n	-1, %arg2, fdsync	/* Outer loop decr */
    271
    272fdoneloop:					/* Loop if LOOP = 1 */
    273	/* Some implementations may flush with a single fdce instruction */
    274	cmpib,COND(>>=),n	15, %arg2, fdoneloop2
    275
    276fdoneloop1:
    277	fdce,m		%arg1(%sr1, %arg0)
    278	fdce,m		%arg1(%sr1, %arg0)
    279	fdce,m		%arg1(%sr1, %arg0)
    280	fdce,m		%arg1(%sr1, %arg0)
    281	fdce,m		%arg1(%sr1, %arg0)
    282	fdce,m		%arg1(%sr1, %arg0)
    283	fdce,m		%arg1(%sr1, %arg0)
    284	fdce,m		%arg1(%sr1, %arg0)
    285	fdce,m		%arg1(%sr1, %arg0)
    286	fdce,m		%arg1(%sr1, %arg0)
    287	fdce,m		%arg1(%sr1, %arg0)
    288	fdce,m		%arg1(%sr1, %arg0)
    289	fdce,m		%arg1(%sr1, %arg0)
    290	fdce,m		%arg1(%sr1, %arg0)
    291	fdce,m		%arg1(%sr1, %arg0)
    292	addib,COND(>)	-16, %arg2, fdoneloop1
    293	fdce,m		%arg1(%sr1, %arg0)
    294
    295	/* Check if done */
    296	cmpb,COND(=),n	%arg2, %r0, fdsync	/* Predict branch taken */
    297
    298fdoneloop2:
    299	addib,COND(>)	-1, %arg2, fdoneloop2	/* Outer loop count decr */
    300	fdce,m		%arg1(%sr1, %arg0)	/* Fdce for one loop */
    301
    302fdsync:
    303	sync
    304	mtsm		%r22			/* restore I-bit */
    30589:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
    306	bv		%r0(%r2)
    307	nop
    308ENDPROC_CFI(flush_data_cache_local)
    309
    310/* Clear page using kernel mapping.  */
    311
    312ENTRY_CFI(clear_page_asm)
    313#ifdef CONFIG_64BIT
    314
    315	/* Unroll the loop.  */
    316	ldi		(PAGE_SIZE / 128), %r1
    317
    3181:
    319	std		%r0, 0(%r26)
    320	std		%r0, 8(%r26)
    321	std		%r0, 16(%r26)
    322	std		%r0, 24(%r26)
    323	std		%r0, 32(%r26)
    324	std		%r0, 40(%r26)
    325	std		%r0, 48(%r26)
    326	std		%r0, 56(%r26)
    327	std		%r0, 64(%r26)
    328	std		%r0, 72(%r26)
    329	std		%r0, 80(%r26)
    330	std		%r0, 88(%r26)
    331	std		%r0, 96(%r26)
    332	std		%r0, 104(%r26)
    333	std		%r0, 112(%r26)
    334	std		%r0, 120(%r26)
    335
    336	/* Note reverse branch hint for addib is taken.  */
    337	addib,COND(>),n	-1, %r1, 1b
    338	ldo		128(%r26), %r26
    339
    340#else
    341
    342	/*
    343	 * Note that until (if) we start saving the full 64-bit register
    344	 * values on interrupt, we can't use std on a 32 bit kernel.
    345	 */
    346	ldi		(PAGE_SIZE / 64), %r1
    347
    3481:
    349	stw		%r0, 0(%r26)
    350	stw		%r0, 4(%r26)
    351	stw		%r0, 8(%r26)
    352	stw		%r0, 12(%r26)
    353	stw		%r0, 16(%r26)
    354	stw		%r0, 20(%r26)
    355	stw		%r0, 24(%r26)
    356	stw		%r0, 28(%r26)
    357	stw		%r0, 32(%r26)
    358	stw		%r0, 36(%r26)
    359	stw		%r0, 40(%r26)
    360	stw		%r0, 44(%r26)
    361	stw		%r0, 48(%r26)
    362	stw		%r0, 52(%r26)
    363	stw		%r0, 56(%r26)
    364	stw		%r0, 60(%r26)
    365
    366	addib,COND(>),n	-1, %r1, 1b
    367	ldo		64(%r26), %r26
    368#endif
    369	bv		%r0(%r2)
    370	nop
    371ENDPROC_CFI(clear_page_asm)
    372
    373/* Copy page using kernel mapping.  */
    374
    375ENTRY_CFI(copy_page_asm)
    376#ifdef CONFIG_64BIT
    377	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
    378	 * Unroll the loop by hand and arrange insn appropriately.
    379	 * Prefetch doesn't improve performance on rp3440.
    380	 * GCC probably can do this just as well...
    381	 */
    382
    383	ldi		(PAGE_SIZE / 128), %r1
    384
    3851:	ldd		0(%r25), %r19
    386	ldd		8(%r25), %r20
    387
    388	ldd		16(%r25), %r21
    389	ldd		24(%r25), %r22
    390	std		%r19, 0(%r26)
    391	std		%r20, 8(%r26)
    392
    393	ldd		32(%r25), %r19
    394	ldd		40(%r25), %r20
    395	std		%r21, 16(%r26)
    396	std		%r22, 24(%r26)
    397
    398	ldd		48(%r25), %r21
    399	ldd		56(%r25), %r22
    400	std		%r19, 32(%r26)
    401	std		%r20, 40(%r26)
    402
    403	ldd		64(%r25), %r19
    404	ldd		72(%r25), %r20
    405	std		%r21, 48(%r26)
    406	std		%r22, 56(%r26)
    407
    408	ldd		80(%r25), %r21
    409	ldd		88(%r25), %r22
    410	std		%r19, 64(%r26)
    411	std		%r20, 72(%r26)
    412
    413	ldd		 96(%r25), %r19
    414	ldd		104(%r25), %r20
    415	std		%r21, 80(%r26)
    416	std		%r22, 88(%r26)
    417
    418	ldd		112(%r25), %r21
    419	ldd		120(%r25), %r22
    420	ldo		128(%r25), %r25
    421	std		%r19, 96(%r26)
    422	std		%r20, 104(%r26)
    423
    424	std		%r21, 112(%r26)
    425	std		%r22, 120(%r26)
    426
    427	/* Note reverse branch hint for addib is taken.  */
    428	addib,COND(>),n	-1, %r1, 1b
    429	ldo		128(%r26), %r26
    430
    431#else
    432
    433	/*
    434	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
    435	 * bundles (very restricted rules for bundling).
    436	 * Note that until (if) we start saving
    437	 * the full 64 bit register values on interrupt, we can't
    438	 * use ldd/std on a 32 bit kernel.
    439	 */
    440	ldw		0(%r25), %r19
    441	ldi		(PAGE_SIZE / 64), %r1
    442
    4431:
    444	ldw		4(%r25), %r20
    445	ldw		8(%r25), %r21
    446	ldw		12(%r25), %r22
    447	stw		%r19, 0(%r26)
    448	stw		%r20, 4(%r26)
    449	stw		%r21, 8(%r26)
    450	stw		%r22, 12(%r26)
    451	ldw		16(%r25), %r19
    452	ldw		20(%r25), %r20
    453	ldw		24(%r25), %r21
    454	ldw		28(%r25), %r22
    455	stw		%r19, 16(%r26)
    456	stw		%r20, 20(%r26)
    457	stw		%r21, 24(%r26)
    458	stw		%r22, 28(%r26)
    459	ldw		32(%r25), %r19
    460	ldw		36(%r25), %r20
    461	ldw		40(%r25), %r21
    462	ldw		44(%r25), %r22
    463	stw		%r19, 32(%r26)
    464	stw		%r20, 36(%r26)
    465	stw		%r21, 40(%r26)
    466	stw		%r22, 44(%r26)
    467	ldw		48(%r25), %r19
    468	ldw		52(%r25), %r20
    469	ldw		56(%r25), %r21
    470	ldw		60(%r25), %r22
    471	stw		%r19, 48(%r26)
    472	stw		%r20, 52(%r26)
    473	ldo		64(%r25), %r25
    474	stw		%r21, 56(%r26)
    475	stw		%r22, 60(%r26)
    476	ldo		64(%r26), %r26
    477	addib,COND(>),n	-1, %r1, 1b
    478	ldw		0(%r25), %r19
    479#endif
    480	bv		%r0(%r2)
    481	nop
    482ENDPROC_CFI(copy_page_asm)
    483
    484/*
    485 * NOTE: Code in clear_user_page has a hard coded dependency on the
    486 *       maximum alias boundary being 4 Mb. We've been assured by the
    487 *       parisc chip designers that there will not ever be a parisc
    488 *       chip with a larger alias boundary (Never say never :-) ).
    489 *
    490 *       Yah, what about the PA8800 and PA8900 processors?
    491 *
    492 *       Subtle: the dtlb miss handlers support the temp alias region by
    493 *       "knowing" that if a dtlb miss happens within the temp alias
    494 *       region it must have occurred while in clear_user_page. Since
    495 *       this routine makes use of processor local translations, we
    496 *       don't want to insert them into the kernel page table. Instead,
    497 *       we load up some general registers (they need to be registers
    498 *       which aren't shadowed) with the physical page numbers (preshifted
    499 *       for tlb insertion) needed to insert the translations. When we
    500 *       miss on the translation, the dtlb miss handler inserts the
    501 *       translation into the tlb using these values:
    502 *
    503 *          %r26 physical address of "to" translation
    504 *          %r23 physical address of "from" translation
    505 */
    506
    507	/*
    508	 * copy_user_page_asm() performs a page copy using mappings
    509	 * equivalent to the user page mappings.  It can be used to
    510	 * implement copy_user_page() but unfortunately both the `from'
    511	 * and `to' pages need to be flushed through mappings equivalent
    512	 * to the user mappings after the copy because the kernel accesses
    513	 * the `from' page through the kmap kernel mapping and the `to'
    514	 * page needs to be flushed since code can be copied.  As a
    515	 * result, this implementation is less efficient than the simpler
    516	 * copy using the kernel mapping.  It only needs the `from' page
    517	 * to flushed via the user mapping.  The kunmap routines handle
    518	 * the flushes needed for the kernel mapping.
    519	 *
    520	 * I'm still keeping this around because it may be possible to
    521	 * use it if more information is passed into copy_user_page().
    522	 * Have to do some measurements to see if it is worthwhile to
    523	 * lobby for such a change.
    524	 *
    525	 */
    526
    527ENTRY_CFI(copy_user_page_asm)
    528	/* Convert virtual `to' and `from' addresses to physical addresses.
    529	   Move `from' physical address to non shadowed register.  */
    530	ldil		L%(__PAGE_OFFSET), %r1
    531	sub		%r26, %r1, %r26
    532	sub		%r25, %r1, %r23
    533
    534	ldil		L%(TMPALIAS_MAP_START), %r28
    535	dep_safe	%r24, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
    536	depi_safe	0, 31,PAGE_SHIFT, %r28			/* Clear any offset bits */
    537	copy		%r28, %r29
    538	depi_safe	1, 31-TMPALIAS_SIZE_BITS,1, %r29	/* Form aliased virtual address 'from' */
    539
    540	/* Purge any old translations */
    541
    542#ifdef CONFIG_PA20
    543	pdtlb,l		%r0(%r28)
    544	pdtlb,l		%r0(%r29)
    545#else
    5460:	pdtlb		%r0(%r28)
    5471:	pdtlb		%r0(%r29)
    548	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
    549	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
    550#endif
    551
    552#ifdef CONFIG_64BIT
    553	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
    554	 * Unroll the loop by hand and arrange insn appropriately.
    555	 * GCC probably can do this just as well.
    556	 */
    557
    558	ldd		0(%r29), %r19
    559	ldi		(PAGE_SIZE / 128), %r1
    560
    5611:	ldd		8(%r29), %r20
    562
    563	ldd		16(%r29), %r21
    564	ldd		24(%r29), %r22
    565	std		%r19, 0(%r28)
    566	std		%r20, 8(%r28)
    567
    568	ldd		32(%r29), %r19
    569	ldd		40(%r29), %r20
    570	std		%r21, 16(%r28)
    571	std		%r22, 24(%r28)
    572
    573	ldd		48(%r29), %r21
    574	ldd		56(%r29), %r22
    575	std		%r19, 32(%r28)
    576	std		%r20, 40(%r28)
    577
    578	ldd		64(%r29), %r19
    579	ldd		72(%r29), %r20
    580	std		%r21, 48(%r28)
    581	std		%r22, 56(%r28)
    582
    583	ldd		80(%r29), %r21
    584	ldd		88(%r29), %r22
    585	std		%r19, 64(%r28)
    586	std		%r20, 72(%r28)
    587
    588	ldd		 96(%r29), %r19
    589	ldd		104(%r29), %r20
    590	std		%r21, 80(%r28)
    591	std		%r22, 88(%r28)
    592
    593	ldd		112(%r29), %r21
    594	ldd		120(%r29), %r22
    595	std		%r19, 96(%r28)
    596	std		%r20, 104(%r28)
    597
    598	ldo		128(%r29), %r29
    599	std		%r21, 112(%r28)
    600	std		%r22, 120(%r28)
    601	ldo		128(%r28), %r28
    602
    603	/* conditional branches nullify on forward taken branch, and on
    604	 * non-taken backward branch. Note that .+4 is a backwards branch.
    605	 * The ldd should only get executed if the branch is taken.
    606	 */
    607	addib,COND(>),n	-1, %r1, 1b		/* bundle 10 */
    608	ldd		0(%r29), %r19		/* start next loads */
    609
    610#else
    611	ldi		(PAGE_SIZE / 64), %r1
    612
    613	/*
    614	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
    615	 * bundles (very restricted rules for bundling). It probably
    616	 * does OK on PCXU and better, but we could do better with
    617	 * ldd/std instructions. Note that until (if) we start saving
    618	 * the full 64 bit register values on interrupt, we can't
    619	 * use ldd/std on a 32 bit kernel.
    620	 */
    621
    6221:	ldw		0(%r29), %r19
    623	ldw		4(%r29), %r20
    624	ldw		8(%r29), %r21
    625	ldw		12(%r29), %r22
    626	stw		%r19, 0(%r28)
    627	stw		%r20, 4(%r28)
    628	stw		%r21, 8(%r28)
    629	stw		%r22, 12(%r28)
    630	ldw		16(%r29), %r19
    631	ldw		20(%r29), %r20
    632	ldw		24(%r29), %r21
    633	ldw		28(%r29), %r22
    634	stw		%r19, 16(%r28)
    635	stw		%r20, 20(%r28)
    636	stw		%r21, 24(%r28)
    637	stw		%r22, 28(%r28)
    638	ldw		32(%r29), %r19
    639	ldw		36(%r29), %r20
    640	ldw		40(%r29), %r21
    641	ldw		44(%r29), %r22
    642	stw		%r19, 32(%r28)
    643	stw		%r20, 36(%r28)
    644	stw		%r21, 40(%r28)
    645	stw		%r22, 44(%r28)
    646	ldw		48(%r29), %r19
    647	ldw		52(%r29), %r20
    648	ldw		56(%r29), %r21
    649	ldw		60(%r29), %r22
    650	stw		%r19, 48(%r28)
    651	stw		%r20, 52(%r28)
    652	stw		%r21, 56(%r28)
    653	stw		%r22, 60(%r28)
    654	ldo		64(%r28), %r28
    655
    656	addib,COND(>)		-1, %r1,1b
    657	ldo		64(%r29), %r29
    658#endif
    659
    660	bv		%r0(%r2)
    661	nop
    662ENDPROC_CFI(copy_user_page_asm)
    663
    664ENTRY_CFI(clear_user_page_asm)
    665	tophys_r1	%r26
    666
    667	ldil		L%(TMPALIAS_MAP_START), %r28
    668	dep_safe	%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
    669	depi_safe	0, 31,PAGE_SHIFT, %r28			/* Clear any offset bits */
    670
    671	/* Purge any old translation */
    672
    673#ifdef CONFIG_PA20
    674	pdtlb,l		%r0(%r28)
    675#else
    6760:	pdtlb		%r0(%r28)
    677	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
    678#endif
    679
    680#ifdef CONFIG_64BIT
    681	ldi		(PAGE_SIZE / 128), %r1
    682
    683	/* PREFETCH (Write) has not (yet) been proven to help here */
    684	/* #define	PREFETCHW_OP	ldd		256(%0), %r0 */
    685
    6861:	std		%r0, 0(%r28)
    687	std		%r0, 8(%r28)
    688	std		%r0, 16(%r28)
    689	std		%r0, 24(%r28)
    690	std		%r0, 32(%r28)
    691	std		%r0, 40(%r28)
    692	std		%r0, 48(%r28)
    693	std		%r0, 56(%r28)
    694	std		%r0, 64(%r28)
    695	std		%r0, 72(%r28)
    696	std		%r0, 80(%r28)
    697	std		%r0, 88(%r28)
    698	std		%r0, 96(%r28)
    699	std		%r0, 104(%r28)
    700	std		%r0, 112(%r28)
    701	std		%r0, 120(%r28)
    702	addib,COND(>)		-1, %r1, 1b
    703	ldo		128(%r28), %r28
    704
    705#else	/* ! CONFIG_64BIT */
    706	ldi		(PAGE_SIZE / 64), %r1
    707
    7081:	stw		%r0, 0(%r28)
    709	stw		%r0, 4(%r28)
    710	stw		%r0, 8(%r28)
    711	stw		%r0, 12(%r28)
    712	stw		%r0, 16(%r28)
    713	stw		%r0, 20(%r28)
    714	stw		%r0, 24(%r28)
    715	stw		%r0, 28(%r28)
    716	stw		%r0, 32(%r28)
    717	stw		%r0, 36(%r28)
    718	stw		%r0, 40(%r28)
    719	stw		%r0, 44(%r28)
    720	stw		%r0, 48(%r28)
    721	stw		%r0, 52(%r28)
    722	stw		%r0, 56(%r28)
    723	stw		%r0, 60(%r28)
    724	addib,COND(>)		-1, %r1, 1b
    725	ldo		64(%r28), %r28
    726#endif	/* CONFIG_64BIT */
    727
    728	bv		%r0(%r2)
    729	nop
    730ENDPROC_CFI(clear_user_page_asm)
    731
    732ENTRY_CFI(flush_dcache_page_asm)
    733	ldil		L%(TMPALIAS_MAP_START), %r28
    734	dep_safe	%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
    735	depi_safe	0, 31,PAGE_SHIFT, %r28			/* Clear any offset bits */
    736
    737	/* Purge any old translation */
    738
    739#ifdef CONFIG_PA20
    740	pdtlb,l		%r0(%r28)
    741#else
    7420:	pdtlb		%r0(%r28)
    743	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
    744#endif
    745
    74688:	ldil		L%dcache_stride, %r1
    747	ldw		R%dcache_stride(%r1), r31
    748
    749#ifdef CONFIG_64BIT
    750	depdi,z		1, 63-PAGE_SHIFT,1, %r25
    751#else
    752	depwi,z		1, 31-PAGE_SHIFT,1, %r25
    753#endif
    754	add		%r28, %r25, %r25
    755	sub		%r25, r31, %r25
    756
    7571:	fdc,m		r31(%r28)
    758	fdc,m		r31(%r28)
    759	fdc,m		r31(%r28)
    760	fdc,m		r31(%r28)
    761	fdc,m		r31(%r28)
    762	fdc,m		r31(%r28)
    763	fdc,m		r31(%r28)
    764	fdc,m		r31(%r28)
    765	fdc,m		r31(%r28)
    766	fdc,m		r31(%r28)
    767	fdc,m		r31(%r28)
    768	fdc,m		r31(%r28)
    769	fdc,m		r31(%r28)
    770	fdc,m		r31(%r28)
    771	fdc,m		r31(%r28)
    772	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
    773	fdc,m		r31(%r28)
    774
    77589:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
    776	sync
    777	bv		%r0(%r2)
    778	nop
    779ENDPROC_CFI(flush_dcache_page_asm)
    780
    781ENTRY_CFI(purge_dcache_page_asm)
    782	ldil		L%(TMPALIAS_MAP_START), %r28
    783	dep_safe	%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
    784	depi_safe	0, 31,PAGE_SHIFT, %r28			/* Clear any offset bits */
    785
    786	/* Purge any old translation */
    787
    788#ifdef CONFIG_PA20
    789	pdtlb,l		%r0(%r28)
    790#else
    7910:	pdtlb		%r0(%r28)
    792	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
    793#endif
    794
    79588:	ldil		L%dcache_stride, %r1
    796	ldw		R%dcache_stride(%r1), r31
    797
    798#ifdef CONFIG_64BIT
    799	depdi,z		1, 63-PAGE_SHIFT,1, %r25
    800#else
    801	depwi,z		1, 31-PAGE_SHIFT,1, %r25
    802#endif
    803	add		%r28, %r25, %r25
    804	sub		%r25, r31, %r25
    805
    8061:      pdc,m		r31(%r28)
    807	pdc,m		r31(%r28)
    808	pdc,m		r31(%r28)
    809	pdc,m		r31(%r28)
    810	pdc,m		r31(%r28)
    811	pdc,m		r31(%r28)
    812	pdc,m		r31(%r28)
    813	pdc,m		r31(%r28)
    814	pdc,m		r31(%r28)
    815	pdc,m		r31(%r28)
    816	pdc,m		r31(%r28)
    817	pdc,m		r31(%r28)
    818	pdc,m		r31(%r28)
    819	pdc,m		r31(%r28)
    820	pdc,m		r31(%r28)
    821	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
    822	pdc,m		r31(%r28)
    823
    82489:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
    825	sync
    826	bv		%r0(%r2)
    827	nop
    828ENDPROC_CFI(purge_dcache_page_asm)
    829
    830ENTRY_CFI(flush_icache_page_asm)
    831	ldil		L%(TMPALIAS_MAP_START), %r28
    832	dep_safe	%r25, 31,TMPALIAS_SIZE_BITS, %r28	/* Form aliased virtual address 'to' */
    833	depi_safe	0, 31,PAGE_SHIFT, %r28			/* Clear any offset bits */
    834
    835	/* Purge any old translation.  Note that the FIC instruction
    836	 * may use either the instruction or data TLB.  Given that we
    837	 * have a flat address space, it's not clear which TLB will be
    838	 * used.  So, we purge both entries.  */
    839
    840#ifdef CONFIG_PA20
    841	pdtlb,l		%r0(%r28)
    8421:	pitlb,l         %r0(%sr4,%r28)
    843	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
    844#else
    8450:	pdtlb		%r0(%r28)
    8461:	pitlb           %r0(%sr4,%r28)
    847	ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
    848	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
    849	ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
    850#endif
    851
    85288:	ldil		L%icache_stride, %r1
    853	ldw		R%icache_stride(%r1), %r31
    854
    855#ifdef CONFIG_64BIT
    856	depdi,z		1, 63-PAGE_SHIFT,1, %r25
    857#else
    858	depwi,z		1, 31-PAGE_SHIFT,1, %r25
    859#endif
    860	add		%r28, %r25, %r25
    861	sub		%r25, %r31, %r25
    862
    863	/* fic only has the type 26 form on PA1.1, requiring an
    864	 * explicit space specification, so use %sr4 */
    8651:      fic,m		%r31(%sr4,%r28)
    866	fic,m		%r31(%sr4,%r28)
    867	fic,m		%r31(%sr4,%r28)
    868	fic,m		%r31(%sr4,%r28)
    869	fic,m		%r31(%sr4,%r28)
    870	fic,m		%r31(%sr4,%r28)
    871	fic,m		%r31(%sr4,%r28)
    872	fic,m		%r31(%sr4,%r28)
    873	fic,m		%r31(%sr4,%r28)
    874	fic,m		%r31(%sr4,%r28)
    875	fic,m		%r31(%sr4,%r28)
    876	fic,m		%r31(%sr4,%r28)
    877	fic,m		%r31(%sr4,%r28)
    878	fic,m		%r31(%sr4,%r28)
    879	fic,m		%r31(%sr4,%r28)
    880	cmpb,COND(>>)	%r25, %r28, 1b /* predict taken */
    881	fic,m		%r31(%sr4,%r28)
    882
    88389:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
    884	sync
    885	bv		%r0(%r2)
    886	nop
    887ENDPROC_CFI(flush_icache_page_asm)
    888
    889ENTRY_CFI(flush_kernel_dcache_page_asm)
    89088:	ldil		L%dcache_stride, %r1
    891	ldw		R%dcache_stride(%r1), %r23
    892
    893#ifdef CONFIG_64BIT
    894	depdi,z		1, 63-PAGE_SHIFT,1, %r25
    895#else
    896	depwi,z		1, 31-PAGE_SHIFT,1, %r25
    897#endif
    898	add		%r26, %r25, %r25
    899	sub		%r25, %r23, %r25
    900
    9011:      fdc,m		%r23(%r26)
    902	fdc,m		%r23(%r26)
    903	fdc,m		%r23(%r26)
    904	fdc,m		%r23(%r26)
    905	fdc,m		%r23(%r26)
    906	fdc,m		%r23(%r26)
    907	fdc,m		%r23(%r26)
    908	fdc,m		%r23(%r26)
    909	fdc,m		%r23(%r26)
    910	fdc,m		%r23(%r26)
    911	fdc,m		%r23(%r26)
    912	fdc,m		%r23(%r26)
    913	fdc,m		%r23(%r26)
    914	fdc,m		%r23(%r26)
    915	fdc,m		%r23(%r26)
    916	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
    917	fdc,m		%r23(%r26)
    918
    91989:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
    920	sync
    921	bv		%r0(%r2)
    922	nop
    923ENDPROC_CFI(flush_kernel_dcache_page_asm)
    924
    925ENTRY_CFI(purge_kernel_dcache_page_asm)
    92688:	ldil		L%dcache_stride, %r1
    927	ldw		R%dcache_stride(%r1), %r23
    928
    929#ifdef CONFIG_64BIT
    930	depdi,z		1, 63-PAGE_SHIFT,1, %r25
    931#else
    932	depwi,z		1, 31-PAGE_SHIFT,1, %r25
    933#endif
    934	add		%r26, %r25, %r25
    935	sub		%r25, %r23, %r25
    936
    9371:      pdc,m		%r23(%r26)
    938	pdc,m		%r23(%r26)
    939	pdc,m		%r23(%r26)
    940	pdc,m		%r23(%r26)
    941	pdc,m		%r23(%r26)
    942	pdc,m		%r23(%r26)
    943	pdc,m		%r23(%r26)
    944	pdc,m		%r23(%r26)
    945	pdc,m		%r23(%r26)
    946	pdc,m		%r23(%r26)
    947	pdc,m		%r23(%r26)
    948	pdc,m		%r23(%r26)
    949	pdc,m		%r23(%r26)
    950	pdc,m		%r23(%r26)
    951	pdc,m		%r23(%r26)
    952	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
    953	pdc,m		%r23(%r26)
    954
    95589:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
    956	sync
    957	bv		%r0(%r2)
    958	nop
    959ENDPROC_CFI(purge_kernel_dcache_page_asm)
    960
    961ENTRY_CFI(flush_user_dcache_range_asm)
    96288:	ldil		L%dcache_stride, %r1
    963	ldw		R%dcache_stride(%r1), %r23
    964	ldo		-1(%r23), %r21
    965	ANDCM		%r26, %r21, %r26
    966
    967#ifdef CONFIG_64BIT
    968	depd,z		%r23, 59, 60, %r21
    969#else
    970	depw,z		%r23, 27, 28, %r21
    971#endif
    972	add		%r26, %r21, %r22
    973	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
    9741:	add		%r22, %r21, %r22
    975	fdc,m		%r23(%sr3, %r26)
    976	fdc,m		%r23(%sr3, %r26)
    977	fdc,m		%r23(%sr3, %r26)
    978	fdc,m		%r23(%sr3, %r26)
    979	fdc,m		%r23(%sr3, %r26)
    980	fdc,m		%r23(%sr3, %r26)
    981	fdc,m		%r23(%sr3, %r26)
    982	fdc,m		%r23(%sr3, %r26)
    983	fdc,m		%r23(%sr3, %r26)
    984	fdc,m		%r23(%sr3, %r26)
    985	fdc,m		%r23(%sr3, %r26)
    986	fdc,m		%r23(%sr3, %r26)
    987	fdc,m		%r23(%sr3, %r26)
    988	fdc,m		%r23(%sr3, %r26)
    989	fdc,m		%r23(%sr3, %r26)
    990	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
    991	fdc,m		%r23(%sr3, %r26)
    992
    9932:	cmpb,COND(>>),n	%r25, %r26, 2b
    994	fdc,m		%r23(%sr3, %r26)
    995
    99689:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
    997	sync
    998	bv		%r0(%r2)
    999	nop
   1000ENDPROC_CFI(flush_user_dcache_range_asm)
   1001
   1002ENTRY_CFI(flush_kernel_dcache_range_asm)
   100388:	ldil		L%dcache_stride, %r1
   1004	ldw		R%dcache_stride(%r1), %r23
   1005	ldo		-1(%r23), %r21
   1006	ANDCM		%r26, %r21, %r26
   1007
   1008#ifdef CONFIG_64BIT
   1009	depd,z		%r23, 59, 60, %r21
   1010#else
   1011	depw,z		%r23, 27, 28, %r21
   1012#endif
   1013	add		%r26, %r21, %r22
   1014	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
   10151:	add		%r22, %r21, %r22
   1016	fdc,m		%r23(%r26)
   1017	fdc,m		%r23(%r26)
   1018	fdc,m		%r23(%r26)
   1019	fdc,m		%r23(%r26)
   1020	fdc,m		%r23(%r26)
   1021	fdc,m		%r23(%r26)
   1022	fdc,m		%r23(%r26)
   1023	fdc,m		%r23(%r26)
   1024	fdc,m		%r23(%r26)
   1025	fdc,m		%r23(%r26)
   1026	fdc,m		%r23(%r26)
   1027	fdc,m		%r23(%r26)
   1028	fdc,m		%r23(%r26)
   1029	fdc,m		%r23(%r26)
   1030	fdc,m		%r23(%r26)
   1031	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
   1032	fdc,m		%r23(%r26)
   1033
   10342:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
   1035	fdc,m		%r23(%r26)
   1036
   1037	sync
   103889:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
   1039	bv		%r0(%r2)
   1040	nop
   1041ENDPROC_CFI(flush_kernel_dcache_range_asm)
   1042
   1043ENTRY_CFI(purge_kernel_dcache_range_asm)
   104488:	ldil		L%dcache_stride, %r1
   1045	ldw		R%dcache_stride(%r1), %r23
   1046	ldo		-1(%r23), %r21
   1047	ANDCM		%r26, %r21, %r26
   1048
   1049#ifdef CONFIG_64BIT
   1050	depd,z		%r23, 59, 60, %r21
   1051#else
   1052	depw,z		%r23, 27, 28, %r21
   1053#endif
   1054	add		%r26, %r21, %r22
   1055	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
   10561:	add		%r22, %r21, %r22
   1057	pdc,m		%r23(%r26)
   1058	pdc,m		%r23(%r26)
   1059	pdc,m		%r23(%r26)
   1060	pdc,m		%r23(%r26)
   1061	pdc,m		%r23(%r26)
   1062	pdc,m		%r23(%r26)
   1063	pdc,m		%r23(%r26)
   1064	pdc,m		%r23(%r26)
   1065	pdc,m		%r23(%r26)
   1066	pdc,m		%r23(%r26)
   1067	pdc,m		%r23(%r26)
   1068	pdc,m		%r23(%r26)
   1069	pdc,m		%r23(%r26)
   1070	pdc,m		%r23(%r26)
   1071	pdc,m		%r23(%r26)
   1072	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
   1073	pdc,m		%r23(%r26)
   1074
   10752:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
   1076	pdc,m		%r23(%r26)
   1077
   1078	sync
   107989:	ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP)
   1080	bv		%r0(%r2)
   1081	nop
   1082ENDPROC_CFI(purge_kernel_dcache_range_asm)
   1083
   1084ENTRY_CFI(flush_user_icache_range_asm)
   108588:	ldil		L%icache_stride, %r1
   1086	ldw		R%icache_stride(%r1), %r23
   1087	ldo		-1(%r23), %r21
   1088	ANDCM		%r26, %r21, %r26
   1089
   1090#ifdef CONFIG_64BIT
   1091	depd,z		%r23, 59, 60, %r21
   1092#else
   1093	depw,z		%r23, 27, 28, %r21
   1094#endif
   1095	add		%r26, %r21, %r22
   1096	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
   10971:	add		%r22, %r21, %r22
   1098	fic,m		%r23(%sr3, %r26)
   1099	fic,m		%r23(%sr3, %r26)
   1100	fic,m		%r23(%sr3, %r26)
   1101	fic,m		%r23(%sr3, %r26)
   1102	fic,m		%r23(%sr3, %r26)
   1103	fic,m		%r23(%sr3, %r26)
   1104	fic,m		%r23(%sr3, %r26)
   1105	fic,m		%r23(%sr3, %r26)
   1106	fic,m		%r23(%sr3, %r26)
   1107	fic,m		%r23(%sr3, %r26)
   1108	fic,m		%r23(%sr3, %r26)
   1109	fic,m		%r23(%sr3, %r26)
   1110	fic,m		%r23(%sr3, %r26)
   1111	fic,m		%r23(%sr3, %r26)
   1112	fic,m		%r23(%sr3, %r26)
   1113	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
   1114	fic,m		%r23(%sr3, %r26)
   1115
   11162:	cmpb,COND(>>),n	%r25, %r26, 2b
   1117	fic,m		%r23(%sr3, %r26)
   1118
   111989:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
   1120	sync
   1121	bv		%r0(%r2)
   1122	nop
   1123ENDPROC_CFI(flush_user_icache_range_asm)
   1124
   1125ENTRY_CFI(flush_kernel_icache_page)
   112688:	ldil		L%icache_stride, %r1
   1127	ldw		R%icache_stride(%r1), %r23
   1128
   1129#ifdef CONFIG_64BIT
   1130	depdi,z		1, 63-PAGE_SHIFT,1, %r25
   1131#else
   1132	depwi,z		1, 31-PAGE_SHIFT,1, %r25
   1133#endif
   1134	add		%r26, %r25, %r25
   1135	sub		%r25, %r23, %r25
   1136
   1137
   11381:      fic,m		%r23(%sr4, %r26)
   1139	fic,m		%r23(%sr4, %r26)
   1140	fic,m		%r23(%sr4, %r26)
   1141	fic,m		%r23(%sr4, %r26)
   1142	fic,m		%r23(%sr4, %r26)
   1143	fic,m		%r23(%sr4, %r26)
   1144	fic,m		%r23(%sr4, %r26)
   1145	fic,m		%r23(%sr4, %r26)
   1146	fic,m		%r23(%sr4, %r26)
   1147	fic,m		%r23(%sr4, %r26)
   1148	fic,m		%r23(%sr4, %r26)
   1149	fic,m		%r23(%sr4, %r26)
   1150	fic,m		%r23(%sr4, %r26)
   1151	fic,m		%r23(%sr4, %r26)
   1152	fic,m		%r23(%sr4, %r26)
   1153	cmpb,COND(>>)	%r25, %r26, 1b /* predict taken */
   1154	fic,m		%r23(%sr4, %r26)
   1155
   115689:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
   1157	sync
   1158	bv		%r0(%r2)
   1159	nop
   1160ENDPROC_CFI(flush_kernel_icache_page)
   1161
   1162ENTRY_CFI(flush_kernel_icache_range_asm)
   116388:	ldil		L%icache_stride, %r1
   1164	ldw		R%icache_stride(%r1), %r23
   1165	ldo		-1(%r23), %r21
   1166	ANDCM		%r26, %r21, %r26
   1167
   1168#ifdef CONFIG_64BIT
   1169	depd,z		%r23, 59, 60, %r21
   1170#else
   1171	depw,z		%r23, 27, 28, %r21
   1172#endif
   1173	add		%r26, %r21, %r22
   1174	cmpb,COND(>>),n	%r22, %r25, 2f /* predict not taken */
   11751:	add		%r22, %r21, %r22
   1176	fic,m		%r23(%sr4, %r26)
   1177	fic,m		%r23(%sr4, %r26)
   1178	fic,m		%r23(%sr4, %r26)
   1179	fic,m		%r23(%sr4, %r26)
   1180	fic,m		%r23(%sr4, %r26)
   1181	fic,m		%r23(%sr4, %r26)
   1182	fic,m		%r23(%sr4, %r26)
   1183	fic,m		%r23(%sr4, %r26)
   1184	fic,m		%r23(%sr4, %r26)
   1185	fic,m		%r23(%sr4, %r26)
   1186	fic,m		%r23(%sr4, %r26)
   1187	fic,m		%r23(%sr4, %r26)
   1188	fic,m		%r23(%sr4, %r26)
   1189	fic,m		%r23(%sr4, %r26)
   1190	fic,m		%r23(%sr4, %r26)
   1191	cmpb,COND(<<=)	%r22, %r25, 1b /* predict taken */
   1192	fic,m		%r23(%sr4, %r26)
   1193
   11942:	cmpb,COND(>>),n	%r25, %r26, 2b /* predict taken */
   1195	fic,m		%r23(%sr4, %r26)
   1196
   119789:	ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP)
   1198	sync
   1199	bv		%r0(%r2)
   1200	nop
   1201ENDPROC_CFI(flush_kernel_icache_range_asm)
   1202
   1203	.text
   1204
   1205	/* align should cover use of rfi in disable_sr_hashing_asm and
   1206	 * srdis_done.
   1207	 */
   1208	.align	256
   1209ENTRY_CFI(disable_sr_hashing_asm)
   1210	/*
   1211	 * Switch to real mode
   1212	 */
   1213	/* pcxt_ssm_bug */
   1214	rsm		PSW_SM_I, %r0
   1215	load32		PA(1f), %r1
   1216	nop
   1217	nop
   1218	nop
   1219	nop
   1220	nop
   1221
   1222	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
   1223	mtctl		%r0, %cr17		/* Clear IIASQ tail */
   1224	mtctl		%r0, %cr17		/* Clear IIASQ head */
   1225	mtctl		%r1, %cr18		/* IIAOQ head */
   1226	ldo		4(%r1), %r1
   1227	mtctl		%r1, %cr18		/* IIAOQ tail */
   1228	load32		REAL_MODE_PSW, %r1
   1229	mtctl		%r1, %ipsw
   1230	rfi
   1231	nop
   1232
   12331:      cmpib,=,n	SRHASH_PCXST, %r26,srdis_pcxs
   1234	cmpib,=,n	SRHASH_PCXL, %r26,srdis_pcxl
   1235	cmpib,=,n	SRHASH_PA20, %r26,srdis_pa20
   1236	b,n		srdis_done
   1237
   1238srdis_pcxs:
   1239
   1240	/* Disable Space Register Hashing for PCXS,PCXT,PCXT' */
   1241
   1242	.word		0x141c1a00		/* mfdiag %dr0, %r28 */
   1243	.word		0x141c1a00		/* must issue twice */
   1244	depwi		0,18,1, %r28		/* Clear DHE (dcache hash enable) */
   1245	depwi		0,20,1, %r28		/* Clear IHE (icache hash enable) */
   1246	.word		0x141c1600		/* mtdiag %r28, %dr0 */
   1247	.word		0x141c1600		/* must issue twice */
   1248	b,n		srdis_done
   1249
   1250srdis_pcxl:
   1251
   1252	/* Disable Space Register Hashing for PCXL */
   1253
   1254	.word		0x141c0600		/* mfdiag %dr0, %r28 */
   1255	depwi           0,28,2, %r28		/* Clear DHASH_EN & IHASH_EN */
   1256	.word		0x141c0240		/* mtdiag %r28, %dr0 */
   1257	b,n		srdis_done
   1258
   1259srdis_pa20:
   1260
   1261	/* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */
   1262
   1263	.word		0x144008bc		/* mfdiag %dr2, %r28 */
   1264	depdi		0, 54,1, %r28		/* clear DIAG_SPHASH_ENAB (bit 54) */
   1265	.word		0x145c1840		/* mtdiag %r28, %dr2 */
   1266
   1267
   1268srdis_done:
   1269	/* Switch back to virtual mode */
   1270	rsm		PSW_SM_I, %r0		/* prep to load iia queue */
   1271	load32 	   	2f, %r1
   1272	nop
   1273	nop
   1274	nop
   1275	nop
   1276	nop
   1277
   1278	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
   1279	mtctl		%r0, %cr17		/* Clear IIASQ tail */
   1280	mtctl		%r0, %cr17		/* Clear IIASQ head */
   1281	mtctl		%r1, %cr18		/* IIAOQ head */
   1282	ldo		4(%r1), %r1
   1283	mtctl		%r1, %cr18		/* IIAOQ tail */
   1284	load32		KERNEL_PSW, %r1
   1285	mtctl		%r1, %ipsw
   1286	rfi
   1287	nop
   1288
   12892:      bv		%r0(%r2)
   1290	nop
   1291ENDPROC_CFI(disable_sr_hashing_asm)
   1292
   1293	.end