cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

g98.fuc0s (12988B)


      1/*
      2 *  fuc microcode for g98 sec engine
      3 *  Copyright (C) 2010  Marcin Koƛcielnicki
      4 *
      5 *  This program is free software; you can redistribute it and/or modify
      6 *  it under the terms of the GNU General Public License as published by
      7 *  the Free Software Foundation; either version 2 of the License, or
      8 *  (at your option) any later version.
      9 *
     10 *  This program is distributed in the hope that it will be useful,
     11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 *  GNU General Public License for more details.
     14 *
     15 *  You should have received a copy of the GNU General Public License
     16 *  along with this program; if not, write to the Free Software
     17 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
     18 */
     19
     20.section #g98_sec_data
     21
     22ctx_dma:
     23ctx_dma_query:		.b32 0
     24ctx_dma_src:		.b32 0
     25ctx_dma_dst:		.b32 0
     26.equ #dma_count 3
     27ctx_query_address_high:	.b32 0
     28ctx_query_address_low:	.b32 0
     29ctx_query_counter:	.b32 0
     30ctx_cond_address_high:	.b32 0
     31ctx_cond_address_low:	.b32 0
     32ctx_cond_off:		.b32 0
     33ctx_src_address_high:	.b32 0
     34ctx_src_address_low:	.b32 0
     35ctx_dst_address_high:	.b32 0
     36ctx_dst_address_low:	.b32 0
     37ctx_mode:		.b32 0
     38.align 16
     39ctx_key:		.skip 16
     40ctx_iv:			.skip 16
     41
     42.align 0x80
     43swap:
     44.skip 32
     45
     46.align 8
     47common_cmd_dtable:
     48.b32 #ctx_query_address_high + 0x20000 ~0xff
     49.b32 #ctx_query_address_low + 0x20000 ~0xfffffff0
     50.b32 #ctx_query_counter + 0x20000 ~0xffffffff
     51.b32 #cmd_query_get + 0x00000 ~1
     52.b32 #ctx_cond_address_high + 0x20000 ~0xff
     53.b32 #ctx_cond_address_low + 0x20000 ~0xfffffff0
     54.b32 #cmd_cond_mode + 0x00000 ~7
     55.b32 #cmd_wrcache_flush + 0x00000 ~0
     56.equ #common_cmd_max 0x88
     57
     58
     59.align 8
     60engine_cmd_dtable:
     61.b32 #ctx_key + 0x0 + 0x20000 ~0xffffffff
     62.b32 #ctx_key + 0x4 + 0x20000 ~0xffffffff
     63.b32 #ctx_key + 0x8 + 0x20000 ~0xffffffff
     64.b32 #ctx_key + 0xc + 0x20000 ~0xffffffff
     65.b32 #ctx_iv + 0x0 + 0x20000 ~0xffffffff
     66.b32 #ctx_iv + 0x4 + 0x20000 ~0xffffffff
     67.b32 #ctx_iv + 0x8 + 0x20000 ~0xffffffff
     68.b32 #ctx_iv + 0xc + 0x20000 ~0xffffffff
     69.b32 #ctx_src_address_high + 0x20000 ~0xff
     70.b32 #ctx_src_address_low + 0x20000 ~0xfffffff0
     71.b32 #ctx_dst_address_high + 0x20000 ~0xff
     72.b32 #ctx_dst_address_low + 0x20000 ~0xfffffff0
     73.b32 #sec_cmd_mode + 0x00000 ~0xf
     74.b32 #sec_cmd_length + 0x10000 ~0x0ffffff0
     75.equ #engine_cmd_max 0xce
     76
     77.align 4
     78sec_dtable:
     79.b16 #sec_copy_prep #sec_do_inout
     80.b16 #sec_store_prep #sec_do_out
     81.b16 #sec_ecb_e_prep #sec_do_inout
     82.b16 #sec_ecb_d_prep #sec_do_inout
     83.b16 #sec_cbc_e_prep #sec_do_inout
     84.b16 #sec_cbc_d_prep #sec_do_inout
     85.b16 #sec_pcbc_e_prep #sec_do_inout
     86.b16 #sec_pcbc_d_prep #sec_do_inout
     87.b16 #sec_cfb_e_prep #sec_do_inout
     88.b16 #sec_cfb_d_prep #sec_do_inout
     89.b16 #sec_ofb_prep #sec_do_inout
     90.b16 #sec_ctr_prep #sec_do_inout
     91.b16 #sec_cbc_mac_prep #sec_do_in
     92.b16 #sec_cmac_finish_complete_prep #sec_do_in
     93.b16 #sec_cmac_finish_partial_prep #sec_do_in
     94
     95.align 0x100
     96
     97.section #g98_sec_code
     98
     99	// $r0 is always set to 0 in our code - this allows some space savings.
    100	clear b32 $r0
    101
    102	// set up the interrupt handler
    103	mov $r1 #ih
    104	mov $iv0 $r1
    105
    106	// init stack pointer
    107	mov $sp $r0
    108
    109	// set interrupt dispatch - route timer, fifo, ctxswitch to i0, others to host
    110	movw $r1 0xfff0
    111	sethi $r1 0
    112	mov $r2 0x400
    113	iowr I[$r2 + 0x300] $r1
    114
    115	// enable the interrupts
    116	or $r1 0xc
    117	iowr I[$r2] $r1
    118
    119	// enable fifo access and context switching
    120	mov $r1 3
    121	mov $r2 0x1200
    122	iowr I[$r2] $r1
    123
    124	// enable i0 delivery
    125	bset $flags ie0
    126
    127	// sleep forver, waking only for interrupts.
    128	bset $flags $p0
    129	spin:
    130	sleep $p0
    131	bra #spin
    132
    133// i0 handler
    134ih:
    135	// see which interrupts we got
    136	iord $r1 I[$r0 + 0x200]
    137
    138	and $r2 $r1 0x8
    139	cmpu b32 $r2 0
    140	bra e #noctx
    141
    142		// context switch... prepare the regs for xfer
    143		mov $r2 0x7700
    144		mov $xtargets $r2
    145		mov $xdbase $r0
    146		// 128-byte context.
    147		mov $r2 0
    148		sethi $r2 0x50000
    149
    150		// read current channel
    151		mov $r3 0x1400
    152		iord $r4 I[$r3]
    153		// if bit 30 set, it's active, so we have to unload it first.
    154		shl b32 $r5 $r4 1
    155		cmps b32 $r5 0
    156		bra nc #ctxload
    157
    158			// unload the current channel - save the context
    159			xdst $r0 $r2
    160			xdwait
    161			// and clear bit 30, then write back
    162			bclr $r4 0x1e
    163			iowr I[$r3] $r4
    164			// tell PFIFO we unloaded
    165			mov $r4 1
    166			iowr I[$r3 + 0x200] $r4
    167
    168		bra #noctx
    169
    170		ctxload:
    171			// no channel loaded - perhaps we're requested to load one
    172			iord $r4 I[$r3 + 0x100]
    173			shl b32 $r15 $r4 1
    174			cmps b32 $r15 0
    175			// if bit 30 of next channel not set, probably PFIFO is just
    176			// killing a context. do a faux load, without the active bit.
    177			bra nc #dummyload
    178
    179				// ok, do a real context load.
    180				xdld $r0 $r2
    181				xdwait
    182				mov $r5 #ctx_dma
    183				mov $r6 #dma_count - 1
    184				ctxload_dma_loop:
    185					ld b32 $r7 D[$r5 + $r6 * 4]
    186					add b32 $r8 $r6 0x180
    187					shl b32 $r8 8
    188					iowr I[$r8] $r7
    189					sub b32 $r6 1
    190				bra nc #ctxload_dma_loop
    191
    192			dummyload:
    193			// tell PFIFO we're done
    194			mov $r5 2
    195			iowr I[$r3 + 0x200] $r5
    196
    197	noctx:
    198	and $r2 $r1 0x4
    199	cmpu b32 $r2 0
    200	bra e #nocmd
    201
    202		// incoming fifo command.
    203		mov $r3 0x1900
    204		iord $r2 I[$r3 + 0x100]
    205		iord $r3 I[$r3]
    206		// extract the method
    207		and $r4 $r2 0x7ff
    208		// shift the addr to proper position if we need to interrupt later
    209		shl b32 $r2 0x10
    210
    211		// mthd 0 and 0x100 [NAME, NOP]: ignore
    212		and $r5 $r4 0x7bf
    213		cmpu b32 $r5 0
    214		bra e #cmddone
    215
    216		mov $r5 #engine_cmd_dtable - 0xc0 * 8
    217		mov $r6 #engine_cmd_max
    218		cmpu b32 $r4 0xc0
    219		bra nc #dtable_cmd
    220		mov $r5 #common_cmd_dtable - 0x80 * 8
    221		mov $r6 #common_cmd_max
    222		cmpu b32 $r4 0x80
    223		bra nc #dtable_cmd
    224		cmpu b32 $r4 0x60
    225		bra nc #dma_cmd
    226		cmpu b32 $r4 0x50
    227		bra ne #illegal_mthd
    228
    229			// mthd 0x140: PM_TRIGGER
    230			mov $r2 0x2200
    231			clear b32 $r3
    232			sethi $r3 0x20000
    233			iowr I[$r2] $r3
    234			bra #cmddone
    235
    236		dma_cmd:
    237			// mthd 0x180...: DMA_*
    238			cmpu b32 $r4 0x60+#dma_count
    239			bra nc #illegal_mthd
    240			shl b32 $r5 $r4 2
    241			add b32 $r5 ((#ctx_dma - 0x60 * 4) & 0xffff)
    242			bset $r3 0x1e
    243			st b32 D[$r5] $r3
    244			add b32 $r4 0x180 - 0x60
    245			shl b32 $r4 8
    246			iowr I[$r4] $r3
    247			bra #cmddone
    248
    249		dtable_cmd:
    250			cmpu b32 $r4 $r6
    251			bra nc #illegal_mthd
    252			shl b32 $r4 3
    253			add b32 $r4 $r5
    254			ld b32 $r5 D[$r4 + 4]
    255			and $r5 $r3
    256			cmpu b32 $r5 0
    257			bra ne #invalid_bitfield
    258			ld b16 $r5 D[$r4]
    259			ld b16 $r6 D[$r4 + 2]
    260			cmpu b32 $r6 2
    261			bra e #cmd_setctx
    262			ld b32 $r7 D[$r0 + #ctx_cond_off]
    263			and $r6 $r7
    264			cmpu b32 $r6 1
    265			bra e #cmddone
    266			call $r5
    267			bra $p1 #dispatch_error
    268			bra #cmddone
    269
    270		cmd_setctx:
    271			st b32 D[$r5] $r3
    272			bra #cmddone
    273
    274
    275		invalid_bitfield:
    276			or $r2 1
    277		dispatch_error:
    278		illegal_mthd:
    279			mov $r4 0x1000
    280			iowr I[$r4] $r2
    281			iowr I[$r4 + 0x100] $r3
    282			mov $r4 0x40
    283			iowr I[$r0] $r4
    284
    285			im_loop:
    286				iord $r4 I[$r0 + 0x200]
    287				and $r4 0x40
    288				cmpu b32 $r4 0
    289			bra ne #im_loop
    290
    291		cmddone:
    292		// remove the command from FIFO
    293		mov $r3 0x1d00
    294		mov $r4 1
    295		iowr I[$r3] $r4
    296
    297	nocmd:
    298	// ack the processed interrupts
    299	and $r1 $r1 0xc
    300	iowr I[$r0 + 0x100] $r1
    301iret
    302
    303cmd_query_get:
    304	// if bit 0 of param set, trigger interrupt afterwards.
    305	setp $p1 $r3
    306	or $r2 3
    307
    308	// read PTIMER, beware of races...
    309	mov $r4 0xb00
    310	ptimer_retry:
    311		iord $r6 I[$r4 + 0x100]
    312		iord $r5 I[$r4]
    313		iord $r7 I[$r4 + 0x100]
    314		cmpu b32 $r6 $r7
    315	bra ne #ptimer_retry
    316
    317	// prepare the query structure
    318	ld b32 $r4 D[$r0 + #ctx_query_counter]
    319	st b32 D[$r0 + #swap + 0x0] $r4
    320	st b32 D[$r0 + #swap + 0x4] $r0
    321	st b32 D[$r0 + #swap + 0x8] $r5
    322	st b32 D[$r0 + #swap + 0xc] $r6
    323
    324	// will use target 0, DMA_QUERY.
    325	mov $xtargets $r0
    326
    327	ld b32 $r4 D[$r0 + #ctx_query_address_high]
    328	shl b32 $r4 0x18
    329	mov $xdbase $r4
    330
    331	ld b32 $r4 D[$r0 + #ctx_query_address_low]
    332	mov $r5 #swap
    333	sethi $r5 0x20000
    334	xdst $r4 $r5
    335	xdwait
    336
    337	ret
    338
    339cmd_cond_mode:
    340	// if >= 5, INVALID_ENUM
    341	bset $flags $p1
    342	or $r2 2
    343	cmpu b32 $r3 5
    344	bra nc #return
    345
    346	// otherwise, no error.
    347	bclr $flags $p1
    348
    349	// if < 2, no QUERY object is involved
    350	cmpu b32 $r3 2
    351	bra nc #cmd_cond_mode_queryful
    352
    353		xor $r3 1
    354		st b32 D[$r0 + #ctx_cond_off] $r3
    355	return:
    356		ret
    357
    358	cmd_cond_mode_queryful:
    359	// ok, will need to pull a QUERY object, prepare offsets
    360	ld b32 $r4 D[$r0 + #ctx_cond_address_high]
    361	ld b32 $r5 D[$r0 + #ctx_cond_address_low]
    362	and $r6 $r5 0xff
    363	shr b32 $r5 8
    364	shl b32 $r4 0x18
    365	or $r4 $r5
    366	mov $xdbase $r4
    367	mov $xtargets $r0
    368
    369	// pull the first one
    370	mov $r5 #swap
    371	sethi $r5 0x20000
    372	xdld $r6 $r5
    373
    374	// if == 2, only a single QUERY is involved...
    375	cmpu b32 $r3 2
    376	bra ne #cmd_cond_mode_double
    377
    378		xdwait
    379		ld b32 $r4 D[$r0 + #swap + 4]
    380		cmpu b32 $r4 0
    381		xbit $r4 $flags z
    382		st b32 D[$r0 + #ctx_cond_off] $r4
    383		ret
    384
    385	// ok, we'll need to pull second one too
    386	cmd_cond_mode_double:
    387	add b32 $r6 0x10
    388	add b32 $r5 0x10
    389	xdld $r6 $r5
    390	xdwait
    391
    392	// compare COUNTERs
    393	ld b32 $r5 D[$r0 + #swap + 0x00]
    394	ld b32 $r6 D[$r0 + #swap + 0x10]
    395	cmpu b32 $r5 $r6
    396	xbit $r4 $flags z
    397
    398	// compare RESen
    399	ld b32 $r5 D[$r0 + #swap + 0x04]
    400	ld b32 $r6 D[$r0 + #swap + 0x14]
    401	cmpu b32 $r5 $r6
    402	xbit $r5 $flags z
    403	and $r4 $r5
    404
    405	// and negate or not, depending on mode
    406	cmpu b32 $r3 3
    407	xbit $r5 $flags z
    408	xor $r4 $r5
    409	st b32 D[$r0 + #ctx_cond_off] $r4
    410	ret
    411
    412cmd_wrcache_flush:
    413	bclr $flags $p1
    414	mov $r2 0x2200
    415	clear b32 $r3
    416	sethi $r3 0x10000
    417	iowr I[$r2] $r3
    418	ret
    419
    420sec_cmd_mode:
    421	// if >= 0xf, INVALID_ENUM
    422	bset $flags $p1
    423	or $r2 2
    424	cmpu b32 $r3 0xf
    425	bra nc #sec_cmd_mode_return
    426
    427		bclr $flags $p1
    428		st b32 D[$r0 + #ctx_mode] $r3
    429
    430	sec_cmd_mode_return:
    431	ret
    432
    433sec_cmd_length:
    434	// nop if length == 0
    435	cmpu b32 $r3 0
    436	bra e #sec_cmd_mode_return
    437
    438	// init key, IV
    439	cxset 3
    440	mov $r4 #ctx_key
    441	sethi $r4 0x70000
    442	xdst $r0 $r4
    443	mov $r4 #ctx_iv
    444	sethi $r4 0x60000
    445	xdst $r0 $r4
    446	xdwait
    447	ckeyreg $c7
    448
    449	// prepare the targets
    450	mov $r4 0x2100
    451	mov $xtargets $r4
    452
    453	// prepare src address
    454	ld b32 $r4 D[$r0 + #ctx_src_address_high]
    455	ld b32 $r5 D[$r0 + #ctx_src_address_low]
    456	shr b32 $r8 $r5 8
    457	shl b32 $r4 0x18
    458	or $r4 $r8
    459	and $r5 $r5 0xff
    460
    461	// prepare dst address
    462	ld b32 $r6 D[$r0 + #ctx_dst_address_high]
    463	ld b32 $r7 D[$r0 + #ctx_dst_address_low]
    464	shr b32 $r8 $r7 8
    465	shl b32 $r6 0x18
    466	or $r6 $r8
    467	and $r7 $r7 0xff
    468
    469	// find the proper prep & do functions
    470	ld b32 $r8 D[$r0 + #ctx_mode]
    471	shl b32 $r8 2
    472
    473	// run prep
    474	ld b16 $r9 D[$r8 + #sec_dtable]
    475	call $r9
    476
    477	// do it
    478	ld b16 $r9 D[$r8 + #sec_dtable + 2]
    479	call $r9
    480	cxset 1
    481	xdwait
    482	cxset 0x61
    483	xdwait
    484	xdwait
    485
    486	// update src address
    487	shr b32 $r8 $r4 0x18
    488	shl b32 $r9 $r4 8
    489	add b32 $r9 $r5
    490	adc b32 $r8 0
    491	st b32 D[$r0 + #ctx_src_address_high] $r8
    492	st b32 D[$r0 + #ctx_src_address_low] $r9
    493
    494	// update dst address
    495	shr b32 $r8 $r6 0x18
    496	shl b32 $r9 $r6 8
    497	add b32 $r9 $r7
    498	adc b32 $r8 0
    499	st b32 D[$r0 + #ctx_dst_address_high] $r8
    500	st b32 D[$r0 + #ctx_dst_address_low] $r9
    501
    502	// pull updated IV
    503	cxset 2
    504	mov $r4 #ctx_iv
    505	sethi $r4 0x60000
    506	xdld $r0 $r4
    507	xdwait
    508
    509	ret
    510
    511
    512sec_copy_prep:
    513	cs0begin 2
    514		cxsin $c0
    515		cxsout $c0
    516	ret
    517
    518sec_store_prep:
    519	cs0begin 1
    520		cxsout $c6
    521	ret
    522
    523sec_ecb_e_prep:
    524	cs0begin 3
    525		cxsin $c0
    526		cenc $c0 $c0
    527		cxsout $c0
    528	ret
    529
    530sec_ecb_d_prep:
    531	ckexp $c7 $c7
    532	cs0begin 3
    533		cxsin $c0
    534		cdec $c0 $c0
    535		cxsout $c0
    536	ret
    537
    538sec_cbc_e_prep:
    539	cs0begin 4
    540		cxsin $c0
    541		cxor $c6 $c0
    542		cenc $c6 $c6
    543		cxsout $c6
    544	ret
    545
    546sec_cbc_d_prep:
    547	ckexp $c7 $c7
    548	cs0begin 5
    549		cmov $c2 $c6
    550		cxsin $c6
    551		cdec $c0 $c6
    552		cxor $c0 $c2
    553		cxsout $c0
    554	ret
    555
    556sec_pcbc_e_prep:
    557	cs0begin 5
    558		cxsin $c0
    559		cxor $c6 $c0
    560		cenc $c6 $c6
    561		cxsout $c6
    562		cxor $c6 $c0
    563	ret
    564
    565sec_pcbc_d_prep:
    566	ckexp $c7 $c7
    567	cs0begin 5
    568		cxsin $c0
    569		cdec $c1 $c0
    570		cxor $c6 $c1
    571		cxsout $c6
    572		cxor $c6 $c0
    573	ret
    574
    575sec_cfb_e_prep:
    576	cs0begin 4
    577		cenc $c6 $c6
    578		cxsin $c0
    579		cxor $c6 $c0
    580		cxsout $c6
    581	ret
    582
    583sec_cfb_d_prep:
    584	cs0begin 4
    585		cenc $c0 $c6
    586		cxsin $c6
    587		cxor $c0 $c6
    588		cxsout $c0
    589	ret
    590
    591sec_ofb_prep:
    592	cs0begin 4
    593		cenc $c6 $c6
    594		cxsin $c0
    595		cxor $c0 $c6
    596		cxsout $c0
    597	ret
    598
    599sec_ctr_prep:
    600	cs0begin 5
    601		cenc $c1 $c6
    602		cadd $c6 1
    603		cxsin $c0
    604		cxor $c0 $c1
    605		cxsout $c0
    606	ret
    607
    608sec_cbc_mac_prep:
    609	cs0begin 3
    610		cxsin $c0
    611		cxor $c6 $c0
    612		cenc $c6 $c6
    613	ret
    614
    615sec_cmac_finish_complete_prep:
    616	cs0begin 7
    617		cxsin $c0
    618		cxor $c6 $c0
    619		cxor $c0 $c0
    620		cenc $c0 $c0
    621		cprecmac $c0 $c0
    622		cxor $c6 $c0
    623		cenc $c6 $c6
    624	ret
    625
    626sec_cmac_finish_partial_prep:
    627	cs0begin 8
    628		cxsin $c0
    629		cxor $c6 $c0
    630		cxor $c0 $c0
    631		cenc $c0 $c0
    632		cprecmac $c0 $c0
    633		cprecmac $c0 $c0
    634		cxor $c6 $c0
    635		cenc $c6 $c6
    636	ret
    637
    638// TODO
    639sec_do_in:
    640	add b32 $r3 $r5
    641	mov $xdbase $r4
    642	mov $r9 #swap
    643	sethi $r9 0x20000
    644	sec_do_in_loop:
    645		xdld $r5 $r9
    646		xdwait
    647		cxset 0x22
    648		xdst $r0 $r9
    649		cs0exec 1
    650		xdwait
    651		add b32 $r5 0x10
    652		cmpu b32 $r5 $r3
    653	bra ne #sec_do_in_loop
    654	cxset 1
    655	xdwait
    656	ret
    657
    658sec_do_out:
    659	add b32 $r3 $r7
    660	mov $xdbase $r6
    661	mov $r9 #swap
    662	sethi $r9 0x20000
    663	sec_do_out_loop:
    664		cs0exec 1
    665		cxset 0x61
    666		xdld $r7 $r9
    667		xdst $r7 $r9
    668		cxset 1
    669		xdwait
    670		add b32 $r7 0x10
    671		cmpu b32 $r7 $r3
    672	bra ne #sec_do_out_loop
    673	ret
    674
    675sec_do_inout:
    676	add b32 $r3 $r5
    677	mov $r9 #swap
    678	sethi $r9 0x20000
    679	sec_do_inout_loop:
    680		mov $xdbase $r4
    681		xdld $r5 $r9
    682		xdwait
    683		cxset 0x21
    684		xdst $r0 $r9
    685		cs0exec 1
    686		cxset 0x61
    687		mov $xdbase $r6
    688		xdld $r7 $r9
    689		xdst $r7 $r9
    690		cxset 1
    691		xdwait
    692		add b32 $r5 0x10
    693		add b32 $r7 0x10
    694		cmpu b32 $r5 $r3
    695	bra ne #sec_do_inout_loop
    696	ret
    697
    698.align 0x100