cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

com.fuc (20800B)


      1/* fuc microcode for copy engine on gt215- chipsets
      2 *
      3 * Copyright 2011 Red Hat Inc.
      4 *
      5 * Permission is hereby granted, free of charge, to any person obtaining a
      6 * copy of this software and associated documentation files (the "Software"),
      7 * to deal in the Software without restriction, including without limitation
      8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9 * and/or sell copies of the Software, and to permit persons to whom the
     10 * Software is furnished to do so, subject to the following conditions:
     11 *
     12 * The above copyright notice and this permission notice shall be included in
     13 * all copies or substantial portions of the Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     21 * OTHER DEALINGS IN THE SOFTWARE.
     22 *
     23 * Authors: Ben Skeggs
     24 */
     25
     26#ifdef GT215
     27.section #gt215_ce_data
     28#else
     29.section #gf100_ce_data
     30#endif
     31
     32ctx_object:                   .b32 0
     33#ifdef GT215
     34ctx_dma:
     35ctx_dma_query:                .b32 0
     36ctx_dma_src:                  .b32 0
     37ctx_dma_dst:                  .b32 0
     38#endif
     39.equ #ctx_dma_count 3
     40ctx_query_address_high:       .b32 0
     41ctx_query_address_low:        .b32 0
     42ctx_query_counter:            .b32 0
     43ctx_src_address_high:         .b32 0
     44ctx_src_address_low:          .b32 0
     45ctx_src_pitch:                .b32 0
     46ctx_src_tile_mode:            .b32 0
     47ctx_src_xsize:                .b32 0
     48ctx_src_ysize:                .b32 0
     49ctx_src_zsize:                .b32 0
     50ctx_src_zoff:                 .b32 0
     51ctx_src_xoff:                 .b32 0
     52ctx_src_yoff:                 .b32 0
     53ctx_src_cpp:                  .b32 0
     54ctx_dst_address_high:         .b32 0
     55ctx_dst_address_low:          .b32 0
     56ctx_dst_pitch:                .b32 0
     57ctx_dst_tile_mode:            .b32 0
     58ctx_dst_xsize:                .b32 0
     59ctx_dst_ysize:                .b32 0
     60ctx_dst_zsize:                .b32 0
     61ctx_dst_zoff:                 .b32 0
     62ctx_dst_xoff:                 .b32 0
     63ctx_dst_yoff:                 .b32 0
     64ctx_dst_cpp:                  .b32 0
     65ctx_format:                   .b32 0
     66ctx_swz_const0:               .b32 0
     67ctx_swz_const1:               .b32 0
     68ctx_xcnt:                     .b32 0
     69ctx_ycnt:                     .b32 0
     70.align 256
     71
     72dispatch_table:
     73// mthd 0x0000, NAME
     74.b16 0x000 1
     75.b32 #ctx_object                     ~0xffffffff
     76// mthd 0x0100, NOP
     77.b16 0x040 1
     78.b32 0x00010000 + #cmd_nop           ~0xffffffff
     79// mthd 0x0140, PM_TRIGGER
     80.b16 0x050 1
     81.b32 0x00010000 + #cmd_pm_trigger    ~0xffffffff
     82#ifdef GT215
     83// mthd 0x0180-0x018c, DMA_
     84.b16 0x060 #ctx_dma_count
     85dispatch_dma:
     86.b32 0x00010000 + #cmd_dma           ~0xffffffff
     87.b32 0x00010000 + #cmd_dma           ~0xffffffff
     88.b32 0x00010000 + #cmd_dma           ~0xffffffff
     89#endif
     90// mthd 0x0200-0x0218, SRC_TILE
     91.b16 0x80 7
     92.b32 #ctx_src_tile_mode              ~0x00000fff
     93.b32 #ctx_src_xsize                  ~0x0007ffff
     94.b32 #ctx_src_ysize                  ~0x00001fff
     95.b32 #ctx_src_zsize                  ~0x000007ff
     96.b32 #ctx_src_zoff                   ~0x00000fff
     97.b32 #ctx_src_xoff                   ~0x0007ffff
     98.b32 #ctx_src_yoff                   ~0x00001fff
     99// mthd 0x0220-0x0238, DST_TILE
    100.b16 0x88 7
    101.b32 #ctx_dst_tile_mode              ~0x00000fff
    102.b32 #ctx_dst_xsize                  ~0x0007ffff
    103.b32 #ctx_dst_ysize                  ~0x00001fff
    104.b32 #ctx_dst_zsize                  ~0x000007ff
    105.b32 #ctx_dst_zoff                   ~0x00000fff
    106.b32 #ctx_dst_xoff                   ~0x0007ffff
    107.b32 #ctx_dst_yoff                   ~0x00001fff
    108// mthd 0x0300-0x0304, EXEC, WRCACHE_FLUSH
    109.b16 0xc0 2
    110.b32 0x00010000 + #cmd_exec          ~0xffffffff
    111.b32 0x00010000 + #cmd_wrcache_flush ~0xffffffff
    112// mthd 0x030c-0x0340, various stuff
    113.b16 0xc3 14
    114.b32 #ctx_src_address_high           ~0x000000ff
    115.b32 #ctx_src_address_low            ~0xffffffff
    116.b32 #ctx_dst_address_high           ~0x000000ff
    117.b32 #ctx_dst_address_low            ~0xffffffff
    118.b32 #ctx_src_pitch                  ~0x0007ffff
    119.b32 #ctx_dst_pitch                  ~0x0007ffff
    120.b32 #ctx_xcnt                       ~0x0000ffff
    121.b32 #ctx_ycnt                       ~0x00001fff
    122.b32 #ctx_format                     ~0x0333ffff
    123.b32 #ctx_swz_const0                 ~0xffffffff
    124.b32 #ctx_swz_const1                 ~0xffffffff
    125.b32 #ctx_query_address_high         ~0x000000ff
    126.b32 #ctx_query_address_low          ~0xffffffff
    127.b32 #ctx_query_counter              ~0xffffffff
    128.b16 0x800 0
    129
    130#ifdef GT215
    131.section #gt215_ce_code
    132#else
    133.section #gf100_ce_code
    134#endif
    135
    136main:
    137   clear b32 $r0
    138   mov $sp $r0
    139
    140   // setup i0 handler and route fifo and ctxswitch to it
    141   mov $r1 #ih
    142   mov $iv0 $r1
    143   mov $r1 0x400
    144   movw $r2 0xfff3
    145   sethi $r2 0
    146   iowr I[$r1 + 0x300] $r2
    147
    148   // enable interrupts
    149   or $r2 0xc
    150   iowr I[$r1] $r2
    151   bset $flags ie0
    152
    153   // enable fifo access and context switching
    154   mov $r1 0x1200
    155   mov $r2 3
    156   iowr I[$r1] $r2
    157
    158   // sleep forever, waking for interrupts
    159   bset $flags $p0
    160   spin:
    161      sleep $p0
    162      bra #spin
    163
    164// i0 handler
    165ih:
    166   iord $r1 I[$r0 + 0x200]
    167
    168   and $r2 $r1 0x00000008
    169   bra e #ih_no_chsw
    170      call #chsw
    171   ih_no_chsw:
    172   and $r2 $r1 0x00000004
    173   bra e #ih_no_cmd
    174      call #dispatch
    175
    176   ih_no_cmd:
    177   and $r1 $r1 0x0000000c
    178   iowr I[$r0 + 0x100] $r1
    179   iret
    180
    181// $p1 direction (0 = unload, 1 = load)
    182// $r3 channel
    183swctx:
    184   mov $r4 0x7700
    185   mov $xtargets $r4
    186#ifdef GT215
    187   // target 7 hardcoded to ctx dma object
    188   mov $xdbase $r0
    189#else
    190   // read SCRATCH3 to decide if we are PCOPY0 or PCOPY1
    191   mov $r4 0x2100
    192   iord $r4 I[$r4 + 0]
    193   and $r4 1
    194   shl b32 $r4 4
    195   add b32 $r4 0x30
    196
    197   // channel is in vram
    198   mov $r15 0x61c
    199   shl b32 $r15 6
    200   mov $r5 0x114
    201   iowrs I[$r15] $r5
    202
    203   // read 16-byte PCOPYn info, containing context pointer, from channel
    204   shl b32 $r5 $r3 4
    205   add b32 $r5 2
    206   mov $xdbase $r5
    207   mov $r5 $sp
    208   // get a chunk of stack space, aligned to 256 byte boundary
    209   sub b32 $r5 0x100
    210   mov $r6 0xff
    211   not b32 $r6
    212   and $r5 $r6
    213   sethi $r5 0x00020000
    214   xdld $r4 $r5
    215   xdwait
    216   sethi $r5 0
    217
    218   // set context pointer, from within channel VM
    219   mov $r14 0
    220   iowrs I[$r15] $r14
    221   ld b32 $r4 D[$r5 + 0]
    222   shr b32 $r4 8
    223   ld b32 $r6 D[$r5 + 4]
    224   shl b32 $r6 24
    225   or $r4 $r6
    226   mov $xdbase $r4
    227#endif
    228   // 256-byte context, at start of data segment
    229   mov b32 $r4 $r0
    230   sethi $r4 0x60000
    231
    232   // swap!
    233   bra $p1 #swctx_load
    234      xdst $r0 $r4
    235      bra #swctx_done
    236   swctx_load:
    237      xdld $r0 $r4
    238   swctx_done:
    239   xdwait
    240   ret
    241
    242chsw:
    243   // read current channel
    244   mov $r2 0x1400
    245   iord $r3 I[$r2]
    246
    247   // if it's active, unload it and return
    248   xbit $r15 $r3 0x1e
    249   bra e #chsw_no_unload
    250      bclr $flags $p1
    251      call #swctx
    252      bclr $r3 0x1e
    253      iowr I[$r2] $r3
    254      mov $r4 1
    255      iowr I[$r2 + 0x200] $r4
    256      ret
    257
    258   // read next channel
    259   chsw_no_unload:
    260   iord $r3 I[$r2 + 0x100]
    261
    262   // is there a channel waiting to be loaded?
    263   xbit $r13 $r3 0x1e
    264   bra e #chsw_finish_load
    265      bset $flags $p1
    266      call #swctx
    267#ifdef GT215
    268      // load dma objects back into TARGET regs
    269      mov $r5 #ctx_dma
    270      mov $r6 #ctx_dma_count
    271      chsw_load_ctx_dma:
    272         ld b32 $r7 D[$r5 + $r6 * 4]
    273         add b32 $r8 $r6 0x180
    274         shl b32 $r8 8
    275         iowr I[$r8] $r7
    276         sub b32 $r6 1
    277         bra nc #chsw_load_ctx_dma
    278#endif
    279   chsw_finish_load:
    280   mov $r3 2
    281   iowr I[$r2 + 0x200] $r3
    282   ret
    283
    284dispatch:
    285   // read incoming fifo command
    286   mov $r3 0x1900
    287   iord $r2 I[$r3 + 0x100]
    288   iord $r3 I[$r3 + 0x000]
    289   and $r4 $r2 0x7ff
    290   // $r2 will be used to store exception data
    291   shl b32 $r2 0x10
    292
    293   // lookup method in the dispatch table, ILLEGAL_MTHD if not found
    294   mov $r5 #dispatch_table
    295   clear b32 $r6
    296   clear b32 $r7
    297   dispatch_loop:
    298      ld b16 $r6 D[$r5 + 0]
    299      ld b16 $r7 D[$r5 + 2]
    300      add b32 $r5 4
    301      cmpu b32 $r4 $r6
    302      bra c #dispatch_illegal_mthd
    303      add b32 $r7 $r6
    304      cmpu b32 $r4 $r7
    305      bra c #dispatch_valid_mthd
    306      sub b32 $r7 $r6
    307      shl b32 $r7 3
    308      add b32 $r5 $r7
    309      bra #dispatch_loop
    310
    311   // ensure no bits set in reserved fields, INVALID_BITFIELD
    312   dispatch_valid_mthd:
    313   sub b32 $r4 $r6
    314   shl b32 $r4 3
    315   add b32 $r4 $r5
    316   ld b32 $r5 D[$r4 + 4]
    317   and $r5 $r3
    318   cmpu b32 $r5 0
    319   bra ne #dispatch_invalid_bitfield
    320
    321   // depending on dispatch flags: execute method, or save data as state
    322   ld b16 $r5 D[$r4 + 0]
    323   ld b16 $r6 D[$r4 + 2]
    324   cmpu b32 $r6 0
    325   bra ne #dispatch_cmd
    326      st b32 D[$r5] $r3
    327      bra #dispatch_done
    328   dispatch_cmd:
    329      bclr $flags $p1
    330      call $r5
    331      bra $p1 #dispatch_error
    332      bra #dispatch_done
    333
    334   dispatch_invalid_bitfield:
    335   or $r2 2
    336   dispatch_illegal_mthd:
    337   or $r2 1
    338
    339   // store exception data in SCRATCH0/SCRATCH1, signal hostirq
    340   dispatch_error:
    341   mov $r4 0x1000
    342   iowr I[$r4 + 0x000] $r2
    343   iowr I[$r4 + 0x100] $r3
    344   mov $r2 0x40
    345   iowr I[$r0] $r2
    346   hostirq_wait:
    347      iord $r2 I[$r0 + 0x200]
    348      and $r2 0x40
    349      cmpu b32 $r2 0
    350      bra ne #hostirq_wait
    351
    352   dispatch_done:
    353   mov $r2 0x1d00
    354   mov $r3 1
    355   iowr I[$r2] $r3
    356   ret
    357
    358// No-operation
    359//
    360// Inputs:
    361//    $r1: irqh state
    362//    $r2: hostirq state
    363//    $r3: data
    364//    $r4: dispatch table entry
    365// Outputs:
    366//    $r1: irqh state
    367//    $p1: set on error
    368//       $r2: hostirq state
    369//       $r3: data
    370cmd_nop:
    371   ret
    372
    373// PM_TRIGGER
    374//
    375// Inputs:
    376//    $r1: irqh state
    377//    $r2: hostirq state
    378//    $r3: data
    379//    $r4: dispatch table entry
    380// Outputs:
    381//    $r1: irqh state
    382//    $p1: set on error
    383//       $r2: hostirq state
    384//       $r3: data
    385cmd_pm_trigger:
    386   mov $r2 0x2200
    387   clear b32 $r3
    388   sethi $r3 0x20000
    389   iowr I[$r2] $r3
    390   ret
    391
    392#ifdef GT215
    393// SET_DMA_* method handler
    394//
    395// Inputs:
    396//    $r1: irqh state
    397//    $r2: hostirq state
    398//    $r3: data
    399//    $r4: dispatch table entry
    400// Outputs:
    401//    $r1: irqh state
    402//    $p1: set on error
    403//       $r2: hostirq state
    404//       $r3: data
    405cmd_dma:
    406   sub b32 $r4 #dispatch_dma
    407   shr b32 $r4 1
    408   bset $r3 0x1e
    409   st b32 D[$r4 + #ctx_dma] $r3
    410   add b32 $r4 0x600
    411   shl b32 $r4 6
    412   iowr I[$r4] $r3
    413   ret
    414#endif
    415
    416// Calculates the hw swizzle mask and adjusts the surface's xcnt to match
    417//
    418cmd_exec_set_format:
    419   // zero out a chunk of the stack to store the swizzle into
    420   add $sp -0x10
    421   st b32 D[$sp + 0x00] $r0
    422   st b32 D[$sp + 0x04] $r0
    423   st b32 D[$sp + 0x08] $r0
    424   st b32 D[$sp + 0x0c] $r0
    425
    426   // extract cpp, src_ncomp and dst_ncomp from FORMAT
    427   ld b32 $r4 D[$r0 + #ctx_format]
    428   extr $r5 $r4 16:17
    429   add b32 $r5 1
    430   extr $r6 $r4 20:21
    431   add b32 $r6 1
    432   extr $r7 $r4 24:25
    433   add b32 $r7 1
    434
    435   // convert FORMAT swizzle mask to hw swizzle mask
    436   bclr $flags $p2
    437   clear b32 $r8
    438   clear b32 $r9
    439   ncomp_loop:
    440      and $r10 $r4 0xf
    441      shr b32 $r4 4
    442      clear b32 $r11
    443      bpc_loop:
    444         cmpu b8 $r10 4
    445         bra nc #cmp_c0
    446            mulu $r12 $r10 $r5
    447            add b32 $r12 $r11
    448            bset $flags $p2
    449            bra #bpc_next
    450         cmp_c0:
    451         bra ne #cmp_c1
    452            mov $r12 0x10
    453            add b32 $r12 $r11
    454            bra #bpc_next
    455         cmp_c1:
    456         cmpu b8 $r10 6
    457         bra nc #cmp_zero
    458            mov $r12 0x14
    459            add b32 $r12 $r11
    460            bra #bpc_next
    461         cmp_zero:
    462            mov $r12 0x80
    463         bpc_next:
    464         st b8 D[$sp + $r8] $r12
    465         add b32 $r8 1
    466         add b32 $r11 1
    467         cmpu b32 $r11 $r5
    468         bra c #bpc_loop
    469      add b32 $r9 1
    470      cmpu b32 $r9 $r7
    471      bra c #ncomp_loop
    472
    473   // SRC_XCNT = (xcnt * src_cpp), or 0 if no src ref in swz (hw will hang)
    474   mulu $r6 $r5
    475   st b32 D[$r0 + #ctx_src_cpp] $r6
    476   ld b32 $r8 D[$r0 + #ctx_xcnt]
    477   mulu $r6 $r8
    478   bra $p2 #dst_xcnt
    479   clear b32 $r6
    480
    481   dst_xcnt:
    482   mulu $r7 $r5
    483   st b32 D[$r0 + #ctx_dst_cpp] $r7
    484   mulu $r7 $r8
    485
    486   mov $r5 0x810
    487   shl b32 $r5 6
    488   iowr I[$r5 + 0x000] $r6
    489   iowr I[$r5 + 0x100] $r7
    490   add b32 $r5 0x800
    491   ld b32 $r6 D[$r0 + #ctx_dst_cpp]
    492   sub b32 $r6 1
    493   shl b32 $r6 8
    494   ld b32 $r7 D[$r0 + #ctx_src_cpp]
    495   sub b32 $r7 1
    496   or $r6 $r7
    497   iowr I[$r5 + 0x000] $r6
    498   add b32 $r5 0x100
    499   ld b32 $r6 D[$sp + 0x00]
    500   iowr I[$r5 + 0x000] $r6
    501   ld b32 $r6 D[$sp + 0x04]
    502   iowr I[$r5 + 0x100] $r6
    503   ld b32 $r6 D[$sp + 0x08]
    504   iowr I[$r5 + 0x200] $r6
    505   ld b32 $r6 D[$sp + 0x0c]
    506   iowr I[$r5 + 0x300] $r6
    507   add b32 $r5 0x400
    508   ld b32 $r6 D[$r0 + #ctx_swz_const0]
    509   iowr I[$r5 + 0x000] $r6
    510   ld b32 $r6 D[$r0 + #ctx_swz_const1]
    511   iowr I[$r5 + 0x100] $r6
    512   add $sp 0x10
    513   ret
    514
    515// Setup to handle a tiled surface
    516//
    517// Calculates a number of parameters the hardware requires in order
    518// to correctly handle tiling.
    519//
    520// Offset calculation is performed as follows (Tp/Th/Td from TILE_MODE):
    521//    nTx = round_up(w * cpp, 1 << Tp) >> Tp
    522//    nTy = round_up(h, 1 << Th) >> Th
    523//    Txo = (x * cpp) & ((1 << Tp) - 1)
    524//     Tx = (x * cpp) >> Tp
    525//    Tyo = y & ((1 << Th) - 1)
    526//     Ty = y >> Th
    527//    Tzo = z & ((1 << Td) - 1)
    528//     Tz = z >> Td
    529//
    530//    off  = (Tzo << Tp << Th) + (Tyo << Tp) + Txo
    531//    off += ((Tz * nTy * nTx)) + (Ty * nTx) + Tx) << Td << Th << Tp;
    532//
    533// Inputs:
    534//    $r4: hw command (0x104800)
    535//    $r5: ctx offset adjustment for src/dst selection
    536//    $p2: set if dst surface
    537//
    538cmd_exec_set_surface_tiled:
    539   // translate TILE_MODE into Tp, Th, Td shift values
    540   ld b32 $r7 D[$r5 + #ctx_src_tile_mode]
    541   extr $r9 $r7 8:11
    542   extr $r8 $r7 4:7
    543#ifdef GT215
    544   add b32 $r8 2
    545#else
    546   add b32 $r8 3
    547#endif
    548   extr $r7 $r7 0:3
    549   cmp b32 $r7 0xe
    550   bra ne #xtile64
    551   mov $r7 4
    552   bra #xtileok
    553   xtile64:
    554   xbit $r7 $flags $p2
    555   add b32 $r7 17
    556   bset $r4 $r7
    557   mov $r7 6
    558   xtileok:
    559
    560   // Op = (x * cpp) & ((1 << Tp) - 1)
    561   // Tx = (x * cpp) >> Tp
    562   ld b32 $r10 D[$r5 + #ctx_src_xoff]
    563   ld b32 $r11 D[$r5 + #ctx_src_cpp]
    564   mulu $r10 $r11
    565   mov $r11 1
    566   shl b32 $r11 $r7
    567   sub b32 $r11 1
    568   and $r12 $r10 $r11
    569   shr b32 $r10 $r7
    570
    571   // Tyo = y & ((1 << Th) - 1)
    572   // Ty  = y >> Th
    573   ld b32 $r13 D[$r5 + #ctx_src_yoff]
    574   mov $r14 1
    575   shl b32 $r14 $r8
    576   sub b32 $r14 1
    577   and $r11 $r13 $r14
    578   shr b32 $r13 $r8
    579
    580   // YTILE = ((1 << Th) << 12) | ((1 << Th) - Tyo)
    581   add b32 $r14 1
    582   shl b32 $r15 $r14 12
    583   sub b32 $r14 $r11
    584   or $r15 $r14
    585   xbit $r6 $flags $p2
    586   add b32 $r6 0x208
    587   shl b32 $r6 8
    588   iowr I[$r6 + 0x000] $r15
    589
    590   // Op += Tyo << Tp
    591   shl b32 $r11 $r7
    592   add b32 $r12 $r11
    593
    594   // nTx = ((w * cpp) + ((1 << Tp) - 1) >> Tp)
    595   ld b32 $r15 D[$r5 + #ctx_src_xsize]
    596   ld b32 $r11 D[$r5 + #ctx_src_cpp]
    597   mulu $r15 $r11
    598   mov $r11 1
    599   shl b32 $r11 $r7
    600   sub b32 $r11 1
    601   add b32 $r15 $r11
    602   shr b32 $r15 $r7
    603   push $r15
    604
    605   // nTy = (h + ((1 << Th) - 1)) >> Th
    606   ld b32 $r15 D[$r5 + #ctx_src_ysize]
    607   mov $r11 1
    608   shl b32 $r11 $r8
    609   sub b32 $r11 1
    610   add b32 $r15 $r11
    611   shr b32 $r15 $r8
    612   push $r15
    613
    614   // Tys = Tp + Th
    615   // CFG_YZ_TILE_SIZE = ((1 << Th) >> 2) << Td
    616   add b32 $r7 $r8
    617   sub b32 $r8 2
    618   mov $r11 1
    619   shl b32 $r11 $r8
    620   shl b32 $r11 $r9
    621
    622   // Tzo = z & ((1 << Td) - 1)
    623   // Tz  = z >> Td
    624   // Op += Tzo << Tys
    625   // Ts  = Tys + Td
    626   ld b32 $r8 D[$r5 + #ctx_src_zoff]
    627   mov $r14 1
    628   shl b32 $r14 $r9
    629   sub b32 $r14 1
    630   and $r15 $r8 $r14
    631   shl b32 $r15 $r7
    632   add b32 $r12 $r15
    633   add b32 $r7 $r9
    634   shr b32 $r8 $r9
    635
    636   // Ot = ((Tz * nTy * nTx) + (Ty * nTx) + Tx) << Ts
    637   pop $r15
    638   pop $r9
    639   mulu $r13 $r9
    640   add b32 $r10 $r13
    641   mulu $r8 $r9
    642   mulu $r8 $r15
    643   add b32 $r10 $r8
    644   shl b32 $r10 $r7
    645
    646   // PITCH = (nTx - 1) << Ts
    647   sub b32 $r9 1
    648   shl b32 $r9 $r7
    649   iowr I[$r6 + 0x200] $r9
    650
    651   // SRC_ADDRESS_LOW   = (Ot + Op) & 0xffffffff
    652   // CFG_ADDRESS_HIGH |= ((Ot + Op) >> 32) << 16
    653   ld b32 $r7 D[$r5 + #ctx_src_address_low]
    654   ld b32 $r8 D[$r5 + #ctx_src_address_high]
    655   add b32 $r10 $r12
    656   add b32 $r7 $r10
    657   adc b32 $r8 0
    658   shl b32 $r8 16
    659   or $r8 $r11
    660   sub b32 $r6 0x600
    661   iowr I[$r6 + 0x000] $r7
    662   add b32 $r6 0x400
    663   iowr I[$r6 + 0x000] $r8
    664   ret
    665
    666// Setup to handle a linear surface
    667//
    668// Nothing to see here.. Sets ADDRESS and PITCH, pretty non-exciting
    669//
    670cmd_exec_set_surface_linear:
    671   xbit $r6 $flags $p2
    672   add b32 $r6 0x202
    673   shl b32 $r6 8
    674   ld b32 $r7 D[$r5 + #ctx_src_address_low]
    675   iowr I[$r6 + 0x000] $r7
    676   add b32 $r6 0x400
    677   ld b32 $r7 D[$r5 + #ctx_src_address_high]
    678   shl b32 $r7 16
    679   iowr I[$r6 + 0x000] $r7
    680   add b32 $r6 0x400
    681   ld b32 $r7 D[$r5 + #ctx_src_pitch]
    682   iowr I[$r6 + 0x000] $r7
    683   ret
    684
    685// wait for regs to be available for use
    686cmd_exec_wait:
    687   push $r0
    688   push $r1
    689   mov $r0 0x800
    690   shl b32 $r0 6
    691   loop:
    692      iord $r1 I[$r0]
    693      and $r1 1
    694      bra ne #loop
    695   pop $r1
    696   pop $r0
    697   ret
    698
    699cmd_exec_query:
    700   // if QUERY_SHORT not set, write out { -, 0, TIME_LO, TIME_HI }
    701   xbit $r4 $r3 13
    702   bra ne #query_counter
    703      call #cmd_exec_wait
    704      mov $r4 0x80c
    705      shl b32 $r4 6
    706      ld b32 $r5 D[$r0 + #ctx_query_address_low]
    707      add b32 $r5 4
    708      iowr I[$r4 + 0x000] $r5
    709      iowr I[$r4 + 0x100] $r0
    710      mov $r5 0xc
    711      iowr I[$r4 + 0x200] $r5
    712      add b32 $r4 0x400
    713      ld b32 $r5 D[$r0 + #ctx_query_address_high]
    714      shl b32 $r5 16
    715      iowr I[$r4 + 0x000] $r5
    716      add b32 $r4 0x500
    717      mov $r5 0x00000b00
    718      sethi $r5 0x00010000
    719      iowr I[$r4 + 0x000] $r5
    720      mov $r5 0x00004040
    721      shl b32 $r5 1
    722      sethi $r5 0x80800000
    723      iowr I[$r4 + 0x100] $r5
    724      mov $r5 0x00001110
    725      sethi $r5 0x13120000
    726      iowr I[$r4 + 0x200] $r5
    727      mov $r5 0x00001514
    728      sethi $r5 0x17160000
    729      iowr I[$r4 + 0x300] $r5
    730      mov $r5 0x00002601
    731      sethi $r5 0x00010000
    732      mov $r4 0x800
    733      shl b32 $r4 6
    734      iowr I[$r4 + 0x000] $r5
    735
    736   // write COUNTER
    737   query_counter:
    738   call #cmd_exec_wait
    739   mov $r4 0x80c
    740   shl b32 $r4 6
    741   ld b32 $r5 D[$r0 + #ctx_query_address_low]
    742   iowr I[$r4 + 0x000] $r5
    743   iowr I[$r4 + 0x100] $r0
    744   mov $r5 0x4
    745   iowr I[$r4 + 0x200] $r5
    746   add b32 $r4 0x400
    747   ld b32 $r5 D[$r0 + #ctx_query_address_high]
    748   shl b32 $r5 16
    749   iowr I[$r4 + 0x000] $r5
    750   add b32 $r4 0x500
    751   mov $r5 0x00000300
    752   iowr I[$r4 + 0x000] $r5
    753   mov $r5 0x00001110
    754   sethi $r5 0x13120000
    755   iowr I[$r4 + 0x100] $r5
    756   ld b32 $r5 D[$r0 + #ctx_query_counter]
    757   add b32 $r4 0x500
    758   iowr I[$r4 + 0x000] $r5
    759   mov $r5 0x00002601
    760   sethi $r5 0x00010000
    761   mov $r4 0x800
    762   shl b32 $r4 6
    763   iowr I[$r4 + 0x000] $r5
    764   ret
    765
    766// Execute a copy operation
    767//
    768// Inputs:
    769//    $r1: irqh state
    770//    $r2: hostirq state
    771//    $r3: data
    772//       000002000 QUERY_SHORT
    773//       000001000 QUERY
    774//       000000100 DST_LINEAR
    775//       000000010 SRC_LINEAR
    776//       000000001 FORMAT
    777//    $r4: dispatch table entry
    778// Outputs:
    779//    $r1: irqh state
    780//    $p1: set on error
    781//       $r2: hostirq state
    782//       $r3: data
    783cmd_exec:
    784   call #cmd_exec_wait
    785
    786   // if format requested, call function to calculate it, otherwise
    787   // fill in cpp/xcnt for both surfaces as if (cpp == 1)
    788   xbit $r15 $r3 0
    789   bra e #cmd_exec_no_format
    790      call #cmd_exec_set_format
    791      mov $r4 0x200
    792      bra #cmd_exec_init_src_surface
    793   cmd_exec_no_format:
    794      mov $r6 0x810
    795      shl b32 $r6 6
    796      mov $r7 1
    797      st b32 D[$r0 + #ctx_src_cpp] $r7
    798      st b32 D[$r0 + #ctx_dst_cpp] $r7
    799      ld b32 $r7 D[$r0 + #ctx_xcnt]
    800      iowr I[$r6 + 0x000] $r7
    801      iowr I[$r6 + 0x100] $r7
    802      clear b32 $r4
    803
    804   cmd_exec_init_src_surface:
    805   bclr $flags $p2
    806   clear b32 $r5
    807   xbit $r15 $r3 4
    808   bra e #src_tiled
    809      call #cmd_exec_set_surface_linear
    810      bra #cmd_exec_init_dst_surface
    811   src_tiled:
    812      call #cmd_exec_set_surface_tiled
    813      bset $r4 7
    814
    815   cmd_exec_init_dst_surface:
    816   bset $flags $p2
    817   mov $r5 #ctx_dst_address_high - #ctx_src_address_high
    818   xbit $r15 $r3 8
    819   bra e #dst_tiled
    820      call #cmd_exec_set_surface_linear
    821      bra #cmd_exec_kick
    822   dst_tiled:
    823      call #cmd_exec_set_surface_tiled
    824      bset $r4 8
    825
    826   cmd_exec_kick:
    827   mov $r5 0x800
    828   shl b32 $r5 6
    829   ld b32 $r6 D[$r0 + #ctx_ycnt]
    830   iowr I[$r5 + 0x100] $r6
    831   mov $r6 0x0041
    832   // SRC_TARGET = 1, DST_TARGET = 2
    833   sethi $r6 0x44000000
    834   or $r4 $r6
    835   iowr I[$r5] $r4
    836
    837   // if requested, queue up a QUERY write after the copy has completed
    838   xbit $r15 $r3 12
    839   bra e #cmd_exec_done
    840      call #cmd_exec_query
    841
    842   cmd_exec_done:
    843   ret
    844
    845// Flush write cache
    846//
    847// Inputs:
    848//    $r1: irqh state
    849//    $r2: hostirq state
    850//    $r3: data
    851//    $r4: dispatch table entry
    852// Outputs:
    853//    $r1: irqh state
    854//    $p1: set on error
    855//       $r2: hostirq state
    856//       $r3: data
    857cmd_wrcache_flush:
    858   mov $r2 0x2200
    859   clear b32 $r3
    860   sethi $r3 0x10000
    861   iowr I[$r2] $r3
    862   ret
    863
    864.align 0x100