cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

mach64_accel.c (11894B)


      1// SPDX-License-Identifier: GPL-2.0
      2
      3/*
      4 *  ATI Mach64 Hardware Acceleration
      5 */
      6
      7#include <linux/delay.h>
      8#include <asm/unaligned.h>
      9#include <linux/fb.h>
     10#include <video/mach64.h>
     11#include "atyfb.h"
     12
     13    /*
     14     *  Generic Mach64 routines
     15     */
     16
     17/* this is for DMA GUI engine! work in progress */
     18typedef struct {
     19	u32 frame_buf_offset;
     20	u32 system_mem_addr;
     21	u32 command;
     22	u32 reserved;
     23} BM_DESCRIPTOR_ENTRY;
     24
     25#define LAST_DESCRIPTOR (1 << 31)
     26#define SYSTEM_TO_FRAME_BUFFER 0
     27
     28static u32 rotation24bpp(u32 dx, u32 direction)
     29{
     30	u32 rotation;
     31	if (direction & DST_X_LEFT_TO_RIGHT) {
     32		rotation = (dx / 4) % 6;
     33	} else {
     34		rotation = ((dx + 2) / 4) % 6;
     35	}
     36
     37	return ((rotation << 8) | DST_24_ROTATION_ENABLE);
     38}
     39
     40void aty_reset_engine(struct atyfb_par *par)
     41{
     42	/* reset engine */
     43	aty_st_le32(GEN_TEST_CNTL,
     44		aty_ld_le32(GEN_TEST_CNTL, par) &
     45		~(GUI_ENGINE_ENABLE | HWCURSOR_ENABLE), par);
     46	/* enable engine */
     47	aty_st_le32(GEN_TEST_CNTL,
     48		aty_ld_le32(GEN_TEST_CNTL, par) | GUI_ENGINE_ENABLE, par);
     49	/* ensure engine is not locked up by clearing any FIFO or */
     50	/* HOST errors */
     51	aty_st_le32(BUS_CNTL,
     52		aty_ld_le32(BUS_CNTL, par) | BUS_HOST_ERR_ACK | BUS_FIFO_ERR_ACK, par);
     53
     54	par->fifo_space = 0;
     55}
     56
     57static void reset_GTC_3D_engine(const struct atyfb_par *par)
     58{
     59	aty_st_le32(SCALE_3D_CNTL, 0xc0, par);
     60	mdelay(GTC_3D_RESET_DELAY);
     61	aty_st_le32(SETUP_CNTL, 0x00, par);
     62	mdelay(GTC_3D_RESET_DELAY);
     63	aty_st_le32(SCALE_3D_CNTL, 0x00, par);
     64	mdelay(GTC_3D_RESET_DELAY);
     65}
     66
     67void aty_init_engine(struct atyfb_par *par, struct fb_info *info)
     68{
     69	u32 pitch_value;
     70	u32 vxres;
     71
     72	/* determine modal information from global mode structure */
     73	pitch_value = info->fix.line_length / (info->var.bits_per_pixel / 8);
     74	vxres = info->var.xres_virtual;
     75
     76	if (info->var.bits_per_pixel == 24) {
     77		/* In 24 bpp, the engine is in 8 bpp - this requires that all */
     78		/* horizontal coordinates and widths must be adjusted */
     79		pitch_value *= 3;
     80		vxres *= 3;
     81	}
     82
     83	/* On GTC (RagePro), we need to reset the 3D engine before */
     84	if (M64_HAS(RESET_3D))
     85		reset_GTC_3D_engine(par);
     86
     87	/* Reset engine, enable, and clear any engine errors */
     88	aty_reset_engine(par);
     89	/* Ensure that vga page pointers are set to zero - the upper */
     90	/* page pointers are set to 1 to handle overflows in the */
     91	/* lower page */
     92	aty_st_le32(MEM_VGA_WP_SEL, 0x00010000, par);
     93	aty_st_le32(MEM_VGA_RP_SEL, 0x00010000, par);
     94
     95	/* ---- Setup standard engine context ---- */
     96
     97	/* All GUI registers here are FIFOed - therefore, wait for */
     98	/* the appropriate number of empty FIFO entries */
     99	wait_for_fifo(14, par);
    100
    101	/* enable all registers to be loaded for context loads */
    102	aty_st_le32(CONTEXT_MASK, 0xFFFFFFFF, par);
    103
    104	/* set destination pitch to modal pitch, set offset to zero */
    105	aty_st_le32(DST_OFF_PITCH, (pitch_value / 8) << 22, par);
    106
    107	/* zero these registers (set them to a known state) */
    108	aty_st_le32(DST_Y_X, 0, par);
    109	aty_st_le32(DST_HEIGHT, 0, par);
    110	aty_st_le32(DST_BRES_ERR, 0, par);
    111	aty_st_le32(DST_BRES_INC, 0, par);
    112	aty_st_le32(DST_BRES_DEC, 0, par);
    113
    114	/* set destination drawing attributes */
    115	aty_st_le32(DST_CNTL, DST_LAST_PEL | DST_Y_TOP_TO_BOTTOM |
    116		    DST_X_LEFT_TO_RIGHT, par);
    117
    118	/* set source pitch to modal pitch, set offset to zero */
    119	aty_st_le32(SRC_OFF_PITCH, (pitch_value / 8) << 22, par);
    120
    121	/* set these registers to a known state */
    122	aty_st_le32(SRC_Y_X, 0, par);
    123	aty_st_le32(SRC_HEIGHT1_WIDTH1, 1, par);
    124	aty_st_le32(SRC_Y_X_START, 0, par);
    125	aty_st_le32(SRC_HEIGHT2_WIDTH2, 1, par);
    126
    127	/* set source pixel retrieving attributes */
    128	aty_st_le32(SRC_CNTL, SRC_LINE_X_LEFT_TO_RIGHT, par);
    129
    130	/* set host attributes */
    131	wait_for_fifo(13, par);
    132	aty_st_le32(HOST_CNTL, HOST_BYTE_ALIGN, par);
    133
    134	/* set pattern attributes */
    135	aty_st_le32(PAT_REG0, 0, par);
    136	aty_st_le32(PAT_REG1, 0, par);
    137	aty_st_le32(PAT_CNTL, 0, par);
    138
    139	/* set scissors to modal size */
    140	aty_st_le32(SC_LEFT, 0, par);
    141	aty_st_le32(SC_TOP, 0, par);
    142	aty_st_le32(SC_BOTTOM, par->crtc.vyres - 1, par);
    143	aty_st_le32(SC_RIGHT, vxres - 1, par);
    144
    145	/* set background color to minimum value (usually BLACK) */
    146	aty_st_le32(DP_BKGD_CLR, 0, par);
    147
    148	/* set foreground color to maximum value (usually WHITE) */
    149	aty_st_le32(DP_FRGD_CLR, 0xFFFFFFFF, par);
    150
    151	/* set write mask to effect all pixel bits */
    152	aty_st_le32(DP_WRITE_MASK, 0xFFFFFFFF, par);
    153
    154	/* set foreground mix to overpaint and background mix to */
    155	/* no-effect */
    156	aty_st_le32(DP_MIX, FRGD_MIX_S | BKGD_MIX_D, par);
    157
    158	/* set primary source pixel channel to foreground color */
    159	/* register */
    160	aty_st_le32(DP_SRC, FRGD_SRC_FRGD_CLR, par);
    161
    162	/* set compare functionality to false (no-effect on */
    163	/* destination) */
    164	wait_for_fifo(3, par);
    165	aty_st_le32(CLR_CMP_CLR, 0, par);
    166	aty_st_le32(CLR_CMP_MASK, 0xFFFFFFFF, par);
    167	aty_st_le32(CLR_CMP_CNTL, 0, par);
    168
    169	/* set pixel depth */
    170	wait_for_fifo(2, par);
    171	aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, par);
    172	aty_st_le32(DP_CHAIN_MASK, par->crtc.dp_chain_mask, par);
    173
    174	wait_for_fifo(5, par);
    175 	aty_st_le32(SCALE_3D_CNTL, 0, par);
    176	aty_st_le32(Z_CNTL, 0, par);
    177	aty_st_le32(CRTC_INT_CNTL, aty_ld_le32(CRTC_INT_CNTL, par) & ~0x20,
    178		    par);
    179	aty_st_le32(GUI_TRAJ_CNTL, 0x100023, par);
    180
    181	/* insure engine is idle before leaving */
    182	wait_for_idle(par);
    183}
    184
    185    /*
    186     *  Accelerated functions
    187     */
    188
    189static inline void draw_rect(s16 x, s16 y, u16 width, u16 height,
    190			     struct atyfb_par *par)
    191{
    192	/* perform rectangle fill */
    193	wait_for_fifo(2, par);
    194	aty_st_le32(DST_Y_X, (x << 16) | y, par);
    195	aty_st_le32(DST_HEIGHT_WIDTH, (width << 16) | height, par);
    196	par->blitter_may_be_busy = 1;
    197}
    198
    199void atyfb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
    200{
    201	struct atyfb_par *par = (struct atyfb_par *) info->par;
    202	u32 dy = area->dy, sy = area->sy, direction = DST_LAST_PEL;
    203	u32 sx = area->sx, dx = area->dx, width = area->width, rotation = 0;
    204
    205	if (par->asleep)
    206		return;
    207	if (!area->width || !area->height)
    208		return;
    209	if (!par->accel_flags) {
    210		cfb_copyarea(info, area);
    211		return;
    212	}
    213
    214	if (info->var.bits_per_pixel == 24) {
    215		/* In 24 bpp, the engine is in 8 bpp - this requires that all */
    216		/* horizontal coordinates and widths must be adjusted */
    217		sx *= 3;
    218		dx *= 3;
    219		width *= 3;
    220	}
    221
    222	if (area->sy < area->dy) {
    223		dy += area->height - 1;
    224		sy += area->height - 1;
    225	} else
    226		direction |= DST_Y_TOP_TO_BOTTOM;
    227
    228	if (sx < dx) {
    229		dx += width - 1;
    230		sx += width - 1;
    231	} else
    232		direction |= DST_X_LEFT_TO_RIGHT;
    233
    234	if (info->var.bits_per_pixel == 24) {
    235		rotation = rotation24bpp(dx, direction);
    236	}
    237
    238	wait_for_fifo(5, par);
    239	aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, par);
    240	aty_st_le32(DP_SRC, FRGD_SRC_BLIT, par);
    241	aty_st_le32(SRC_Y_X, (sx << 16) | sy, par);
    242	aty_st_le32(SRC_HEIGHT1_WIDTH1, (width << 16) | area->height, par);
    243	aty_st_le32(DST_CNTL, direction | rotation, par);
    244	draw_rect(dx, dy, width, area->height, par);
    245}
    246
    247void atyfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
    248{
    249	struct atyfb_par *par = (struct atyfb_par *) info->par;
    250	u32 color, dx = rect->dx, width = rect->width, rotation = 0;
    251
    252	if (par->asleep)
    253		return;
    254	if (!rect->width || !rect->height)
    255		return;
    256	if (!par->accel_flags) {
    257		cfb_fillrect(info, rect);
    258		return;
    259	}
    260
    261	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
    262	    info->fix.visual == FB_VISUAL_DIRECTCOLOR)
    263		color = ((u32 *)(info->pseudo_palette))[rect->color];
    264	else
    265		color = rect->color;
    266
    267	if (info->var.bits_per_pixel == 24) {
    268		/* In 24 bpp, the engine is in 8 bpp - this requires that all */
    269		/* horizontal coordinates and widths must be adjusted */
    270		dx *= 3;
    271		width *= 3;
    272		rotation = rotation24bpp(dx, DST_X_LEFT_TO_RIGHT);
    273	}
    274
    275	wait_for_fifo(4, par);
    276	aty_st_le32(DP_PIX_WIDTH, par->crtc.dp_pix_width, par);
    277	aty_st_le32(DP_FRGD_CLR, color, par);
    278	aty_st_le32(DP_SRC,
    279		    BKGD_SRC_BKGD_CLR | FRGD_SRC_FRGD_CLR | MONO_SRC_ONE,
    280		    par);
    281	aty_st_le32(DST_CNTL,
    282		    DST_LAST_PEL | DST_Y_TOP_TO_BOTTOM |
    283		    DST_X_LEFT_TO_RIGHT | rotation, par);
    284	draw_rect(dx, rect->dy, width, rect->height, par);
    285}
    286
    287void atyfb_imageblit(struct fb_info *info, const struct fb_image *image)
    288{
    289	struct atyfb_par *par = (struct atyfb_par *) info->par;
    290	u32 src_bytes, dx = image->dx, dy = image->dy, width = image->width;
    291	u32 pix_width, rotation = 0, src, mix;
    292
    293	if (par->asleep)
    294		return;
    295	if (!image->width || !image->height)
    296		return;
    297	if (!par->accel_flags ||
    298	    (image->depth != 1 && info->var.bits_per_pixel != image->depth)) {
    299		cfb_imageblit(info, image);
    300		return;
    301	}
    302
    303	pix_width = par->crtc.dp_pix_width;
    304
    305	switch (image->depth) {
    306	case 1:
    307	    pix_width &= ~(BYTE_ORDER_MASK | HOST_MASK);
    308	    pix_width |= (BYTE_ORDER_MSB_TO_LSB | HOST_1BPP);
    309	    break;
    310	case 4:
    311	    pix_width &= ~(BYTE_ORDER_MASK | HOST_MASK);
    312	    pix_width |= (BYTE_ORDER_MSB_TO_LSB | HOST_4BPP);
    313	    break;
    314	case 8:
    315	    pix_width &= ~HOST_MASK;
    316	    pix_width |= HOST_8BPP;
    317	    break;
    318	case 15:
    319	    pix_width &= ~HOST_MASK;
    320	    pix_width |= HOST_15BPP;
    321	    break;
    322	case 16:
    323	    pix_width &= ~HOST_MASK;
    324	    pix_width |= HOST_16BPP;
    325	    break;
    326	case 24:
    327	    pix_width &= ~HOST_MASK;
    328	    pix_width |= HOST_24BPP;
    329	    break;
    330	case 32:
    331	    pix_width &= ~HOST_MASK;
    332	    pix_width |= HOST_32BPP;
    333	    break;
    334	}
    335
    336	if (info->var.bits_per_pixel == 24) {
    337		/* In 24 bpp, the engine is in 8 bpp - this requires that all */
    338		/* horizontal coordinates and widths must be adjusted */
    339		dx *= 3;
    340		width *= 3;
    341
    342		rotation = rotation24bpp(dx, DST_X_LEFT_TO_RIGHT);
    343
    344		pix_width &= ~DST_MASK;
    345		pix_width |= DST_8BPP;
    346
    347		/*
    348		 * since Rage 3D IIc we have DP_HOST_TRIPLE_EN bit
    349		 * this hwaccelerated triple has an issue with not aligned data
    350		 */
    351		if (image->depth == 1 && M64_HAS(HW_TRIPLE) && image->width % 8 == 0)
    352			pix_width |= DP_HOST_TRIPLE_EN;
    353	}
    354
    355	if (image->depth == 1) {
    356		u32 fg, bg;
    357		if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
    358		    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
    359			fg = ((u32*)(info->pseudo_palette))[image->fg_color];
    360			bg = ((u32*)(info->pseudo_palette))[image->bg_color];
    361		} else {
    362			fg = image->fg_color;
    363			bg = image->bg_color;
    364		}
    365
    366		wait_for_fifo(2, par);
    367		aty_st_le32(DP_BKGD_CLR, bg, par);
    368		aty_st_le32(DP_FRGD_CLR, fg, par);
    369		src = MONO_SRC_HOST | FRGD_SRC_FRGD_CLR | BKGD_SRC_BKGD_CLR;
    370		mix = FRGD_MIX_S | BKGD_MIX_S;
    371	} else {
    372		src = MONO_SRC_ONE | FRGD_SRC_HOST;
    373		mix = FRGD_MIX_D_XOR_S | BKGD_MIX_D;
    374	}
    375
    376	wait_for_fifo(5, par);
    377	aty_st_le32(DP_PIX_WIDTH, pix_width, par);
    378	aty_st_le32(DP_MIX, mix, par);
    379	aty_st_le32(DP_SRC, src, par);
    380	aty_st_le32(HOST_CNTL, HOST_BYTE_ALIGN, par);
    381	aty_st_le32(DST_CNTL, DST_Y_TOP_TO_BOTTOM | DST_X_LEFT_TO_RIGHT | rotation, par);
    382
    383	draw_rect(dx, dy, width, image->height, par);
    384	src_bytes = (((image->width * image->depth) + 7) / 8) * image->height;
    385
    386	/* manual triple each pixel */
    387	if (image->depth == 1 && info->var.bits_per_pixel == 24 && !(pix_width & DP_HOST_TRIPLE_EN)) {
    388		int inbit, outbit, mult24, byte_id_in_dword, width;
    389		u8 *pbitmapin = (u8*)image->data, *pbitmapout;
    390		u32 hostdword;
    391
    392		for (width = image->width, inbit = 7, mult24 = 0; src_bytes; ) {
    393			for (hostdword = 0, pbitmapout = (u8*)&hostdword, byte_id_in_dword = 0;
    394				byte_id_in_dword < 4 && src_bytes;
    395				byte_id_in_dword++, pbitmapout++) {
    396				for (outbit = 7; outbit >= 0; outbit--) {
    397					*pbitmapout |= (((*pbitmapin >> inbit) & 1) << outbit);
    398					mult24++;
    399					/* next bit */
    400					if (mult24 == 3) {
    401						mult24 = 0;
    402						inbit--;
    403						width--;
    404					}
    405
    406					/* next byte */
    407					if (inbit < 0 || width == 0) {
    408						src_bytes--;
    409						pbitmapin++;
    410						inbit = 7;
    411
    412						if (width == 0) {
    413						    width = image->width;
    414						    outbit = 0;
    415						}
    416					}
    417				}
    418			}
    419			wait_for_fifo(1, par);
    420			aty_st_le32(HOST_DATA0, le32_to_cpu(hostdword), par);
    421		}
    422	} else {
    423		u32 *pbitmap, dwords = (src_bytes + 3) / 4;
    424		for (pbitmap = (u32*)(image->data); dwords; dwords--, pbitmap++) {
    425			wait_for_fifo(1, par);
    426			aty_st_le32(HOST_DATA0, get_unaligned_le32(pbitmap), par);
    427		}
    428	}
    429}