cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

r100.c (119613B)


      1/*
      2 * Copyright 2008 Advanced Micro Devices, Inc.
      3 * Copyright 2008 Red Hat Inc.
      4 * Copyright 2009 Jerome Glisse.
      5 *
      6 * Permission is hereby granted, free of charge, to any person obtaining a
      7 * copy of this software and associated documentation files (the "Software"),
      8 * to deal in the Software without restriction, including without limitation
      9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10 * and/or sell copies of the Software, and to permit persons to whom the
     11 * Software is furnished to do so, subject to the following conditions:
     12 *
     13 * The above copyright notice and this permission notice shall be included in
     14 * all copies or substantial portions of the Software.
     15 *
     16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22 * OTHER DEALINGS IN THE SOFTWARE.
     23 *
     24 * Authors: Dave Airlie
     25 *          Alex Deucher
     26 *          Jerome Glisse
     27 */
     28
     29#include <linux/firmware.h>
     30#include <linux/module.h>
     31#include <linux/pci.h>
     32#include <linux/seq_file.h>
     33#include <linux/slab.h>
     34
     35#include <drm/drm_device.h>
     36#include <drm/drm_file.h>
     37#include <drm/drm_fourcc.h>
     38#include <drm/drm_vblank.h>
     39#include <drm/radeon_drm.h>
     40
     41#include "atom.h"
     42#include "r100_reg_safe.h"
     43#include "r100d.h"
     44#include "radeon.h"
     45#include "radeon_asic.h"
     46#include "radeon_reg.h"
     47#include "rn50_reg_safe.h"
     48#include "rs100d.h"
     49#include "rv200d.h"
     50#include "rv250d.h"
     51
     52/* Firmware Names */
     53#define FIRMWARE_R100		"radeon/R100_cp.bin"
     54#define FIRMWARE_R200		"radeon/R200_cp.bin"
     55#define FIRMWARE_R300		"radeon/R300_cp.bin"
     56#define FIRMWARE_R420		"radeon/R420_cp.bin"
     57#define FIRMWARE_RS690		"radeon/RS690_cp.bin"
     58#define FIRMWARE_RS600		"radeon/RS600_cp.bin"
     59#define FIRMWARE_R520		"radeon/R520_cp.bin"
     60
     61MODULE_FIRMWARE(FIRMWARE_R100);
     62MODULE_FIRMWARE(FIRMWARE_R200);
     63MODULE_FIRMWARE(FIRMWARE_R300);
     64MODULE_FIRMWARE(FIRMWARE_R420);
     65MODULE_FIRMWARE(FIRMWARE_RS690);
     66MODULE_FIRMWARE(FIRMWARE_RS600);
     67MODULE_FIRMWARE(FIRMWARE_R520);
     68
     69#include "r100_track.h"
     70
     71/* This files gather functions specifics to:
     72 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
     73 * and others in some cases.
     74 */
     75
     76static bool r100_is_in_vblank(struct radeon_device *rdev, int crtc)
     77{
     78	if (crtc == 0) {
     79		if (RREG32(RADEON_CRTC_STATUS) & RADEON_CRTC_VBLANK_CUR)
     80			return true;
     81		else
     82			return false;
     83	} else {
     84		if (RREG32(RADEON_CRTC2_STATUS) & RADEON_CRTC2_VBLANK_CUR)
     85			return true;
     86		else
     87			return false;
     88	}
     89}
     90
     91static bool r100_is_counter_moving(struct radeon_device *rdev, int crtc)
     92{
     93	u32 vline1, vline2;
     94
     95	if (crtc == 0) {
     96		vline1 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
     97		vline2 = (RREG32(RADEON_CRTC_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
     98	} else {
     99		vline1 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
    100		vline2 = (RREG32(RADEON_CRTC2_VLINE_CRNT_VLINE) >> 16) & RADEON_CRTC_V_TOTAL;
    101	}
    102	if (vline1 != vline2)
    103		return true;
    104	else
    105		return false;
    106}
    107
    108/**
    109 * r100_wait_for_vblank - vblank wait asic callback.
    110 *
    111 * @rdev: radeon_device pointer
    112 * @crtc: crtc to wait for vblank on
    113 *
    114 * Wait for vblank on the requested crtc (r1xx-r4xx).
    115 */
    116void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
    117{
    118	unsigned i = 0;
    119
    120	if (crtc >= rdev->num_crtc)
    121		return;
    122
    123	if (crtc == 0) {
    124		if (!(RREG32(RADEON_CRTC_GEN_CNTL) & RADEON_CRTC_EN))
    125			return;
    126	} else {
    127		if (!(RREG32(RADEON_CRTC2_GEN_CNTL) & RADEON_CRTC2_EN))
    128			return;
    129	}
    130
    131	/* depending on when we hit vblank, we may be close to active; if so,
    132	 * wait for another frame.
    133	 */
    134	while (r100_is_in_vblank(rdev, crtc)) {
    135		if (i++ % 100 == 0) {
    136			if (!r100_is_counter_moving(rdev, crtc))
    137				break;
    138		}
    139	}
    140
    141	while (!r100_is_in_vblank(rdev, crtc)) {
    142		if (i++ % 100 == 0) {
    143			if (!r100_is_counter_moving(rdev, crtc))
    144				break;
    145		}
    146	}
    147}
    148
    149/**
    150 * r100_page_flip - pageflip callback.
    151 *
    152 * @rdev: radeon_device pointer
    153 * @crtc_id: crtc to cleanup pageflip on
    154 * @crtc_base: new address of the crtc (GPU MC address)
    155 * @async: asynchronous flip
    156 *
    157 * Does the actual pageflip (r1xx-r4xx).
    158 * During vblank we take the crtc lock and wait for the update_pending
    159 * bit to go high, when it does, we release the lock, and allow the
    160 * double buffered update to take place.
    161 */
    162void r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base, bool async)
    163{
    164	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
    165	uint32_t crtc_pitch, pitch_pixels;
    166	struct drm_framebuffer *fb = radeon_crtc->base.primary->fb;
    167	u32 tmp = ((u32)crtc_base) | RADEON_CRTC_OFFSET__OFFSET_LOCK;
    168	int i;
    169
    170	/* Lock the graphics update lock */
    171	/* update the scanout addresses */
    172	WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
    173
    174	/* update pitch */
    175	pitch_pixels = fb->pitches[0] / fb->format->cpp[0];
    176	crtc_pitch = DIV_ROUND_UP(pitch_pixels * fb->format->cpp[0] * 8,
    177				  fb->format->cpp[0] * 8 * 8);
    178	crtc_pitch |= crtc_pitch << 16;
    179	WREG32(RADEON_CRTC_PITCH + radeon_crtc->crtc_offset, crtc_pitch);
    180
    181	/* Wait for update_pending to go high. */
    182	for (i = 0; i < rdev->usec_timeout; i++) {
    183		if (RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET)
    184			break;
    185		udelay(1);
    186	}
    187	DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
    188
    189	/* Unlock the lock, so double-buffering can take place inside vblank */
    190	tmp &= ~RADEON_CRTC_OFFSET__OFFSET_LOCK;
    191	WREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset, tmp);
    192
    193}
    194
    195/**
    196 * r100_page_flip_pending - check if page flip is still pending
    197 *
    198 * @rdev: radeon_device pointer
    199 * @crtc_id: crtc to check
    200 *
    201 * Check if the last pagefilp is still pending (r1xx-r4xx).
    202 * Returns the current update pending status.
    203 */
    204bool r100_page_flip_pending(struct radeon_device *rdev, int crtc_id)
    205{
    206	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
    207
    208	/* Return current update_pending status: */
    209	return !!(RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) &
    210		RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET);
    211}
    212
    213/**
    214 * r100_pm_get_dynpm_state - look up dynpm power state callback.
    215 *
    216 * @rdev: radeon_device pointer
    217 *
    218 * Look up the optimal power state based on the
    219 * current state of the GPU (r1xx-r5xx).
    220 * Used for dynpm only.
    221 */
    222void r100_pm_get_dynpm_state(struct radeon_device *rdev)
    223{
    224	int i;
    225	rdev->pm.dynpm_can_upclock = true;
    226	rdev->pm.dynpm_can_downclock = true;
    227
    228	switch (rdev->pm.dynpm_planned_action) {
    229	case DYNPM_ACTION_MINIMUM:
    230		rdev->pm.requested_power_state_index = 0;
    231		rdev->pm.dynpm_can_downclock = false;
    232		break;
    233	case DYNPM_ACTION_DOWNCLOCK:
    234		if (rdev->pm.current_power_state_index == 0) {
    235			rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
    236			rdev->pm.dynpm_can_downclock = false;
    237		} else {
    238			if (rdev->pm.active_crtc_count > 1) {
    239				for (i = 0; i < rdev->pm.num_power_states; i++) {
    240					if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
    241						continue;
    242					else if (i >= rdev->pm.current_power_state_index) {
    243						rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
    244						break;
    245					} else {
    246						rdev->pm.requested_power_state_index = i;
    247						break;
    248					}
    249				}
    250			} else
    251				rdev->pm.requested_power_state_index =
    252					rdev->pm.current_power_state_index - 1;
    253		}
    254		/* don't use the power state if crtcs are active and no display flag is set */
    255		if ((rdev->pm.active_crtc_count > 0) &&
    256		    (rdev->pm.power_state[rdev->pm.requested_power_state_index].clock_info[0].flags &
    257		     RADEON_PM_MODE_NO_DISPLAY)) {
    258			rdev->pm.requested_power_state_index++;
    259		}
    260		break;
    261	case DYNPM_ACTION_UPCLOCK:
    262		if (rdev->pm.current_power_state_index == (rdev->pm.num_power_states - 1)) {
    263			rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
    264			rdev->pm.dynpm_can_upclock = false;
    265		} else {
    266			if (rdev->pm.active_crtc_count > 1) {
    267				for (i = (rdev->pm.num_power_states - 1); i >= 0; i--) {
    268					if (rdev->pm.power_state[i].flags & RADEON_PM_STATE_SINGLE_DISPLAY_ONLY)
    269						continue;
    270					else if (i <= rdev->pm.current_power_state_index) {
    271						rdev->pm.requested_power_state_index = rdev->pm.current_power_state_index;
    272						break;
    273					} else {
    274						rdev->pm.requested_power_state_index = i;
    275						break;
    276					}
    277				}
    278			} else
    279				rdev->pm.requested_power_state_index =
    280					rdev->pm.current_power_state_index + 1;
    281		}
    282		break;
    283	case DYNPM_ACTION_DEFAULT:
    284		rdev->pm.requested_power_state_index = rdev->pm.default_power_state_index;
    285		rdev->pm.dynpm_can_upclock = false;
    286		break;
    287	case DYNPM_ACTION_NONE:
    288	default:
    289		DRM_ERROR("Requested mode for not defined action\n");
    290		return;
    291	}
    292	/* only one clock mode per power state */
    293	rdev->pm.requested_clock_mode_index = 0;
    294
    295	DRM_DEBUG_DRIVER("Requested: e: %d m: %d p: %d\n",
    296		  rdev->pm.power_state[rdev->pm.requested_power_state_index].
    297		  clock_info[rdev->pm.requested_clock_mode_index].sclk,
    298		  rdev->pm.power_state[rdev->pm.requested_power_state_index].
    299		  clock_info[rdev->pm.requested_clock_mode_index].mclk,
    300		  rdev->pm.power_state[rdev->pm.requested_power_state_index].
    301		  pcie_lanes);
    302}
    303
    304/**
    305 * r100_pm_init_profile - Initialize power profiles callback.
    306 *
    307 * @rdev: radeon_device pointer
    308 *
    309 * Initialize the power states used in profile mode
    310 * (r1xx-r3xx).
    311 * Used for profile mode only.
    312 */
    313void r100_pm_init_profile(struct radeon_device *rdev)
    314{
    315	/* default */
    316	rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_ps_idx = rdev->pm.default_power_state_index;
    317	rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
    318	rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_off_cm_idx = 0;
    319	rdev->pm.profiles[PM_PROFILE_DEFAULT_IDX].dpms_on_cm_idx = 0;
    320	/* low sh */
    321	rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_ps_idx = 0;
    322	rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_ps_idx = 0;
    323	rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_off_cm_idx = 0;
    324	rdev->pm.profiles[PM_PROFILE_LOW_SH_IDX].dpms_on_cm_idx = 0;
    325	/* mid sh */
    326	rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_ps_idx = 0;
    327	rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_ps_idx = 0;
    328	rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_off_cm_idx = 0;
    329	rdev->pm.profiles[PM_PROFILE_MID_SH_IDX].dpms_on_cm_idx = 0;
    330	/* high sh */
    331	rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_ps_idx = 0;
    332	rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
    333	rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_off_cm_idx = 0;
    334	rdev->pm.profiles[PM_PROFILE_HIGH_SH_IDX].dpms_on_cm_idx = 0;
    335	/* low mh */
    336	rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_ps_idx = 0;
    337	rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
    338	rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_off_cm_idx = 0;
    339	rdev->pm.profiles[PM_PROFILE_LOW_MH_IDX].dpms_on_cm_idx = 0;
    340	/* mid mh */
    341	rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_ps_idx = 0;
    342	rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
    343	rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_off_cm_idx = 0;
    344	rdev->pm.profiles[PM_PROFILE_MID_MH_IDX].dpms_on_cm_idx = 0;
    345	/* high mh */
    346	rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_ps_idx = 0;
    347	rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_ps_idx = rdev->pm.default_power_state_index;
    348	rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_off_cm_idx = 0;
    349	rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0;
    350}
    351
    352/**
    353 * r100_pm_misc - set additional pm hw parameters callback.
    354 *
    355 * @rdev: radeon_device pointer
    356 *
    357 * Set non-clock parameters associated with a power state
    358 * (voltage, pcie lanes, etc.) (r1xx-r4xx).
    359 */
    360void r100_pm_misc(struct radeon_device *rdev)
    361{
    362	int requested_index = rdev->pm.requested_power_state_index;
    363	struct radeon_power_state *ps = &rdev->pm.power_state[requested_index];
    364	struct radeon_voltage *voltage = &ps->clock_info[0].voltage;
    365	u32 tmp, sclk_cntl, sclk_cntl2, sclk_more_cntl;
    366
    367	if ((voltage->type == VOLTAGE_GPIO) && (voltage->gpio.valid)) {
    368		if (ps->misc & ATOM_PM_MISCINFO_VOLTAGE_DROP_SUPPORT) {
    369			tmp = RREG32(voltage->gpio.reg);
    370			if (voltage->active_high)
    371				tmp |= voltage->gpio.mask;
    372			else
    373				tmp &= ~(voltage->gpio.mask);
    374			WREG32(voltage->gpio.reg, tmp);
    375			if (voltage->delay)
    376				udelay(voltage->delay);
    377		} else {
    378			tmp = RREG32(voltage->gpio.reg);
    379			if (voltage->active_high)
    380				tmp &= ~voltage->gpio.mask;
    381			else
    382				tmp |= voltage->gpio.mask;
    383			WREG32(voltage->gpio.reg, tmp);
    384			if (voltage->delay)
    385				udelay(voltage->delay);
    386		}
    387	}
    388
    389	sclk_cntl = RREG32_PLL(SCLK_CNTL);
    390	sclk_cntl2 = RREG32_PLL(SCLK_CNTL2);
    391	sclk_cntl2 &= ~REDUCED_SPEED_SCLK_SEL(3);
    392	sclk_more_cntl = RREG32_PLL(SCLK_MORE_CNTL);
    393	sclk_more_cntl &= ~VOLTAGE_DELAY_SEL(3);
    394	if (ps->misc & ATOM_PM_MISCINFO_ASIC_REDUCED_SPEED_SCLK_EN) {
    395		sclk_more_cntl |= REDUCED_SPEED_SCLK_EN;
    396		if (ps->misc & ATOM_PM_MISCINFO_DYN_CLK_3D_IDLE)
    397			sclk_cntl2 |= REDUCED_SPEED_SCLK_MODE;
    398		else
    399			sclk_cntl2 &= ~REDUCED_SPEED_SCLK_MODE;
    400		if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_2)
    401			sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(0);
    402		else if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_CLOCK_DIVIDER_BY_4)
    403			sclk_cntl2 |= REDUCED_SPEED_SCLK_SEL(2);
    404	} else
    405		sclk_more_cntl &= ~REDUCED_SPEED_SCLK_EN;
    406
    407	if (ps->misc & ATOM_PM_MISCINFO_ASIC_DYNAMIC_VOLTAGE_EN) {
    408		sclk_more_cntl |= IO_CG_VOLTAGE_DROP;
    409		if (voltage->delay) {
    410			sclk_more_cntl |= VOLTAGE_DROP_SYNC;
    411			switch (voltage->delay) {
    412			case 33:
    413				sclk_more_cntl |= VOLTAGE_DELAY_SEL(0);
    414				break;
    415			case 66:
    416				sclk_more_cntl |= VOLTAGE_DELAY_SEL(1);
    417				break;
    418			case 99:
    419				sclk_more_cntl |= VOLTAGE_DELAY_SEL(2);
    420				break;
    421			case 132:
    422				sclk_more_cntl |= VOLTAGE_DELAY_SEL(3);
    423				break;
    424			}
    425		} else
    426			sclk_more_cntl &= ~VOLTAGE_DROP_SYNC;
    427	} else
    428		sclk_more_cntl &= ~IO_CG_VOLTAGE_DROP;
    429
    430	if (ps->misc & ATOM_PM_MISCINFO_DYNAMIC_HDP_BLOCK_EN)
    431		sclk_cntl &= ~FORCE_HDP;
    432	else
    433		sclk_cntl |= FORCE_HDP;
    434
    435	WREG32_PLL(SCLK_CNTL, sclk_cntl);
    436	WREG32_PLL(SCLK_CNTL2, sclk_cntl2);
    437	WREG32_PLL(SCLK_MORE_CNTL, sclk_more_cntl);
    438
    439	/* set pcie lanes */
    440	if ((rdev->flags & RADEON_IS_PCIE) &&
    441	    !(rdev->flags & RADEON_IS_IGP) &&
    442	    rdev->asic->pm.set_pcie_lanes &&
    443	    (ps->pcie_lanes !=
    444	     rdev->pm.power_state[rdev->pm.current_power_state_index].pcie_lanes)) {
    445		radeon_set_pcie_lanes(rdev,
    446				      ps->pcie_lanes);
    447		DRM_DEBUG_DRIVER("Setting: p: %d\n", ps->pcie_lanes);
    448	}
    449}
    450
    451/**
    452 * r100_pm_prepare - pre-power state change callback.
    453 *
    454 * @rdev: radeon_device pointer
    455 *
    456 * Prepare for a power state change (r1xx-r4xx).
    457 */
    458void r100_pm_prepare(struct radeon_device *rdev)
    459{
    460	struct drm_device *ddev = rdev->ddev;
    461	struct drm_crtc *crtc;
    462	struct radeon_crtc *radeon_crtc;
    463	u32 tmp;
    464
    465	/* disable any active CRTCs */
    466	list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
    467		radeon_crtc = to_radeon_crtc(crtc);
    468		if (radeon_crtc->enabled) {
    469			if (radeon_crtc->crtc_id) {
    470				tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
    471				tmp |= RADEON_CRTC2_DISP_REQ_EN_B;
    472				WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
    473			} else {
    474				tmp = RREG32(RADEON_CRTC_GEN_CNTL);
    475				tmp |= RADEON_CRTC_DISP_REQ_EN_B;
    476				WREG32(RADEON_CRTC_GEN_CNTL, tmp);
    477			}
    478		}
    479	}
    480}
    481
    482/**
    483 * r100_pm_finish - post-power state change callback.
    484 *
    485 * @rdev: radeon_device pointer
    486 *
    487 * Clean up after a power state change (r1xx-r4xx).
    488 */
    489void r100_pm_finish(struct radeon_device *rdev)
    490{
    491	struct drm_device *ddev = rdev->ddev;
    492	struct drm_crtc *crtc;
    493	struct radeon_crtc *radeon_crtc;
    494	u32 tmp;
    495
    496	/* enable any active CRTCs */
    497	list_for_each_entry(crtc, &ddev->mode_config.crtc_list, head) {
    498		radeon_crtc = to_radeon_crtc(crtc);
    499		if (radeon_crtc->enabled) {
    500			if (radeon_crtc->crtc_id) {
    501				tmp = RREG32(RADEON_CRTC2_GEN_CNTL);
    502				tmp &= ~RADEON_CRTC2_DISP_REQ_EN_B;
    503				WREG32(RADEON_CRTC2_GEN_CNTL, tmp);
    504			} else {
    505				tmp = RREG32(RADEON_CRTC_GEN_CNTL);
    506				tmp &= ~RADEON_CRTC_DISP_REQ_EN_B;
    507				WREG32(RADEON_CRTC_GEN_CNTL, tmp);
    508			}
    509		}
    510	}
    511}
    512
    513/**
    514 * r100_gui_idle - gui idle callback.
    515 *
    516 * @rdev: radeon_device pointer
    517 *
    518 * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx).
    519 * Returns true if idle, false if not.
    520 */
    521bool r100_gui_idle(struct radeon_device *rdev)
    522{
    523	if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)
    524		return false;
    525	else
    526		return true;
    527}
    528
    529/* hpd for digital panel detect/disconnect */
    530/**
    531 * r100_hpd_sense - hpd sense callback.
    532 *
    533 * @rdev: radeon_device pointer
    534 * @hpd: hpd (hotplug detect) pin
    535 *
    536 * Checks if a digital monitor is connected (r1xx-r4xx).
    537 * Returns true if connected, false if not connected.
    538 */
    539bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
    540{
    541	bool connected = false;
    542
    543	switch (hpd) {
    544	case RADEON_HPD_1:
    545		if (RREG32(RADEON_FP_GEN_CNTL) & RADEON_FP_DETECT_SENSE)
    546			connected = true;
    547		break;
    548	case RADEON_HPD_2:
    549		if (RREG32(RADEON_FP2_GEN_CNTL) & RADEON_FP2_DETECT_SENSE)
    550			connected = true;
    551		break;
    552	default:
    553		break;
    554	}
    555	return connected;
    556}
    557
    558/**
    559 * r100_hpd_set_polarity - hpd set polarity callback.
    560 *
    561 * @rdev: radeon_device pointer
    562 * @hpd: hpd (hotplug detect) pin
    563 *
    564 * Set the polarity of the hpd pin (r1xx-r4xx).
    565 */
    566void r100_hpd_set_polarity(struct radeon_device *rdev,
    567			   enum radeon_hpd_id hpd)
    568{
    569	u32 tmp;
    570	bool connected = r100_hpd_sense(rdev, hpd);
    571
    572	switch (hpd) {
    573	case RADEON_HPD_1:
    574		tmp = RREG32(RADEON_FP_GEN_CNTL);
    575		if (connected)
    576			tmp &= ~RADEON_FP_DETECT_INT_POL;
    577		else
    578			tmp |= RADEON_FP_DETECT_INT_POL;
    579		WREG32(RADEON_FP_GEN_CNTL, tmp);
    580		break;
    581	case RADEON_HPD_2:
    582		tmp = RREG32(RADEON_FP2_GEN_CNTL);
    583		if (connected)
    584			tmp &= ~RADEON_FP2_DETECT_INT_POL;
    585		else
    586			tmp |= RADEON_FP2_DETECT_INT_POL;
    587		WREG32(RADEON_FP2_GEN_CNTL, tmp);
    588		break;
    589	default:
    590		break;
    591	}
    592}
    593
    594/**
    595 * r100_hpd_init - hpd setup callback.
    596 *
    597 * @rdev: radeon_device pointer
    598 *
    599 * Setup the hpd pins used by the card (r1xx-r4xx).
    600 * Set the polarity, and enable the hpd interrupts.
    601 */
    602void r100_hpd_init(struct radeon_device *rdev)
    603{
    604	struct drm_device *dev = rdev->ddev;
    605	struct drm_connector *connector;
    606	unsigned enable = 0;
    607
    608	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
    609		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
    610		if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
    611			enable |= 1 << radeon_connector->hpd.hpd;
    612		radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
    613	}
    614	radeon_irq_kms_enable_hpd(rdev, enable);
    615}
    616
    617/**
    618 * r100_hpd_fini - hpd tear down callback.
    619 *
    620 * @rdev: radeon_device pointer
    621 *
    622 * Tear down the hpd pins used by the card (r1xx-r4xx).
    623 * Disable the hpd interrupts.
    624 */
    625void r100_hpd_fini(struct radeon_device *rdev)
    626{
    627	struct drm_device *dev = rdev->ddev;
    628	struct drm_connector *connector;
    629	unsigned disable = 0;
    630
    631	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
    632		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
    633		if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
    634			disable |= 1 << radeon_connector->hpd.hpd;
    635	}
    636	radeon_irq_kms_disable_hpd(rdev, disable);
    637}
    638
    639/*
    640 * PCI GART
    641 */
    642void r100_pci_gart_tlb_flush(struct radeon_device *rdev)
    643{
    644	/* TODO: can we do somethings here ? */
    645	/* It seems hw only cache one entry so we should discard this
    646	 * entry otherwise if first GPU GART read hit this entry it
    647	 * could end up in wrong address. */
    648}
    649
    650int r100_pci_gart_init(struct radeon_device *rdev)
    651{
    652	int r;
    653
    654	if (rdev->gart.ptr) {
    655		WARN(1, "R100 PCI GART already initialized\n");
    656		return 0;
    657	}
    658	/* Initialize common gart structure */
    659	r = radeon_gart_init(rdev);
    660	if (r)
    661		return r;
    662	rdev->gart.table_size = rdev->gart.num_gpu_pages * 4;
    663	rdev->asic->gart.tlb_flush = &r100_pci_gart_tlb_flush;
    664	rdev->asic->gart.get_page_entry = &r100_pci_gart_get_page_entry;
    665	rdev->asic->gart.set_page = &r100_pci_gart_set_page;
    666	return radeon_gart_table_ram_alloc(rdev);
    667}
    668
    669int r100_pci_gart_enable(struct radeon_device *rdev)
    670{
    671	uint32_t tmp;
    672
    673	/* discard memory request outside of configured range */
    674	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
    675	WREG32(RADEON_AIC_CNTL, tmp);
    676	/* set address range for PCI address translate */
    677	WREG32(RADEON_AIC_LO_ADDR, rdev->mc.gtt_start);
    678	WREG32(RADEON_AIC_HI_ADDR, rdev->mc.gtt_end);
    679	/* set PCI GART page-table base address */
    680	WREG32(RADEON_AIC_PT_BASE, rdev->gart.table_addr);
    681	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
    682	WREG32(RADEON_AIC_CNTL, tmp);
    683	r100_pci_gart_tlb_flush(rdev);
    684	DRM_INFO("PCI GART of %uM enabled (table at 0x%016llX).\n",
    685		 (unsigned)(rdev->mc.gtt_size >> 20),
    686		 (unsigned long long)rdev->gart.table_addr);
    687	rdev->gart.ready = true;
    688	return 0;
    689}
    690
    691void r100_pci_gart_disable(struct radeon_device *rdev)
    692{
    693	uint32_t tmp;
    694
    695	/* discard memory request outside of configured range */
    696	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
    697	WREG32(RADEON_AIC_CNTL, tmp & ~RADEON_PCIGART_TRANSLATE_EN);
    698	WREG32(RADEON_AIC_LO_ADDR, 0);
    699	WREG32(RADEON_AIC_HI_ADDR, 0);
    700}
    701
    702uint64_t r100_pci_gart_get_page_entry(uint64_t addr, uint32_t flags)
    703{
    704	return addr;
    705}
    706
    707void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
    708			    uint64_t entry)
    709{
    710	u32 *gtt = rdev->gart.ptr;
    711	gtt[i] = cpu_to_le32(lower_32_bits(entry));
    712}
    713
    714void r100_pci_gart_fini(struct radeon_device *rdev)
    715{
    716	radeon_gart_fini(rdev);
    717	r100_pci_gart_disable(rdev);
    718	radeon_gart_table_ram_free(rdev);
    719}
    720
    721int r100_irq_set(struct radeon_device *rdev)
    722{
    723	uint32_t tmp = 0;
    724
    725	if (!rdev->irq.installed) {
    726		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
    727		WREG32(R_000040_GEN_INT_CNTL, 0);
    728		return -EINVAL;
    729	}
    730	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
    731		tmp |= RADEON_SW_INT_ENABLE;
    732	}
    733	if (rdev->irq.crtc_vblank_int[0] ||
    734	    atomic_read(&rdev->irq.pflip[0])) {
    735		tmp |= RADEON_CRTC_VBLANK_MASK;
    736	}
    737	if (rdev->irq.crtc_vblank_int[1] ||
    738	    atomic_read(&rdev->irq.pflip[1])) {
    739		tmp |= RADEON_CRTC2_VBLANK_MASK;
    740	}
    741	if (rdev->irq.hpd[0]) {
    742		tmp |= RADEON_FP_DETECT_MASK;
    743	}
    744	if (rdev->irq.hpd[1]) {
    745		tmp |= RADEON_FP2_DETECT_MASK;
    746	}
    747	WREG32(RADEON_GEN_INT_CNTL, tmp);
    748
    749	/* read back to post the write */
    750	RREG32(RADEON_GEN_INT_CNTL);
    751
    752	return 0;
    753}
    754
    755void r100_irq_disable(struct radeon_device *rdev)
    756{
    757	u32 tmp;
    758
    759	WREG32(R_000040_GEN_INT_CNTL, 0);
    760	/* Wait and acknowledge irq */
    761	mdelay(1);
    762	tmp = RREG32(R_000044_GEN_INT_STATUS);
    763	WREG32(R_000044_GEN_INT_STATUS, tmp);
    764}
    765
    766static uint32_t r100_irq_ack(struct radeon_device *rdev)
    767{
    768	uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
    769	uint32_t irq_mask = RADEON_SW_INT_TEST |
    770		RADEON_CRTC_VBLANK_STAT | RADEON_CRTC2_VBLANK_STAT |
    771		RADEON_FP_DETECT_STAT | RADEON_FP2_DETECT_STAT;
    772
    773	if (irqs) {
    774		WREG32(RADEON_GEN_INT_STATUS, irqs);
    775	}
    776	return irqs & irq_mask;
    777}
    778
    779int r100_irq_process(struct radeon_device *rdev)
    780{
    781	uint32_t status, msi_rearm;
    782	bool queue_hotplug = false;
    783
    784	status = r100_irq_ack(rdev);
    785	if (!status) {
    786		return IRQ_NONE;
    787	}
    788	if (rdev->shutdown) {
    789		return IRQ_NONE;
    790	}
    791	while (status) {
    792		/* SW interrupt */
    793		if (status & RADEON_SW_INT_TEST) {
    794			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
    795		}
    796		/* Vertical blank interrupts */
    797		if (status & RADEON_CRTC_VBLANK_STAT) {
    798			if (rdev->irq.crtc_vblank_int[0]) {
    799				drm_handle_vblank(rdev->ddev, 0);
    800				rdev->pm.vblank_sync = true;
    801				wake_up(&rdev->irq.vblank_queue);
    802			}
    803			if (atomic_read(&rdev->irq.pflip[0]))
    804				radeon_crtc_handle_vblank(rdev, 0);
    805		}
    806		if (status & RADEON_CRTC2_VBLANK_STAT) {
    807			if (rdev->irq.crtc_vblank_int[1]) {
    808				drm_handle_vblank(rdev->ddev, 1);
    809				rdev->pm.vblank_sync = true;
    810				wake_up(&rdev->irq.vblank_queue);
    811			}
    812			if (atomic_read(&rdev->irq.pflip[1]))
    813				radeon_crtc_handle_vblank(rdev, 1);
    814		}
    815		if (status & RADEON_FP_DETECT_STAT) {
    816			queue_hotplug = true;
    817			DRM_DEBUG("HPD1\n");
    818		}
    819		if (status & RADEON_FP2_DETECT_STAT) {
    820			queue_hotplug = true;
    821			DRM_DEBUG("HPD2\n");
    822		}
    823		status = r100_irq_ack(rdev);
    824	}
    825	if (queue_hotplug)
    826		schedule_delayed_work(&rdev->hotplug_work, 0);
    827	if (rdev->msi_enabled) {
    828		switch (rdev->family) {
    829		case CHIP_RS400:
    830		case CHIP_RS480:
    831			msi_rearm = RREG32(RADEON_AIC_CNTL) & ~RS400_MSI_REARM;
    832			WREG32(RADEON_AIC_CNTL, msi_rearm);
    833			WREG32(RADEON_AIC_CNTL, msi_rearm | RS400_MSI_REARM);
    834			break;
    835		default:
    836			WREG32(RADEON_MSI_REARM_EN, RV370_MSI_REARM_EN);
    837			break;
    838		}
    839	}
    840	return IRQ_HANDLED;
    841}
    842
    843u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc)
    844{
    845	if (crtc == 0)
    846		return RREG32(RADEON_CRTC_CRNT_FRAME);
    847	else
    848		return RREG32(RADEON_CRTC2_CRNT_FRAME);
    849}
    850
    851/**
    852 * r100_ring_hdp_flush - flush Host Data Path via the ring buffer
    853 * @rdev: radeon device structure
    854 * @ring: ring buffer struct for emitting packets
    855 */
    856static void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring)
    857{
    858	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
    859	radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
    860				RADEON_HDP_READ_BUFFER_INVALIDATE);
    861	radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
    862	radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
    863}
    864
    865/* Who ever call radeon_fence_emit should call ring_lock and ask
    866 * for enough space (today caller are ib schedule and buffer move) */
    867void r100_fence_ring_emit(struct radeon_device *rdev,
    868			  struct radeon_fence *fence)
    869{
    870	struct radeon_ring *ring = &rdev->ring[fence->ring];
    871
    872	/* We have to make sure that caches are flushed before
    873	 * CPU might read something from VRAM. */
    874	radeon_ring_write(ring, PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));
    875	radeon_ring_write(ring, RADEON_RB3D_DC_FLUSH_ALL);
    876	radeon_ring_write(ring, PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));
    877	radeon_ring_write(ring, RADEON_RB3D_ZC_FLUSH_ALL);
    878	/* Wait until IDLE & CLEAN */
    879	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
    880	radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
    881	r100_ring_hdp_flush(rdev, ring);
    882	/* Emit fence sequence & fire IRQ */
    883	radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
    884	radeon_ring_write(ring, fence->seq);
    885	radeon_ring_write(ring, PACKET0(RADEON_GEN_INT_STATUS, 0));
    886	radeon_ring_write(ring, RADEON_SW_INT_FIRE);
    887}
    888
    889bool r100_semaphore_ring_emit(struct radeon_device *rdev,
    890			      struct radeon_ring *ring,
    891			      struct radeon_semaphore *semaphore,
    892			      bool emit_wait)
    893{
    894	/* Unused on older asics, since we don't have semaphores or multiple rings */
    895	BUG();
    896	return false;
    897}
    898
    899struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
    900				    uint64_t src_offset,
    901				    uint64_t dst_offset,
    902				    unsigned num_gpu_pages,
    903				    struct dma_resv *resv)
    904{
    905	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
    906	struct radeon_fence *fence;
    907	uint32_t cur_pages;
    908	uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
    909	uint32_t pitch;
    910	uint32_t stride_pixels;
    911	unsigned ndw;
    912	int num_loops;
    913	int r = 0;
    914
    915	/* radeon limited to 16k stride */
    916	stride_bytes &= 0x3fff;
    917	/* radeon pitch is /64 */
    918	pitch = stride_bytes / 64;
    919	stride_pixels = stride_bytes / 4;
    920	num_loops = DIV_ROUND_UP(num_gpu_pages, 8191);
    921
    922	/* Ask for enough room for blit + flush + fence */
    923	ndw = 64 + (10 * num_loops);
    924	r = radeon_ring_lock(rdev, ring, ndw);
    925	if (r) {
    926		DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
    927		return ERR_PTR(-EINVAL);
    928	}
    929	while (num_gpu_pages > 0) {
    930		cur_pages = num_gpu_pages;
    931		if (cur_pages > 8191) {
    932			cur_pages = 8191;
    933		}
    934		num_gpu_pages -= cur_pages;
    935
    936		/* pages are in Y direction - height
    937		   page width in X direction - width */
    938		radeon_ring_write(ring, PACKET3(PACKET3_BITBLT_MULTI, 8));
    939		radeon_ring_write(ring,
    940				  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
    941				  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
    942				  RADEON_GMC_SRC_CLIPPING |
    943				  RADEON_GMC_DST_CLIPPING |
    944				  RADEON_GMC_BRUSH_NONE |
    945				  (RADEON_COLOR_FORMAT_ARGB8888 << 8) |
    946				  RADEON_GMC_SRC_DATATYPE_COLOR |
    947				  RADEON_ROP3_S |
    948				  RADEON_DP_SRC_SOURCE_MEMORY |
    949				  RADEON_GMC_CLR_CMP_CNTL_DIS |
    950				  RADEON_GMC_WR_MSK_DIS);
    951		radeon_ring_write(ring, (pitch << 22) | (src_offset >> 10));
    952		radeon_ring_write(ring, (pitch << 22) | (dst_offset >> 10));
    953		radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
    954		radeon_ring_write(ring, 0);
    955		radeon_ring_write(ring, (0x1fff) | (0x1fff << 16));
    956		radeon_ring_write(ring, num_gpu_pages);
    957		radeon_ring_write(ring, num_gpu_pages);
    958		radeon_ring_write(ring, cur_pages | (stride_pixels << 16));
    959	}
    960	radeon_ring_write(ring, PACKET0(RADEON_DSTCACHE_CTLSTAT, 0));
    961	radeon_ring_write(ring, RADEON_RB2D_DC_FLUSH_ALL);
    962	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
    963	radeon_ring_write(ring,
    964			  RADEON_WAIT_2D_IDLECLEAN |
    965			  RADEON_WAIT_HOST_IDLECLEAN |
    966			  RADEON_WAIT_DMA_GUI_IDLE);
    967	r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
    968	if (r) {
    969		radeon_ring_unlock_undo(rdev, ring);
    970		return ERR_PTR(r);
    971	}
    972	radeon_ring_unlock_commit(rdev, ring, false);
    973	return fence;
    974}
    975
    976static int r100_cp_wait_for_idle(struct radeon_device *rdev)
    977{
    978	unsigned i;
    979	u32 tmp;
    980
    981	for (i = 0; i < rdev->usec_timeout; i++) {
    982		tmp = RREG32(R_000E40_RBBM_STATUS);
    983		if (!G_000E40_CP_CMDSTRM_BUSY(tmp)) {
    984			return 0;
    985		}
    986		udelay(1);
    987	}
    988	return -1;
    989}
    990
    991void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring)
    992{
    993	int r;
    994
    995	r = radeon_ring_lock(rdev, ring, 2);
    996	if (r) {
    997		return;
    998	}
    999	radeon_ring_write(ring, PACKET0(RADEON_ISYNC_CNTL, 0));
   1000	radeon_ring_write(ring,
   1001			  RADEON_ISYNC_ANY2D_IDLE3D |
   1002			  RADEON_ISYNC_ANY3D_IDLE2D |
   1003			  RADEON_ISYNC_WAIT_IDLEGUI |
   1004			  RADEON_ISYNC_CPSCRATCH_IDLEGUI);
   1005	radeon_ring_unlock_commit(rdev, ring, false);
   1006}
   1007
   1008
   1009/* Load the microcode for the CP */
   1010static int r100_cp_init_microcode(struct radeon_device *rdev)
   1011{
   1012	const char *fw_name = NULL;
   1013	int err;
   1014
   1015	DRM_DEBUG_KMS("\n");
   1016
   1017	if ((rdev->family == CHIP_R100) || (rdev->family == CHIP_RV100) ||
   1018	    (rdev->family == CHIP_RV200) || (rdev->family == CHIP_RS100) ||
   1019	    (rdev->family == CHIP_RS200)) {
   1020		DRM_INFO("Loading R100 Microcode\n");
   1021		fw_name = FIRMWARE_R100;
   1022	} else if ((rdev->family == CHIP_R200) ||
   1023		   (rdev->family == CHIP_RV250) ||
   1024		   (rdev->family == CHIP_RV280) ||
   1025		   (rdev->family == CHIP_RS300)) {
   1026		DRM_INFO("Loading R200 Microcode\n");
   1027		fw_name = FIRMWARE_R200;
   1028	} else if ((rdev->family == CHIP_R300) ||
   1029		   (rdev->family == CHIP_R350) ||
   1030		   (rdev->family == CHIP_RV350) ||
   1031		   (rdev->family == CHIP_RV380) ||
   1032		   (rdev->family == CHIP_RS400) ||
   1033		   (rdev->family == CHIP_RS480)) {
   1034		DRM_INFO("Loading R300 Microcode\n");
   1035		fw_name = FIRMWARE_R300;
   1036	} else if ((rdev->family == CHIP_R420) ||
   1037		   (rdev->family == CHIP_R423) ||
   1038		   (rdev->family == CHIP_RV410)) {
   1039		DRM_INFO("Loading R400 Microcode\n");
   1040		fw_name = FIRMWARE_R420;
   1041	} else if ((rdev->family == CHIP_RS690) ||
   1042		   (rdev->family == CHIP_RS740)) {
   1043		DRM_INFO("Loading RS690/RS740 Microcode\n");
   1044		fw_name = FIRMWARE_RS690;
   1045	} else if (rdev->family == CHIP_RS600) {
   1046		DRM_INFO("Loading RS600 Microcode\n");
   1047		fw_name = FIRMWARE_RS600;
   1048	} else if ((rdev->family == CHIP_RV515) ||
   1049		   (rdev->family == CHIP_R520) ||
   1050		   (rdev->family == CHIP_RV530) ||
   1051		   (rdev->family == CHIP_R580) ||
   1052		   (rdev->family == CHIP_RV560) ||
   1053		   (rdev->family == CHIP_RV570)) {
   1054		DRM_INFO("Loading R500 Microcode\n");
   1055		fw_name = FIRMWARE_R520;
   1056	}
   1057
   1058	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
   1059	if (err) {
   1060		pr_err("radeon_cp: Failed to load firmware \"%s\"\n", fw_name);
   1061	} else if (rdev->me_fw->size % 8) {
   1062		pr_err("radeon_cp: Bogus length %zu in firmware \"%s\"\n",
   1063		       rdev->me_fw->size, fw_name);
   1064		err = -EINVAL;
   1065		release_firmware(rdev->me_fw);
   1066		rdev->me_fw = NULL;
   1067	}
   1068	return err;
   1069}
   1070
   1071u32 r100_gfx_get_rptr(struct radeon_device *rdev,
   1072		      struct radeon_ring *ring)
   1073{
   1074	u32 rptr;
   1075
   1076	if (rdev->wb.enabled)
   1077		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
   1078	else
   1079		rptr = RREG32(RADEON_CP_RB_RPTR);
   1080
   1081	return rptr;
   1082}
   1083
   1084u32 r100_gfx_get_wptr(struct radeon_device *rdev,
   1085		      struct radeon_ring *ring)
   1086{
   1087	return RREG32(RADEON_CP_RB_WPTR);
   1088}
   1089
   1090void r100_gfx_set_wptr(struct radeon_device *rdev,
   1091		       struct radeon_ring *ring)
   1092{
   1093	WREG32(RADEON_CP_RB_WPTR, ring->wptr);
   1094	(void)RREG32(RADEON_CP_RB_WPTR);
   1095}
   1096
   1097static void r100_cp_load_microcode(struct radeon_device *rdev)
   1098{
   1099	const __be32 *fw_data;
   1100	int i, size;
   1101
   1102	if (r100_gui_wait_for_idle(rdev)) {
   1103		pr_warn("Failed to wait GUI idle while programming pipes. Bad things might happen.\n");
   1104	}
   1105
   1106	if (rdev->me_fw) {
   1107		size = rdev->me_fw->size / 4;
   1108		fw_data = (const __be32 *)&rdev->me_fw->data[0];
   1109		WREG32(RADEON_CP_ME_RAM_ADDR, 0);
   1110		for (i = 0; i < size; i += 2) {
   1111			WREG32(RADEON_CP_ME_RAM_DATAH,
   1112			       be32_to_cpup(&fw_data[i]));
   1113			WREG32(RADEON_CP_ME_RAM_DATAL,
   1114			       be32_to_cpup(&fw_data[i + 1]));
   1115		}
   1116	}
   1117}
   1118
   1119int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
   1120{
   1121	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
   1122	unsigned rb_bufsz;
   1123	unsigned rb_blksz;
   1124	unsigned max_fetch;
   1125	unsigned pre_write_timer;
   1126	unsigned pre_write_limit;
   1127	unsigned indirect2_start;
   1128	unsigned indirect1_start;
   1129	uint32_t tmp;
   1130	int r;
   1131
   1132	r100_debugfs_cp_init(rdev);
   1133	if (!rdev->me_fw) {
   1134		r = r100_cp_init_microcode(rdev);
   1135		if (r) {
   1136			DRM_ERROR("Failed to load firmware!\n");
   1137			return r;
   1138		}
   1139	}
   1140
   1141	/* Align ring size */
   1142	rb_bufsz = order_base_2(ring_size / 8);
   1143	ring_size = (1 << (rb_bufsz + 1)) * 4;
   1144	r100_cp_load_microcode(rdev);
   1145	r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET,
   1146			     RADEON_CP_PACKET2);
   1147	if (r) {
   1148		return r;
   1149	}
   1150	/* Each time the cp read 1024 bytes (16 dword/quadword) update
   1151	 * the rptr copy in system ram */
   1152	rb_blksz = 9;
   1153	/* cp will read 128bytes at a time (4 dwords) */
   1154	max_fetch = 1;
   1155	ring->align_mask = 16 - 1;
   1156	/* Write to CP_RB_WPTR will be delayed for pre_write_timer clocks */
   1157	pre_write_timer = 64;
   1158	/* Force CP_RB_WPTR write if written more than one time before the
   1159	 * delay expire
   1160	 */
   1161	pre_write_limit = 0;
   1162	/* Setup the cp cache like this (cache size is 96 dwords) :
   1163	 *	RING		0  to 15
   1164	 *	INDIRECT1	16 to 79
   1165	 *	INDIRECT2	80 to 95
   1166	 * So ring cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
   1167	 *    indirect1 cache size is 64dwords (> (2 * max_fetch = 2 * 4dwords))
   1168	 *    indirect2 cache size is 16dwords (> (2 * max_fetch = 2 * 4dwords))
   1169	 * Idea being that most of the gpu cmd will be through indirect1 buffer
   1170	 * so it gets the bigger cache.
   1171	 */
   1172	indirect2_start = 80;
   1173	indirect1_start = 16;
   1174	/* cp setup */
   1175	WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
   1176	tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
   1177	       REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
   1178	       REG_SET(RADEON_MAX_FETCH, max_fetch));
   1179#ifdef __BIG_ENDIAN
   1180	tmp |= RADEON_BUF_SWAP_32BIT;
   1181#endif
   1182	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_NO_UPDATE);
   1183
   1184	/* Set ring address */
   1185	DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)ring->gpu_addr);
   1186	WREG32(RADEON_CP_RB_BASE, ring->gpu_addr);
   1187	/* Force read & write ptr to 0 */
   1188	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA | RADEON_RB_NO_UPDATE);
   1189	WREG32(RADEON_CP_RB_RPTR_WR, 0);
   1190	ring->wptr = 0;
   1191	WREG32(RADEON_CP_RB_WPTR, ring->wptr);
   1192
   1193	/* set the wb address whether it's enabled or not */
   1194	WREG32(R_00070C_CP_RB_RPTR_ADDR,
   1195		S_00070C_RB_RPTR_ADDR((rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) >> 2));
   1196	WREG32(R_000774_SCRATCH_ADDR, rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET);
   1197
   1198	if (rdev->wb.enabled)
   1199		WREG32(R_000770_SCRATCH_UMSK, 0xff);
   1200	else {
   1201		tmp |= RADEON_RB_NO_UPDATE;
   1202		WREG32(R_000770_SCRATCH_UMSK, 0);
   1203	}
   1204
   1205	WREG32(RADEON_CP_RB_CNTL, tmp);
   1206	udelay(10);
   1207	/* Set cp mode to bus mastering & enable cp*/
   1208	WREG32(RADEON_CP_CSQ_MODE,
   1209	       REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
   1210	       REG_SET(RADEON_INDIRECT1_START, indirect1_start));
   1211	WREG32(RADEON_CP_RB_WPTR_DELAY, 0);
   1212	WREG32(RADEON_CP_CSQ_MODE, 0x00004D4D);
   1213	WREG32(RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM);
   1214
   1215	/* at this point everything should be setup correctly to enable master */
   1216	pci_set_master(rdev->pdev);
   1217
   1218	radeon_ring_start(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
   1219	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring);
   1220	if (r) {
   1221		DRM_ERROR("radeon: cp isn't working (%d).\n", r);
   1222		return r;
   1223	}
   1224	ring->ready = true;
   1225	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
   1226
   1227	if (!ring->rptr_save_reg /* not resuming from suspend */
   1228	    && radeon_ring_supports_scratch_reg(rdev, ring)) {
   1229		r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
   1230		if (r) {
   1231			DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
   1232			ring->rptr_save_reg = 0;
   1233		}
   1234	}
   1235	return 0;
   1236}
   1237
   1238void r100_cp_fini(struct radeon_device *rdev)
   1239{
   1240	if (r100_cp_wait_for_idle(rdev)) {
   1241		DRM_ERROR("Wait for CP idle timeout, shutting down CP.\n");
   1242	}
   1243	/* Disable ring */
   1244	r100_cp_disable(rdev);
   1245	radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg);
   1246	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
   1247	DRM_INFO("radeon: cp finalized\n");
   1248}
   1249
   1250void r100_cp_disable(struct radeon_device *rdev)
   1251{
   1252	/* Disable ring */
   1253	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
   1254	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
   1255	WREG32(RADEON_CP_CSQ_MODE, 0);
   1256	WREG32(RADEON_CP_CSQ_CNTL, 0);
   1257	WREG32(R_000770_SCRATCH_UMSK, 0);
   1258	if (r100_gui_wait_for_idle(rdev)) {
   1259		pr_warn("Failed to wait GUI idle while programming pipes. Bad things might happen.\n");
   1260	}
   1261}
   1262
   1263/*
   1264 * CS functions
   1265 */
   1266int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
   1267			    struct radeon_cs_packet *pkt,
   1268			    unsigned idx,
   1269			    unsigned reg)
   1270{
   1271	int r;
   1272	u32 tile_flags = 0;
   1273	u32 tmp;
   1274	struct radeon_bo_list *reloc;
   1275	u32 value;
   1276
   1277	r = radeon_cs_packet_next_reloc(p, &reloc, 0);
   1278	if (r) {
   1279		DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
   1280			  idx, reg);
   1281		radeon_cs_dump_packet(p, pkt);
   1282		return r;
   1283	}
   1284
   1285	value = radeon_get_ib_value(p, idx);
   1286	tmp = value & 0x003fffff;
   1287	tmp += (((u32)reloc->gpu_offset) >> 10);
   1288
   1289	if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
   1290		if (reloc->tiling_flags & RADEON_TILING_MACRO)
   1291			tile_flags |= RADEON_DST_TILE_MACRO;
   1292		if (reloc->tiling_flags & RADEON_TILING_MICRO) {
   1293			if (reg == RADEON_SRC_PITCH_OFFSET) {
   1294				DRM_ERROR("Cannot src blit from microtiled surface\n");
   1295				radeon_cs_dump_packet(p, pkt);
   1296				return -EINVAL;
   1297			}
   1298			tile_flags |= RADEON_DST_TILE_MICRO;
   1299		}
   1300
   1301		tmp |= tile_flags;
   1302		p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
   1303	} else
   1304		p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
   1305	return 0;
   1306}
   1307
   1308int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
   1309			     struct radeon_cs_packet *pkt,
   1310			     int idx)
   1311{
   1312	unsigned c, i;
   1313	struct radeon_bo_list *reloc;
   1314	struct r100_cs_track *track;
   1315	int r = 0;
   1316	volatile uint32_t *ib;
   1317	u32 idx_value;
   1318
   1319	ib = p->ib.ptr;
   1320	track = (struct r100_cs_track *)p->track;
   1321	c = radeon_get_ib_value(p, idx++) & 0x1F;
   1322	if (c > 16) {
   1323	    DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
   1324		      pkt->opcode);
   1325	    radeon_cs_dump_packet(p, pkt);
   1326	    return -EINVAL;
   1327	}
   1328	track->num_arrays = c;
   1329	for (i = 0; i < (c - 1); i+=2, idx+=3) {
   1330		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
   1331		if (r) {
   1332			DRM_ERROR("No reloc for packet3 %d\n",
   1333				  pkt->opcode);
   1334			radeon_cs_dump_packet(p, pkt);
   1335			return r;
   1336		}
   1337		idx_value = radeon_get_ib_value(p, idx);
   1338		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
   1339
   1340		track->arrays[i + 0].esize = idx_value >> 8;
   1341		track->arrays[i + 0].robj = reloc->robj;
   1342		track->arrays[i + 0].esize &= 0x7F;
   1343		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
   1344		if (r) {
   1345			DRM_ERROR("No reloc for packet3 %d\n",
   1346				  pkt->opcode);
   1347			radeon_cs_dump_packet(p, pkt);
   1348			return r;
   1349		}
   1350		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->gpu_offset);
   1351		track->arrays[i + 1].robj = reloc->robj;
   1352		track->arrays[i + 1].esize = idx_value >> 24;
   1353		track->arrays[i + 1].esize &= 0x7F;
   1354	}
   1355	if (c & 1) {
   1356		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
   1357		if (r) {
   1358			DRM_ERROR("No reloc for packet3 %d\n",
   1359					  pkt->opcode);
   1360			radeon_cs_dump_packet(p, pkt);
   1361			return r;
   1362		}
   1363		idx_value = radeon_get_ib_value(p, idx);
   1364		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
   1365		track->arrays[i + 0].robj = reloc->robj;
   1366		track->arrays[i + 0].esize = idx_value >> 8;
   1367		track->arrays[i + 0].esize &= 0x7F;
   1368	}
   1369	return r;
   1370}
   1371
   1372int r100_cs_parse_packet0(struct radeon_cs_parser *p,
   1373			  struct radeon_cs_packet *pkt,
   1374			  const unsigned *auth, unsigned n,
   1375			  radeon_packet0_check_t check)
   1376{
   1377	unsigned reg;
   1378	unsigned i, j, m;
   1379	unsigned idx;
   1380	int r;
   1381
   1382	idx = pkt->idx + 1;
   1383	reg = pkt->reg;
   1384	/* Check that register fall into register range
   1385	 * determined by the number of entry (n) in the
   1386	 * safe register bitmap.
   1387	 */
   1388	if (pkt->one_reg_wr) {
   1389		if ((reg >> 7) > n) {
   1390			return -EINVAL;
   1391		}
   1392	} else {
   1393		if (((reg + (pkt->count << 2)) >> 7) > n) {
   1394			return -EINVAL;
   1395		}
   1396	}
   1397	for (i = 0; i <= pkt->count; i++, idx++) {
   1398		j = (reg >> 7);
   1399		m = 1 << ((reg >> 2) & 31);
   1400		if (auth[j] & m) {
   1401			r = check(p, pkt, idx, reg);
   1402			if (r) {
   1403				return r;
   1404			}
   1405		}
   1406		if (pkt->one_reg_wr) {
   1407			if (!(auth[j] & m)) {
   1408				break;
   1409			}
   1410		} else {
   1411			reg += 4;
   1412		}
   1413	}
   1414	return 0;
   1415}
   1416
   1417/**
   1418 * r100_cs_packet_parse_vline() - parse userspace VLINE packet
   1419 * @p:		parser structure holding parsing context.
   1420 *
   1421 * Userspace sends a special sequence for VLINE waits.
   1422 * PACKET0 - VLINE_START_END + value
   1423 * PACKET0 - WAIT_UNTIL +_value
   1424 * RELOC (P3) - crtc_id in reloc.
   1425 *
   1426 * This function parses this and relocates the VLINE START END
   1427 * and WAIT UNTIL packets to the correct crtc.
   1428 * It also detects a switched off crtc and nulls out the
   1429 * wait in that case.
   1430 */
   1431int r100_cs_packet_parse_vline(struct radeon_cs_parser *p)
   1432{
   1433	struct drm_crtc *crtc;
   1434	struct radeon_crtc *radeon_crtc;
   1435	struct radeon_cs_packet p3reloc, waitreloc;
   1436	int crtc_id;
   1437	int r;
   1438	uint32_t header, h_idx, reg;
   1439	volatile uint32_t *ib;
   1440
   1441	ib = p->ib.ptr;
   1442
   1443	/* parse the wait until */
   1444	r = radeon_cs_packet_parse(p, &waitreloc, p->idx);
   1445	if (r)
   1446		return r;
   1447
   1448	/* check its a wait until and only 1 count */
   1449	if (waitreloc.reg != RADEON_WAIT_UNTIL ||
   1450	    waitreloc.count != 0) {
   1451		DRM_ERROR("vline wait had illegal wait until segment\n");
   1452		return -EINVAL;
   1453	}
   1454
   1455	if (radeon_get_ib_value(p, waitreloc.idx + 1) != RADEON_WAIT_CRTC_VLINE) {
   1456		DRM_ERROR("vline wait had illegal wait until\n");
   1457		return -EINVAL;
   1458	}
   1459
   1460	/* jump over the NOP */
   1461	r = radeon_cs_packet_parse(p, &p3reloc, p->idx + waitreloc.count + 2);
   1462	if (r)
   1463		return r;
   1464
   1465	h_idx = p->idx - 2;
   1466	p->idx += waitreloc.count + 2;
   1467	p->idx += p3reloc.count + 2;
   1468
   1469	header = radeon_get_ib_value(p, h_idx);
   1470	crtc_id = radeon_get_ib_value(p, h_idx + 5);
   1471	reg = R100_CP_PACKET0_GET_REG(header);
   1472	crtc = drm_crtc_find(p->rdev->ddev, p->filp, crtc_id);
   1473	if (!crtc) {
   1474		DRM_ERROR("cannot find crtc %d\n", crtc_id);
   1475		return -ENOENT;
   1476	}
   1477	radeon_crtc = to_radeon_crtc(crtc);
   1478	crtc_id = radeon_crtc->crtc_id;
   1479
   1480	if (!crtc->enabled) {
   1481		/* if the CRTC isn't enabled - we need to nop out the wait until */
   1482		ib[h_idx + 2] = PACKET2(0);
   1483		ib[h_idx + 3] = PACKET2(0);
   1484	} else if (crtc_id == 1) {
   1485		switch (reg) {
   1486		case AVIVO_D1MODE_VLINE_START_END:
   1487			header &= ~R300_CP_PACKET0_REG_MASK;
   1488			header |= AVIVO_D2MODE_VLINE_START_END >> 2;
   1489			break;
   1490		case RADEON_CRTC_GUI_TRIG_VLINE:
   1491			header &= ~R300_CP_PACKET0_REG_MASK;
   1492			header |= RADEON_CRTC2_GUI_TRIG_VLINE >> 2;
   1493			break;
   1494		default:
   1495			DRM_ERROR("unknown crtc reloc\n");
   1496			return -EINVAL;
   1497		}
   1498		ib[h_idx] = header;
   1499		ib[h_idx + 3] |= RADEON_ENG_DISPLAY_SELECT_CRTC1;
   1500	}
   1501
   1502	return 0;
   1503}
   1504
   1505static int r100_get_vtx_size(uint32_t vtx_fmt)
   1506{
   1507	int vtx_size;
   1508	vtx_size = 2;
   1509	/* ordered according to bits in spec */
   1510	if (vtx_fmt & RADEON_SE_VTX_FMT_W0)
   1511		vtx_size++;
   1512	if (vtx_fmt & RADEON_SE_VTX_FMT_FPCOLOR)
   1513		vtx_size += 3;
   1514	if (vtx_fmt & RADEON_SE_VTX_FMT_FPALPHA)
   1515		vtx_size++;
   1516	if (vtx_fmt & RADEON_SE_VTX_FMT_PKCOLOR)
   1517		vtx_size++;
   1518	if (vtx_fmt & RADEON_SE_VTX_FMT_FPSPEC)
   1519		vtx_size += 3;
   1520	if (vtx_fmt & RADEON_SE_VTX_FMT_FPFOG)
   1521		vtx_size++;
   1522	if (vtx_fmt & RADEON_SE_VTX_FMT_PKSPEC)
   1523		vtx_size++;
   1524	if (vtx_fmt & RADEON_SE_VTX_FMT_ST0)
   1525		vtx_size += 2;
   1526	if (vtx_fmt & RADEON_SE_VTX_FMT_ST1)
   1527		vtx_size += 2;
   1528	if (vtx_fmt & RADEON_SE_VTX_FMT_Q1)
   1529		vtx_size++;
   1530	if (vtx_fmt & RADEON_SE_VTX_FMT_ST2)
   1531		vtx_size += 2;
   1532	if (vtx_fmt & RADEON_SE_VTX_FMT_Q2)
   1533		vtx_size++;
   1534	if (vtx_fmt & RADEON_SE_VTX_FMT_ST3)
   1535		vtx_size += 2;
   1536	if (vtx_fmt & RADEON_SE_VTX_FMT_Q3)
   1537		vtx_size++;
   1538	if (vtx_fmt & RADEON_SE_VTX_FMT_Q0)
   1539		vtx_size++;
   1540	/* blend weight */
   1541	if (vtx_fmt & (0x7 << 15))
   1542		vtx_size += (vtx_fmt >> 15) & 0x7;
   1543	if (vtx_fmt & RADEON_SE_VTX_FMT_N0)
   1544		vtx_size += 3;
   1545	if (vtx_fmt & RADEON_SE_VTX_FMT_XY1)
   1546		vtx_size += 2;
   1547	if (vtx_fmt & RADEON_SE_VTX_FMT_Z1)
   1548		vtx_size++;
   1549	if (vtx_fmt & RADEON_SE_VTX_FMT_W1)
   1550		vtx_size++;
   1551	if (vtx_fmt & RADEON_SE_VTX_FMT_N1)
   1552		vtx_size++;
   1553	if (vtx_fmt & RADEON_SE_VTX_FMT_Z)
   1554		vtx_size++;
   1555	return vtx_size;
   1556}
   1557
   1558static int r100_packet0_check(struct radeon_cs_parser *p,
   1559			      struct radeon_cs_packet *pkt,
   1560			      unsigned idx, unsigned reg)
   1561{
   1562	struct radeon_bo_list *reloc;
   1563	struct r100_cs_track *track;
   1564	volatile uint32_t *ib;
   1565	uint32_t tmp;
   1566	int r;
   1567	int i, face;
   1568	u32 tile_flags = 0;
   1569	u32 idx_value;
   1570
   1571	ib = p->ib.ptr;
   1572	track = (struct r100_cs_track *)p->track;
   1573
   1574	idx_value = radeon_get_ib_value(p, idx);
   1575
   1576	switch (reg) {
   1577	case RADEON_CRTC_GUI_TRIG_VLINE:
   1578		r = r100_cs_packet_parse_vline(p);
   1579		if (r) {
   1580			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
   1581				  idx, reg);
   1582			radeon_cs_dump_packet(p, pkt);
   1583			return r;
   1584		}
   1585		break;
   1586		/* FIXME: only allow PACKET3 blit? easier to check for out of
   1587		 * range access */
   1588	case RADEON_DST_PITCH_OFFSET:
   1589	case RADEON_SRC_PITCH_OFFSET:
   1590		r = r100_reloc_pitch_offset(p, pkt, idx, reg);
   1591		if (r)
   1592			return r;
   1593		break;
   1594	case RADEON_RB3D_DEPTHOFFSET:
   1595		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
   1596		if (r) {
   1597			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
   1598				  idx, reg);
   1599			radeon_cs_dump_packet(p, pkt);
   1600			return r;
   1601		}
   1602		track->zb.robj = reloc->robj;
   1603		track->zb.offset = idx_value;
   1604		track->zb_dirty = true;
   1605		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
   1606		break;
   1607	case RADEON_RB3D_COLOROFFSET:
   1608		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
   1609		if (r) {
   1610			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
   1611				  idx, reg);
   1612			radeon_cs_dump_packet(p, pkt);
   1613			return r;
   1614		}
   1615		track->cb[0].robj = reloc->robj;
   1616		track->cb[0].offset = idx_value;
   1617		track->cb_dirty = true;
   1618		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
   1619		break;
   1620	case RADEON_PP_TXOFFSET_0:
   1621	case RADEON_PP_TXOFFSET_1:
   1622	case RADEON_PP_TXOFFSET_2:
   1623		i = (reg - RADEON_PP_TXOFFSET_0) / 24;
   1624		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
   1625		if (r) {
   1626			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
   1627				  idx, reg);
   1628			radeon_cs_dump_packet(p, pkt);
   1629			return r;
   1630		}
   1631		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
   1632			if (reloc->tiling_flags & RADEON_TILING_MACRO)
   1633				tile_flags |= RADEON_TXO_MACRO_TILE;
   1634			if (reloc->tiling_flags & RADEON_TILING_MICRO)
   1635				tile_flags |= RADEON_TXO_MICRO_TILE_X2;
   1636
   1637			tmp = idx_value & ~(0x7 << 2);
   1638			tmp |= tile_flags;
   1639			ib[idx] = tmp + ((u32)reloc->gpu_offset);
   1640		} else
   1641			ib[idx] = idx_value + ((u32)reloc->gpu_offset);
   1642		track->textures[i].robj = reloc->robj;
   1643		track->tex_dirty = true;
   1644		break;
   1645	case RADEON_PP_CUBIC_OFFSET_T0_0:
   1646	case RADEON_PP_CUBIC_OFFSET_T0_1:
   1647	case RADEON_PP_CUBIC_OFFSET_T0_2:
   1648	case RADEON_PP_CUBIC_OFFSET_T0_3:
   1649	case RADEON_PP_CUBIC_OFFSET_T0_4:
   1650		i = (reg - RADEON_PP_CUBIC_OFFSET_T0_0) / 4;
   1651		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
   1652		if (r) {
   1653			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
   1654				  idx, reg);
   1655			radeon_cs_dump_packet(p, pkt);
   1656			return r;
   1657		}
   1658		track->textures[0].cube_info[i].offset = idx_value;
   1659		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
   1660		track->textures[0].cube_info[i].robj = reloc->robj;
   1661		track->tex_dirty = true;
   1662		break;
   1663	case RADEON_PP_CUBIC_OFFSET_T1_0:
   1664	case RADEON_PP_CUBIC_OFFSET_T1_1:
   1665	case RADEON_PP_CUBIC_OFFSET_T1_2:
   1666	case RADEON_PP_CUBIC_OFFSET_T1_3:
   1667	case RADEON_PP_CUBIC_OFFSET_T1_4:
   1668		i = (reg - RADEON_PP_CUBIC_OFFSET_T1_0) / 4;
   1669		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
   1670		if (r) {
   1671			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
   1672				  idx, reg);
   1673			radeon_cs_dump_packet(p, pkt);
   1674			return r;
   1675		}
   1676		track->textures[1].cube_info[i].offset = idx_value;
   1677		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
   1678		track->textures[1].cube_info[i].robj = reloc->robj;
   1679		track->tex_dirty = true;
   1680		break;
   1681	case RADEON_PP_CUBIC_OFFSET_T2_0:
   1682	case RADEON_PP_CUBIC_OFFSET_T2_1:
   1683	case RADEON_PP_CUBIC_OFFSET_T2_2:
   1684	case RADEON_PP_CUBIC_OFFSET_T2_3:
   1685	case RADEON_PP_CUBIC_OFFSET_T2_4:
   1686		i = (reg - RADEON_PP_CUBIC_OFFSET_T2_0) / 4;
   1687		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
   1688		if (r) {
   1689			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
   1690				  idx, reg);
   1691			radeon_cs_dump_packet(p, pkt);
   1692			return r;
   1693		}
   1694		track->textures[2].cube_info[i].offset = idx_value;
   1695		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
   1696		track->textures[2].cube_info[i].robj = reloc->robj;
   1697		track->tex_dirty = true;
   1698		break;
   1699	case RADEON_RE_WIDTH_HEIGHT:
   1700		track->maxy = ((idx_value >> 16) & 0x7FF);
   1701		track->cb_dirty = true;
   1702		track->zb_dirty = true;
   1703		break;
   1704	case RADEON_RB3D_COLORPITCH:
   1705		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
   1706		if (r) {
   1707			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
   1708				  idx, reg);
   1709			radeon_cs_dump_packet(p, pkt);
   1710			return r;
   1711		}
   1712		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
   1713			if (reloc->tiling_flags & RADEON_TILING_MACRO)
   1714				tile_flags |= RADEON_COLOR_TILE_ENABLE;
   1715			if (reloc->tiling_flags & RADEON_TILING_MICRO)
   1716				tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
   1717
   1718			tmp = idx_value & ~(0x7 << 16);
   1719			tmp |= tile_flags;
   1720			ib[idx] = tmp;
   1721		} else
   1722			ib[idx] = idx_value;
   1723
   1724		track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
   1725		track->cb_dirty = true;
   1726		break;
   1727	case RADEON_RB3D_DEPTHPITCH:
   1728		track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
   1729		track->zb_dirty = true;
   1730		break;
   1731	case RADEON_RB3D_CNTL:
   1732		switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
   1733		case 7:
   1734		case 8:
   1735		case 9:
   1736		case 11:
   1737		case 12:
   1738			track->cb[0].cpp = 1;
   1739			break;
   1740		case 3:
   1741		case 4:
   1742		case 15:
   1743			track->cb[0].cpp = 2;
   1744			break;
   1745		case 6:
   1746			track->cb[0].cpp = 4;
   1747			break;
   1748		default:
   1749			DRM_ERROR("Invalid color buffer format (%d) !\n",
   1750				  ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f));
   1751			return -EINVAL;
   1752		}
   1753		track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
   1754		track->cb_dirty = true;
   1755		track->zb_dirty = true;
   1756		break;
   1757	case RADEON_RB3D_ZSTENCILCNTL:
   1758		switch (idx_value & 0xf) {
   1759		case 0:
   1760			track->zb.cpp = 2;
   1761			break;
   1762		case 2:
   1763		case 3:
   1764		case 4:
   1765		case 5:
   1766		case 9:
   1767		case 11:
   1768			track->zb.cpp = 4;
   1769			break;
   1770		default:
   1771			break;
   1772		}
   1773		track->zb_dirty = true;
   1774		break;
   1775	case RADEON_RB3D_ZPASS_ADDR:
   1776		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
   1777		if (r) {
   1778			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
   1779				  idx, reg);
   1780			radeon_cs_dump_packet(p, pkt);
   1781			return r;
   1782		}
   1783		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
   1784		break;
   1785	case RADEON_PP_CNTL:
   1786		{
   1787			uint32_t temp = idx_value >> 4;
   1788			for (i = 0; i < track->num_texture; i++)
   1789				track->textures[i].enabled = !!(temp & (1 << i));
   1790			track->tex_dirty = true;
   1791		}
   1792		break;
   1793	case RADEON_SE_VF_CNTL:
   1794		track->vap_vf_cntl = idx_value;
   1795		break;
   1796	case RADEON_SE_VTX_FMT:
   1797		track->vtx_size = r100_get_vtx_size(idx_value);
   1798		break;
   1799	case RADEON_PP_TEX_SIZE_0:
   1800	case RADEON_PP_TEX_SIZE_1:
   1801	case RADEON_PP_TEX_SIZE_2:
   1802		i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
   1803		track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
   1804		track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
   1805		track->tex_dirty = true;
   1806		break;
   1807	case RADEON_PP_TEX_PITCH_0:
   1808	case RADEON_PP_TEX_PITCH_1:
   1809	case RADEON_PP_TEX_PITCH_2:
   1810		i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
   1811		track->textures[i].pitch = idx_value + 32;
   1812		track->tex_dirty = true;
   1813		break;
   1814	case RADEON_PP_TXFILTER_0:
   1815	case RADEON_PP_TXFILTER_1:
   1816	case RADEON_PP_TXFILTER_2:
   1817		i = (reg - RADEON_PP_TXFILTER_0) / 24;
   1818		track->textures[i].num_levels = ((idx_value & RADEON_MAX_MIP_LEVEL_MASK)
   1819						 >> RADEON_MAX_MIP_LEVEL_SHIFT);
   1820		tmp = (idx_value >> 23) & 0x7;
   1821		if (tmp == 2 || tmp == 6)
   1822			track->textures[i].roundup_w = false;
   1823		tmp = (idx_value >> 27) & 0x7;
   1824		if (tmp == 2 || tmp == 6)
   1825			track->textures[i].roundup_h = false;
   1826		track->tex_dirty = true;
   1827		break;
   1828	case RADEON_PP_TXFORMAT_0:
   1829	case RADEON_PP_TXFORMAT_1:
   1830	case RADEON_PP_TXFORMAT_2:
   1831		i = (reg - RADEON_PP_TXFORMAT_0) / 24;
   1832		if (idx_value & RADEON_TXFORMAT_NON_POWER2) {
   1833			track->textures[i].use_pitch = true;
   1834		} else {
   1835			track->textures[i].use_pitch = false;
   1836			track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT);
   1837			track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT);
   1838		}
   1839		if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE)
   1840			track->textures[i].tex_coord_type = 2;
   1841		switch ((idx_value & RADEON_TXFORMAT_FORMAT_MASK)) {
   1842		case RADEON_TXFORMAT_I8:
   1843		case RADEON_TXFORMAT_RGB332:
   1844		case RADEON_TXFORMAT_Y8:
   1845			track->textures[i].cpp = 1;
   1846			track->textures[i].compress_format = R100_TRACK_COMP_NONE;
   1847			break;
   1848		case RADEON_TXFORMAT_AI88:
   1849		case RADEON_TXFORMAT_ARGB1555:
   1850		case RADEON_TXFORMAT_RGB565:
   1851		case RADEON_TXFORMAT_ARGB4444:
   1852		case RADEON_TXFORMAT_VYUY422:
   1853		case RADEON_TXFORMAT_YVYU422:
   1854		case RADEON_TXFORMAT_SHADOW16:
   1855		case RADEON_TXFORMAT_LDUDV655:
   1856		case RADEON_TXFORMAT_DUDV88:
   1857			track->textures[i].cpp = 2;
   1858			track->textures[i].compress_format = R100_TRACK_COMP_NONE;
   1859			break;
   1860		case RADEON_TXFORMAT_ARGB8888:
   1861		case RADEON_TXFORMAT_RGBA8888:
   1862		case RADEON_TXFORMAT_SHADOW32:
   1863		case RADEON_TXFORMAT_LDUDUV8888:
   1864			track->textures[i].cpp = 4;
   1865			track->textures[i].compress_format = R100_TRACK_COMP_NONE;
   1866			break;
   1867		case RADEON_TXFORMAT_DXT1:
   1868			track->textures[i].cpp = 1;
   1869			track->textures[i].compress_format = R100_TRACK_COMP_DXT1;
   1870			break;
   1871		case RADEON_TXFORMAT_DXT23:
   1872		case RADEON_TXFORMAT_DXT45:
   1873			track->textures[i].cpp = 1;
   1874			track->textures[i].compress_format = R100_TRACK_COMP_DXT35;
   1875			break;
   1876		}
   1877		track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
   1878		track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
   1879		track->tex_dirty = true;
   1880		break;
   1881	case RADEON_PP_CUBIC_FACES_0:
   1882	case RADEON_PP_CUBIC_FACES_1:
   1883	case RADEON_PP_CUBIC_FACES_2:
   1884		tmp = idx_value;
   1885		i = (reg - RADEON_PP_CUBIC_FACES_0) / 4;
   1886		for (face = 0; face < 4; face++) {
   1887			track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
   1888			track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
   1889		}
   1890		track->tex_dirty = true;
   1891		break;
   1892	default:
   1893		pr_err("Forbidden register 0x%04X in cs at %d\n", reg, idx);
   1894		return -EINVAL;
   1895	}
   1896	return 0;
   1897}
   1898
   1899int r100_cs_track_check_pkt3_indx_buffer(struct radeon_cs_parser *p,
   1900					 struct radeon_cs_packet *pkt,
   1901					 struct radeon_bo *robj)
   1902{
   1903	unsigned idx;
   1904	u32 value;
   1905	idx = pkt->idx + 1;
   1906	value = radeon_get_ib_value(p, idx + 2);
   1907	if ((value + 1) > radeon_bo_size(robj)) {
   1908		DRM_ERROR("[drm] Buffer too small for PACKET3 INDX_BUFFER "
   1909			  "(need %u have %lu) !\n",
   1910			  value + 1,
   1911			  radeon_bo_size(robj));
   1912		return -EINVAL;
   1913	}
   1914	return 0;
   1915}
   1916
   1917static int r100_packet3_check(struct radeon_cs_parser *p,
   1918			      struct radeon_cs_packet *pkt)
   1919{
   1920	struct radeon_bo_list *reloc;
   1921	struct r100_cs_track *track;
   1922	unsigned idx;
   1923	volatile uint32_t *ib;
   1924	int r;
   1925
   1926	ib = p->ib.ptr;
   1927	idx = pkt->idx + 1;
   1928	track = (struct r100_cs_track *)p->track;
   1929	switch (pkt->opcode) {
   1930	case PACKET3_3D_LOAD_VBPNTR:
   1931		r = r100_packet3_load_vbpntr(p, pkt, idx);
   1932		if (r)
   1933			return r;
   1934		break;
   1935	case PACKET3_INDX_BUFFER:
   1936		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
   1937		if (r) {
   1938			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
   1939			radeon_cs_dump_packet(p, pkt);
   1940			return r;
   1941		}
   1942		ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->gpu_offset);
   1943		r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
   1944		if (r) {
   1945			return r;
   1946		}
   1947		break;
   1948	case 0x23:
   1949		/* 3D_RNDR_GEN_INDX_PRIM on r100/r200 */
   1950		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
   1951		if (r) {
   1952			DRM_ERROR("No reloc for packet3 %d\n", pkt->opcode);
   1953			radeon_cs_dump_packet(p, pkt);
   1954			return r;
   1955		}
   1956		ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->gpu_offset);
   1957		track->num_arrays = 1;
   1958		track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2));
   1959
   1960		track->arrays[0].robj = reloc->robj;
   1961		track->arrays[0].esize = track->vtx_size;
   1962
   1963		track->max_indx = radeon_get_ib_value(p, idx+1);
   1964
   1965		track->vap_vf_cntl = radeon_get_ib_value(p, idx+3);
   1966		track->immd_dwords = pkt->count - 1;
   1967		r = r100_cs_track_check(p->rdev, track);
   1968		if (r)
   1969			return r;
   1970		break;
   1971	case PACKET3_3D_DRAW_IMMD:
   1972		if (((radeon_get_ib_value(p, idx + 1) >> 4) & 0x3) != 3) {
   1973			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
   1974			return -EINVAL;
   1975		}
   1976		track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 0));
   1977		track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
   1978		track->immd_dwords = pkt->count - 1;
   1979		r = r100_cs_track_check(p->rdev, track);
   1980		if (r)
   1981			return r;
   1982		break;
   1983		/* triggers drawing using in-packet vertex data */
   1984	case PACKET3_3D_DRAW_IMMD_2:
   1985		if (((radeon_get_ib_value(p, idx) >> 4) & 0x3) != 3) {
   1986			DRM_ERROR("PRIM_WALK must be 3 for IMMD draw\n");
   1987			return -EINVAL;
   1988		}
   1989		track->vap_vf_cntl = radeon_get_ib_value(p, idx);
   1990		track->immd_dwords = pkt->count;
   1991		r = r100_cs_track_check(p->rdev, track);
   1992		if (r)
   1993			return r;
   1994		break;
   1995		/* triggers drawing using in-packet vertex data */
   1996	case PACKET3_3D_DRAW_VBUF_2:
   1997		track->vap_vf_cntl = radeon_get_ib_value(p, idx);
   1998		r = r100_cs_track_check(p->rdev, track);
   1999		if (r)
   2000			return r;
   2001		break;
   2002		/* triggers drawing of vertex buffers setup elsewhere */
   2003	case PACKET3_3D_DRAW_INDX_2:
   2004		track->vap_vf_cntl = radeon_get_ib_value(p, idx);
   2005		r = r100_cs_track_check(p->rdev, track);
   2006		if (r)
   2007			return r;
   2008		break;
   2009		/* triggers drawing using indices to vertex buffer */
   2010	case PACKET3_3D_DRAW_VBUF:
   2011		track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
   2012		r = r100_cs_track_check(p->rdev, track);
   2013		if (r)
   2014			return r;
   2015		break;
   2016		/* triggers drawing of vertex buffers setup elsewhere */
   2017	case PACKET3_3D_DRAW_INDX:
   2018		track->vap_vf_cntl = radeon_get_ib_value(p, idx + 1);
   2019		r = r100_cs_track_check(p->rdev, track);
   2020		if (r)
   2021			return r;
   2022		break;
   2023		/* triggers drawing using indices to vertex buffer */
   2024	case PACKET3_3D_CLEAR_HIZ:
   2025	case PACKET3_3D_CLEAR_ZMASK:
   2026		if (p->rdev->hyperz_filp != p->filp)
   2027			return -EINVAL;
   2028		break;
   2029	case PACKET3_NOP:
   2030		break;
   2031	default:
   2032		DRM_ERROR("Packet3 opcode %x not supported\n", pkt->opcode);
   2033		return -EINVAL;
   2034	}
   2035	return 0;
   2036}
   2037
   2038int r100_cs_parse(struct radeon_cs_parser *p)
   2039{
   2040	struct radeon_cs_packet pkt;
   2041	struct r100_cs_track *track;
   2042	int r;
   2043
   2044	track = kzalloc(sizeof(*track), GFP_KERNEL);
   2045	if (!track)
   2046		return -ENOMEM;
   2047	r100_cs_track_clear(p->rdev, track);
   2048	p->track = track;
   2049	do {
   2050		r = radeon_cs_packet_parse(p, &pkt, p->idx);
   2051		if (r) {
   2052			return r;
   2053		}
   2054		p->idx += pkt.count + 2;
   2055		switch (pkt.type) {
   2056		case RADEON_PACKET_TYPE0:
   2057			if (p->rdev->family >= CHIP_R200)
   2058				r = r100_cs_parse_packet0(p, &pkt,
   2059					p->rdev->config.r100.reg_safe_bm,
   2060					p->rdev->config.r100.reg_safe_bm_size,
   2061					&r200_packet0_check);
   2062			else
   2063				r = r100_cs_parse_packet0(p, &pkt,
   2064					p->rdev->config.r100.reg_safe_bm,
   2065					p->rdev->config.r100.reg_safe_bm_size,
   2066					&r100_packet0_check);
   2067			break;
   2068		case RADEON_PACKET_TYPE2:
   2069			break;
   2070		case RADEON_PACKET_TYPE3:
   2071			r = r100_packet3_check(p, &pkt);
   2072			break;
   2073		default:
   2074			DRM_ERROR("Unknown packet type %d !\n",
   2075				  pkt.type);
   2076			return -EINVAL;
   2077		}
   2078		if (r)
   2079			return r;
   2080	} while (p->idx < p->chunk_ib->length_dw);
   2081	return 0;
   2082}
   2083
   2084static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
   2085{
   2086	DRM_ERROR("pitch                      %d\n", t->pitch);
   2087	DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
   2088	DRM_ERROR("width                      %d\n", t->width);
   2089	DRM_ERROR("width_11                   %d\n", t->width_11);
   2090	DRM_ERROR("height                     %d\n", t->height);
   2091	DRM_ERROR("height_11                  %d\n", t->height_11);
   2092	DRM_ERROR("num levels                 %d\n", t->num_levels);
   2093	DRM_ERROR("depth                      %d\n", t->txdepth);
   2094	DRM_ERROR("bpp                        %d\n", t->cpp);
   2095	DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
   2096	DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
   2097	DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
   2098	DRM_ERROR("compress format            %d\n", t->compress_format);
   2099}
   2100
   2101static int r100_track_compress_size(int compress_format, int w, int h)
   2102{
   2103	int block_width, block_height, block_bytes;
   2104	int wblocks, hblocks;
   2105	int min_wblocks;
   2106	int sz;
   2107
   2108	block_width = 4;
   2109	block_height = 4;
   2110
   2111	switch (compress_format) {
   2112	case R100_TRACK_COMP_DXT1:
   2113		block_bytes = 8;
   2114		min_wblocks = 4;
   2115		break;
   2116	default:
   2117	case R100_TRACK_COMP_DXT35:
   2118		block_bytes = 16;
   2119		min_wblocks = 2;
   2120		break;
   2121	}
   2122
   2123	hblocks = (h + block_height - 1) / block_height;
   2124	wblocks = (w + block_width - 1) / block_width;
   2125	if (wblocks < min_wblocks)
   2126		wblocks = min_wblocks;
   2127	sz = wblocks * hblocks * block_bytes;
   2128	return sz;
   2129}
   2130
   2131static int r100_cs_track_cube(struct radeon_device *rdev,
   2132			      struct r100_cs_track *track, unsigned idx)
   2133{
   2134	unsigned face, w, h;
   2135	struct radeon_bo *cube_robj;
   2136	unsigned long size;
   2137	unsigned compress_format = track->textures[idx].compress_format;
   2138
   2139	for (face = 0; face < 5; face++) {
   2140		cube_robj = track->textures[idx].cube_info[face].robj;
   2141		w = track->textures[idx].cube_info[face].width;
   2142		h = track->textures[idx].cube_info[face].height;
   2143
   2144		if (compress_format) {
   2145			size = r100_track_compress_size(compress_format, w, h);
   2146		} else
   2147			size = w * h;
   2148		size *= track->textures[idx].cpp;
   2149
   2150		size += track->textures[idx].cube_info[face].offset;
   2151
   2152		if (size > radeon_bo_size(cube_robj)) {
   2153			DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
   2154				  size, radeon_bo_size(cube_robj));
   2155			r100_cs_track_texture_print(&track->textures[idx]);
   2156			return -1;
   2157		}
   2158	}
   2159	return 0;
   2160}
   2161
   2162static int r100_cs_track_texture_check(struct radeon_device *rdev,
   2163				       struct r100_cs_track *track)
   2164{
   2165	struct radeon_bo *robj;
   2166	unsigned long size;
   2167	unsigned u, i, w, h, d;
   2168	int ret;
   2169
   2170	for (u = 0; u < track->num_texture; u++) {
   2171		if (!track->textures[u].enabled)
   2172			continue;
   2173		if (track->textures[u].lookup_disable)
   2174			continue;
   2175		robj = track->textures[u].robj;
   2176		if (robj == NULL) {
   2177			DRM_ERROR("No texture bound to unit %u\n", u);
   2178			return -EINVAL;
   2179		}
   2180		size = 0;
   2181		for (i = 0; i <= track->textures[u].num_levels; i++) {
   2182			if (track->textures[u].use_pitch) {
   2183				if (rdev->family < CHIP_R300)
   2184					w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
   2185				else
   2186					w = track->textures[u].pitch / (1 << i);
   2187			} else {
   2188				w = track->textures[u].width;
   2189				if (rdev->family >= CHIP_RV515)
   2190					w |= track->textures[u].width_11;
   2191				w = w / (1 << i);
   2192				if (track->textures[u].roundup_w)
   2193					w = roundup_pow_of_two(w);
   2194			}
   2195			h = track->textures[u].height;
   2196			if (rdev->family >= CHIP_RV515)
   2197				h |= track->textures[u].height_11;
   2198			h = h / (1 << i);
   2199			if (track->textures[u].roundup_h)
   2200				h = roundup_pow_of_two(h);
   2201			if (track->textures[u].tex_coord_type == 1) {
   2202				d = (1 << track->textures[u].txdepth) / (1 << i);
   2203				if (!d)
   2204					d = 1;
   2205			} else {
   2206				d = 1;
   2207			}
   2208			if (track->textures[u].compress_format) {
   2209
   2210				size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
   2211				/* compressed textures are block based */
   2212			} else
   2213				size += w * h * d;
   2214		}
   2215		size *= track->textures[u].cpp;
   2216
   2217		switch (track->textures[u].tex_coord_type) {
   2218		case 0:
   2219		case 1:
   2220			break;
   2221		case 2:
   2222			if (track->separate_cube) {
   2223				ret = r100_cs_track_cube(rdev, track, u);
   2224				if (ret)
   2225					return ret;
   2226			} else
   2227				size *= 6;
   2228			break;
   2229		default:
   2230			DRM_ERROR("Invalid texture coordinate type %u for unit "
   2231				  "%u\n", track->textures[u].tex_coord_type, u);
   2232			return -EINVAL;
   2233		}
   2234		if (size > radeon_bo_size(robj)) {
   2235			DRM_ERROR("Texture of unit %u needs %lu bytes but is "
   2236				  "%lu\n", u, size, radeon_bo_size(robj));
   2237			r100_cs_track_texture_print(&track->textures[u]);
   2238			return -EINVAL;
   2239		}
   2240	}
   2241	return 0;
   2242}
   2243
   2244int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
   2245{
   2246	unsigned i;
   2247	unsigned long size;
   2248	unsigned prim_walk;
   2249	unsigned nverts;
   2250	unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
   2251
   2252	if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
   2253	    !track->blend_read_enable)
   2254		num_cb = 0;
   2255
   2256	for (i = 0; i < num_cb; i++) {
   2257		if (track->cb[i].robj == NULL) {
   2258			DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
   2259			return -EINVAL;
   2260		}
   2261		size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
   2262		size += track->cb[i].offset;
   2263		if (size > radeon_bo_size(track->cb[i].robj)) {
   2264			DRM_ERROR("[drm] Buffer too small for color buffer %d "
   2265				  "(need %lu have %lu) !\n", i, size,
   2266				  radeon_bo_size(track->cb[i].robj));
   2267			DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
   2268				  i, track->cb[i].pitch, track->cb[i].cpp,
   2269				  track->cb[i].offset, track->maxy);
   2270			return -EINVAL;
   2271		}
   2272	}
   2273	track->cb_dirty = false;
   2274
   2275	if (track->zb_dirty && track->z_enabled) {
   2276		if (track->zb.robj == NULL) {
   2277			DRM_ERROR("[drm] No buffer for z buffer !\n");
   2278			return -EINVAL;
   2279		}
   2280		size = track->zb.pitch * track->zb.cpp * track->maxy;
   2281		size += track->zb.offset;
   2282		if (size > radeon_bo_size(track->zb.robj)) {
   2283			DRM_ERROR("[drm] Buffer too small for z buffer "
   2284				  "(need %lu have %lu) !\n", size,
   2285				  radeon_bo_size(track->zb.robj));
   2286			DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
   2287				  track->zb.pitch, track->zb.cpp,
   2288				  track->zb.offset, track->maxy);
   2289			return -EINVAL;
   2290		}
   2291	}
   2292	track->zb_dirty = false;
   2293
   2294	if (track->aa_dirty && track->aaresolve) {
   2295		if (track->aa.robj == NULL) {
   2296			DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
   2297			return -EINVAL;
   2298		}
   2299		/* I believe the format comes from colorbuffer0. */
   2300		size = track->aa.pitch * track->cb[0].cpp * track->maxy;
   2301		size += track->aa.offset;
   2302		if (size > radeon_bo_size(track->aa.robj)) {
   2303			DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
   2304				  "(need %lu have %lu) !\n", i, size,
   2305				  radeon_bo_size(track->aa.robj));
   2306			DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
   2307				  i, track->aa.pitch, track->cb[0].cpp,
   2308				  track->aa.offset, track->maxy);
   2309			return -EINVAL;
   2310		}
   2311	}
   2312	track->aa_dirty = false;
   2313
   2314	prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
   2315	if (track->vap_vf_cntl & (1 << 14)) {
   2316		nverts = track->vap_alt_nverts;
   2317	} else {
   2318		nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
   2319	}
   2320	switch (prim_walk) {
   2321	case 1:
   2322		for (i = 0; i < track->num_arrays; i++) {
   2323			size = track->arrays[i].esize * track->max_indx * 4;
   2324			if (track->arrays[i].robj == NULL) {
   2325				DRM_ERROR("(PW %u) Vertex array %u no buffer "
   2326					  "bound\n", prim_walk, i);
   2327				return -EINVAL;
   2328			}
   2329			if (size > radeon_bo_size(track->arrays[i].robj)) {
   2330				dev_err(rdev->dev, "(PW %u) Vertex array %u "
   2331					"need %lu dwords have %lu dwords\n",
   2332					prim_walk, i, size >> 2,
   2333					radeon_bo_size(track->arrays[i].robj)
   2334					>> 2);
   2335				DRM_ERROR("Max indices %u\n", track->max_indx);
   2336				return -EINVAL;
   2337			}
   2338		}
   2339		break;
   2340	case 2:
   2341		for (i = 0; i < track->num_arrays; i++) {
   2342			size = track->arrays[i].esize * (nverts - 1) * 4;
   2343			if (track->arrays[i].robj == NULL) {
   2344				DRM_ERROR("(PW %u) Vertex array %u no buffer "
   2345					  "bound\n", prim_walk, i);
   2346				return -EINVAL;
   2347			}
   2348			if (size > radeon_bo_size(track->arrays[i].robj)) {
   2349				dev_err(rdev->dev, "(PW %u) Vertex array %u "
   2350					"need %lu dwords have %lu dwords\n",
   2351					prim_walk, i, size >> 2,
   2352					radeon_bo_size(track->arrays[i].robj)
   2353					>> 2);
   2354				return -EINVAL;
   2355			}
   2356		}
   2357		break;
   2358	case 3:
   2359		size = track->vtx_size * nverts;
   2360		if (size != track->immd_dwords) {
   2361			DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
   2362				  track->immd_dwords, size);
   2363			DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
   2364				  nverts, track->vtx_size);
   2365			return -EINVAL;
   2366		}
   2367		break;
   2368	default:
   2369		DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
   2370			  prim_walk);
   2371		return -EINVAL;
   2372	}
   2373
   2374	if (track->tex_dirty) {
   2375		track->tex_dirty = false;
   2376		return r100_cs_track_texture_check(rdev, track);
   2377	}
   2378	return 0;
   2379}
   2380
   2381void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
   2382{
   2383	unsigned i, face;
   2384
   2385	track->cb_dirty = true;
   2386	track->zb_dirty = true;
   2387	track->tex_dirty = true;
   2388	track->aa_dirty = true;
   2389
   2390	if (rdev->family < CHIP_R300) {
   2391		track->num_cb = 1;
   2392		if (rdev->family <= CHIP_RS200)
   2393			track->num_texture = 3;
   2394		else
   2395			track->num_texture = 6;
   2396		track->maxy = 2048;
   2397		track->separate_cube = true;
   2398	} else {
   2399		track->num_cb = 4;
   2400		track->num_texture = 16;
   2401		track->maxy = 4096;
   2402		track->separate_cube = false;
   2403		track->aaresolve = false;
   2404		track->aa.robj = NULL;
   2405	}
   2406
   2407	for (i = 0; i < track->num_cb; i++) {
   2408		track->cb[i].robj = NULL;
   2409		track->cb[i].pitch = 8192;
   2410		track->cb[i].cpp = 16;
   2411		track->cb[i].offset = 0;
   2412	}
   2413	track->z_enabled = true;
   2414	track->zb.robj = NULL;
   2415	track->zb.pitch = 8192;
   2416	track->zb.cpp = 4;
   2417	track->zb.offset = 0;
   2418	track->vtx_size = 0x7F;
   2419	track->immd_dwords = 0xFFFFFFFFUL;
   2420	track->num_arrays = 11;
   2421	track->max_indx = 0x00FFFFFFUL;
   2422	for (i = 0; i < track->num_arrays; i++) {
   2423		track->arrays[i].robj = NULL;
   2424		track->arrays[i].esize = 0x7F;
   2425	}
   2426	for (i = 0; i < track->num_texture; i++) {
   2427		track->textures[i].compress_format = R100_TRACK_COMP_NONE;
   2428		track->textures[i].pitch = 16536;
   2429		track->textures[i].width = 16536;
   2430		track->textures[i].height = 16536;
   2431		track->textures[i].width_11 = 1 << 11;
   2432		track->textures[i].height_11 = 1 << 11;
   2433		track->textures[i].num_levels = 12;
   2434		if (rdev->family <= CHIP_RS200) {
   2435			track->textures[i].tex_coord_type = 0;
   2436			track->textures[i].txdepth = 0;
   2437		} else {
   2438			track->textures[i].txdepth = 16;
   2439			track->textures[i].tex_coord_type = 1;
   2440		}
   2441		track->textures[i].cpp = 64;
   2442		track->textures[i].robj = NULL;
   2443		/* CS IB emission code makes sure texture unit are disabled */
   2444		track->textures[i].enabled = false;
   2445		track->textures[i].lookup_disable = false;
   2446		track->textures[i].roundup_w = true;
   2447		track->textures[i].roundup_h = true;
   2448		if (track->separate_cube)
   2449			for (face = 0; face < 5; face++) {
   2450				track->textures[i].cube_info[face].robj = NULL;
   2451				track->textures[i].cube_info[face].width = 16536;
   2452				track->textures[i].cube_info[face].height = 16536;
   2453				track->textures[i].cube_info[face].offset = 0;
   2454			}
   2455	}
   2456}
   2457
   2458/*
   2459 * Global GPU functions
   2460 */
   2461static void r100_errata(struct radeon_device *rdev)
   2462{
   2463	rdev->pll_errata = 0;
   2464
   2465	if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) {
   2466		rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS;
   2467	}
   2468
   2469	if (rdev->family == CHIP_RV100 ||
   2470	    rdev->family == CHIP_RS100 ||
   2471	    rdev->family == CHIP_RS200) {
   2472		rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY;
   2473	}
   2474}
   2475
   2476static int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n)
   2477{
   2478	unsigned i;
   2479	uint32_t tmp;
   2480
   2481	for (i = 0; i < rdev->usec_timeout; i++) {
   2482		tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK;
   2483		if (tmp >= n) {
   2484			return 0;
   2485		}
   2486		udelay(1);
   2487	}
   2488	return -1;
   2489}
   2490
   2491int r100_gui_wait_for_idle(struct radeon_device *rdev)
   2492{
   2493	unsigned i;
   2494	uint32_t tmp;
   2495
   2496	if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) {
   2497		pr_warn("radeon: wait for empty RBBM fifo failed! Bad things might happen.\n");
   2498	}
   2499	for (i = 0; i < rdev->usec_timeout; i++) {
   2500		tmp = RREG32(RADEON_RBBM_STATUS);
   2501		if (!(tmp & RADEON_RBBM_ACTIVE)) {
   2502			return 0;
   2503		}
   2504		udelay(1);
   2505	}
   2506	return -1;
   2507}
   2508
   2509int r100_mc_wait_for_idle(struct radeon_device *rdev)
   2510{
   2511	unsigned i;
   2512	uint32_t tmp;
   2513
   2514	for (i = 0; i < rdev->usec_timeout; i++) {
   2515		/* read MC_STATUS */
   2516		tmp = RREG32(RADEON_MC_STATUS);
   2517		if (tmp & RADEON_MC_IDLE) {
   2518			return 0;
   2519		}
   2520		udelay(1);
   2521	}
   2522	return -1;
   2523}
   2524
   2525bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
   2526{
   2527	u32 rbbm_status;
   2528
   2529	rbbm_status = RREG32(R_000E40_RBBM_STATUS);
   2530	if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
   2531		radeon_ring_lockup_update(rdev, ring);
   2532		return false;
   2533	}
   2534	return radeon_ring_test_lockup(rdev, ring);
   2535}
   2536
   2537/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
   2538void r100_enable_bm(struct radeon_device *rdev)
   2539{
   2540	uint32_t tmp;
   2541	/* Enable bus mastering */
   2542	tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
   2543	WREG32(RADEON_BUS_CNTL, tmp);
   2544}
   2545
   2546void r100_bm_disable(struct radeon_device *rdev)
   2547{
   2548	u32 tmp;
   2549
   2550	/* disable bus mastering */
   2551	tmp = RREG32(R_000030_BUS_CNTL);
   2552	WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000044);
   2553	mdelay(1);
   2554	WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000042);
   2555	mdelay(1);
   2556	WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040);
   2557	tmp = RREG32(RADEON_BUS_CNTL);
   2558	mdelay(1);
   2559	pci_clear_master(rdev->pdev);
   2560	mdelay(1);
   2561}
   2562
   2563int r100_asic_reset(struct radeon_device *rdev, bool hard)
   2564{
   2565	struct r100_mc_save save;
   2566	u32 status, tmp;
   2567	int ret = 0;
   2568
   2569	status = RREG32(R_000E40_RBBM_STATUS);
   2570	if (!G_000E40_GUI_ACTIVE(status)) {
   2571		return 0;
   2572	}
   2573	r100_mc_stop(rdev, &save);
   2574	status = RREG32(R_000E40_RBBM_STATUS);
   2575	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
   2576	/* stop CP */
   2577	WREG32(RADEON_CP_CSQ_CNTL, 0);
   2578	tmp = RREG32(RADEON_CP_RB_CNTL);
   2579	WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
   2580	WREG32(RADEON_CP_RB_RPTR_WR, 0);
   2581	WREG32(RADEON_CP_RB_WPTR, 0);
   2582	WREG32(RADEON_CP_RB_CNTL, tmp);
   2583	/* save PCI state */
   2584	pci_save_state(rdev->pdev);
   2585	/* disable bus mastering */
   2586	r100_bm_disable(rdev);
   2587	WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_SE(1) |
   2588					S_0000F0_SOFT_RESET_RE(1) |
   2589					S_0000F0_SOFT_RESET_PP(1) |
   2590					S_0000F0_SOFT_RESET_RB(1));
   2591	RREG32(R_0000F0_RBBM_SOFT_RESET);
   2592	mdelay(500);
   2593	WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
   2594	mdelay(1);
   2595	status = RREG32(R_000E40_RBBM_STATUS);
   2596	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
   2597	/* reset CP */
   2598	WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1));
   2599	RREG32(R_0000F0_RBBM_SOFT_RESET);
   2600	mdelay(500);
   2601	WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
   2602	mdelay(1);
   2603	status = RREG32(R_000E40_RBBM_STATUS);
   2604	dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
   2605	/* restore PCI & busmastering */
   2606	pci_restore_state(rdev->pdev);
   2607	r100_enable_bm(rdev);
   2608	/* Check if GPU is idle */
   2609	if (G_000E40_SE_BUSY(status) || G_000E40_RE_BUSY(status) ||
   2610		G_000E40_TAM_BUSY(status) || G_000E40_PB_BUSY(status)) {
   2611		dev_err(rdev->dev, "failed to reset GPU\n");
   2612		ret = -1;
   2613	} else
   2614		dev_info(rdev->dev, "GPU reset succeed\n");
   2615	r100_mc_resume(rdev, &save);
   2616	return ret;
   2617}
   2618
   2619void r100_set_common_regs(struct radeon_device *rdev)
   2620{
   2621	bool force_dac2 = false;
   2622	u32 tmp;
   2623
   2624	/* set these so they don't interfere with anything */
   2625	WREG32(RADEON_OV0_SCALE_CNTL, 0);
   2626	WREG32(RADEON_SUBPIC_CNTL, 0);
   2627	WREG32(RADEON_VIPH_CONTROL, 0);
   2628	WREG32(RADEON_I2C_CNTL_1, 0);
   2629	WREG32(RADEON_DVI_I2C_CNTL_1, 0);
   2630	WREG32(RADEON_CAP0_TRIG_CNTL, 0);
   2631	WREG32(RADEON_CAP1_TRIG_CNTL, 0);
   2632
   2633	/* always set up dac2 on rn50 and some rv100 as lots
   2634	 * of servers seem to wire it up to a VGA port but
   2635	 * don't report it in the bios connector
   2636	 * table.
   2637	 */
   2638	switch (rdev->pdev->device) {
   2639		/* RN50 */
   2640	case 0x515e:
   2641	case 0x5969:
   2642		force_dac2 = true;
   2643		break;
   2644		/* RV100*/
   2645	case 0x5159:
   2646	case 0x515a:
   2647		/* DELL triple head servers */
   2648		if ((rdev->pdev->subsystem_vendor == 0x1028 /* DELL */) &&
   2649		    ((rdev->pdev->subsystem_device == 0x016c) ||
   2650		     (rdev->pdev->subsystem_device == 0x016d) ||
   2651		     (rdev->pdev->subsystem_device == 0x016e) ||
   2652		     (rdev->pdev->subsystem_device == 0x016f) ||
   2653		     (rdev->pdev->subsystem_device == 0x0170) ||
   2654		     (rdev->pdev->subsystem_device == 0x017d) ||
   2655		     (rdev->pdev->subsystem_device == 0x017e) ||
   2656		     (rdev->pdev->subsystem_device == 0x0183) ||
   2657		     (rdev->pdev->subsystem_device == 0x018a) ||
   2658		     (rdev->pdev->subsystem_device == 0x019a)))
   2659			force_dac2 = true;
   2660		break;
   2661	}
   2662
   2663	if (force_dac2) {
   2664		u32 disp_hw_debug = RREG32(RADEON_DISP_HW_DEBUG);
   2665		u32 tv_dac_cntl = RREG32(RADEON_TV_DAC_CNTL);
   2666		u32 dac2_cntl = RREG32(RADEON_DAC_CNTL2);
   2667
   2668		/* For CRT on DAC2, don't turn it on if BIOS didn't
   2669		   enable it, even it's detected.
   2670		*/
   2671
   2672		/* force it to crtc0 */
   2673		dac2_cntl &= ~RADEON_DAC2_DAC_CLK_SEL;
   2674		dac2_cntl |= RADEON_DAC2_DAC2_CLK_SEL;
   2675		disp_hw_debug |= RADEON_CRT2_DISP1_SEL;
   2676
   2677		/* set up the TV DAC */
   2678		tv_dac_cntl &= ~(RADEON_TV_DAC_PEDESTAL |
   2679				 RADEON_TV_DAC_STD_MASK |
   2680				 RADEON_TV_DAC_RDACPD |
   2681				 RADEON_TV_DAC_GDACPD |
   2682				 RADEON_TV_DAC_BDACPD |
   2683				 RADEON_TV_DAC_BGADJ_MASK |
   2684				 RADEON_TV_DAC_DACADJ_MASK);
   2685		tv_dac_cntl |= (RADEON_TV_DAC_NBLANK |
   2686				RADEON_TV_DAC_NHOLD |
   2687				RADEON_TV_DAC_STD_PS2 |
   2688				(0x58 << 16));
   2689
   2690		WREG32(RADEON_TV_DAC_CNTL, tv_dac_cntl);
   2691		WREG32(RADEON_DISP_HW_DEBUG, disp_hw_debug);
   2692		WREG32(RADEON_DAC_CNTL2, dac2_cntl);
   2693	}
   2694
   2695	/* switch PM block to ACPI mode */
   2696	tmp = RREG32_PLL(RADEON_PLL_PWRMGT_CNTL);
   2697	tmp &= ~RADEON_PM_MODE_SEL;
   2698	WREG32_PLL(RADEON_PLL_PWRMGT_CNTL, tmp);
   2699
   2700}
   2701
   2702/*
   2703 * VRAM info
   2704 */
   2705static void r100_vram_get_type(struct radeon_device *rdev)
   2706{
   2707	uint32_t tmp;
   2708
   2709	rdev->mc.vram_is_ddr = false;
   2710	if (rdev->flags & RADEON_IS_IGP)
   2711		rdev->mc.vram_is_ddr = true;
   2712	else if (RREG32(RADEON_MEM_SDRAM_MODE_REG) & RADEON_MEM_CFG_TYPE_DDR)
   2713		rdev->mc.vram_is_ddr = true;
   2714	if ((rdev->family == CHIP_RV100) ||
   2715	    (rdev->family == CHIP_RS100) ||
   2716	    (rdev->family == CHIP_RS200)) {
   2717		tmp = RREG32(RADEON_MEM_CNTL);
   2718		if (tmp & RV100_HALF_MODE) {
   2719			rdev->mc.vram_width = 32;
   2720		} else {
   2721			rdev->mc.vram_width = 64;
   2722		}
   2723		if (rdev->flags & RADEON_SINGLE_CRTC) {
   2724			rdev->mc.vram_width /= 4;
   2725			rdev->mc.vram_is_ddr = true;
   2726		}
   2727	} else if (rdev->family <= CHIP_RV280) {
   2728		tmp = RREG32(RADEON_MEM_CNTL);
   2729		if (tmp & RADEON_MEM_NUM_CHANNELS_MASK) {
   2730			rdev->mc.vram_width = 128;
   2731		} else {
   2732			rdev->mc.vram_width = 64;
   2733		}
   2734	} else {
   2735		/* newer IGPs */
   2736		rdev->mc.vram_width = 128;
   2737	}
   2738}
   2739
   2740static u32 r100_get_accessible_vram(struct radeon_device *rdev)
   2741{
   2742	u32 aper_size;
   2743	u8 byte;
   2744
   2745	aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
   2746
   2747	/* Set HDP_APER_CNTL only on cards that are known not to be broken,
   2748	 * that is has the 2nd generation multifunction PCI interface
   2749	 */
   2750	if (rdev->family == CHIP_RV280 ||
   2751	    rdev->family >= CHIP_RV350) {
   2752		WREG32_P(RADEON_HOST_PATH_CNTL, RADEON_HDP_APER_CNTL,
   2753		       ~RADEON_HDP_APER_CNTL);
   2754		DRM_INFO("Generation 2 PCI interface, using max accessible memory\n");
   2755		return aper_size * 2;
   2756	}
   2757
   2758	/* Older cards have all sorts of funny issues to deal with. First
   2759	 * check if it's a multifunction card by reading the PCI config
   2760	 * header type... Limit those to one aperture size
   2761	 */
   2762	pci_read_config_byte(rdev->pdev, 0xe, &byte);
   2763	if (byte & 0x80) {
   2764		DRM_INFO("Generation 1 PCI interface in multifunction mode\n");
   2765		DRM_INFO("Limiting VRAM to one aperture\n");
   2766		return aper_size;
   2767	}
   2768
   2769	/* Single function older card. We read HDP_APER_CNTL to see how the BIOS
   2770	 * have set it up. We don't write this as it's broken on some ASICs but
   2771	 * we expect the BIOS to have done the right thing (might be too optimistic...)
   2772	 */
   2773	if (RREG32(RADEON_HOST_PATH_CNTL) & RADEON_HDP_APER_CNTL)
   2774		return aper_size * 2;
   2775	return aper_size;
   2776}
   2777
   2778void r100_vram_init_sizes(struct radeon_device *rdev)
   2779{
   2780	u64 config_aper_size;
   2781
   2782	/* work out accessible VRAM */
   2783	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
   2784	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
   2785	rdev->mc.visible_vram_size = r100_get_accessible_vram(rdev);
   2786	/* FIXME we don't use the second aperture yet when we could use it */
   2787	if (rdev->mc.visible_vram_size > rdev->mc.aper_size)
   2788		rdev->mc.visible_vram_size = rdev->mc.aper_size;
   2789	config_aper_size = RREG32(RADEON_CONFIG_APER_SIZE);
   2790	if (rdev->flags & RADEON_IS_IGP) {
   2791		uint32_t tom;
   2792		/* read NB_TOM to get the amount of ram stolen for the GPU */
   2793		tom = RREG32(RADEON_NB_TOM);
   2794		rdev->mc.real_vram_size = (((tom >> 16) - (tom & 0xffff) + 1) << 16);
   2795		WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
   2796		rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
   2797	} else {
   2798		rdev->mc.real_vram_size = RREG32(RADEON_CONFIG_MEMSIZE);
   2799		/* Some production boards of m6 will report 0
   2800		 * if it's 8 MB
   2801		 */
   2802		if (rdev->mc.real_vram_size == 0) {
   2803			rdev->mc.real_vram_size = 8192 * 1024;
   2804			WREG32(RADEON_CONFIG_MEMSIZE, rdev->mc.real_vram_size);
   2805		}
   2806		/* Fix for RN50, M6, M7 with 8/16/32(??) MBs of VRAM -
   2807		 * Novell bug 204882 + along with lots of ubuntu ones
   2808		 */
   2809		if (rdev->mc.aper_size > config_aper_size)
   2810			config_aper_size = rdev->mc.aper_size;
   2811
   2812		if (config_aper_size > rdev->mc.real_vram_size)
   2813			rdev->mc.mc_vram_size = config_aper_size;
   2814		else
   2815			rdev->mc.mc_vram_size = rdev->mc.real_vram_size;
   2816	}
   2817}
   2818
   2819void r100_vga_set_state(struct radeon_device *rdev, bool state)
   2820{
   2821	uint32_t temp;
   2822
   2823	temp = RREG32(RADEON_CONFIG_CNTL);
   2824	if (!state) {
   2825		temp &= ~RADEON_CFG_VGA_RAM_EN;
   2826		temp |= RADEON_CFG_VGA_IO_DIS;
   2827	} else {
   2828		temp &= ~RADEON_CFG_VGA_IO_DIS;
   2829	}
   2830	WREG32(RADEON_CONFIG_CNTL, temp);
   2831}
   2832
   2833static void r100_mc_init(struct radeon_device *rdev)
   2834{
   2835	u64 base;
   2836
   2837	r100_vram_get_type(rdev);
   2838	r100_vram_init_sizes(rdev);
   2839	base = rdev->mc.aper_base;
   2840	if (rdev->flags & RADEON_IS_IGP)
   2841		base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
   2842	radeon_vram_location(rdev, &rdev->mc, base);
   2843	rdev->mc.gtt_base_align = 0;
   2844	if (!(rdev->flags & RADEON_IS_AGP))
   2845		radeon_gtt_location(rdev, &rdev->mc);
   2846	radeon_update_bandwidth_info(rdev);
   2847}
   2848
   2849
   2850/*
   2851 * Indirect registers accessor
   2852 */
   2853void r100_pll_errata_after_index(struct radeon_device *rdev)
   2854{
   2855	if (rdev->pll_errata & CHIP_ERRATA_PLL_DUMMYREADS) {
   2856		(void)RREG32(RADEON_CLOCK_CNTL_DATA);
   2857		(void)RREG32(RADEON_CRTC_GEN_CNTL);
   2858	}
   2859}
   2860
   2861static void r100_pll_errata_after_data(struct radeon_device *rdev)
   2862{
   2863	/* This workarounds is necessary on RV100, RS100 and RS200 chips
   2864	 * or the chip could hang on a subsequent access
   2865	 */
   2866	if (rdev->pll_errata & CHIP_ERRATA_PLL_DELAY) {
   2867		mdelay(5);
   2868	}
   2869
   2870	/* This function is required to workaround a hardware bug in some (all?)
   2871	 * revisions of the R300.  This workaround should be called after every
   2872	 * CLOCK_CNTL_INDEX register access.  If not, register reads afterward
   2873	 * may not be correct.
   2874	 */
   2875	if (rdev->pll_errata & CHIP_ERRATA_R300_CG) {
   2876		uint32_t save, tmp;
   2877
   2878		save = RREG32(RADEON_CLOCK_CNTL_INDEX);
   2879		tmp = save & ~(0x3f | RADEON_PLL_WR_EN);
   2880		WREG32(RADEON_CLOCK_CNTL_INDEX, tmp);
   2881		tmp = RREG32(RADEON_CLOCK_CNTL_DATA);
   2882		WREG32(RADEON_CLOCK_CNTL_INDEX, save);
   2883	}
   2884}
   2885
   2886uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg)
   2887{
   2888	unsigned long flags;
   2889	uint32_t data;
   2890
   2891	spin_lock_irqsave(&rdev->pll_idx_lock, flags);
   2892	WREG8(RADEON_CLOCK_CNTL_INDEX, reg & 0x3f);
   2893	r100_pll_errata_after_index(rdev);
   2894	data = RREG32(RADEON_CLOCK_CNTL_DATA);
   2895	r100_pll_errata_after_data(rdev);
   2896	spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
   2897	return data;
   2898}
   2899
   2900void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
   2901{
   2902	unsigned long flags;
   2903
   2904	spin_lock_irqsave(&rdev->pll_idx_lock, flags);
   2905	WREG8(RADEON_CLOCK_CNTL_INDEX, ((reg & 0x3f) | RADEON_PLL_WR_EN));
   2906	r100_pll_errata_after_index(rdev);
   2907	WREG32(RADEON_CLOCK_CNTL_DATA, v);
   2908	r100_pll_errata_after_data(rdev);
   2909	spin_unlock_irqrestore(&rdev->pll_idx_lock, flags);
   2910}
   2911
   2912static void r100_set_safe_registers(struct radeon_device *rdev)
   2913{
   2914	if (ASIC_IS_RN50(rdev)) {
   2915		rdev->config.r100.reg_safe_bm = rn50_reg_safe_bm;
   2916		rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(rn50_reg_safe_bm);
   2917	} else if (rdev->family < CHIP_R200) {
   2918		rdev->config.r100.reg_safe_bm = r100_reg_safe_bm;
   2919		rdev->config.r100.reg_safe_bm_size = ARRAY_SIZE(r100_reg_safe_bm);
   2920	} else {
   2921		r200_set_safe_registers(rdev);
   2922	}
   2923}
   2924
   2925/*
   2926 * Debugfs info
   2927 */
   2928#if defined(CONFIG_DEBUG_FS)
   2929static int r100_debugfs_rbbm_info_show(struct seq_file *m, void *unused)
   2930{
   2931	struct radeon_device *rdev = (struct radeon_device *)m->private;
   2932	uint32_t reg, value;
   2933	unsigned i;
   2934
   2935	seq_printf(m, "RBBM_STATUS 0x%08x\n", RREG32(RADEON_RBBM_STATUS));
   2936	seq_printf(m, "RBBM_CMDFIFO_STAT 0x%08x\n", RREG32(0xE7C));
   2937	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
   2938	for (i = 0; i < 64; i++) {
   2939		WREG32(RADEON_RBBM_CMDFIFO_ADDR, i | 0x100);
   2940		reg = (RREG32(RADEON_RBBM_CMDFIFO_DATA) - 1) >> 2;
   2941		WREG32(RADEON_RBBM_CMDFIFO_ADDR, i);
   2942		value = RREG32(RADEON_RBBM_CMDFIFO_DATA);
   2943		seq_printf(m, "[0x%03X] 0x%04X=0x%08X\n", i, reg, value);
   2944	}
   2945	return 0;
   2946}
   2947
   2948static int r100_debugfs_cp_ring_info_show(struct seq_file *m, void *unused)
   2949{
   2950	struct radeon_device *rdev = (struct radeon_device *)m->private;
   2951	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
   2952	uint32_t rdp, wdp;
   2953	unsigned count, i, j;
   2954
   2955	radeon_ring_free_size(rdev, ring);
   2956	rdp = RREG32(RADEON_CP_RB_RPTR);
   2957	wdp = RREG32(RADEON_CP_RB_WPTR);
   2958	count = (rdp + ring->ring_size - wdp) & ring->ptr_mask;
   2959	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
   2960	seq_printf(m, "CP_RB_WPTR 0x%08x\n", wdp);
   2961	seq_printf(m, "CP_RB_RPTR 0x%08x\n", rdp);
   2962	seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
   2963	seq_printf(m, "%u dwords in ring\n", count);
   2964	if (ring->ready) {
   2965		for (j = 0; j <= count; j++) {
   2966			i = (rdp + j) & ring->ptr_mask;
   2967			seq_printf(m, "r[%04d]=0x%08x\n", i, ring->ring[i]);
   2968		}
   2969	}
   2970	return 0;
   2971}
   2972
   2973
   2974static int r100_debugfs_cp_csq_fifo_show(struct seq_file *m, void *unused)
   2975{
   2976	struct radeon_device *rdev = (struct radeon_device *)m->private;
   2977	uint32_t csq_stat, csq2_stat, tmp;
   2978	unsigned r_rptr, r_wptr, ib1_rptr, ib1_wptr, ib2_rptr, ib2_wptr;
   2979	unsigned i;
   2980
   2981	seq_printf(m, "CP_STAT 0x%08x\n", RREG32(RADEON_CP_STAT));
   2982	seq_printf(m, "CP_CSQ_MODE 0x%08x\n", RREG32(RADEON_CP_CSQ_MODE));
   2983	csq_stat = RREG32(RADEON_CP_CSQ_STAT);
   2984	csq2_stat = RREG32(RADEON_CP_CSQ2_STAT);
   2985	r_rptr = (csq_stat >> 0) & 0x3ff;
   2986	r_wptr = (csq_stat >> 10) & 0x3ff;
   2987	ib1_rptr = (csq_stat >> 20) & 0x3ff;
   2988	ib1_wptr = (csq2_stat >> 0) & 0x3ff;
   2989	ib2_rptr = (csq2_stat >> 10) & 0x3ff;
   2990	ib2_wptr = (csq2_stat >> 20) & 0x3ff;
   2991	seq_printf(m, "CP_CSQ_STAT 0x%08x\n", csq_stat);
   2992	seq_printf(m, "CP_CSQ2_STAT 0x%08x\n", csq2_stat);
   2993	seq_printf(m, "Ring rptr %u\n", r_rptr);
   2994	seq_printf(m, "Ring wptr %u\n", r_wptr);
   2995	seq_printf(m, "Indirect1 rptr %u\n", ib1_rptr);
   2996	seq_printf(m, "Indirect1 wptr %u\n", ib1_wptr);
   2997	seq_printf(m, "Indirect2 rptr %u\n", ib2_rptr);
   2998	seq_printf(m, "Indirect2 wptr %u\n", ib2_wptr);
   2999	/* FIXME: 0, 128, 640 depends on fifo setup see cp_init_kms
   3000	 * 128 = indirect1_start * 8 & 640 = indirect2_start * 8 */
   3001	seq_printf(m, "Ring fifo:\n");
   3002	for (i = 0; i < 256; i++) {
   3003		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
   3004		tmp = RREG32(RADEON_CP_CSQ_DATA);
   3005		seq_printf(m, "rfifo[%04d]=0x%08X\n", i, tmp);
   3006	}
   3007	seq_printf(m, "Indirect1 fifo:\n");
   3008	for (i = 256; i <= 512; i++) {
   3009		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
   3010		tmp = RREG32(RADEON_CP_CSQ_DATA);
   3011		seq_printf(m, "ib1fifo[%04d]=0x%08X\n", i, tmp);
   3012	}
   3013	seq_printf(m, "Indirect2 fifo:\n");
   3014	for (i = 640; i < ib1_wptr; i++) {
   3015		WREG32(RADEON_CP_CSQ_ADDR, i << 2);
   3016		tmp = RREG32(RADEON_CP_CSQ_DATA);
   3017		seq_printf(m, "ib2fifo[%04d]=0x%08X\n", i, tmp);
   3018	}
   3019	return 0;
   3020}
   3021
   3022static int r100_debugfs_mc_info_show(struct seq_file *m, void *unused)
   3023{
   3024	struct radeon_device *rdev = (struct radeon_device *)m->private;
   3025	uint32_t tmp;
   3026
   3027	tmp = RREG32(RADEON_CONFIG_MEMSIZE);
   3028	seq_printf(m, "CONFIG_MEMSIZE 0x%08x\n", tmp);
   3029	tmp = RREG32(RADEON_MC_FB_LOCATION);
   3030	seq_printf(m, "MC_FB_LOCATION 0x%08x\n", tmp);
   3031	tmp = RREG32(RADEON_BUS_CNTL);
   3032	seq_printf(m, "BUS_CNTL 0x%08x\n", tmp);
   3033	tmp = RREG32(RADEON_MC_AGP_LOCATION);
   3034	seq_printf(m, "MC_AGP_LOCATION 0x%08x\n", tmp);
   3035	tmp = RREG32(RADEON_AGP_BASE);
   3036	seq_printf(m, "AGP_BASE 0x%08x\n", tmp);
   3037	tmp = RREG32(RADEON_HOST_PATH_CNTL);
   3038	seq_printf(m, "HOST_PATH_CNTL 0x%08x\n", tmp);
   3039	tmp = RREG32(0x01D0);
   3040	seq_printf(m, "AIC_CTRL 0x%08x\n", tmp);
   3041	tmp = RREG32(RADEON_AIC_LO_ADDR);
   3042	seq_printf(m, "AIC_LO_ADDR 0x%08x\n", tmp);
   3043	tmp = RREG32(RADEON_AIC_HI_ADDR);
   3044	seq_printf(m, "AIC_HI_ADDR 0x%08x\n", tmp);
   3045	tmp = RREG32(0x01E4);
   3046	seq_printf(m, "AIC_TLB_ADDR 0x%08x\n", tmp);
   3047	return 0;
   3048}
   3049
   3050DEFINE_SHOW_ATTRIBUTE(r100_debugfs_rbbm_info);
   3051DEFINE_SHOW_ATTRIBUTE(r100_debugfs_cp_ring_info);
   3052DEFINE_SHOW_ATTRIBUTE(r100_debugfs_cp_csq_fifo);
   3053DEFINE_SHOW_ATTRIBUTE(r100_debugfs_mc_info);
   3054
   3055#endif
   3056
   3057void  r100_debugfs_rbbm_init(struct radeon_device *rdev)
   3058{
   3059#if defined(CONFIG_DEBUG_FS)
   3060	struct dentry *root = rdev->ddev->primary->debugfs_root;
   3061
   3062	debugfs_create_file("r100_rbbm_info", 0444, root, rdev,
   3063			    &r100_debugfs_rbbm_info_fops);
   3064#endif
   3065}
   3066
   3067void r100_debugfs_cp_init(struct radeon_device *rdev)
   3068{
   3069#if defined(CONFIG_DEBUG_FS)
   3070	struct dentry *root = rdev->ddev->primary->debugfs_root;
   3071
   3072	debugfs_create_file("r100_cp_ring_info", 0444, root, rdev,
   3073			    &r100_debugfs_cp_ring_info_fops);
   3074	debugfs_create_file("r100_cp_csq_fifo", 0444, root, rdev,
   3075			    &r100_debugfs_cp_csq_fifo_fops);
   3076#endif
   3077}
   3078
   3079void  r100_debugfs_mc_info_init(struct radeon_device *rdev)
   3080{
   3081#if defined(CONFIG_DEBUG_FS)
   3082	struct dentry *root = rdev->ddev->primary->debugfs_root;
   3083
   3084	debugfs_create_file("r100_mc_info", 0444, root, rdev,
   3085			    &r100_debugfs_mc_info_fops);
   3086#endif
   3087}
   3088
   3089int r100_set_surface_reg(struct radeon_device *rdev, int reg,
   3090			 uint32_t tiling_flags, uint32_t pitch,
   3091			 uint32_t offset, uint32_t obj_size)
   3092{
   3093	int surf_index = reg * 16;
   3094	int flags = 0;
   3095
   3096	if (rdev->family <= CHIP_RS200) {
   3097		if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
   3098				 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
   3099			flags |= RADEON_SURF_TILE_COLOR_BOTH;
   3100		if (tiling_flags & RADEON_TILING_MACRO)
   3101			flags |= RADEON_SURF_TILE_COLOR_MACRO;
   3102		/* setting pitch to 0 disables tiling */
   3103		if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
   3104				== 0)
   3105			pitch = 0;
   3106	} else if (rdev->family <= CHIP_RV280) {
   3107		if (tiling_flags & (RADEON_TILING_MACRO))
   3108			flags |= R200_SURF_TILE_COLOR_MACRO;
   3109		if (tiling_flags & RADEON_TILING_MICRO)
   3110			flags |= R200_SURF_TILE_COLOR_MICRO;
   3111	} else {
   3112		if (tiling_flags & RADEON_TILING_MACRO)
   3113			flags |= R300_SURF_TILE_MACRO;
   3114		if (tiling_flags & RADEON_TILING_MICRO)
   3115			flags |= R300_SURF_TILE_MICRO;
   3116	}
   3117
   3118	if (tiling_flags & RADEON_TILING_SWAP_16BIT)
   3119		flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP;
   3120	if (tiling_flags & RADEON_TILING_SWAP_32BIT)
   3121		flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP;
   3122
   3123	/* r100/r200 divide by 16 */
   3124	if (rdev->family < CHIP_R300)
   3125		flags |= pitch / 16;
   3126	else
   3127		flags |= pitch / 8;
   3128
   3129
   3130	DRM_DEBUG_KMS("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
   3131	WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
   3132	WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
   3133	WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
   3134	return 0;
   3135}
   3136
   3137void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
   3138{
   3139	int surf_index = reg * 16;
   3140	WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
   3141}
   3142
   3143void r100_bandwidth_update(struct radeon_device *rdev)
   3144{
   3145	fixed20_12 trcd_ff, trp_ff, tras_ff, trbs_ff, tcas_ff;
   3146	fixed20_12 sclk_ff, mclk_ff, sclk_eff_ff, sclk_delay_ff;
   3147	fixed20_12 peak_disp_bw, mem_bw, pix_clk, pix_clk2, temp_ff;
   3148	fixed20_12 crit_point_ff = {0};
   3149	uint32_t temp, data, mem_trcd, mem_trp, mem_tras;
   3150	fixed20_12 memtcas_ff[8] = {
   3151		dfixed_init(1),
   3152		dfixed_init(2),
   3153		dfixed_init(3),
   3154		dfixed_init(0),
   3155		dfixed_init_half(1),
   3156		dfixed_init_half(2),
   3157		dfixed_init(0),
   3158	};
   3159	fixed20_12 memtcas_rs480_ff[8] = {
   3160		dfixed_init(0),
   3161		dfixed_init(1),
   3162		dfixed_init(2),
   3163		dfixed_init(3),
   3164		dfixed_init(0),
   3165		dfixed_init_half(1),
   3166		dfixed_init_half(2),
   3167		dfixed_init_half(3),
   3168	};
   3169	fixed20_12 memtcas2_ff[8] = {
   3170		dfixed_init(0),
   3171		dfixed_init(1),
   3172		dfixed_init(2),
   3173		dfixed_init(3),
   3174		dfixed_init(4),
   3175		dfixed_init(5),
   3176		dfixed_init(6),
   3177		dfixed_init(7),
   3178	};
   3179	fixed20_12 memtrbs[8] = {
   3180		dfixed_init(1),
   3181		dfixed_init_half(1),
   3182		dfixed_init(2),
   3183		dfixed_init_half(2),
   3184		dfixed_init(3),
   3185		dfixed_init_half(3),
   3186		dfixed_init(4),
   3187		dfixed_init_half(4)
   3188	};
   3189	fixed20_12 memtrbs_r4xx[8] = {
   3190		dfixed_init(4),
   3191		dfixed_init(5),
   3192		dfixed_init(6),
   3193		dfixed_init(7),
   3194		dfixed_init(8),
   3195		dfixed_init(9),
   3196		dfixed_init(10),
   3197		dfixed_init(11)
   3198	};
   3199	fixed20_12 min_mem_eff;
   3200	fixed20_12 mc_latency_sclk, mc_latency_mclk, k1;
   3201	fixed20_12 cur_latency_mclk, cur_latency_sclk;
   3202	fixed20_12 disp_latency, disp_latency_overhead, disp_drain_rate = {0},
   3203		disp_drain_rate2, read_return_rate;
   3204	fixed20_12 time_disp1_drop_priority;
   3205	int c;
   3206	int cur_size = 16;       /* in octawords */
   3207	int critical_point = 0, critical_point2;
   3208/* 	uint32_t read_return_rate, time_disp1_drop_priority; */
   3209	int stop_req, max_stop_req;
   3210	struct drm_display_mode *mode1 = NULL;
   3211	struct drm_display_mode *mode2 = NULL;
   3212	uint32_t pixel_bytes1 = 0;
   3213	uint32_t pixel_bytes2 = 0;
   3214
   3215	/* Guess line buffer size to be 8192 pixels */
   3216	u32 lb_size = 8192;
   3217
   3218	if (!rdev->mode_info.mode_config_initialized)
   3219		return;
   3220
   3221	radeon_update_display_priority(rdev);
   3222
   3223	if (rdev->mode_info.crtcs[0]->base.enabled) {
   3224		const struct drm_framebuffer *fb =
   3225			rdev->mode_info.crtcs[0]->base.primary->fb;
   3226
   3227		mode1 = &rdev->mode_info.crtcs[0]->base.mode;
   3228		pixel_bytes1 = fb->format->cpp[0];
   3229	}
   3230	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
   3231		if (rdev->mode_info.crtcs[1]->base.enabled) {
   3232			const struct drm_framebuffer *fb =
   3233				rdev->mode_info.crtcs[1]->base.primary->fb;
   3234
   3235			mode2 = &rdev->mode_info.crtcs[1]->base.mode;
   3236			pixel_bytes2 = fb->format->cpp[0];
   3237		}
   3238	}
   3239
   3240	min_mem_eff.full = dfixed_const_8(0);
   3241	/* get modes */
   3242	if ((rdev->disp_priority == 2) && ASIC_IS_R300(rdev)) {
   3243		uint32_t mc_init_misc_lat_timer = RREG32(R300_MC_INIT_MISC_LAT_TIMER);
   3244		mc_init_misc_lat_timer &= ~(R300_MC_DISP1R_INIT_LAT_MASK << R300_MC_DISP1R_INIT_LAT_SHIFT);
   3245		mc_init_misc_lat_timer &= ~(R300_MC_DISP0R_INIT_LAT_MASK << R300_MC_DISP0R_INIT_LAT_SHIFT);
   3246		/* check crtc enables */
   3247		if (mode2)
   3248			mc_init_misc_lat_timer |= (1 << R300_MC_DISP1R_INIT_LAT_SHIFT);
   3249		if (mode1)
   3250			mc_init_misc_lat_timer |= (1 << R300_MC_DISP0R_INIT_LAT_SHIFT);
   3251		WREG32(R300_MC_INIT_MISC_LAT_TIMER, mc_init_misc_lat_timer);
   3252	}
   3253
   3254	/*
   3255	 * determine is there is enough bw for current mode
   3256	 */
   3257	sclk_ff = rdev->pm.sclk;
   3258	mclk_ff = rdev->pm.mclk;
   3259
   3260	temp = (rdev->mc.vram_width / 8) * (rdev->mc.vram_is_ddr ? 2 : 1);
   3261	temp_ff.full = dfixed_const(temp);
   3262	mem_bw.full = dfixed_mul(mclk_ff, temp_ff);
   3263
   3264	pix_clk.full = 0;
   3265	pix_clk2.full = 0;
   3266	peak_disp_bw.full = 0;
   3267	if (mode1) {
   3268		temp_ff.full = dfixed_const(1000);
   3269		pix_clk.full = dfixed_const(mode1->clock); /* convert to fixed point */
   3270		pix_clk.full = dfixed_div(pix_clk, temp_ff);
   3271		temp_ff.full = dfixed_const(pixel_bytes1);
   3272		peak_disp_bw.full += dfixed_mul(pix_clk, temp_ff);
   3273	}
   3274	if (mode2) {
   3275		temp_ff.full = dfixed_const(1000);
   3276		pix_clk2.full = dfixed_const(mode2->clock); /* convert to fixed point */
   3277		pix_clk2.full = dfixed_div(pix_clk2, temp_ff);
   3278		temp_ff.full = dfixed_const(pixel_bytes2);
   3279		peak_disp_bw.full += dfixed_mul(pix_clk2, temp_ff);
   3280	}
   3281
   3282	mem_bw.full = dfixed_mul(mem_bw, min_mem_eff);
   3283	if (peak_disp_bw.full >= mem_bw.full) {
   3284		DRM_ERROR("You may not have enough display bandwidth for current mode\n"
   3285			  "If you have flickering problem, try to lower resolution, refresh rate, or color depth\n");
   3286	}
   3287
   3288	/*  Get values from the EXT_MEM_CNTL register...converting its contents. */
   3289	temp = RREG32(RADEON_MEM_TIMING_CNTL);
   3290	if ((rdev->family == CHIP_RV100) || (rdev->flags & RADEON_IS_IGP)) { /* RV100, M6, IGPs */
   3291		mem_trcd = ((temp >> 2) & 0x3) + 1;
   3292		mem_trp  = ((temp & 0x3)) + 1;
   3293		mem_tras = ((temp & 0x70) >> 4) + 1;
   3294	} else if (rdev->family == CHIP_R300 ||
   3295		   rdev->family == CHIP_R350) { /* r300, r350 */
   3296		mem_trcd = (temp & 0x7) + 1;
   3297		mem_trp = ((temp >> 8) & 0x7) + 1;
   3298		mem_tras = ((temp >> 11) & 0xf) + 4;
   3299	} else if (rdev->family == CHIP_RV350 ||
   3300		   rdev->family == CHIP_RV380) {
   3301		/* rv3x0 */
   3302		mem_trcd = (temp & 0x7) + 3;
   3303		mem_trp = ((temp >> 8) & 0x7) + 3;
   3304		mem_tras = ((temp >> 11) & 0xf) + 6;
   3305	} else if (rdev->family == CHIP_R420 ||
   3306		   rdev->family == CHIP_R423 ||
   3307		   rdev->family == CHIP_RV410) {
   3308		/* r4xx */
   3309		mem_trcd = (temp & 0xf) + 3;
   3310		if (mem_trcd > 15)
   3311			mem_trcd = 15;
   3312		mem_trp = ((temp >> 8) & 0xf) + 3;
   3313		if (mem_trp > 15)
   3314			mem_trp = 15;
   3315		mem_tras = ((temp >> 12) & 0x1f) + 6;
   3316		if (mem_tras > 31)
   3317			mem_tras = 31;
   3318	} else { /* RV200, R200 */
   3319		mem_trcd = (temp & 0x7) + 1;
   3320		mem_trp = ((temp >> 8) & 0x7) + 1;
   3321		mem_tras = ((temp >> 12) & 0xf) + 4;
   3322	}
   3323	/* convert to FF */
   3324	trcd_ff.full = dfixed_const(mem_trcd);
   3325	trp_ff.full = dfixed_const(mem_trp);
   3326	tras_ff.full = dfixed_const(mem_tras);
   3327
   3328	/* Get values from the MEM_SDRAM_MODE_REG register...converting its */
   3329	temp = RREG32(RADEON_MEM_SDRAM_MODE_REG);
   3330	data = (temp & (7 << 20)) >> 20;
   3331	if ((rdev->family == CHIP_RV100) || rdev->flags & RADEON_IS_IGP) {
   3332		if (rdev->family == CHIP_RS480) /* don't think rs400 */
   3333			tcas_ff = memtcas_rs480_ff[data];
   3334		else
   3335			tcas_ff = memtcas_ff[data];
   3336	} else
   3337		tcas_ff = memtcas2_ff[data];
   3338
   3339	if (rdev->family == CHIP_RS400 ||
   3340	    rdev->family == CHIP_RS480) {
   3341		/* extra cas latency stored in bits 23-25 0-4 clocks */
   3342		data = (temp >> 23) & 0x7;
   3343		if (data < 5)
   3344			tcas_ff.full += dfixed_const(data);
   3345	}
   3346
   3347	if (ASIC_IS_R300(rdev) && !(rdev->flags & RADEON_IS_IGP)) {
   3348		/* on the R300, Tcas is included in Trbs.
   3349		 */
   3350		temp = RREG32(RADEON_MEM_CNTL);
   3351		data = (R300_MEM_NUM_CHANNELS_MASK & temp);
   3352		if (data == 1) {
   3353			if (R300_MEM_USE_CD_CH_ONLY & temp) {
   3354				temp = RREG32(R300_MC_IND_INDEX);
   3355				temp &= ~R300_MC_IND_ADDR_MASK;
   3356				temp |= R300_MC_READ_CNTL_CD_mcind;
   3357				WREG32(R300_MC_IND_INDEX, temp);
   3358				temp = RREG32(R300_MC_IND_DATA);
   3359				data = (R300_MEM_RBS_POSITION_C_MASK & temp);
   3360			} else {
   3361				temp = RREG32(R300_MC_READ_CNTL_AB);
   3362				data = (R300_MEM_RBS_POSITION_A_MASK & temp);
   3363			}
   3364		} else {
   3365			temp = RREG32(R300_MC_READ_CNTL_AB);
   3366			data = (R300_MEM_RBS_POSITION_A_MASK & temp);
   3367		}
   3368		if (rdev->family == CHIP_RV410 ||
   3369		    rdev->family == CHIP_R420 ||
   3370		    rdev->family == CHIP_R423)
   3371			trbs_ff = memtrbs_r4xx[data];
   3372		else
   3373			trbs_ff = memtrbs[data];
   3374		tcas_ff.full += trbs_ff.full;
   3375	}
   3376
   3377	sclk_eff_ff.full = sclk_ff.full;
   3378
   3379	if (rdev->flags & RADEON_IS_AGP) {
   3380		fixed20_12 agpmode_ff;
   3381		agpmode_ff.full = dfixed_const(radeon_agpmode);
   3382		temp_ff.full = dfixed_const_666(16);
   3383		sclk_eff_ff.full -= dfixed_mul(agpmode_ff, temp_ff);
   3384	}
   3385	/* TODO PCIE lanes may affect this - agpmode == 16?? */
   3386
   3387	if (ASIC_IS_R300(rdev)) {
   3388		sclk_delay_ff.full = dfixed_const(250);
   3389	} else {
   3390		if ((rdev->family == CHIP_RV100) ||
   3391		    rdev->flags & RADEON_IS_IGP) {
   3392			if (rdev->mc.vram_is_ddr)
   3393				sclk_delay_ff.full = dfixed_const(41);
   3394			else
   3395				sclk_delay_ff.full = dfixed_const(33);
   3396		} else {
   3397			if (rdev->mc.vram_width == 128)
   3398				sclk_delay_ff.full = dfixed_const(57);
   3399			else
   3400				sclk_delay_ff.full = dfixed_const(41);
   3401		}
   3402	}
   3403
   3404	mc_latency_sclk.full = dfixed_div(sclk_delay_ff, sclk_eff_ff);
   3405
   3406	if (rdev->mc.vram_is_ddr) {
   3407		if (rdev->mc.vram_width == 32) {
   3408			k1.full = dfixed_const(40);
   3409			c  = 3;
   3410		} else {
   3411			k1.full = dfixed_const(20);
   3412			c  = 1;
   3413		}
   3414	} else {
   3415		k1.full = dfixed_const(40);
   3416		c  = 3;
   3417	}
   3418
   3419	temp_ff.full = dfixed_const(2);
   3420	mc_latency_mclk.full = dfixed_mul(trcd_ff, temp_ff);
   3421	temp_ff.full = dfixed_const(c);
   3422	mc_latency_mclk.full += dfixed_mul(tcas_ff, temp_ff);
   3423	temp_ff.full = dfixed_const(4);
   3424	mc_latency_mclk.full += dfixed_mul(tras_ff, temp_ff);
   3425	mc_latency_mclk.full += dfixed_mul(trp_ff, temp_ff);
   3426	mc_latency_mclk.full += k1.full;
   3427
   3428	mc_latency_mclk.full = dfixed_div(mc_latency_mclk, mclk_ff);
   3429	mc_latency_mclk.full += dfixed_div(temp_ff, sclk_eff_ff);
   3430
   3431	/*
   3432	  HW cursor time assuming worst case of full size colour cursor.
   3433	*/
   3434	temp_ff.full = dfixed_const((2 * (cur_size - (rdev->mc.vram_is_ddr + 1))));
   3435	temp_ff.full += trcd_ff.full;
   3436	if (temp_ff.full < tras_ff.full)
   3437		temp_ff.full = tras_ff.full;
   3438	cur_latency_mclk.full = dfixed_div(temp_ff, mclk_ff);
   3439
   3440	temp_ff.full = dfixed_const(cur_size);
   3441	cur_latency_sclk.full = dfixed_div(temp_ff, sclk_eff_ff);
   3442	/*
   3443	  Find the total latency for the display data.
   3444	*/
   3445	disp_latency_overhead.full = dfixed_const(8);
   3446	disp_latency_overhead.full = dfixed_div(disp_latency_overhead, sclk_ff);
   3447	mc_latency_mclk.full += disp_latency_overhead.full + cur_latency_mclk.full;
   3448	mc_latency_sclk.full += disp_latency_overhead.full + cur_latency_sclk.full;
   3449
   3450	if (mc_latency_mclk.full > mc_latency_sclk.full)
   3451		disp_latency.full = mc_latency_mclk.full;
   3452	else
   3453		disp_latency.full = mc_latency_sclk.full;
   3454
   3455	/* setup Max GRPH_STOP_REQ default value */
   3456	if (ASIC_IS_RV100(rdev))
   3457		max_stop_req = 0x5c;
   3458	else
   3459		max_stop_req = 0x7c;
   3460
   3461	if (mode1) {
   3462		/*  CRTC1
   3463		    Set GRPH_BUFFER_CNTL register using h/w defined optimal values.
   3464		    GRPH_STOP_REQ <= MIN[ 0x7C, (CRTC_H_DISP + 1) * (bit depth) / 0x10 ]
   3465		*/
   3466		stop_req = mode1->hdisplay * pixel_bytes1 / 16;
   3467
   3468		if (stop_req > max_stop_req)
   3469			stop_req = max_stop_req;
   3470
   3471		/*
   3472		  Find the drain rate of the display buffer.
   3473		*/
   3474		temp_ff.full = dfixed_const((16/pixel_bytes1));
   3475		disp_drain_rate.full = dfixed_div(pix_clk, temp_ff);
   3476
   3477		/*
   3478		  Find the critical point of the display buffer.
   3479		*/
   3480		crit_point_ff.full = dfixed_mul(disp_drain_rate, disp_latency);
   3481		crit_point_ff.full += dfixed_const_half(0);
   3482
   3483		critical_point = dfixed_trunc(crit_point_ff);
   3484
   3485		if (rdev->disp_priority == 2) {
   3486			critical_point = 0;
   3487		}
   3488
   3489		/*
   3490		  The critical point should never be above max_stop_req-4.  Setting
   3491		  GRPH_CRITICAL_CNTL = 0 will thus force high priority all the time.
   3492		*/
   3493		if (max_stop_req - critical_point < 4)
   3494			critical_point = 0;
   3495
   3496		if (critical_point == 0 && mode2 && rdev->family == CHIP_R300) {
   3497			/* some R300 cards have problem with this set to 0, when CRTC2 is enabled.*/
   3498			critical_point = 0x10;
   3499		}
   3500
   3501		temp = RREG32(RADEON_GRPH_BUFFER_CNTL);
   3502		temp &= ~(RADEON_GRPH_STOP_REQ_MASK);
   3503		temp |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
   3504		temp &= ~(RADEON_GRPH_START_REQ_MASK);
   3505		if ((rdev->family == CHIP_R350) &&
   3506		    (stop_req > 0x15)) {
   3507			stop_req -= 0x10;
   3508		}
   3509		temp |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
   3510		temp |= RADEON_GRPH_BUFFER_SIZE;
   3511		temp &= ~(RADEON_GRPH_CRITICAL_CNTL   |
   3512			  RADEON_GRPH_CRITICAL_AT_SOF |
   3513			  RADEON_GRPH_STOP_CNTL);
   3514		/*
   3515		  Write the result into the register.
   3516		*/
   3517		WREG32(RADEON_GRPH_BUFFER_CNTL, ((temp & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
   3518						       (critical_point << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
   3519
   3520#if 0
   3521		if ((rdev->family == CHIP_RS400) ||
   3522		    (rdev->family == CHIP_RS480)) {
   3523			/* attempt to program RS400 disp regs correctly ??? */
   3524			temp = RREG32(RS400_DISP1_REG_CNTL);
   3525			temp &= ~(RS400_DISP1_START_REQ_LEVEL_MASK |
   3526				  RS400_DISP1_STOP_REQ_LEVEL_MASK);
   3527			WREG32(RS400_DISP1_REQ_CNTL1, (temp |
   3528						       (critical_point << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
   3529						       (critical_point << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
   3530			temp = RREG32(RS400_DMIF_MEM_CNTL1);
   3531			temp &= ~(RS400_DISP1_CRITICAL_POINT_START_MASK |
   3532				  RS400_DISP1_CRITICAL_POINT_STOP_MASK);
   3533			WREG32(RS400_DMIF_MEM_CNTL1, (temp |
   3534						      (critical_point << RS400_DISP1_CRITICAL_POINT_START_SHIFT) |
   3535						      (critical_point << RS400_DISP1_CRITICAL_POINT_STOP_SHIFT)));
   3536		}
   3537#endif
   3538
   3539		DRM_DEBUG_KMS("GRPH_BUFFER_CNTL from to %x\n",
   3540			  /* 	  (unsigned int)info->SavedReg->grph_buffer_cntl, */
   3541			  (unsigned int)RREG32(RADEON_GRPH_BUFFER_CNTL));
   3542	}
   3543
   3544	if (mode2) {
   3545		u32 grph2_cntl;
   3546		stop_req = mode2->hdisplay * pixel_bytes2 / 16;
   3547
   3548		if (stop_req > max_stop_req)
   3549			stop_req = max_stop_req;
   3550
   3551		/*
   3552		  Find the drain rate of the display buffer.
   3553		*/
   3554		temp_ff.full = dfixed_const((16/pixel_bytes2));
   3555		disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff);
   3556
   3557		grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL);
   3558		grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK);
   3559		grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT);
   3560		grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK);
   3561		if ((rdev->family == CHIP_R350) &&
   3562		    (stop_req > 0x15)) {
   3563			stop_req -= 0x10;
   3564		}
   3565		grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT);
   3566		grph2_cntl |= RADEON_GRPH_BUFFER_SIZE;
   3567		grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL   |
   3568			  RADEON_GRPH_CRITICAL_AT_SOF |
   3569			  RADEON_GRPH_STOP_CNTL);
   3570
   3571		if ((rdev->family == CHIP_RS100) ||
   3572		    (rdev->family == CHIP_RS200))
   3573			critical_point2 = 0;
   3574		else {
   3575			temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128;
   3576			temp_ff.full = dfixed_const(temp);
   3577			temp_ff.full = dfixed_mul(mclk_ff, temp_ff);
   3578			if (sclk_ff.full < temp_ff.full)
   3579				temp_ff.full = sclk_ff.full;
   3580
   3581			read_return_rate.full = temp_ff.full;
   3582
   3583			if (mode1) {
   3584				temp_ff.full = read_return_rate.full - disp_drain_rate.full;
   3585				time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff);
   3586			} else {
   3587				time_disp1_drop_priority.full = 0;
   3588			}
   3589			crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full;
   3590			crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2);
   3591			crit_point_ff.full += dfixed_const_half(0);
   3592
   3593			critical_point2 = dfixed_trunc(crit_point_ff);
   3594
   3595			if (rdev->disp_priority == 2) {
   3596				critical_point2 = 0;
   3597			}
   3598
   3599			if (max_stop_req - critical_point2 < 4)
   3600				critical_point2 = 0;
   3601
   3602		}
   3603
   3604		if (critical_point2 == 0 && rdev->family == CHIP_R300) {
   3605			/* some R300 cards have problem with this set to 0 */
   3606			critical_point2 = 0x10;
   3607		}
   3608
   3609		WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) |
   3610						  (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT)));
   3611
   3612		if ((rdev->family == CHIP_RS400) ||
   3613		    (rdev->family == CHIP_RS480)) {
   3614#if 0
   3615			/* attempt to program RS400 disp2 regs correctly ??? */
   3616			temp = RREG32(RS400_DISP2_REQ_CNTL1);
   3617			temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK |
   3618				  RS400_DISP2_STOP_REQ_LEVEL_MASK);
   3619			WREG32(RS400_DISP2_REQ_CNTL1, (temp |
   3620						       (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) |
   3621						       (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT)));
   3622			temp = RREG32(RS400_DISP2_REQ_CNTL2);
   3623			temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK |
   3624				  RS400_DISP2_CRITICAL_POINT_STOP_MASK);
   3625			WREG32(RS400_DISP2_REQ_CNTL2, (temp |
   3626						       (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) |
   3627						       (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT)));
   3628#endif
   3629			WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC);
   3630			WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000);
   3631			WREG32(RS400_DMIF_MEM_CNTL1,  0x29CA71DC);
   3632			WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC);
   3633		}
   3634
   3635		DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n",
   3636			  (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL));
   3637	}
   3638
   3639	/* Save number of lines the linebuffer leads before the scanout */
   3640	if (mode1)
   3641	    rdev->mode_info.crtcs[0]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode1->crtc_hdisplay);
   3642
   3643	if (mode2)
   3644	    rdev->mode_info.crtcs[1]->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode2->crtc_hdisplay);
   3645}
   3646
   3647int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
   3648{
   3649	uint32_t scratch;
   3650	uint32_t tmp = 0;
   3651	unsigned i;
   3652	int r;
   3653
   3654	r = radeon_scratch_get(rdev, &scratch);
   3655	if (r) {
   3656		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
   3657		return r;
   3658	}
   3659	WREG32(scratch, 0xCAFEDEAD);
   3660	r = radeon_ring_lock(rdev, ring, 2);
   3661	if (r) {
   3662		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
   3663		radeon_scratch_free(rdev, scratch);
   3664		return r;
   3665	}
   3666	radeon_ring_write(ring, PACKET0(scratch, 0));
   3667	radeon_ring_write(ring, 0xDEADBEEF);
   3668	radeon_ring_unlock_commit(rdev, ring, false);
   3669	for (i = 0; i < rdev->usec_timeout; i++) {
   3670		tmp = RREG32(scratch);
   3671		if (tmp == 0xDEADBEEF) {
   3672			break;
   3673		}
   3674		udelay(1);
   3675	}
   3676	if (i < rdev->usec_timeout) {
   3677		DRM_INFO("ring test succeeded in %d usecs\n", i);
   3678	} else {
   3679		DRM_ERROR("radeon: ring test failed (scratch(0x%04X)=0x%08X)\n",
   3680			  scratch, tmp);
   3681		r = -EINVAL;
   3682	}
   3683	radeon_scratch_free(rdev, scratch);
   3684	return r;
   3685}
   3686
   3687void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
   3688{
   3689	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
   3690
   3691	if (ring->rptr_save_reg) {
   3692		u32 next_rptr = ring->wptr + 2 + 3;
   3693		radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
   3694		radeon_ring_write(ring, next_rptr);
   3695	}
   3696
   3697	radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1));
   3698	radeon_ring_write(ring, ib->gpu_addr);
   3699	radeon_ring_write(ring, ib->length_dw);
   3700}
   3701
   3702int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
   3703{
   3704	struct radeon_ib ib;
   3705	uint32_t scratch;
   3706	uint32_t tmp = 0;
   3707	unsigned i;
   3708	int r;
   3709
   3710	r = radeon_scratch_get(rdev, &scratch);
   3711	if (r) {
   3712		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
   3713		return r;
   3714	}
   3715	WREG32(scratch, 0xCAFEDEAD);
   3716	r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &ib, NULL, 256);
   3717	if (r) {
   3718		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
   3719		goto free_scratch;
   3720	}
   3721	ib.ptr[0] = PACKET0(scratch, 0);
   3722	ib.ptr[1] = 0xDEADBEEF;
   3723	ib.ptr[2] = PACKET2(0);
   3724	ib.ptr[3] = PACKET2(0);
   3725	ib.ptr[4] = PACKET2(0);
   3726	ib.ptr[5] = PACKET2(0);
   3727	ib.ptr[6] = PACKET2(0);
   3728	ib.ptr[7] = PACKET2(0);
   3729	ib.length_dw = 8;
   3730	r = radeon_ib_schedule(rdev, &ib, NULL, false);
   3731	if (r) {
   3732		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
   3733		goto free_ib;
   3734	}
   3735	r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
   3736		RADEON_USEC_IB_TEST_TIMEOUT));
   3737	if (r < 0) {
   3738		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
   3739		goto free_ib;
   3740	} else if (r == 0) {
   3741		DRM_ERROR("radeon: fence wait timed out.\n");
   3742		r = -ETIMEDOUT;
   3743		goto free_ib;
   3744	}
   3745	r = 0;
   3746	for (i = 0; i < rdev->usec_timeout; i++) {
   3747		tmp = RREG32(scratch);
   3748		if (tmp == 0xDEADBEEF) {
   3749			break;
   3750		}
   3751		udelay(1);
   3752	}
   3753	if (i < rdev->usec_timeout) {
   3754		DRM_INFO("ib test succeeded in %u usecs\n", i);
   3755	} else {
   3756		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
   3757			  scratch, tmp);
   3758		r = -EINVAL;
   3759	}
   3760free_ib:
   3761	radeon_ib_free(rdev, &ib);
   3762free_scratch:
   3763	radeon_scratch_free(rdev, scratch);
   3764	return r;
   3765}
   3766
   3767void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
   3768{
   3769	/* Shutdown CP we shouldn't need to do that but better be safe than
   3770	 * sorry
   3771	 */
   3772	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
   3773	WREG32(R_000740_CP_CSQ_CNTL, 0);
   3774
   3775	/* Save few CRTC registers */
   3776	save->GENMO_WT = RREG8(R_0003C2_GENMO_WT);
   3777	save->CRTC_EXT_CNTL = RREG32(R_000054_CRTC_EXT_CNTL);
   3778	save->CRTC_GEN_CNTL = RREG32(R_000050_CRTC_GEN_CNTL);
   3779	save->CUR_OFFSET = RREG32(R_000260_CUR_OFFSET);
   3780	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
   3781		save->CRTC2_GEN_CNTL = RREG32(R_0003F8_CRTC2_GEN_CNTL);
   3782		save->CUR2_OFFSET = RREG32(R_000360_CUR2_OFFSET);
   3783	}
   3784
   3785	/* Disable VGA aperture access */
   3786	WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & save->GENMO_WT);
   3787	/* Disable cursor, overlay, crtc */
   3788	WREG32(R_000260_CUR_OFFSET, save->CUR_OFFSET | S_000260_CUR_LOCK(1));
   3789	WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL |
   3790					S_000054_CRTC_DISPLAY_DIS(1));
   3791	WREG32(R_000050_CRTC_GEN_CNTL,
   3792			(C_000050_CRTC_CUR_EN & save->CRTC_GEN_CNTL) |
   3793			S_000050_CRTC_DISP_REQ_EN_B(1));
   3794	WREG32(R_000420_OV0_SCALE_CNTL,
   3795		C_000420_OV0_OVERLAY_EN & RREG32(R_000420_OV0_SCALE_CNTL));
   3796	WREG32(R_000260_CUR_OFFSET, C_000260_CUR_LOCK & save->CUR_OFFSET);
   3797	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
   3798		WREG32(R_000360_CUR2_OFFSET, save->CUR2_OFFSET |
   3799						S_000360_CUR2_LOCK(1));
   3800		WREG32(R_0003F8_CRTC2_GEN_CNTL,
   3801			(C_0003F8_CRTC2_CUR_EN & save->CRTC2_GEN_CNTL) |
   3802			S_0003F8_CRTC2_DISPLAY_DIS(1) |
   3803			S_0003F8_CRTC2_DISP_REQ_EN_B(1));
   3804		WREG32(R_000360_CUR2_OFFSET,
   3805			C_000360_CUR2_LOCK & save->CUR2_OFFSET);
   3806	}
   3807}
   3808
   3809void r100_mc_resume(struct radeon_device *rdev, struct r100_mc_save *save)
   3810{
   3811	/* Update base address for crtc */
   3812	WREG32(R_00023C_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
   3813	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
   3814		WREG32(R_00033C_CRTC2_DISPLAY_BASE_ADDR, rdev->mc.vram_start);
   3815	}
   3816	/* Restore CRTC registers */
   3817	WREG8(R_0003C2_GENMO_WT, save->GENMO_WT);
   3818	WREG32(R_000054_CRTC_EXT_CNTL, save->CRTC_EXT_CNTL);
   3819	WREG32(R_000050_CRTC_GEN_CNTL, save->CRTC_GEN_CNTL);
   3820	if (!(rdev->flags & RADEON_SINGLE_CRTC)) {
   3821		WREG32(R_0003F8_CRTC2_GEN_CNTL, save->CRTC2_GEN_CNTL);
   3822	}
   3823}
   3824
   3825void r100_vga_render_disable(struct radeon_device *rdev)
   3826{
   3827	u32 tmp;
   3828
   3829	tmp = RREG8(R_0003C2_GENMO_WT);
   3830	WREG8(R_0003C2_GENMO_WT, C_0003C2_VGA_RAM_EN & tmp);
   3831}
   3832
   3833static void r100_mc_program(struct radeon_device *rdev)
   3834{
   3835	struct r100_mc_save save;
   3836
   3837	/* Stops all mc clients */
   3838	r100_mc_stop(rdev, &save);
   3839	if (rdev->flags & RADEON_IS_AGP) {
   3840		WREG32(R_00014C_MC_AGP_LOCATION,
   3841			S_00014C_MC_AGP_START(rdev->mc.gtt_start >> 16) |
   3842			S_00014C_MC_AGP_TOP(rdev->mc.gtt_end >> 16));
   3843		WREG32(R_000170_AGP_BASE, lower_32_bits(rdev->mc.agp_base));
   3844		if (rdev->family > CHIP_RV200)
   3845			WREG32(R_00015C_AGP_BASE_2,
   3846				upper_32_bits(rdev->mc.agp_base) & 0xff);
   3847	} else {
   3848		WREG32(R_00014C_MC_AGP_LOCATION, 0x0FFFFFFF);
   3849		WREG32(R_000170_AGP_BASE, 0);
   3850		if (rdev->family > CHIP_RV200)
   3851			WREG32(R_00015C_AGP_BASE_2, 0);
   3852	}
   3853	/* Wait for mc idle */
   3854	if (r100_mc_wait_for_idle(rdev))
   3855		dev_warn(rdev->dev, "Wait for MC idle timeout.\n");
   3856	/* Program MC, should be a 32bits limited address space */
   3857	WREG32(R_000148_MC_FB_LOCATION,
   3858		S_000148_MC_FB_START(rdev->mc.vram_start >> 16) |
   3859		S_000148_MC_FB_TOP(rdev->mc.vram_end >> 16));
   3860	r100_mc_resume(rdev, &save);
   3861}
   3862
   3863static void r100_clock_startup(struct radeon_device *rdev)
   3864{
   3865	u32 tmp;
   3866
   3867	if (radeon_dynclks != -1 && radeon_dynclks)
   3868		radeon_legacy_set_clock_gating(rdev, 1);
   3869	/* We need to force on some of the block */
   3870	tmp = RREG32_PLL(R_00000D_SCLK_CNTL);
   3871	tmp |= S_00000D_FORCE_CP(1) | S_00000D_FORCE_VIP(1);
   3872	if ((rdev->family == CHIP_RV250) || (rdev->family == CHIP_RV280))
   3873		tmp |= S_00000D_FORCE_DISP1(1) | S_00000D_FORCE_DISP2(1);
   3874	WREG32_PLL(R_00000D_SCLK_CNTL, tmp);
   3875}
   3876
   3877static int r100_startup(struct radeon_device *rdev)
   3878{
   3879	int r;
   3880
   3881	/* set common regs */
   3882	r100_set_common_regs(rdev);
   3883	/* program mc */
   3884	r100_mc_program(rdev);
   3885	/* Resume clock */
   3886	r100_clock_startup(rdev);
   3887	/* Initialize GART (initialize after TTM so we can allocate
   3888	 * memory through TTM but finalize after TTM) */
   3889	r100_enable_bm(rdev);
   3890	if (rdev->flags & RADEON_IS_PCI) {
   3891		r = r100_pci_gart_enable(rdev);
   3892		if (r)
   3893			return r;
   3894	}
   3895
   3896	/* allocate wb buffer */
   3897	r = radeon_wb_init(rdev);
   3898	if (r)
   3899		return r;
   3900
   3901	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
   3902	if (r) {
   3903		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
   3904		return r;
   3905	}
   3906
   3907	/* Enable IRQ */
   3908	if (!rdev->irq.installed) {
   3909		r = radeon_irq_kms_init(rdev);
   3910		if (r)
   3911			return r;
   3912	}
   3913
   3914	r100_irq_set(rdev);
   3915	rdev->config.r100.hdp_cntl = RREG32(RADEON_HOST_PATH_CNTL);
   3916	/* 1M ring buffer */
   3917	r = r100_cp_init(rdev, 1024 * 1024);
   3918	if (r) {
   3919		dev_err(rdev->dev, "failed initializing CP (%d).\n", r);
   3920		return r;
   3921	}
   3922
   3923	r = radeon_ib_pool_init(rdev);
   3924	if (r) {
   3925		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
   3926		return r;
   3927	}
   3928
   3929	return 0;
   3930}
   3931
   3932int r100_resume(struct radeon_device *rdev)
   3933{
   3934	int r;
   3935
   3936	/* Make sur GART are not working */
   3937	if (rdev->flags & RADEON_IS_PCI)
   3938		r100_pci_gart_disable(rdev);
   3939	/* Resume clock before doing reset */
   3940	r100_clock_startup(rdev);
   3941	/* Reset gpu before posting otherwise ATOM will enter infinite loop */
   3942	if (radeon_asic_reset(rdev)) {
   3943		dev_warn(rdev->dev, "GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
   3944			RREG32(R_000E40_RBBM_STATUS),
   3945			RREG32(R_0007C0_CP_STAT));
   3946	}
   3947	/* post */
   3948	radeon_combios_asic_init(rdev->ddev);
   3949	/* Resume clock after posting */
   3950	r100_clock_startup(rdev);
   3951	/* Initialize surface registers */
   3952	radeon_surface_init(rdev);
   3953
   3954	rdev->accel_working = true;
   3955	r = r100_startup(rdev);
   3956	if (r) {
   3957		rdev->accel_working = false;
   3958	}
   3959	return r;
   3960}
   3961
   3962int r100_suspend(struct radeon_device *rdev)
   3963{
   3964	radeon_pm_suspend(rdev);
   3965	r100_cp_disable(rdev);
   3966	radeon_wb_disable(rdev);
   3967	r100_irq_disable(rdev);
   3968	if (rdev->flags & RADEON_IS_PCI)
   3969		r100_pci_gart_disable(rdev);
   3970	return 0;
   3971}
   3972
   3973void r100_fini(struct radeon_device *rdev)
   3974{
   3975	radeon_pm_fini(rdev);
   3976	r100_cp_fini(rdev);
   3977	radeon_wb_fini(rdev);
   3978	radeon_ib_pool_fini(rdev);
   3979	radeon_gem_fini(rdev);
   3980	if (rdev->flags & RADEON_IS_PCI)
   3981		r100_pci_gart_fini(rdev);
   3982	radeon_agp_fini(rdev);
   3983	radeon_irq_kms_fini(rdev);
   3984	radeon_fence_driver_fini(rdev);
   3985	radeon_bo_fini(rdev);
   3986	radeon_atombios_fini(rdev);
   3987	kfree(rdev->bios);
   3988	rdev->bios = NULL;
   3989}
   3990
   3991/*
   3992 * Due to how kexec works, it can leave the hw fully initialised when it
   3993 * boots the new kernel. However doing our init sequence with the CP and
   3994 * WB stuff setup causes GPU hangs on the RN50 at least. So at startup
   3995 * do some quick sanity checks and restore sane values to avoid this
   3996 * problem.
   3997 */
   3998void r100_restore_sanity(struct radeon_device *rdev)
   3999{
   4000	u32 tmp;
   4001
   4002	tmp = RREG32(RADEON_CP_CSQ_CNTL);
   4003	if (tmp) {
   4004		WREG32(RADEON_CP_CSQ_CNTL, 0);
   4005	}
   4006	tmp = RREG32(RADEON_CP_RB_CNTL);
   4007	if (tmp) {
   4008		WREG32(RADEON_CP_RB_CNTL, 0);
   4009	}
   4010	tmp = RREG32(RADEON_SCRATCH_UMSK);
   4011	if (tmp) {
   4012		WREG32(RADEON_SCRATCH_UMSK, 0);
   4013	}
   4014}
   4015
   4016int r100_init(struct radeon_device *rdev)
   4017{
   4018	int r;
   4019
   4020	/* Register debugfs file specific to this group of asics */
   4021	r100_debugfs_mc_info_init(rdev);
   4022	/* Disable VGA */
   4023	r100_vga_render_disable(rdev);
   4024	/* Initialize scratch registers */
   4025	radeon_scratch_init(rdev);
   4026	/* Initialize surface registers */
   4027	radeon_surface_init(rdev);
   4028	/* sanity check some register to avoid hangs like after kexec */
   4029	r100_restore_sanity(rdev);
   4030	/* TODO: disable VGA need to use VGA request */
   4031	/* BIOS*/
   4032	if (!radeon_get_bios(rdev)) {
   4033		if (ASIC_IS_AVIVO(rdev))
   4034			return -EINVAL;
   4035	}
   4036	if (rdev->is_atom_bios) {
   4037		dev_err(rdev->dev, "Expecting combios for RS400/RS480 GPU\n");
   4038		return -EINVAL;
   4039	} else {
   4040		r = radeon_combios_init(rdev);
   4041		if (r)
   4042			return r;
   4043	}
   4044	/* Reset gpu before posting otherwise ATOM will enter infinite loop */
   4045	if (radeon_asic_reset(rdev)) {
   4046		dev_warn(rdev->dev,
   4047			"GPU reset failed ! (0xE40=0x%08X, 0x7C0=0x%08X)\n",
   4048			RREG32(R_000E40_RBBM_STATUS),
   4049			RREG32(R_0007C0_CP_STAT));
   4050	}
   4051	/* check if cards are posted or not */
   4052	if (radeon_boot_test_post_card(rdev) == false)
   4053		return -EINVAL;
   4054	/* Set asic errata */
   4055	r100_errata(rdev);
   4056	/* Initialize clocks */
   4057	radeon_get_clock_info(rdev->ddev);
   4058	/* initialize AGP */
   4059	if (rdev->flags & RADEON_IS_AGP) {
   4060		r = radeon_agp_init(rdev);
   4061		if (r) {
   4062			radeon_agp_disable(rdev);
   4063		}
   4064	}
   4065	/* initialize VRAM */
   4066	r100_mc_init(rdev);
   4067	/* Fence driver */
   4068	radeon_fence_driver_init(rdev);
   4069	/* Memory manager */
   4070	r = radeon_bo_init(rdev);
   4071	if (r)
   4072		return r;
   4073	if (rdev->flags & RADEON_IS_PCI) {
   4074		r = r100_pci_gart_init(rdev);
   4075		if (r)
   4076			return r;
   4077	}
   4078	r100_set_safe_registers(rdev);
   4079
   4080	/* Initialize power management */
   4081	radeon_pm_init(rdev);
   4082
   4083	rdev->accel_working = true;
   4084	r = r100_startup(rdev);
   4085	if (r) {
   4086		/* Somethings want wront with the accel init stop accel */
   4087		dev_err(rdev->dev, "Disabling GPU acceleration\n");
   4088		r100_cp_fini(rdev);
   4089		radeon_wb_fini(rdev);
   4090		radeon_ib_pool_fini(rdev);
   4091		radeon_irq_kms_fini(rdev);
   4092		if (rdev->flags & RADEON_IS_PCI)
   4093			r100_pci_gart_fini(rdev);
   4094		rdev->accel_working = false;
   4095	}
   4096	return 0;
   4097}
   4098
   4099uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg)
   4100{
   4101	unsigned long flags;
   4102	uint32_t ret;
   4103
   4104	spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
   4105	writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
   4106	ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
   4107	spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
   4108	return ret;
   4109}
   4110
   4111void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v)
   4112{
   4113	unsigned long flags;
   4114
   4115	spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
   4116	writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
   4117	writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
   4118	spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
   4119}
   4120
   4121u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
   4122{
   4123	if (reg < rdev->rio_mem_size)
   4124		return ioread32(rdev->rio_mem + reg);
   4125	else {
   4126		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
   4127		return ioread32(rdev->rio_mem + RADEON_MM_DATA);
   4128	}
   4129}
   4130
   4131void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
   4132{
   4133	if (reg < rdev->rio_mem_size)
   4134		iowrite32(v, rdev->rio_mem + reg);
   4135	else {
   4136		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
   4137		iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
   4138	}
   4139}