cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

gf100.c (66777B)


      1/*
      2 * Copyright 2012 Red Hat Inc.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice shall be included in
     12 * all copies or substantial portions of the Software.
     13 *
     14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     20 * OTHER DEALINGS IN THE SOFTWARE.
     21 *
     22 * Authors: Ben Skeggs
     23 */
     24#include "gf100.h"
     25#include "ctxgf100.h"
     26#include "fuc/os.h"
     27
     28#include <core/client.h>
     29#include <core/firmware.h>
     30#include <core/option.h>
     31#include <subdev/acr.h>
     32#include <subdev/fb.h>
     33#include <subdev/mc.h>
     34#include <subdev/pmu.h>
     35#include <subdev/therm.h>
     36#include <subdev/timer.h>
     37#include <engine/fifo.h>
     38
     39#include <nvif/class.h>
     40#include <nvif/cl9097.h>
     41#include <nvif/if900d.h>
     42#include <nvif/unpack.h>
     43
     44/*******************************************************************************
     45 * Zero Bandwidth Clear
     46 ******************************************************************************/
     47
     48static void
     49gf100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
     50{
     51	struct nvkm_device *device = gr->base.engine.subdev.device;
     52	if (gr->zbc_color[zbc].format) {
     53		nvkm_wr32(device, 0x405804, gr->zbc_color[zbc].ds[0]);
     54		nvkm_wr32(device, 0x405808, gr->zbc_color[zbc].ds[1]);
     55		nvkm_wr32(device, 0x40580c, gr->zbc_color[zbc].ds[2]);
     56		nvkm_wr32(device, 0x405810, gr->zbc_color[zbc].ds[3]);
     57	}
     58	nvkm_wr32(device, 0x405814, gr->zbc_color[zbc].format);
     59	nvkm_wr32(device, 0x405820, zbc);
     60	nvkm_wr32(device, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */
     61}
     62
     63static int
     64gf100_gr_zbc_color_get(struct gf100_gr *gr, int format,
     65		       const u32 ds[4], const u32 l2[4])
     66{
     67	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
     68	int zbc = -ENOSPC, i;
     69
     70	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
     71		if (gr->zbc_color[i].format) {
     72			if (gr->zbc_color[i].format != format)
     73				continue;
     74			if (memcmp(gr->zbc_color[i].ds, ds, sizeof(
     75				   gr->zbc_color[i].ds)))
     76				continue;
     77			if (memcmp(gr->zbc_color[i].l2, l2, sizeof(
     78				   gr->zbc_color[i].l2))) {
     79				WARN_ON(1);
     80				return -EINVAL;
     81			}
     82			return i;
     83		} else {
     84			zbc = (zbc < 0) ? i : zbc;
     85		}
     86	}
     87
     88	if (zbc < 0)
     89		return zbc;
     90
     91	memcpy(gr->zbc_color[zbc].ds, ds, sizeof(gr->zbc_color[zbc].ds));
     92	memcpy(gr->zbc_color[zbc].l2, l2, sizeof(gr->zbc_color[zbc].l2));
     93	gr->zbc_color[zbc].format = format;
     94	nvkm_ltc_zbc_color_get(ltc, zbc, l2);
     95	gr->func->zbc->clear_color(gr, zbc);
     96	return zbc;
     97}
     98
     99static void
    100gf100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc)
    101{
    102	struct nvkm_device *device = gr->base.engine.subdev.device;
    103	if (gr->zbc_depth[zbc].format)
    104		nvkm_wr32(device, 0x405818, gr->zbc_depth[zbc].ds);
    105	nvkm_wr32(device, 0x40581c, gr->zbc_depth[zbc].format);
    106	nvkm_wr32(device, 0x405820, zbc);
    107	nvkm_wr32(device, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */
    108}
    109
    110static int
    111gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
    112		       const u32 ds, const u32 l2)
    113{
    114	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
    115	int zbc = -ENOSPC, i;
    116
    117	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
    118		if (gr->zbc_depth[i].format) {
    119			if (gr->zbc_depth[i].format != format)
    120				continue;
    121			if (gr->zbc_depth[i].ds != ds)
    122				continue;
    123			if (gr->zbc_depth[i].l2 != l2) {
    124				WARN_ON(1);
    125				return -EINVAL;
    126			}
    127			return i;
    128		} else {
    129			zbc = (zbc < 0) ? i : zbc;
    130		}
    131	}
    132
    133	if (zbc < 0)
    134		return zbc;
    135
    136	gr->zbc_depth[zbc].format = format;
    137	gr->zbc_depth[zbc].ds = ds;
    138	gr->zbc_depth[zbc].l2 = l2;
    139	nvkm_ltc_zbc_depth_get(ltc, zbc, l2);
    140	gr->func->zbc->clear_depth(gr, zbc);
    141	return zbc;
    142}
    143
    144const struct gf100_gr_func_zbc
    145gf100_gr_zbc = {
    146	.clear_color = gf100_gr_zbc_clear_color,
    147	.clear_depth = gf100_gr_zbc_clear_depth,
    148};
    149
    150/*******************************************************************************
    151 * Graphics object classes
    152 ******************************************************************************/
    153#define gf100_gr_object(p) container_of((p), struct gf100_gr_object, object)
    154
    155struct gf100_gr_object {
    156	struct nvkm_object object;
    157	struct gf100_gr_chan *chan;
    158};
    159
    160static int
    161gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
    162{
    163	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
    164	union {
    165		struct fermi_a_zbc_color_v0 v0;
    166	} *args = data;
    167	int ret = -ENOSYS;
    168
    169	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
    170		switch (args->v0.format) {
    171		case FERMI_A_ZBC_COLOR_V0_FMT_ZERO:
    172		case FERMI_A_ZBC_COLOR_V0_FMT_UNORM_ONE:
    173		case FERMI_A_ZBC_COLOR_V0_FMT_RF32_GF32_BF32_AF32:
    174		case FERMI_A_ZBC_COLOR_V0_FMT_R16_G16_B16_A16:
    175		case FERMI_A_ZBC_COLOR_V0_FMT_RN16_GN16_BN16_AN16:
    176		case FERMI_A_ZBC_COLOR_V0_FMT_RS16_GS16_BS16_AS16:
    177		case FERMI_A_ZBC_COLOR_V0_FMT_RU16_GU16_BU16_AU16:
    178		case FERMI_A_ZBC_COLOR_V0_FMT_RF16_GF16_BF16_AF16:
    179		case FERMI_A_ZBC_COLOR_V0_FMT_A8R8G8B8:
    180		case FERMI_A_ZBC_COLOR_V0_FMT_A8RL8GL8BL8:
    181		case FERMI_A_ZBC_COLOR_V0_FMT_A2B10G10R10:
    182		case FERMI_A_ZBC_COLOR_V0_FMT_AU2BU10GU10RU10:
    183		case FERMI_A_ZBC_COLOR_V0_FMT_A8B8G8R8:
    184		case FERMI_A_ZBC_COLOR_V0_FMT_A8BL8GL8RL8:
    185		case FERMI_A_ZBC_COLOR_V0_FMT_AN8BN8GN8RN8:
    186		case FERMI_A_ZBC_COLOR_V0_FMT_AS8BS8GS8RS8:
    187		case FERMI_A_ZBC_COLOR_V0_FMT_AU8BU8GU8RU8:
    188		case FERMI_A_ZBC_COLOR_V0_FMT_A2R10G10B10:
    189		case FERMI_A_ZBC_COLOR_V0_FMT_BF10GF11RF11:
    190			ret = gf100_gr_zbc_color_get(gr, args->v0.format,
    191							   args->v0.ds,
    192							   args->v0.l2);
    193			if (ret >= 0) {
    194				args->v0.index = ret;
    195				return 0;
    196			}
    197			break;
    198		default:
    199			return -EINVAL;
    200		}
    201	}
    202
    203	return ret;
    204}
    205
    206static int
    207gf100_fermi_mthd_zbc_depth(struct nvkm_object *object, void *data, u32 size)
    208{
    209	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
    210	union {
    211		struct fermi_a_zbc_depth_v0 v0;
    212	} *args = data;
    213	int ret = -ENOSYS;
    214
    215	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
    216		switch (args->v0.format) {
    217		case FERMI_A_ZBC_DEPTH_V0_FMT_FP32:
    218			ret = gf100_gr_zbc_depth_get(gr, args->v0.format,
    219							   args->v0.ds,
    220							   args->v0.l2);
    221			return (ret >= 0) ? 0 : -ENOSPC;
    222		default:
    223			return -EINVAL;
    224		}
    225	}
    226
    227	return ret;
    228}
    229
    230static int
    231gf100_fermi_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
    232{
    233	nvif_ioctl(object, "fermi mthd %08x\n", mthd);
    234	switch (mthd) {
    235	case FERMI_A_ZBC_COLOR:
    236		return gf100_fermi_mthd_zbc_color(object, data, size);
    237	case FERMI_A_ZBC_DEPTH:
    238		return gf100_fermi_mthd_zbc_depth(object, data, size);
    239	default:
    240		break;
    241	}
    242	return -EINVAL;
    243}
    244
    245const struct nvkm_object_func
    246gf100_fermi = {
    247	.mthd = gf100_fermi_mthd,
    248};
    249
    250static void
    251gf100_gr_mthd_set_shader_exceptions(struct nvkm_device *device, u32 data)
    252{
    253	nvkm_wr32(device, 0x419e44, data ? 0xffffffff : 0x00000000);
    254	nvkm_wr32(device, 0x419e4c, data ? 0xffffffff : 0x00000000);
    255}
    256
    257static bool
    258gf100_gr_mthd_sw(struct nvkm_device *device, u16 class, u32 mthd, u32 data)
    259{
    260	switch (class & 0x00ff) {
    261	case 0x97:
    262	case 0xc0:
    263		switch (mthd) {
    264		case 0x1528:
    265			gf100_gr_mthd_set_shader_exceptions(device, data);
    266			return true;
    267		default:
    268			break;
    269		}
    270		break;
    271	default:
    272		break;
    273	}
    274	return false;
    275}
    276
    277static const struct nvkm_object_func
    278gf100_gr_object_func = {
    279};
    280
    281static int
    282gf100_gr_object_new(const struct nvkm_oclass *oclass, void *data, u32 size,
    283		    struct nvkm_object **pobject)
    284{
    285	struct gf100_gr_chan *chan = gf100_gr_chan(oclass->parent);
    286	struct gf100_gr_object *object;
    287
    288	if (!(object = kzalloc(sizeof(*object), GFP_KERNEL)))
    289		return -ENOMEM;
    290	*pobject = &object->object;
    291
    292	nvkm_object_ctor(oclass->base.func ? oclass->base.func :
    293			 &gf100_gr_object_func, oclass, &object->object);
    294	object->chan = chan;
    295	return 0;
    296}
    297
    298static int
    299gf100_gr_object_get(struct nvkm_gr *base, int index, struct nvkm_sclass *sclass)
    300{
    301	struct gf100_gr *gr = gf100_gr(base);
    302	int c = 0;
    303
    304	while (gr->func->sclass[c].oclass) {
    305		if (c++ == index) {
    306			*sclass = gr->func->sclass[index];
    307			sclass->ctor = gf100_gr_object_new;
    308			return index;
    309		}
    310	}
    311
    312	return c;
    313}
    314
    315/*******************************************************************************
    316 * PGRAPH context
    317 ******************************************************************************/
    318
    319static int
    320gf100_gr_chan_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
    321		   int align, struct nvkm_gpuobj **pgpuobj)
    322{
    323	struct gf100_gr_chan *chan = gf100_gr_chan(object);
    324	struct gf100_gr *gr = chan->gr;
    325	int ret, i;
    326
    327	ret = nvkm_gpuobj_new(gr->base.engine.subdev.device, gr->size,
    328			      align, false, parent, pgpuobj);
    329	if (ret)
    330		return ret;
    331
    332	nvkm_kmap(*pgpuobj);
    333	for (i = 0; i < gr->size; i += 4)
    334		nvkm_wo32(*pgpuobj, i, gr->data[i / 4]);
    335
    336	if (!gr->firmware) {
    337		nvkm_wo32(*pgpuobj, 0x00, chan->mmio_nr / 2);
    338		nvkm_wo32(*pgpuobj, 0x04, chan->mmio_vma->addr >> 8);
    339	} else {
    340		nvkm_wo32(*pgpuobj, 0xf4, 0);
    341		nvkm_wo32(*pgpuobj, 0xf8, 0);
    342		nvkm_wo32(*pgpuobj, 0x10, chan->mmio_nr / 2);
    343		nvkm_wo32(*pgpuobj, 0x14, lower_32_bits(chan->mmio_vma->addr));
    344		nvkm_wo32(*pgpuobj, 0x18, upper_32_bits(chan->mmio_vma->addr));
    345		nvkm_wo32(*pgpuobj, 0x1c, 1);
    346		nvkm_wo32(*pgpuobj, 0x20, 0);
    347		nvkm_wo32(*pgpuobj, 0x28, 0);
    348		nvkm_wo32(*pgpuobj, 0x2c, 0);
    349	}
    350	nvkm_done(*pgpuobj);
    351	return 0;
    352}
    353
    354static void *
    355gf100_gr_chan_dtor(struct nvkm_object *object)
    356{
    357	struct gf100_gr_chan *chan = gf100_gr_chan(object);
    358	int i;
    359
    360	for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
    361		nvkm_vmm_put(chan->vmm, &chan->data[i].vma);
    362		nvkm_memory_unref(&chan->data[i].mem);
    363	}
    364
    365	nvkm_vmm_put(chan->vmm, &chan->mmio_vma);
    366	nvkm_memory_unref(&chan->mmio);
    367	nvkm_vmm_unref(&chan->vmm);
    368	return chan;
    369}
    370
    371static const struct nvkm_object_func
    372gf100_gr_chan = {
    373	.dtor = gf100_gr_chan_dtor,
    374	.bind = gf100_gr_chan_bind,
    375};
    376
    377static int
    378gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
    379		  const struct nvkm_oclass *oclass,
    380		  struct nvkm_object **pobject)
    381{
    382	struct gf100_gr *gr = gf100_gr(base);
    383	struct gf100_gr_data *data = gr->mmio_data;
    384	struct gf100_gr_mmio *mmio = gr->mmio_list;
    385	struct gf100_gr_chan *chan;
    386	struct gf100_vmm_map_v0 args = { .priv = 1 };
    387	struct nvkm_device *device = gr->base.engine.subdev.device;
    388	int ret, i;
    389
    390	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
    391		return -ENOMEM;
    392	nvkm_object_ctor(&gf100_gr_chan, oclass, &chan->object);
    393	chan->gr = gr;
    394	chan->vmm = nvkm_vmm_ref(fifoch->vmm);
    395	*pobject = &chan->object;
    396
    397	/* allocate memory for a "mmio list" buffer that's used by the HUB
    398	 * fuc to modify some per-context register settings on first load
    399	 * of the context.
    400	 */
    401	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
    402			      false, &chan->mmio);
    403	if (ret)
    404		return ret;
    405
    406	ret = nvkm_vmm_get(fifoch->vmm, 12, 0x1000, &chan->mmio_vma);
    407	if (ret)
    408		return ret;
    409
    410	ret = nvkm_memory_map(chan->mmio, 0, fifoch->vmm,
    411			      chan->mmio_vma, &args, sizeof(args));
    412	if (ret)
    413		return ret;
    414
    415	/* allocate buffers referenced by mmio list */
    416	for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
    417		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
    418				      data->size, data->align, false,
    419				      &chan->data[i].mem);
    420		if (ret)
    421			return ret;
    422
    423		ret = nvkm_vmm_get(fifoch->vmm, 12,
    424				   nvkm_memory_size(chan->data[i].mem),
    425				   &chan->data[i].vma);
    426		if (ret)
    427			return ret;
    428
    429		args.priv = data->priv;
    430
    431		ret = nvkm_memory_map(chan->data[i].mem, 0, chan->vmm,
    432				      chan->data[i].vma, &args, sizeof(args));
    433		if (ret)
    434			return ret;
    435
    436		data++;
    437	}
    438
    439	/* finally, fill in the mmio list and point the context at it */
    440	nvkm_kmap(chan->mmio);
    441	for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) {
    442		u32 addr = mmio->addr;
    443		u32 data = mmio->data;
    444
    445		if (mmio->buffer >= 0) {
    446			u64 info = chan->data[mmio->buffer].vma->addr;
    447			data |= info >> mmio->shift;
    448		}
    449
    450		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
    451		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
    452		mmio++;
    453	}
    454	nvkm_done(chan->mmio);
    455	return 0;
    456}
    457
    458/*******************************************************************************
    459 * PGRAPH register lists
    460 ******************************************************************************/
    461
    462const struct gf100_gr_init
    463gf100_gr_init_main_0[] = {
    464	{ 0x400080,   1, 0x04, 0x003083c2 },
    465	{ 0x400088,   1, 0x04, 0x00006fe7 },
    466	{ 0x40008c,   1, 0x04, 0x00000000 },
    467	{ 0x400090,   1, 0x04, 0x00000030 },
    468	{ 0x40013c,   1, 0x04, 0x013901f7 },
    469	{ 0x400140,   1, 0x04, 0x00000100 },
    470	{ 0x400144,   1, 0x04, 0x00000000 },
    471	{ 0x400148,   1, 0x04, 0x00000110 },
    472	{ 0x400138,   1, 0x04, 0x00000000 },
    473	{ 0x400130,   2, 0x04, 0x00000000 },
    474	{ 0x400124,   1, 0x04, 0x00000002 },
    475	{}
    476};
    477
    478const struct gf100_gr_init
    479gf100_gr_init_fe_0[] = {
    480	{ 0x40415c,   1, 0x04, 0x00000000 },
    481	{ 0x404170,   1, 0x04, 0x00000000 },
    482	{}
    483};
    484
    485const struct gf100_gr_init
    486gf100_gr_init_pri_0[] = {
    487	{ 0x404488,   2, 0x04, 0x00000000 },
    488	{}
    489};
    490
    491const struct gf100_gr_init
    492gf100_gr_init_rstr2d_0[] = {
    493	{ 0x407808,   1, 0x04, 0x00000000 },
    494	{}
    495};
    496
    497const struct gf100_gr_init
    498gf100_gr_init_pd_0[] = {
    499	{ 0x406024,   1, 0x04, 0x00000000 },
    500	{}
    501};
    502
    503const struct gf100_gr_init
    504gf100_gr_init_ds_0[] = {
    505	{ 0x405844,   1, 0x04, 0x00ffffff },
    506	{ 0x405850,   1, 0x04, 0x00000000 },
    507	{ 0x405908,   1, 0x04, 0x00000000 },
    508	{}
    509};
    510
    511const struct gf100_gr_init
    512gf100_gr_init_scc_0[] = {
    513	{ 0x40803c,   1, 0x04, 0x00000000 },
    514	{}
    515};
    516
    517const struct gf100_gr_init
    518gf100_gr_init_prop_0[] = {
    519	{ 0x4184a0,   1, 0x04, 0x00000000 },
    520	{}
    521};
    522
    523const struct gf100_gr_init
    524gf100_gr_init_gpc_unk_0[] = {
    525	{ 0x418604,   1, 0x04, 0x00000000 },
    526	{ 0x418680,   1, 0x04, 0x00000000 },
    527	{ 0x418714,   1, 0x04, 0x80000000 },
    528	{ 0x418384,   1, 0x04, 0x00000000 },
    529	{}
    530};
    531
    532const struct gf100_gr_init
    533gf100_gr_init_setup_0[] = {
    534	{ 0x418814,   3, 0x04, 0x00000000 },
    535	{}
    536};
    537
    538const struct gf100_gr_init
    539gf100_gr_init_crstr_0[] = {
    540	{ 0x418b04,   1, 0x04, 0x00000000 },
    541	{}
    542};
    543
    544const struct gf100_gr_init
    545gf100_gr_init_setup_1[] = {
    546	{ 0x4188c8,   1, 0x04, 0x80000000 },
    547	{ 0x4188cc,   1, 0x04, 0x00000000 },
    548	{ 0x4188d0,   1, 0x04, 0x00010000 },
    549	{ 0x4188d4,   1, 0x04, 0x00000001 },
    550	{}
    551};
    552
    553const struct gf100_gr_init
    554gf100_gr_init_zcull_0[] = {
    555	{ 0x418910,   1, 0x04, 0x00010001 },
    556	{ 0x418914,   1, 0x04, 0x00000301 },
    557	{ 0x418918,   1, 0x04, 0x00800000 },
    558	{ 0x418980,   1, 0x04, 0x77777770 },
    559	{ 0x418984,   3, 0x04, 0x77777777 },
    560	{}
    561};
    562
    563const struct gf100_gr_init
    564gf100_gr_init_gpm_0[] = {
    565	{ 0x418c04,   1, 0x04, 0x00000000 },
    566	{ 0x418c88,   1, 0x04, 0x00000000 },
    567	{}
    568};
    569
    570const struct gf100_gr_init
    571gf100_gr_init_gpc_unk_1[] = {
    572	{ 0x418d00,   1, 0x04, 0x00000000 },
    573	{ 0x418f08,   1, 0x04, 0x00000000 },
    574	{ 0x418e00,   1, 0x04, 0x00000050 },
    575	{ 0x418e08,   1, 0x04, 0x00000000 },
    576	{}
    577};
    578
    579const struct gf100_gr_init
    580gf100_gr_init_gcc_0[] = {
    581	{ 0x41900c,   1, 0x04, 0x00000000 },
    582	{ 0x419018,   1, 0x04, 0x00000000 },
    583	{}
    584};
    585
    586const struct gf100_gr_init
    587gf100_gr_init_tpccs_0[] = {
    588	{ 0x419d08,   2, 0x04, 0x00000000 },
    589	{ 0x419d10,   1, 0x04, 0x00000014 },
    590	{}
    591};
    592
    593const struct gf100_gr_init
    594gf100_gr_init_tex_0[] = {
    595	{ 0x419ab0,   1, 0x04, 0x00000000 },
    596	{ 0x419ab8,   1, 0x04, 0x000000e7 },
    597	{ 0x419abc,   2, 0x04, 0x00000000 },
    598	{}
    599};
    600
    601const struct gf100_gr_init
    602gf100_gr_init_pe_0[] = {
    603	{ 0x41980c,   3, 0x04, 0x00000000 },
    604	{ 0x419844,   1, 0x04, 0x00000000 },
    605	{ 0x41984c,   1, 0x04, 0x00005bc5 },
    606	{ 0x419850,   4, 0x04, 0x00000000 },
    607	{}
    608};
    609
    610const struct gf100_gr_init
    611gf100_gr_init_l1c_0[] = {
    612	{ 0x419c98,   1, 0x04, 0x00000000 },
    613	{ 0x419ca8,   1, 0x04, 0x80000000 },
    614	{ 0x419cb4,   1, 0x04, 0x00000000 },
    615	{ 0x419cb8,   1, 0x04, 0x00008bf4 },
    616	{ 0x419cbc,   1, 0x04, 0x28137606 },
    617	{ 0x419cc0,   2, 0x04, 0x00000000 },
    618	{}
    619};
    620
    621const struct gf100_gr_init
    622gf100_gr_init_wwdx_0[] = {
    623	{ 0x419bd4,   1, 0x04, 0x00800000 },
    624	{ 0x419bdc,   1, 0x04, 0x00000000 },
    625	{}
    626};
    627
    628const struct gf100_gr_init
    629gf100_gr_init_tpccs_1[] = {
    630	{ 0x419d2c,   1, 0x04, 0x00000000 },
    631	{}
    632};
    633
    634const struct gf100_gr_init
    635gf100_gr_init_mpc_0[] = {
    636	{ 0x419c0c,   1, 0x04, 0x00000000 },
    637	{}
    638};
    639
    640static const struct gf100_gr_init
    641gf100_gr_init_sm_0[] = {
    642	{ 0x419e00,   1, 0x04, 0x00000000 },
    643	{ 0x419ea0,   1, 0x04, 0x00000000 },
    644	{ 0x419ea4,   1, 0x04, 0x00000100 },
    645	{ 0x419ea8,   1, 0x04, 0x00001100 },
    646	{ 0x419eac,   1, 0x04, 0x11100702 },
    647	{ 0x419eb0,   1, 0x04, 0x00000003 },
    648	{ 0x419eb4,   4, 0x04, 0x00000000 },
    649	{ 0x419ec8,   1, 0x04, 0x06060618 },
    650	{ 0x419ed0,   1, 0x04, 0x0eff0e38 },
    651	{ 0x419ed4,   1, 0x04, 0x011104f1 },
    652	{ 0x419edc,   1, 0x04, 0x00000000 },
    653	{ 0x419f00,   1, 0x04, 0x00000000 },
    654	{ 0x419f2c,   1, 0x04, 0x00000000 },
    655	{}
    656};
    657
    658const struct gf100_gr_init
    659gf100_gr_init_be_0[] = {
    660	{ 0x40880c,   1, 0x04, 0x00000000 },
    661	{ 0x408910,   9, 0x04, 0x00000000 },
    662	{ 0x408950,   1, 0x04, 0x00000000 },
    663	{ 0x408954,   1, 0x04, 0x0000ffff },
    664	{ 0x408984,   1, 0x04, 0x00000000 },
    665	{ 0x408988,   1, 0x04, 0x08040201 },
    666	{ 0x40898c,   1, 0x04, 0x80402010 },
    667	{}
    668};
    669
    670const struct gf100_gr_init
    671gf100_gr_init_fe_1[] = {
    672	{ 0x4040f0,   1, 0x04, 0x00000000 },
    673	{}
    674};
    675
    676const struct gf100_gr_init
    677gf100_gr_init_pe_1[] = {
    678	{ 0x419880,   1, 0x04, 0x00000002 },
    679	{}
    680};
    681
    682static const struct gf100_gr_pack
    683gf100_gr_pack_mmio[] = {
    684	{ gf100_gr_init_main_0 },
    685	{ gf100_gr_init_fe_0 },
    686	{ gf100_gr_init_pri_0 },
    687	{ gf100_gr_init_rstr2d_0 },
    688	{ gf100_gr_init_pd_0 },
    689	{ gf100_gr_init_ds_0 },
    690	{ gf100_gr_init_scc_0 },
    691	{ gf100_gr_init_prop_0 },
    692	{ gf100_gr_init_gpc_unk_0 },
    693	{ gf100_gr_init_setup_0 },
    694	{ gf100_gr_init_crstr_0 },
    695	{ gf100_gr_init_setup_1 },
    696	{ gf100_gr_init_zcull_0 },
    697	{ gf100_gr_init_gpm_0 },
    698	{ gf100_gr_init_gpc_unk_1 },
    699	{ gf100_gr_init_gcc_0 },
    700	{ gf100_gr_init_tpccs_0 },
    701	{ gf100_gr_init_tex_0 },
    702	{ gf100_gr_init_pe_0 },
    703	{ gf100_gr_init_l1c_0 },
    704	{ gf100_gr_init_wwdx_0 },
    705	{ gf100_gr_init_tpccs_1 },
    706	{ gf100_gr_init_mpc_0 },
    707	{ gf100_gr_init_sm_0 },
    708	{ gf100_gr_init_be_0 },
    709	{ gf100_gr_init_fe_1 },
    710	{ gf100_gr_init_pe_1 },
    711	{}
    712};
    713
    714/*******************************************************************************
    715 * PGRAPH engine/subdev functions
    716 ******************************************************************************/
    717
    718static u32
    719gf100_gr_ctxsw_inst(struct nvkm_gr *gr)
    720{
    721	return nvkm_rd32(gr->engine.subdev.device, 0x409b00);
    722}
    723
    724static int
    725gf100_gr_fecs_ctrl_ctxsw(struct gf100_gr *gr, u32 mthd)
    726{
    727	struct nvkm_device *device = gr->base.engine.subdev.device;
    728
    729	nvkm_wr32(device, 0x409804, 0xffffffff);
    730	nvkm_wr32(device, 0x409840, 0xffffffff);
    731	nvkm_wr32(device, 0x409500, 0xffffffff);
    732	nvkm_wr32(device, 0x409504, mthd);
    733	nvkm_msec(device, 2000,
    734		u32 stat = nvkm_rd32(device, 0x409804);
    735		if (stat == 0x00000002)
    736			return -EIO;
    737		if (stat == 0x00000001)
    738			return 0;
    739	);
    740
    741	return -ETIMEDOUT;
    742}
    743
    744static int
    745gf100_gr_fecs_start_ctxsw(struct nvkm_gr *base)
    746{
    747	struct gf100_gr *gr = gf100_gr(base);
    748	int ret = 0;
    749
    750	mutex_lock(&gr->fecs.mutex);
    751	if (!--gr->fecs.disable) {
    752		if (WARN_ON(ret = gf100_gr_fecs_ctrl_ctxsw(gr, 0x39)))
    753			gr->fecs.disable++;
    754	}
    755	mutex_unlock(&gr->fecs.mutex);
    756	return ret;
    757}
    758
    759static int
    760gf100_gr_fecs_stop_ctxsw(struct nvkm_gr *base)
    761{
    762	struct gf100_gr *gr = gf100_gr(base);
    763	int ret = 0;
    764
    765	mutex_lock(&gr->fecs.mutex);
    766	if (!gr->fecs.disable++) {
    767		if (WARN_ON(ret = gf100_gr_fecs_ctrl_ctxsw(gr, 0x38)))
    768			gr->fecs.disable--;
    769	}
    770	mutex_unlock(&gr->fecs.mutex);
    771	return ret;
    772}
    773
    774int
    775gf100_gr_fecs_bind_pointer(struct gf100_gr *gr, u32 inst)
    776{
    777	struct nvkm_device *device = gr->base.engine.subdev.device;
    778
    779	nvkm_wr32(device, 0x409840, 0x00000030);
    780	nvkm_wr32(device, 0x409500, inst);
    781	nvkm_wr32(device, 0x409504, 0x00000003);
    782	nvkm_msec(device, 2000,
    783		u32 stat = nvkm_rd32(device, 0x409800);
    784		if (stat & 0x00000020)
    785			return -EIO;
    786		if (stat & 0x00000010)
    787			return 0;
    788	);
    789
    790	return -ETIMEDOUT;
    791}
    792
    793static int
    794gf100_gr_fecs_set_reglist_virtual_address(struct gf100_gr *gr, u64 addr)
    795{
    796	struct nvkm_device *device = gr->base.engine.subdev.device;
    797
    798	nvkm_wr32(device, 0x409810, addr >> 8);
    799	nvkm_wr32(device, 0x409800, 0x00000000);
    800	nvkm_wr32(device, 0x409500, 0x00000001);
    801	nvkm_wr32(device, 0x409504, 0x00000032);
    802	nvkm_msec(device, 2000,
    803		if (nvkm_rd32(device, 0x409800) == 0x00000001)
    804			return 0;
    805	);
    806
    807	return -ETIMEDOUT;
    808}
    809
    810static int
    811gf100_gr_fecs_set_reglist_bind_instance(struct gf100_gr *gr, u32 inst)
    812{
    813	struct nvkm_device *device = gr->base.engine.subdev.device;
    814
    815	nvkm_wr32(device, 0x409810, inst);
    816	nvkm_wr32(device, 0x409800, 0x00000000);
    817	nvkm_wr32(device, 0x409500, 0x00000001);
    818	nvkm_wr32(device, 0x409504, 0x00000031);
    819	nvkm_msec(device, 2000,
    820		if (nvkm_rd32(device, 0x409800) == 0x00000001)
    821			return 0;
    822	);
    823
    824	return -ETIMEDOUT;
    825}
    826
    827static int
    828gf100_gr_fecs_discover_reglist_image_size(struct gf100_gr *gr, u32 *psize)
    829{
    830	struct nvkm_device *device = gr->base.engine.subdev.device;
    831
    832	nvkm_wr32(device, 0x409800, 0x00000000);
    833	nvkm_wr32(device, 0x409500, 0x00000001);
    834	nvkm_wr32(device, 0x409504, 0x00000030);
    835	nvkm_msec(device, 2000,
    836		if ((*psize = nvkm_rd32(device, 0x409800)))
    837			return 0;
    838	);
    839
    840	return -ETIMEDOUT;
    841}
    842
    843static int
    844gf100_gr_fecs_elpg_bind(struct gf100_gr *gr)
    845{
    846	u32 size;
    847	int ret;
    848
    849	ret = gf100_gr_fecs_discover_reglist_image_size(gr, &size);
    850	if (ret)
    851		return ret;
    852
    853	/*XXX: We need to allocate + map the above into PMU's inst block,
    854	 *     which which means we probably need a proper PMU before we
    855	 *     even bother.
    856	 */
    857
    858	ret = gf100_gr_fecs_set_reglist_bind_instance(gr, 0);
    859	if (ret)
    860		return ret;
    861
    862	return gf100_gr_fecs_set_reglist_virtual_address(gr, 0);
    863}
    864
    865static int
    866gf100_gr_fecs_discover_pm_image_size(struct gf100_gr *gr, u32 *psize)
    867{
    868	struct nvkm_device *device = gr->base.engine.subdev.device;
    869
    870	nvkm_wr32(device, 0x409840, 0xffffffff);
    871	nvkm_wr32(device, 0x409500, 0x00000000);
    872	nvkm_wr32(device, 0x409504, 0x00000025);
    873	nvkm_msec(device, 2000,
    874		if ((*psize = nvkm_rd32(device, 0x409800)))
    875			return 0;
    876	);
    877
    878	return -ETIMEDOUT;
    879}
    880
    881static int
    882gf100_gr_fecs_discover_zcull_image_size(struct gf100_gr *gr, u32 *psize)
    883{
    884	struct nvkm_device *device = gr->base.engine.subdev.device;
    885
    886	nvkm_wr32(device, 0x409840, 0xffffffff);
    887	nvkm_wr32(device, 0x409500, 0x00000000);
    888	nvkm_wr32(device, 0x409504, 0x00000016);
    889	nvkm_msec(device, 2000,
    890		if ((*psize = nvkm_rd32(device, 0x409800)))
    891			return 0;
    892	);
    893
    894	return -ETIMEDOUT;
    895}
    896
    897static int
    898gf100_gr_fecs_discover_image_size(struct gf100_gr *gr, u32 *psize)
    899{
    900	struct nvkm_device *device = gr->base.engine.subdev.device;
    901
    902	nvkm_wr32(device, 0x409840, 0xffffffff);
    903	nvkm_wr32(device, 0x409500, 0x00000000);
    904	nvkm_wr32(device, 0x409504, 0x00000010);
    905	nvkm_msec(device, 2000,
    906		if ((*psize = nvkm_rd32(device, 0x409800)))
    907			return 0;
    908	);
    909
    910	return -ETIMEDOUT;
    911}
    912
    913static void
    914gf100_gr_fecs_set_watchdog_timeout(struct gf100_gr *gr, u32 timeout)
    915{
    916	struct nvkm_device *device = gr->base.engine.subdev.device;
    917
    918	nvkm_wr32(device, 0x409840, 0xffffffff);
    919	nvkm_wr32(device, 0x409500, timeout);
    920	nvkm_wr32(device, 0x409504, 0x00000021);
    921}
    922
    923static bool
    924gf100_gr_chsw_load(struct nvkm_gr *base)
    925{
    926	struct gf100_gr *gr = gf100_gr(base);
    927	if (!gr->firmware) {
    928		u32 trace = nvkm_rd32(gr->base.engine.subdev.device, 0x40981c);
    929		if (trace & 0x00000040)
    930			return true;
    931	} else {
    932		u32 mthd = nvkm_rd32(gr->base.engine.subdev.device, 0x409808);
    933		if (mthd & 0x00080000)
    934			return true;
    935	}
    936	return false;
    937}
    938
    939int
    940gf100_gr_rops(struct gf100_gr *gr)
    941{
    942	struct nvkm_device *device = gr->base.engine.subdev.device;
    943	return (nvkm_rd32(device, 0x409604) & 0x001f0000) >> 16;
    944}
    945
    946void
    947gf100_gr_zbc_init(struct gf100_gr *gr)
    948{
    949	const u32  zero[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    950			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
    951	const u32   one[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
    952			      0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
    953	const u32 f32_0[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
    954			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
    955	const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
    956			      0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
    957	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
    958	int index, c = ltc->zbc_min, d = ltc->zbc_min, s = ltc->zbc_min;
    959
    960	if (!gr->zbc_color[0].format) {
    961		gf100_gr_zbc_color_get(gr, 1,  & zero[0],   &zero[4]); c++;
    962		gf100_gr_zbc_color_get(gr, 2,  &  one[0],    &one[4]); c++;
    963		gf100_gr_zbc_color_get(gr, 4,  &f32_0[0],  &f32_0[4]); c++;
    964		gf100_gr_zbc_color_get(gr, 4,  &f32_1[0],  &f32_1[4]); c++;
    965		gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000); d++;
    966		gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000); d++;
    967		if (gr->func->zbc->stencil_get) {
    968			gr->func->zbc->stencil_get(gr, 1, 0x00, 0x00); s++;
    969			gr->func->zbc->stencil_get(gr, 1, 0x01, 0x01); s++;
    970			gr->func->zbc->stencil_get(gr, 1, 0xff, 0xff); s++;
    971		}
    972	}
    973
    974	for (index = c; index <= ltc->zbc_max; index++)
    975		gr->func->zbc->clear_color(gr, index);
    976	for (index = d; index <= ltc->zbc_max; index++)
    977		gr->func->zbc->clear_depth(gr, index);
    978
    979	if (gr->func->zbc->clear_stencil) {
    980		for (index = s; index <= ltc->zbc_max; index++)
    981			gr->func->zbc->clear_stencil(gr, index);
    982	}
    983}
    984
    985/**
    986 * Wait until GR goes idle. GR is considered idle if it is disabled by the
    987 * MC (0x200) register, or GR is not busy and a context switch is not in
    988 * progress.
    989 */
    990int
    991gf100_gr_wait_idle(struct gf100_gr *gr)
    992{
    993	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
    994	struct nvkm_device *device = subdev->device;
    995	unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
    996	bool gr_enabled, ctxsw_active, gr_busy;
    997
    998	do {
    999		/*
   1000		 * required to make sure FIFO_ENGINE_STATUS (0x2640) is
   1001		 * up-to-date
   1002		 */
   1003		nvkm_rd32(device, 0x400700);
   1004
   1005		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
   1006		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
   1007		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
   1008
   1009		if (!gr_enabled || (!gr_busy && !ctxsw_active))
   1010			return 0;
   1011	} while (time_before(jiffies, end_jiffies));
   1012
   1013	nvkm_error(subdev,
   1014		   "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n",
   1015		   gr_enabled, ctxsw_active, gr_busy);
   1016	return -EAGAIN;
   1017}
   1018
   1019void
   1020gf100_gr_mmio(struct gf100_gr *gr, const struct gf100_gr_pack *p)
   1021{
   1022	struct nvkm_device *device = gr->base.engine.subdev.device;
   1023	const struct gf100_gr_pack *pack;
   1024	const struct gf100_gr_init *init;
   1025
   1026	pack_for_each_init(init, pack, p) {
   1027		u32 next = init->addr + init->count * init->pitch;
   1028		u32 addr = init->addr;
   1029		while (addr < next) {
   1030			nvkm_wr32(device, addr, init->data);
   1031			addr += init->pitch;
   1032		}
   1033	}
   1034}
   1035
   1036void
   1037gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
   1038{
   1039	struct nvkm_device *device = gr->base.engine.subdev.device;
   1040	const struct gf100_gr_pack *pack;
   1041	const struct gf100_gr_init *init;
   1042	u32 data = 0;
   1043
   1044	nvkm_wr32(device, 0x400208, 0x80000000);
   1045
   1046	pack_for_each_init(init, pack, p) {
   1047		u32 next = init->addr + init->count * init->pitch;
   1048		u32 addr = init->addr;
   1049
   1050		if ((pack == p && init == p->init) || data != init->data) {
   1051			nvkm_wr32(device, 0x400204, init->data);
   1052			data = init->data;
   1053		}
   1054
   1055		while (addr < next) {
   1056			nvkm_wr32(device, 0x400200, addr);
   1057			/**
   1058			 * Wait for GR to go idle after submitting a
   1059			 * GO_IDLE bundle
   1060			 */
   1061			if ((addr & 0xffff) == 0xe100)
   1062				gf100_gr_wait_idle(gr);
   1063			nvkm_msec(device, 2000,
   1064				if (!(nvkm_rd32(device, 0x400700) & 0x00000004))
   1065					break;
   1066			);
   1067			addr += init->pitch;
   1068		}
   1069	}
   1070
   1071	nvkm_wr32(device, 0x400208, 0x00000000);
   1072}
   1073
   1074void
   1075gf100_gr_mthd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
   1076{
   1077	struct nvkm_device *device = gr->base.engine.subdev.device;
   1078	const struct gf100_gr_pack *pack;
   1079	const struct gf100_gr_init *init;
   1080	u32 data = 0;
   1081
   1082	pack_for_each_init(init, pack, p) {
   1083		u32 ctrl = 0x80000000 | pack->type;
   1084		u32 next = init->addr + init->count * init->pitch;
   1085		u32 addr = init->addr;
   1086
   1087		if ((pack == p && init == p->init) || data != init->data) {
   1088			nvkm_wr32(device, 0x40448c, init->data);
   1089			data = init->data;
   1090		}
   1091
   1092		while (addr < next) {
   1093			nvkm_wr32(device, 0x404488, ctrl | (addr << 14));
   1094			addr += init->pitch;
   1095		}
   1096	}
   1097}
   1098
   1099u64
   1100gf100_gr_units(struct nvkm_gr *base)
   1101{
   1102	struct gf100_gr *gr = gf100_gr(base);
   1103	u64 cfg;
   1104
   1105	cfg  = (u32)gr->gpc_nr;
   1106	cfg |= (u32)gr->tpc_total << 8;
   1107	cfg |= (u64)gr->rop_nr << 32;
   1108
   1109	return cfg;
   1110}
   1111
   1112static const struct nvkm_bitfield gf100_dispatch_error[] = {
   1113	{ 0x00000001, "INJECTED_BUNDLE_ERROR" },
   1114	{ 0x00000002, "CLASS_SUBCH_MISMATCH" },
   1115	{ 0x00000004, "SUBCHSW_DURING_NOTIFY" },
   1116	{}
   1117};
   1118
   1119static const struct nvkm_bitfield gf100_m2mf_error[] = {
   1120	{ 0x00000001, "PUSH_TOO_MUCH_DATA" },
   1121	{ 0x00000002, "PUSH_NOT_ENOUGH_DATA" },
   1122	{}
   1123};
   1124
   1125static const struct nvkm_bitfield gf100_unk6_error[] = {
   1126	{ 0x00000001, "TEMP_TOO_SMALL" },
   1127	{}
   1128};
   1129
   1130static const struct nvkm_bitfield gf100_ccache_error[] = {
   1131	{ 0x00000001, "INTR" },
   1132	{ 0x00000002, "LDCONST_OOB" },
   1133	{}
   1134};
   1135
   1136static const struct nvkm_bitfield gf100_macro_error[] = {
   1137	{ 0x00000001, "TOO_FEW_PARAMS" },
   1138	{ 0x00000002, "TOO_MANY_PARAMS" },
   1139	{ 0x00000004, "ILLEGAL_OPCODE" },
   1140	{ 0x00000008, "DOUBLE_BRANCH" },
   1141	{ 0x00000010, "WATCHDOG" },
   1142	{}
   1143};
   1144
   1145static const struct nvkm_bitfield gk104_sked_error[] = {
   1146	{ 0x00000040, "CTA_RESUME" },
   1147	{ 0x00000080, "CONSTANT_BUFFER_SIZE" },
   1148	{ 0x00000200, "LOCAL_MEMORY_SIZE_POS" },
   1149	{ 0x00000400, "LOCAL_MEMORY_SIZE_NEG" },
   1150	{ 0x00000800, "WARP_CSTACK_SIZE" },
   1151	{ 0x00001000, "TOTAL_TEMP_SIZE" },
   1152	{ 0x00002000, "REGISTER_COUNT" },
   1153	{ 0x00040000, "TOTAL_THREADS" },
   1154	{ 0x00100000, "PROGRAM_OFFSET" },
   1155	{ 0x00200000, "SHARED_MEMORY_SIZE" },
   1156	{ 0x00800000, "CTA_THREAD_DIMENSION_ZERO" },
   1157	{ 0x01000000, "MEMORY_WINDOW_OVERLAP" },
   1158	{ 0x02000000, "SHARED_CONFIG_TOO_SMALL" },
   1159	{ 0x04000000, "TOTAL_REGISTER_COUNT" },
   1160	{}
   1161};
   1162
   1163static const struct nvkm_bitfield gf100_gpc_rop_error[] = {
   1164	{ 0x00000002, "RT_PITCH_OVERRUN" },
   1165	{ 0x00000010, "RT_WIDTH_OVERRUN" },
   1166	{ 0x00000020, "RT_HEIGHT_OVERRUN" },
   1167	{ 0x00000080, "ZETA_STORAGE_TYPE_MISMATCH" },
   1168	{ 0x00000100, "RT_STORAGE_TYPE_MISMATCH" },
   1169	{ 0x00000400, "RT_LINEAR_MISMATCH" },
   1170	{}
   1171};
   1172
   1173static void
   1174gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
   1175{
   1176	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
   1177	struct nvkm_device *device = subdev->device;
   1178	char error[128];
   1179	u32 trap[4];
   1180
   1181	trap[0] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0420)) & 0x3fffffff;
   1182	trap[1] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0434));
   1183	trap[2] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0438));
   1184	trap[3] = nvkm_rd32(device, GPC_UNIT(gpc, 0x043c));
   1185
   1186	nvkm_snprintbf(error, sizeof(error), gf100_gpc_rop_error, trap[0]);
   1187
   1188	nvkm_error(subdev, "GPC%d/PROP trap: %08x [%s] x = %u, y = %u, "
   1189			   "format = %x, storage type = %x\n",
   1190		   gpc, trap[0], error, trap[1] & 0xffff, trap[1] >> 16,
   1191		   (trap[2] >> 8) & 0x3f, trap[3] & 0xff);
   1192	nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
   1193}
   1194
   1195const struct nvkm_enum gf100_mp_warp_error[] = {
   1196	{ 0x01, "STACK_ERROR" },
   1197	{ 0x02, "API_STACK_ERROR" },
   1198	{ 0x03, "RET_EMPTY_STACK_ERROR" },
   1199	{ 0x04, "PC_WRAP" },
   1200	{ 0x05, "MISALIGNED_PC" },
   1201	{ 0x06, "PC_OVERFLOW" },
   1202	{ 0x07, "MISALIGNED_IMMC_ADDR" },
   1203	{ 0x08, "MISALIGNED_REG" },
   1204	{ 0x09, "ILLEGAL_INSTR_ENCODING" },
   1205	{ 0x0a, "ILLEGAL_SPH_INSTR_COMBO" },
   1206	{ 0x0b, "ILLEGAL_INSTR_PARAM" },
   1207	{ 0x0c, "INVALID_CONST_ADDR" },
   1208	{ 0x0d, "OOR_REG" },
   1209	{ 0x0e, "OOR_ADDR" },
   1210	{ 0x0f, "MISALIGNED_ADDR" },
   1211	{ 0x10, "INVALID_ADDR_SPACE" },
   1212	{ 0x11, "ILLEGAL_INSTR_PARAM2" },
   1213	{ 0x12, "INVALID_CONST_ADDR_LDC" },
   1214	{ 0x13, "GEOMETRY_SM_ERROR" },
   1215	{ 0x14, "DIVERGENT" },
   1216	{ 0x15, "WARP_EXIT" },
   1217	{}
   1218};
   1219
   1220const struct nvkm_bitfield gf100_mp_global_error[] = {
   1221	{ 0x00000001, "SM_TO_SM_FAULT" },
   1222	{ 0x00000002, "L1_ERROR" },
   1223	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
   1224	{ 0x00000008, "PHYSICAL_STACK_OVERFLOW" },
   1225	{ 0x00000010, "BPT_INT" },
   1226	{ 0x00000020, "BPT_PAUSE" },
   1227	{ 0x00000040, "SINGLE_STEP_COMPLETE" },
   1228	{ 0x20000000, "ECC_SEC_ERROR" },
   1229	{ 0x40000000, "ECC_DED_ERROR" },
   1230	{ 0x80000000, "TIMEOUT" },
   1231	{}
   1232};
   1233
   1234void
   1235gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
   1236{
   1237	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
   1238	struct nvkm_device *device = subdev->device;
   1239	u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x648));
   1240	u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x650));
   1241	const struct nvkm_enum *warp;
   1242	char glob[128];
   1243
   1244	nvkm_snprintbf(glob, sizeof(glob), gf100_mp_global_error, gerr);
   1245	warp = nvkm_enum_find(gf100_mp_warp_error, werr & 0xffff);
   1246
   1247	nvkm_error(subdev, "GPC%i/TPC%i/MP trap: "
   1248			   "global %08x [%s] warp %04x [%s]\n",
   1249		   gpc, tpc, gerr, glob, werr, warp ? warp->name : "");
   1250
   1251	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
   1252	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x650), gerr);
   1253}
   1254
   1255static void
   1256gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc)
   1257{
   1258	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
   1259	struct nvkm_device *device = subdev->device;
   1260	u32 stat = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0508));
   1261
   1262	if (stat & 0x00000001) {
   1263		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0224));
   1264		nvkm_error(subdev, "GPC%d/TPC%d/TEX: %08x\n", gpc, tpc, trap);
   1265		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0224), 0xc0000000);
   1266		stat &= ~0x00000001;
   1267	}
   1268
   1269	if (stat & 0x00000002) {
   1270		gr->func->trap_mp(gr, gpc, tpc);
   1271		stat &= ~0x00000002;
   1272	}
   1273
   1274	if (stat & 0x00000004) {
   1275		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0084));
   1276		nvkm_error(subdev, "GPC%d/TPC%d/POLY: %08x\n", gpc, tpc, trap);
   1277		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0084), 0xc0000000);
   1278		stat &= ~0x00000004;
   1279	}
   1280
   1281	if (stat & 0x00000008) {
   1282		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x048c));
   1283		nvkm_error(subdev, "GPC%d/TPC%d/L1C: %08x\n", gpc, tpc, trap);
   1284		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x048c), 0xc0000000);
   1285		stat &= ~0x00000008;
   1286	}
   1287
   1288	if (stat & 0x00000010) {
   1289		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0430));
   1290		nvkm_error(subdev, "GPC%d/TPC%d/MPC: %08x\n", gpc, tpc, trap);
   1291		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0430), 0xc0000000);
   1292		stat &= ~0x00000010;
   1293	}
   1294
   1295	if (stat) {
   1296		nvkm_error(subdev, "GPC%d/TPC%d/%08x: unknown\n", gpc, tpc, stat);
   1297	}
   1298}
   1299
   1300static void
   1301gf100_gr_trap_gpc(struct gf100_gr *gr, int gpc)
   1302{
   1303	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
   1304	struct nvkm_device *device = subdev->device;
   1305	u32 stat = nvkm_rd32(device, GPC_UNIT(gpc, 0x2c90));
   1306	int tpc;
   1307
   1308	if (stat & 0x00000001) {
   1309		gf100_gr_trap_gpc_rop(gr, gpc);
   1310		stat &= ~0x00000001;
   1311	}
   1312
   1313	if (stat & 0x00000002) {
   1314		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0900));
   1315		nvkm_error(subdev, "GPC%d/ZCULL: %08x\n", gpc, trap);
   1316		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
   1317		stat &= ~0x00000002;
   1318	}
   1319
   1320	if (stat & 0x00000004) {
   1321		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x1028));
   1322		nvkm_error(subdev, "GPC%d/CCACHE: %08x\n", gpc, trap);
   1323		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
   1324		stat &= ~0x00000004;
   1325	}
   1326
   1327	if (stat & 0x00000008) {
   1328		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0824));
   1329		nvkm_error(subdev, "GPC%d/ESETUP: %08x\n", gpc, trap);
   1330		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
   1331		stat &= ~0x00000009;
   1332	}
   1333
   1334	for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
   1335		u32 mask = 0x00010000 << tpc;
   1336		if (stat & mask) {
   1337			gf100_gr_trap_tpc(gr, gpc, tpc);
   1338			nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), mask);
   1339			stat &= ~mask;
   1340		}
   1341	}
   1342
   1343	if (stat) {
   1344		nvkm_error(subdev, "GPC%d/%08x: unknown\n", gpc, stat);
   1345	}
   1346}
   1347
   1348static void
   1349gf100_gr_trap_intr(struct gf100_gr *gr)
   1350{
   1351	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
   1352	struct nvkm_device *device = subdev->device;
   1353	char error[128];
   1354	u32 trap = nvkm_rd32(device, 0x400108);
   1355	int rop, gpc;
   1356
   1357	if (trap & 0x00000001) {
   1358		u32 stat = nvkm_rd32(device, 0x404000);
   1359
   1360		nvkm_snprintbf(error, sizeof(error), gf100_dispatch_error,
   1361			       stat & 0x3fffffff);
   1362		nvkm_error(subdev, "DISPATCH %08x [%s]\n", stat, error);
   1363		nvkm_wr32(device, 0x404000, 0xc0000000);
   1364		nvkm_wr32(device, 0x400108, 0x00000001);
   1365		trap &= ~0x00000001;
   1366	}
   1367
   1368	if (trap & 0x00000002) {
   1369		u32 stat = nvkm_rd32(device, 0x404600);
   1370
   1371		nvkm_snprintbf(error, sizeof(error), gf100_m2mf_error,
   1372			       stat & 0x3fffffff);
   1373		nvkm_error(subdev, "M2MF %08x [%s]\n", stat, error);
   1374
   1375		nvkm_wr32(device, 0x404600, 0xc0000000);
   1376		nvkm_wr32(device, 0x400108, 0x00000002);
   1377		trap &= ~0x00000002;
   1378	}
   1379
   1380	if (trap & 0x00000008) {
   1381		u32 stat = nvkm_rd32(device, 0x408030);
   1382
   1383		nvkm_snprintbf(error, sizeof(error), gf100_ccache_error,
   1384			       stat & 0x3fffffff);
   1385		nvkm_error(subdev, "CCACHE %08x [%s]\n", stat, error);
   1386		nvkm_wr32(device, 0x408030, 0xc0000000);
   1387		nvkm_wr32(device, 0x400108, 0x00000008);
   1388		trap &= ~0x00000008;
   1389	}
   1390
   1391	if (trap & 0x00000010) {
   1392		u32 stat = nvkm_rd32(device, 0x405840);
   1393		nvkm_error(subdev, "SHADER %08x, sph: 0x%06x, stage: 0x%02x\n",
   1394			   stat, stat & 0xffffff, (stat >> 24) & 0x3f);
   1395		nvkm_wr32(device, 0x405840, 0xc0000000);
   1396		nvkm_wr32(device, 0x400108, 0x00000010);
   1397		trap &= ~0x00000010;
   1398	}
   1399
   1400	if (trap & 0x00000040) {
   1401		u32 stat = nvkm_rd32(device, 0x40601c);
   1402
   1403		nvkm_snprintbf(error, sizeof(error), gf100_unk6_error,
   1404			       stat & 0x3fffffff);
   1405		nvkm_error(subdev, "UNK6 %08x [%s]\n", stat, error);
   1406
   1407		nvkm_wr32(device, 0x40601c, 0xc0000000);
   1408		nvkm_wr32(device, 0x400108, 0x00000040);
   1409		trap &= ~0x00000040;
   1410	}
   1411
   1412	if (trap & 0x00000080) {
   1413		u32 stat = nvkm_rd32(device, 0x404490);
   1414		u32 pc = nvkm_rd32(device, 0x404494);
   1415		u32 op = nvkm_rd32(device, 0x40449c);
   1416
   1417		nvkm_snprintbf(error, sizeof(error), gf100_macro_error,
   1418			       stat & 0x1fffffff);
   1419		nvkm_error(subdev, "MACRO %08x [%s], pc: 0x%03x%s, op: 0x%08x\n",
   1420			   stat, error, pc & 0x7ff,
   1421			   (pc & 0x10000000) ? "" : " (invalid)",
   1422			   op);
   1423
   1424		nvkm_wr32(device, 0x404490, 0xc0000000);
   1425		nvkm_wr32(device, 0x400108, 0x00000080);
   1426		trap &= ~0x00000080;
   1427	}
   1428
   1429	if (trap & 0x00000100) {
   1430		u32 stat = nvkm_rd32(device, 0x407020) & 0x3fffffff;
   1431
   1432		nvkm_snprintbf(error, sizeof(error), gk104_sked_error, stat);
   1433		nvkm_error(subdev, "SKED: %08x [%s]\n", stat, error);
   1434
   1435		if (stat)
   1436			nvkm_wr32(device, 0x407020, 0x40000000);
   1437		nvkm_wr32(device, 0x400108, 0x00000100);
   1438		trap &= ~0x00000100;
   1439	}
   1440
   1441	if (trap & 0x01000000) {
   1442		u32 stat = nvkm_rd32(device, 0x400118);
   1443		for (gpc = 0; stat && gpc < gr->gpc_nr; gpc++) {
   1444			u32 mask = 0x00000001 << gpc;
   1445			if (stat & mask) {
   1446				gf100_gr_trap_gpc(gr, gpc);
   1447				nvkm_wr32(device, 0x400118, mask);
   1448				stat &= ~mask;
   1449			}
   1450		}
   1451		nvkm_wr32(device, 0x400108, 0x01000000);
   1452		trap &= ~0x01000000;
   1453	}
   1454
   1455	if (trap & 0x02000000) {
   1456		for (rop = 0; rop < gr->rop_nr; rop++) {
   1457			u32 statz = nvkm_rd32(device, ROP_UNIT(rop, 0x070));
   1458			u32 statc = nvkm_rd32(device, ROP_UNIT(rop, 0x144));
   1459			nvkm_error(subdev, "ROP%d %08x %08x\n",
   1460				 rop, statz, statc);
   1461			nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
   1462			nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
   1463		}
   1464		nvkm_wr32(device, 0x400108, 0x02000000);
   1465		trap &= ~0x02000000;
   1466	}
   1467
   1468	if (trap) {
   1469		nvkm_error(subdev, "TRAP UNHANDLED %08x\n", trap);
   1470		nvkm_wr32(device, 0x400108, trap);
   1471	}
   1472}
   1473
   1474static void
   1475gf100_gr_ctxctl_debug_unit(struct gf100_gr *gr, u32 base)
   1476{
   1477	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
   1478	struct nvkm_device *device = subdev->device;
   1479	nvkm_error(subdev, "%06x - done %08x\n", base,
   1480		   nvkm_rd32(device, base + 0x400));
   1481	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
   1482		   nvkm_rd32(device, base + 0x800),
   1483		   nvkm_rd32(device, base + 0x804),
   1484		   nvkm_rd32(device, base + 0x808),
   1485		   nvkm_rd32(device, base + 0x80c));
   1486	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
   1487		   nvkm_rd32(device, base + 0x810),
   1488		   nvkm_rd32(device, base + 0x814),
   1489		   nvkm_rd32(device, base + 0x818),
   1490		   nvkm_rd32(device, base + 0x81c));
   1491}
   1492
   1493void
   1494gf100_gr_ctxctl_debug(struct gf100_gr *gr)
   1495{
   1496	struct nvkm_device *device = gr->base.engine.subdev.device;
   1497	u32 gpcnr = nvkm_rd32(device, 0x409604) & 0xffff;
   1498	u32 gpc;
   1499
   1500	gf100_gr_ctxctl_debug_unit(gr, 0x409000);
   1501	for (gpc = 0; gpc < gpcnr; gpc++)
   1502		gf100_gr_ctxctl_debug_unit(gr, 0x502000 + (gpc * 0x8000));
   1503}
   1504
   1505static void
   1506gf100_gr_ctxctl_isr(struct gf100_gr *gr)
   1507{
   1508	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
   1509	struct nvkm_device *device = subdev->device;
   1510	u32 stat = nvkm_rd32(device, 0x409c18);
   1511
   1512	if (!gr->firmware && (stat & 0x00000001)) {
   1513		u32 code = nvkm_rd32(device, 0x409814);
   1514		if (code == E_BAD_FWMTHD) {
   1515			u32 class = nvkm_rd32(device, 0x409808);
   1516			u32  addr = nvkm_rd32(device, 0x40980c);
   1517			u32  subc = (addr & 0x00070000) >> 16;
   1518			u32  mthd = (addr & 0x00003ffc);
   1519			u32  data = nvkm_rd32(device, 0x409810);
   1520
   1521			nvkm_error(subdev, "FECS MTHD subc %d class %04x "
   1522					   "mthd %04x data %08x\n",
   1523				   subc, class, mthd, data);
   1524		} else {
   1525			nvkm_error(subdev, "FECS ucode error %d\n", code);
   1526		}
   1527		nvkm_wr32(device, 0x409c20, 0x00000001);
   1528		stat &= ~0x00000001;
   1529	}
   1530
   1531	if (!gr->firmware && (stat & 0x00080000)) {
   1532		nvkm_error(subdev, "FECS watchdog timeout\n");
   1533		gf100_gr_ctxctl_debug(gr);
   1534		nvkm_wr32(device, 0x409c20, 0x00080000);
   1535		stat &= ~0x00080000;
   1536	}
   1537
   1538	if (stat) {
   1539		nvkm_error(subdev, "FECS %08x\n", stat);
   1540		gf100_gr_ctxctl_debug(gr);
   1541		nvkm_wr32(device, 0x409c20, stat);
   1542	}
   1543}
   1544
   1545static void
   1546gf100_gr_intr(struct nvkm_gr *base)
   1547{
   1548	struct gf100_gr *gr = gf100_gr(base);
   1549	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
   1550	struct nvkm_device *device = subdev->device;
   1551	struct nvkm_fifo_chan *chan;
   1552	unsigned long flags;
   1553	u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff;
   1554	u32 stat = nvkm_rd32(device, 0x400100);
   1555	u32 addr = nvkm_rd32(device, 0x400704);
   1556	u32 mthd = (addr & 0x00003ffc);
   1557	u32 subc = (addr & 0x00070000) >> 16;
   1558	u32 data = nvkm_rd32(device, 0x400708);
   1559	u32 code = nvkm_rd32(device, 0x400110);
   1560	u32 class;
   1561	const char *name = "unknown";
   1562	int chid = -1;
   1563
   1564	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
   1565	if (chan) {
   1566		name = chan->object.client->name;
   1567		chid = chan->chid;
   1568	}
   1569
   1570	if (device->card_type < NV_E0 || subc < 4)
   1571		class = nvkm_rd32(device, 0x404200 + (subc * 4));
   1572	else
   1573		class = 0x0000;
   1574
   1575	if (stat & 0x00000001) {
   1576		/*
   1577		 * notifier interrupt, only needed for cyclestats
   1578		 * can be safely ignored
   1579		 */
   1580		nvkm_wr32(device, 0x400100, 0x00000001);
   1581		stat &= ~0x00000001;
   1582	}
   1583
   1584	if (stat & 0x00000010) {
   1585		if (!gf100_gr_mthd_sw(device, class, mthd, data)) {
   1586			nvkm_error(subdev, "ILLEGAL_MTHD ch %d [%010llx %s] "
   1587				   "subc %d class %04x mthd %04x data %08x\n",
   1588				   chid, inst << 12, name, subc,
   1589				   class, mthd, data);
   1590		}
   1591		nvkm_wr32(device, 0x400100, 0x00000010);
   1592		stat &= ~0x00000010;
   1593	}
   1594
   1595	if (stat & 0x00000020) {
   1596		nvkm_error(subdev, "ILLEGAL_CLASS ch %d [%010llx %s] "
   1597			   "subc %d class %04x mthd %04x data %08x\n",
   1598			   chid, inst << 12, name, subc, class, mthd, data);
   1599		nvkm_wr32(device, 0x400100, 0x00000020);
   1600		stat &= ~0x00000020;
   1601	}
   1602
   1603	if (stat & 0x00100000) {
   1604		const struct nvkm_enum *en =
   1605			nvkm_enum_find(nv50_data_error_names, code);
   1606		nvkm_error(subdev, "DATA_ERROR %08x [%s] ch %d [%010llx %s] "
   1607				   "subc %d class %04x mthd %04x data %08x\n",
   1608			   code, en ? en->name : "", chid, inst << 12,
   1609			   name, subc, class, mthd, data);
   1610		nvkm_wr32(device, 0x400100, 0x00100000);
   1611		stat &= ~0x00100000;
   1612	}
   1613
   1614	if (stat & 0x00200000) {
   1615		nvkm_error(subdev, "TRAP ch %d [%010llx %s]\n",
   1616			   chid, inst << 12, name);
   1617		gf100_gr_trap_intr(gr);
   1618		nvkm_wr32(device, 0x400100, 0x00200000);
   1619		stat &= ~0x00200000;
   1620	}
   1621
   1622	if (stat & 0x00080000) {
   1623		gf100_gr_ctxctl_isr(gr);
   1624		nvkm_wr32(device, 0x400100, 0x00080000);
   1625		stat &= ~0x00080000;
   1626	}
   1627
   1628	if (stat) {
   1629		nvkm_error(subdev, "intr %08x\n", stat);
   1630		nvkm_wr32(device, 0x400100, stat);
   1631	}
   1632
   1633	nvkm_wr32(device, 0x400500, 0x00010001);
   1634	nvkm_fifo_chan_put(device->fifo, flags, &chan);
   1635}
   1636
   1637static void
   1638gf100_gr_init_fw(struct nvkm_falcon *falcon,
   1639		 struct nvkm_blob *code, struct nvkm_blob *data)
   1640{
   1641	nvkm_falcon_load_dmem(falcon, data->data, 0x0, data->size, 0);
   1642	nvkm_falcon_load_imem(falcon, code->data, 0x0, code->size, 0, 0, false);
   1643}
   1644
   1645static void
   1646gf100_gr_init_csdata(struct gf100_gr *gr,
   1647		     const struct gf100_gr_pack *pack,
   1648		     u32 falcon, u32 starstar, u32 base)
   1649{
   1650	struct nvkm_device *device = gr->base.engine.subdev.device;
   1651	const struct gf100_gr_pack *iter;
   1652	const struct gf100_gr_init *init;
   1653	u32 addr = ~0, prev = ~0, xfer = 0;
   1654	u32 star, temp;
   1655
   1656	nvkm_wr32(device, falcon + 0x01c0, 0x02000000 + starstar);
   1657	star = nvkm_rd32(device, falcon + 0x01c4);
   1658	temp = nvkm_rd32(device, falcon + 0x01c4);
   1659	if (temp > star)
   1660		star = temp;
   1661	nvkm_wr32(device, falcon + 0x01c0, 0x01000000 + star);
   1662
   1663	pack_for_each_init(init, iter, pack) {
   1664		u32 head = init->addr - base;
   1665		u32 tail = head + init->count * init->pitch;
   1666		while (head < tail) {
   1667			if (head != prev + 4 || xfer >= 32) {
   1668				if (xfer) {
   1669					u32 data = ((--xfer << 26) | addr);
   1670					nvkm_wr32(device, falcon + 0x01c4, data);
   1671					star += 4;
   1672				}
   1673				addr = head;
   1674				xfer = 0;
   1675			}
   1676			prev = head;
   1677			xfer = xfer + 1;
   1678			head = head + init->pitch;
   1679		}
   1680	}
   1681
   1682	nvkm_wr32(device, falcon + 0x01c4, (--xfer << 26) | addr);
   1683	nvkm_wr32(device, falcon + 0x01c0, 0x01000004 + starstar);
   1684	nvkm_wr32(device, falcon + 0x01c4, star + 4);
   1685}
   1686
   1687/* Initialize context from an external (secure or not) firmware */
   1688static int
   1689gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
   1690{
   1691	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
   1692	struct nvkm_device *device = subdev->device;
   1693	u32 lsf_mask = 0;
   1694	int ret;
   1695
   1696	/* load fuc microcode */
   1697	nvkm_mc_unk260(device, 0);
   1698
   1699	/* securely-managed falcons must be reset using secure boot */
   1700
   1701	if (!nvkm_acr_managed_falcon(device, NVKM_ACR_LSF_FECS)) {
   1702		gf100_gr_init_fw(&gr->fecs.falcon, &gr->fecs.inst,
   1703						   &gr->fecs.data);
   1704	} else {
   1705		lsf_mask |= BIT(NVKM_ACR_LSF_FECS);
   1706	}
   1707
   1708	if (!nvkm_acr_managed_falcon(device, NVKM_ACR_LSF_GPCCS)) {
   1709		gf100_gr_init_fw(&gr->gpccs.falcon, &gr->gpccs.inst,
   1710						    &gr->gpccs.data);
   1711	} else {
   1712		lsf_mask |= BIT(NVKM_ACR_LSF_GPCCS);
   1713	}
   1714
   1715	if (lsf_mask) {
   1716		ret = nvkm_acr_bootstrap_falcons(device, lsf_mask);
   1717		if (ret)
   1718			return ret;
   1719	}
   1720
   1721	nvkm_mc_unk260(device, 1);
   1722
   1723	/* start both of them running */
   1724	nvkm_wr32(device, 0x409840, 0xffffffff);
   1725	nvkm_wr32(device, 0x41a10c, 0x00000000);
   1726	nvkm_wr32(device, 0x40910c, 0x00000000);
   1727
   1728	nvkm_falcon_start(&gr->gpccs.falcon);
   1729	nvkm_falcon_start(&gr->fecs.falcon);
   1730
   1731	if (nvkm_msec(device, 2000,
   1732		if (nvkm_rd32(device, 0x409800) & 0x00000001)
   1733			break;
   1734	) < 0)
   1735		return -EBUSY;
   1736
   1737	gf100_gr_fecs_set_watchdog_timeout(gr, 0x7fffffff);
   1738
   1739	/* Determine how much memory is required to store main context image. */
   1740	ret = gf100_gr_fecs_discover_image_size(gr, &gr->size);
   1741	if (ret)
   1742		return ret;
   1743
   1744	/* Determine how much memory is required to store ZCULL image. */
   1745	ret = gf100_gr_fecs_discover_zcull_image_size(gr, &gr->size_zcull);
   1746	if (ret)
   1747		return ret;
   1748
   1749	/* Determine how much memory is required to store PerfMon image. */
   1750	ret = gf100_gr_fecs_discover_pm_image_size(gr, &gr->size_pm);
   1751	if (ret)
   1752		return ret;
   1753
   1754	/*XXX: We (likely) require PMU support to even bother with this.
   1755	 *
   1756	 *     Also, it seems like not all GPUs support ELPG.  Traces I
   1757	 *     have here show RM enabling it on Kepler/Turing, but none
   1758	 *     of the GPUs between those.  NVGPU decides this by PCIID.
   1759	 */
   1760	if (0) {
   1761		ret = gf100_gr_fecs_elpg_bind(gr);
   1762		if (ret)
   1763			return ret;
   1764	}
   1765
   1766	/* Generate golden context image. */
   1767	if (gr->data == NULL) {
   1768		int ret = gf100_grctx_generate(gr);
   1769		if (ret) {
   1770			nvkm_error(subdev, "failed to construct context\n");
   1771			return ret;
   1772		}
   1773	}
   1774
   1775	return 0;
   1776}
   1777
   1778static int
   1779gf100_gr_init_ctxctl_int(struct gf100_gr *gr)
   1780{
   1781	const struct gf100_grctx_func *grctx = gr->func->grctx;
   1782	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
   1783	struct nvkm_device *device = subdev->device;
   1784
   1785	if (!gr->func->fecs.ucode) {
   1786		return -ENOSYS;
   1787	}
   1788
   1789	/* load HUB microcode */
   1790	nvkm_mc_unk260(device, 0);
   1791	nvkm_falcon_load_dmem(&gr->fecs.falcon,
   1792			      gr->func->fecs.ucode->data.data, 0x0,
   1793			      gr->func->fecs.ucode->data.size, 0);
   1794	nvkm_falcon_load_imem(&gr->fecs.falcon,
   1795			      gr->func->fecs.ucode->code.data, 0x0,
   1796			      gr->func->fecs.ucode->code.size, 0, 0, false);
   1797
   1798	/* load GPC microcode */
   1799	nvkm_falcon_load_dmem(&gr->gpccs.falcon,
   1800			      gr->func->gpccs.ucode->data.data, 0x0,
   1801			      gr->func->gpccs.ucode->data.size, 0);
   1802	nvkm_falcon_load_imem(&gr->gpccs.falcon,
   1803			      gr->func->gpccs.ucode->code.data, 0x0,
   1804			      gr->func->gpccs.ucode->code.size, 0, 0, false);
   1805	nvkm_mc_unk260(device, 1);
   1806
   1807	/* load register lists */
   1808	gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
   1809	gf100_gr_init_csdata(gr, grctx->gpc_0, 0x41a000, 0x000, 0x418000);
   1810	gf100_gr_init_csdata(gr, grctx->gpc_1, 0x41a000, 0x000, 0x418000);
   1811	gf100_gr_init_csdata(gr, grctx->tpc, 0x41a000, 0x004, 0x419800);
   1812	gf100_gr_init_csdata(gr, grctx->ppc, 0x41a000, 0x008, 0x41be00);
   1813
   1814	/* start HUB ucode running, it'll init the GPCs */
   1815	nvkm_wr32(device, 0x40910c, 0x00000000);
   1816	nvkm_wr32(device, 0x409100, 0x00000002);
   1817	if (nvkm_msec(device, 2000,
   1818		if (nvkm_rd32(device, 0x409800) & 0x80000000)
   1819			break;
   1820	) < 0) {
   1821		gf100_gr_ctxctl_debug(gr);
   1822		return -EBUSY;
   1823	}
   1824
   1825	gr->size = nvkm_rd32(device, 0x409804);
   1826	if (gr->data == NULL) {
   1827		int ret = gf100_grctx_generate(gr);
   1828		if (ret) {
   1829			nvkm_error(subdev, "failed to construct context\n");
   1830			return ret;
   1831		}
   1832	}
   1833
   1834	return 0;
   1835}
   1836
   1837int
   1838gf100_gr_init_ctxctl(struct gf100_gr *gr)
   1839{
   1840	int ret;
   1841
   1842	if (gr->firmware)
   1843		ret = gf100_gr_init_ctxctl_ext(gr);
   1844	else
   1845		ret = gf100_gr_init_ctxctl_int(gr);
   1846
   1847	return ret;
   1848}
   1849
   1850void
   1851gf100_gr_oneinit_sm_id(struct gf100_gr *gr)
   1852{
   1853	int tpc, gpc;
   1854	for (tpc = 0; tpc < gr->tpc_max; tpc++) {
   1855		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
   1856			if (tpc < gr->tpc_nr[gpc]) {
   1857				gr->sm[gr->sm_nr].gpc = gpc;
   1858				gr->sm[gr->sm_nr].tpc = tpc;
   1859				gr->sm_nr++;
   1860			}
   1861		}
   1862	}
   1863}
   1864
   1865void
   1866gf100_gr_oneinit_tiles(struct gf100_gr *gr)
   1867{
   1868	static const u8 primes[] = {
   1869		3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61
   1870	};
   1871	int init_frac[GPC_MAX], init_err[GPC_MAX], run_err[GPC_MAX], i, j;
   1872	u32 mul_factor, comm_denom;
   1873	u8  gpc_map[GPC_MAX];
   1874	bool sorted;
   1875
   1876	switch (gr->tpc_total) {
   1877	case 15: gr->screen_tile_row_offset = 0x06; break;
   1878	case 14: gr->screen_tile_row_offset = 0x05; break;
   1879	case 13: gr->screen_tile_row_offset = 0x02; break;
   1880	case 11: gr->screen_tile_row_offset = 0x07; break;
   1881	case 10: gr->screen_tile_row_offset = 0x06; break;
   1882	case  7:
   1883	case  5: gr->screen_tile_row_offset = 0x01; break;
   1884	case  3: gr->screen_tile_row_offset = 0x02; break;
   1885	case  2:
   1886	case  1: gr->screen_tile_row_offset = 0x01; break;
   1887	default: gr->screen_tile_row_offset = 0x03;
   1888		for (i = 0; i < ARRAY_SIZE(primes); i++) {
   1889			if (gr->tpc_total % primes[i]) {
   1890				gr->screen_tile_row_offset = primes[i];
   1891				break;
   1892			}
   1893		}
   1894		break;
   1895	}
   1896
   1897	/* Sort GPCs by TPC count, highest-to-lowest. */
   1898	for (i = 0; i < gr->gpc_nr; i++)
   1899		gpc_map[i] = i;
   1900	sorted = false;
   1901
   1902	while (!sorted) {
   1903		for (sorted = true, i = 0; i < gr->gpc_nr - 1; i++) {
   1904			if (gr->tpc_nr[gpc_map[i + 1]] >
   1905			    gr->tpc_nr[gpc_map[i + 0]]) {
   1906				u8 swap = gpc_map[i];
   1907				gpc_map[i + 0] = gpc_map[i + 1];
   1908				gpc_map[i + 1] = swap;
   1909				sorted = false;
   1910			}
   1911		}
   1912	}
   1913
   1914	/* Determine tile->GPC mapping */
   1915	mul_factor = gr->gpc_nr * gr->tpc_max;
   1916	if (mul_factor & 1)
   1917		mul_factor = 2;
   1918	else
   1919		mul_factor = 1;
   1920
   1921	comm_denom = gr->gpc_nr * gr->tpc_max * mul_factor;
   1922
   1923	for (i = 0; i < gr->gpc_nr; i++) {
   1924		init_frac[i] = gr->tpc_nr[gpc_map[i]] * gr->gpc_nr * mul_factor;
   1925		 init_err[i] = i * gr->tpc_max * mul_factor - comm_denom/2;
   1926		  run_err[i] = init_frac[i] + init_err[i];
   1927	}
   1928
   1929	for (i = 0; i < gr->tpc_total;) {
   1930		for (j = 0; j < gr->gpc_nr; j++) {
   1931			if ((run_err[j] * 2) >= comm_denom) {
   1932				gr->tile[i++] = gpc_map[j];
   1933				run_err[j] += init_frac[j] - comm_denom;
   1934			} else {
   1935				run_err[j] += init_frac[j];
   1936			}
   1937		}
   1938	}
   1939}
   1940
   1941static int
   1942gf100_gr_oneinit(struct nvkm_gr *base)
   1943{
   1944	struct gf100_gr *gr = gf100_gr(base);
   1945	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
   1946	struct nvkm_device *device = subdev->device;
   1947	int i, j;
   1948
   1949	nvkm_pmu_pgob(device->pmu, false);
   1950
   1951	gr->rop_nr = gr->func->rops(gr);
   1952	gr->gpc_nr = nvkm_rd32(device, 0x409604) & 0x0000001f;
   1953	for (i = 0; i < gr->gpc_nr; i++) {
   1954		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
   1955		gr->tpc_max = max(gr->tpc_max, gr->tpc_nr[i]);
   1956		gr->tpc_total += gr->tpc_nr[i];
   1957		gr->ppc_nr[i]  = gr->func->ppc_nr;
   1958		for (j = 0; j < gr->ppc_nr[i]; j++) {
   1959			gr->ppc_tpc_mask[i][j] =
   1960				nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
   1961			if (gr->ppc_tpc_mask[i][j] == 0)
   1962				continue;
   1963			gr->ppc_mask[i] |= (1 << j);
   1964			gr->ppc_tpc_nr[i][j] = hweight8(gr->ppc_tpc_mask[i][j]);
   1965			if (gr->ppc_tpc_min == 0 ||
   1966			    gr->ppc_tpc_min > gr->ppc_tpc_nr[i][j])
   1967				gr->ppc_tpc_min = gr->ppc_tpc_nr[i][j];
   1968			if (gr->ppc_tpc_max < gr->ppc_tpc_nr[i][j])
   1969				gr->ppc_tpc_max = gr->ppc_tpc_nr[i][j];
   1970		}
   1971	}
   1972
   1973	memset(gr->tile, 0xff, sizeof(gr->tile));
   1974	gr->func->oneinit_tiles(gr);
   1975	gr->func->oneinit_sm_id(gr);
   1976	return 0;
   1977}
   1978
   1979static int
   1980gf100_gr_init_(struct nvkm_gr *base)
   1981{
   1982	struct gf100_gr *gr = gf100_gr(base);
   1983	struct nvkm_subdev *subdev = &base->engine.subdev;
   1984	struct nvkm_device *device = subdev->device;
   1985	bool reset = device->chipset == 0x137 || device->chipset == 0x138;
   1986	u32 ret;
   1987
   1988	/* On certain GP107/GP108 boards, we trigger a weird issue where
   1989	 * GR will stop responding to PRI accesses after we've asked the
   1990	 * SEC2 RTOS to boot the GR falcons.  This happens with far more
   1991	 * frequency when cold-booting a board (ie. returning from D3).
   1992	 *
   1993	 * The root cause for this is not known and has proven difficult
   1994	 * to isolate, with many avenues being dead-ends.
   1995	 *
   1996	 * A workaround was discovered by Karol, whereby putting GR into
   1997	 * reset for an extended period right before initialisation
   1998	 * prevents the problem from occuring.
   1999	 *
   2000	 * XXX: As RM does not require any such workaround, this is more
   2001	 *      of a hack than a true fix.
   2002	 */
   2003	reset = nvkm_boolopt(device->cfgopt, "NvGrResetWar", reset);
   2004	if (reset) {
   2005		nvkm_mask(device, 0x000200, 0x00001000, 0x00000000);
   2006		nvkm_rd32(device, 0x000200);
   2007		msleep(50);
   2008		nvkm_mask(device, 0x000200, 0x00001000, 0x00001000);
   2009		nvkm_rd32(device, 0x000200);
   2010	}
   2011
   2012	nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
   2013
   2014	ret = nvkm_falcon_get(&gr->fecs.falcon, subdev);
   2015	if (ret)
   2016		return ret;
   2017
   2018	ret = nvkm_falcon_get(&gr->gpccs.falcon, subdev);
   2019	if (ret)
   2020		return ret;
   2021
   2022	return gr->func->init(gr);
   2023}
   2024
   2025static int
   2026gf100_gr_fini(struct nvkm_gr *base, bool suspend)
   2027{
   2028	struct gf100_gr *gr = gf100_gr(base);
   2029	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
   2030	nvkm_falcon_put(&gr->gpccs.falcon, subdev);
   2031	nvkm_falcon_put(&gr->fecs.falcon, subdev);
   2032	return 0;
   2033}
   2034
   2035static void *
   2036gf100_gr_dtor(struct nvkm_gr *base)
   2037{
   2038	struct gf100_gr *gr = gf100_gr(base);
   2039
   2040	kfree(gr->data);
   2041
   2042	nvkm_falcon_dtor(&gr->gpccs.falcon);
   2043	nvkm_falcon_dtor(&gr->fecs.falcon);
   2044
   2045	nvkm_blob_dtor(&gr->fecs.inst);
   2046	nvkm_blob_dtor(&gr->fecs.data);
   2047	nvkm_blob_dtor(&gr->gpccs.inst);
   2048	nvkm_blob_dtor(&gr->gpccs.data);
   2049
   2050	vfree(gr->bundle);
   2051	vfree(gr->method);
   2052	vfree(gr->sw_ctx);
   2053	vfree(gr->sw_nonctx);
   2054
   2055	return gr;
   2056}
   2057
   2058static const struct nvkm_gr_func
   2059gf100_gr_ = {
   2060	.dtor = gf100_gr_dtor,
   2061	.oneinit = gf100_gr_oneinit,
   2062	.init = gf100_gr_init_,
   2063	.fini = gf100_gr_fini,
   2064	.intr = gf100_gr_intr,
   2065	.units = gf100_gr_units,
   2066	.chan_new = gf100_gr_chan_new,
   2067	.object_get = gf100_gr_object_get,
   2068	.chsw_load = gf100_gr_chsw_load,
   2069	.ctxsw.pause = gf100_gr_fecs_stop_ctxsw,
   2070	.ctxsw.resume = gf100_gr_fecs_start_ctxsw,
   2071	.ctxsw.inst = gf100_gr_ctxsw_inst,
   2072};
   2073
   2074static const struct nvkm_falcon_func
   2075gf100_gr_flcn = {
   2076	.fbif = 0x600,
   2077	.load_imem = nvkm_falcon_v1_load_imem,
   2078	.load_dmem = nvkm_falcon_v1_load_dmem,
   2079	.read_dmem = nvkm_falcon_v1_read_dmem,
   2080	.bind_context = nvkm_falcon_v1_bind_context,
   2081	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
   2082	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
   2083	.set_start_addr = nvkm_falcon_v1_set_start_addr,
   2084	.start = nvkm_falcon_v1_start,
   2085	.enable = nvkm_falcon_v1_enable,
   2086	.disable = nvkm_falcon_v1_disable,
   2087};
   2088
   2089int
   2090gf100_gr_new_(const struct gf100_gr_fwif *fwif, struct nvkm_device *device,
   2091	      enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr)
   2092{
   2093	struct gf100_gr *gr;
   2094	int ret;
   2095
   2096	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
   2097		return -ENOMEM;
   2098	*pgr = &gr->base;
   2099
   2100	ret = nvkm_gr_ctor(&gf100_gr_, device, type, inst, true, &gr->base);
   2101	if (ret)
   2102		return ret;
   2103
   2104	fwif = nvkm_firmware_load(&gr->base.engine.subdev, fwif, "Gr", gr);
   2105	if (IS_ERR(fwif))
   2106		return PTR_ERR(fwif);
   2107
   2108	gr->func = fwif->func;
   2109
   2110	ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev,
   2111			       "fecs", 0x409000, &gr->fecs.falcon);
   2112	if (ret)
   2113		return ret;
   2114
   2115	mutex_init(&gr->fecs.mutex);
   2116
   2117	ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev,
   2118			       "gpccs", 0x41a000, &gr->gpccs.falcon);
   2119	if (ret)
   2120		return ret;
   2121
   2122	return 0;
   2123}
   2124
   2125void
   2126gf100_gr_init_num_tpc_per_gpc(struct gf100_gr *gr, bool pd, bool ds)
   2127{
   2128	struct nvkm_device *device = gr->base.engine.subdev.device;
   2129	int gpc, i, j;
   2130	u32 data;
   2131
   2132	for (gpc = 0, i = 0; i < 4; i++) {
   2133		for (data = 0, j = 0; j < 8 && gpc < gr->gpc_nr; j++, gpc++)
   2134			data |= gr->tpc_nr[gpc] << (j * 4);
   2135		if (pd)
   2136			nvkm_wr32(device, 0x406028 + (i * 4), data);
   2137		if (ds)
   2138			nvkm_wr32(device, 0x405870 + (i * 4), data);
   2139	}
   2140}
   2141
   2142void
   2143gf100_gr_init_400054(struct gf100_gr *gr)
   2144{
   2145	nvkm_wr32(gr->base.engine.subdev.device, 0x400054, 0x34ce3464);
   2146}
   2147
   2148void
   2149gf100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc)
   2150{
   2151	struct nvkm_device *device = gr->base.engine.subdev.device;
   2152	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
   2153	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
   2154}
   2155
   2156void
   2157gf100_gr_init_tex_hww_esr(struct gf100_gr *gr, int gpc, int tpc)
   2158{
   2159	struct nvkm_device *device = gr->base.engine.subdev.device;
   2160	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
   2161}
   2162
   2163void
   2164gf100_gr_init_419eb4(struct gf100_gr *gr)
   2165{
   2166	struct nvkm_device *device = gr->base.engine.subdev.device;
   2167	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
   2168}
   2169
   2170void
   2171gf100_gr_init_419cc0(struct gf100_gr *gr)
   2172{
   2173	struct nvkm_device *device = gr->base.engine.subdev.device;
   2174	int gpc, tpc;
   2175
   2176	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
   2177
   2178	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
   2179		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++)
   2180			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
   2181	}
   2182}
   2183
   2184void
   2185gf100_gr_init_40601c(struct gf100_gr *gr)
   2186{
   2187	nvkm_wr32(gr->base.engine.subdev.device, 0x40601c, 0xc0000000);
   2188}
   2189
   2190void
   2191gf100_gr_init_fecs_exceptions(struct gf100_gr *gr)
   2192{
   2193	const u32 data = gr->firmware ? 0x000e0000 : 0x000e0001;
   2194	nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, data);
   2195}
   2196
   2197void
   2198gf100_gr_init_gpc_mmu(struct gf100_gr *gr)
   2199{
   2200	struct nvkm_device *device = gr->base.engine.subdev.device;
   2201	struct nvkm_fb *fb = device->fb;
   2202
   2203	nvkm_wr32(device, 0x418880, nvkm_rd32(device, 0x100c80) & 0x00000001);
   2204	nvkm_wr32(device, 0x4188a4, 0x03000000);
   2205	nvkm_wr32(device, 0x418888, 0x00000000);
   2206	nvkm_wr32(device, 0x41888c, 0x00000000);
   2207	nvkm_wr32(device, 0x418890, 0x00000000);
   2208	nvkm_wr32(device, 0x418894, 0x00000000);
   2209	nvkm_wr32(device, 0x4188b4, nvkm_memory_addr(fb->mmu_wr) >> 8);
   2210	nvkm_wr32(device, 0x4188b8, nvkm_memory_addr(fb->mmu_rd) >> 8);
   2211}
   2212
   2213void
   2214gf100_gr_init_num_active_ltcs(struct gf100_gr *gr)
   2215{
   2216	struct nvkm_device *device = gr->base.engine.subdev.device;
   2217	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
   2218}
   2219
   2220void
   2221gf100_gr_init_zcull(struct gf100_gr *gr)
   2222{
   2223	struct nvkm_device *device = gr->base.engine.subdev.device;
   2224	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
   2225	const u8 tile_nr = ALIGN(gr->tpc_total, 32);
   2226	u8 bank[GPC_MAX] = {}, gpc, i, j;
   2227	u32 data;
   2228
   2229	for (i = 0; i < tile_nr; i += 8) {
   2230		for (data = 0, j = 0; j < 8 && i + j < gr->tpc_total; j++) {
   2231			data |= bank[gr->tile[i + j]] << (j * 4);
   2232			bank[gr->tile[i + j]]++;
   2233		}
   2234		nvkm_wr32(device, GPC_BCAST(0x0980 + ((i / 8) * 4)), data);
   2235	}
   2236
   2237	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
   2238		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
   2239			  gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
   2240		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
   2241							 gr->tpc_total);
   2242		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
   2243	}
   2244
   2245	nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
   2246}
   2247
   2248void
   2249gf100_gr_init_vsc_stream_master(struct gf100_gr *gr)
   2250{
   2251	struct nvkm_device *device = gr->base.engine.subdev.device;
   2252	nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001);
   2253}
   2254
   2255int
   2256gf100_gr_init(struct gf100_gr *gr)
   2257{
   2258	struct nvkm_device *device = gr->base.engine.subdev.device;
   2259	int gpc, tpc, rop;
   2260
   2261	if (gr->func->init_419bd8)
   2262		gr->func->init_419bd8(gr);
   2263
   2264	gr->func->init_gpc_mmu(gr);
   2265
   2266	if (gr->sw_nonctx)
   2267		gf100_gr_mmio(gr, gr->sw_nonctx);
   2268	else
   2269		gf100_gr_mmio(gr, gr->func->mmio);
   2270
   2271	gf100_gr_wait_idle(gr);
   2272
   2273	if (gr->func->init_r405a14)
   2274		gr->func->init_r405a14(gr);
   2275
   2276	if (gr->func->clkgate_pack)
   2277		nvkm_therm_clkgate_init(device->therm, gr->func->clkgate_pack);
   2278
   2279	if (gr->func->init_bios)
   2280		gr->func->init_bios(gr);
   2281
   2282	gr->func->init_vsc_stream_master(gr);
   2283	gr->func->init_zcull(gr);
   2284	gr->func->init_num_active_ltcs(gr);
   2285	if (gr->func->init_rop_active_fbps)
   2286		gr->func->init_rop_active_fbps(gr);
   2287	if (gr->func->init_bios_2)
   2288		gr->func->init_bios_2(gr);
   2289	if (gr->func->init_swdx_pes_mask)
   2290		gr->func->init_swdx_pes_mask(gr);
   2291	if (gr->func->init_fs)
   2292		gr->func->init_fs(gr);
   2293
   2294	nvkm_wr32(device, 0x400500, 0x00010001);
   2295
   2296	nvkm_wr32(device, 0x400100, 0xffffffff);
   2297	nvkm_wr32(device, 0x40013c, 0xffffffff);
   2298	nvkm_wr32(device, 0x400124, 0x00000002);
   2299
   2300	gr->func->init_fecs_exceptions(gr);
   2301	if (gr->func->init_ds_hww_esr_2)
   2302		gr->func->init_ds_hww_esr_2(gr);
   2303
   2304	nvkm_wr32(device, 0x404000, 0xc0000000);
   2305	nvkm_wr32(device, 0x404600, 0xc0000000);
   2306	nvkm_wr32(device, 0x408030, 0xc0000000);
   2307
   2308	if (gr->func->init_40601c)
   2309		gr->func->init_40601c(gr);
   2310
   2311	nvkm_wr32(device, 0x406018, 0xc0000000);
   2312	nvkm_wr32(device, 0x404490, 0xc0000000);
   2313
   2314	if (gr->func->init_sked_hww_esr)
   2315		gr->func->init_sked_hww_esr(gr);
   2316
   2317	nvkm_wr32(device, 0x405840, 0xc0000000);
   2318	nvkm_wr32(device, 0x405844, 0x00ffffff);
   2319
   2320	if (gr->func->init_419cc0)
   2321		gr->func->init_419cc0(gr);
   2322	if (gr->func->init_419eb4)
   2323		gr->func->init_419eb4(gr);
   2324	if (gr->func->init_419c9c)
   2325		gr->func->init_419c9c(gr);
   2326
   2327	if (gr->func->init_ppc_exceptions)
   2328		gr->func->init_ppc_exceptions(gr);
   2329
   2330	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
   2331		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
   2332		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
   2333		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
   2334		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
   2335		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
   2336			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
   2337			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
   2338			if (gr->func->init_tex_hww_esr)
   2339				gr->func->init_tex_hww_esr(gr, gpc, tpc);
   2340			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
   2341			if (gr->func->init_504430)
   2342				gr->func->init_504430(gr, gpc, tpc);
   2343			gr->func->init_shader_exceptions(gr, gpc, tpc);
   2344		}
   2345		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
   2346		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
   2347	}
   2348
   2349	for (rop = 0; rop < gr->rop_nr; rop++) {
   2350		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
   2351		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
   2352		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
   2353		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
   2354	}
   2355
   2356	nvkm_wr32(device, 0x400108, 0xffffffff);
   2357	nvkm_wr32(device, 0x400138, 0xffffffff);
   2358	nvkm_wr32(device, 0x400118, 0xffffffff);
   2359	nvkm_wr32(device, 0x400130, 0xffffffff);
   2360	nvkm_wr32(device, 0x40011c, 0xffffffff);
   2361	nvkm_wr32(device, 0x400134, 0xffffffff);
   2362
   2363	if (gr->func->init_400054)
   2364		gr->func->init_400054(gr);
   2365
   2366	gf100_gr_zbc_init(gr);
   2367
   2368	if (gr->func->init_4188a4)
   2369		gr->func->init_4188a4(gr);
   2370
   2371	return gf100_gr_init_ctxctl(gr);
   2372}
   2373
   2374#include "fuc/hubgf100.fuc3.h"
   2375
   2376struct gf100_gr_ucode
   2377gf100_gr_fecs_ucode = {
   2378	.code.data = gf100_grhub_code,
   2379	.code.size = sizeof(gf100_grhub_code),
   2380	.data.data = gf100_grhub_data,
   2381	.data.size = sizeof(gf100_grhub_data),
   2382};
   2383
   2384#include "fuc/gpcgf100.fuc3.h"
   2385
   2386struct gf100_gr_ucode
   2387gf100_gr_gpccs_ucode = {
   2388	.code.data = gf100_grgpc_code,
   2389	.code.size = sizeof(gf100_grgpc_code),
   2390	.data.data = gf100_grgpc_data,
   2391	.data.size = sizeof(gf100_grgpc_data),
   2392};
   2393
   2394static const struct gf100_gr_func
   2395gf100_gr = {
   2396	.oneinit_tiles = gf100_gr_oneinit_tiles,
   2397	.oneinit_sm_id = gf100_gr_oneinit_sm_id,
   2398	.init = gf100_gr_init,
   2399	.init_gpc_mmu = gf100_gr_init_gpc_mmu,
   2400	.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
   2401	.init_zcull = gf100_gr_init_zcull,
   2402	.init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
   2403	.init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
   2404	.init_40601c = gf100_gr_init_40601c,
   2405	.init_419cc0 = gf100_gr_init_419cc0,
   2406	.init_419eb4 = gf100_gr_init_419eb4,
   2407	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
   2408	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
   2409	.init_400054 = gf100_gr_init_400054,
   2410	.trap_mp = gf100_gr_trap_mp,
   2411	.mmio = gf100_gr_pack_mmio,
   2412	.fecs.ucode = &gf100_gr_fecs_ucode,
   2413	.gpccs.ucode = &gf100_gr_gpccs_ucode,
   2414	.rops = gf100_gr_rops,
   2415	.grctx = &gf100_grctx,
   2416	.zbc = &gf100_gr_zbc,
   2417	.sclass = {
   2418		{ -1, -1, FERMI_TWOD_A },
   2419		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
   2420		{ -1, -1, FERMI_A, &gf100_fermi },
   2421		{ -1, -1, FERMI_COMPUTE_A },
   2422		{}
   2423	}
   2424};
   2425
   2426int
   2427gf100_gr_nofw(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif)
   2428{
   2429	gr->firmware = false;
   2430	return 0;
   2431}
   2432
   2433static int
   2434gf100_gr_load_fw(struct gf100_gr *gr, const char *name,
   2435		 struct nvkm_blob *blob)
   2436{
   2437	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
   2438	struct nvkm_device *device = subdev->device;
   2439	const struct firmware *fw;
   2440	char f[32];
   2441	int ret;
   2442
   2443	snprintf(f, sizeof(f), "nouveau/nv%02x_%s", device->chipset, name);
   2444	ret = request_firmware(&fw, f, device->dev);
   2445	if (ret) {
   2446		snprintf(f, sizeof(f), "nouveau/%s", name);
   2447		ret = request_firmware(&fw, f, device->dev);
   2448		if (ret) {
   2449			nvkm_error(subdev, "failed to load %s\n", name);
   2450			return ret;
   2451		}
   2452	}
   2453
   2454	blob->size = fw->size;
   2455	blob->data = kmemdup(fw->data, blob->size, GFP_KERNEL);
   2456	release_firmware(fw);
   2457	return (blob->data != NULL) ? 0 : -ENOMEM;
   2458}
   2459
   2460int
   2461gf100_gr_load(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif)
   2462{
   2463	struct nvkm_device *device = gr->base.engine.subdev.device;
   2464
   2465	if (!nvkm_boolopt(device->cfgopt, "NvGrUseFW", false))
   2466		return -EINVAL;
   2467
   2468	if (gf100_gr_load_fw(gr, "fuc409c", &gr->fecs.inst) ||
   2469	    gf100_gr_load_fw(gr, "fuc409d", &gr->fecs.data) ||
   2470	    gf100_gr_load_fw(gr, "fuc41ac", &gr->gpccs.inst) ||
   2471	    gf100_gr_load_fw(gr, "fuc41ad", &gr->gpccs.data))
   2472		return -ENOENT;
   2473
   2474	gr->firmware = true;
   2475	return 0;
   2476}
   2477
   2478static const struct gf100_gr_fwif
   2479gf100_gr_fwif[] = {
   2480	{ -1, gf100_gr_load, &gf100_gr },
   2481	{ -1, gf100_gr_nofw, &gf100_gr },
   2482	{}
   2483};
   2484
   2485int
   2486gf100_gr_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr)
   2487{
   2488	return gf100_gr_new_(gf100_gr_fwif, device, type, inst, pgr);
   2489}