i915_perf.c (9120B)
1/* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7#include <linux/kref.h> 8 9#include "gem/i915_gem_pm.h" 10#include "gt/intel_gt.h" 11 12#include "i915_selftest.h" 13 14#include "igt_flush_test.h" 15#include "lib_sw_fence.h" 16 17#define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab" 18 19static int 20alloc_empty_config(struct i915_perf *perf) 21{ 22 struct i915_oa_config *oa_config; 23 24 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); 25 if (!oa_config) 26 return -ENOMEM; 27 28 oa_config->perf = perf; 29 kref_init(&oa_config->ref); 30 31 strlcpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid)); 32 33 mutex_lock(&perf->metrics_lock); 34 35 oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL); 36 if (oa_config->id < 0) { 37 mutex_unlock(&perf->metrics_lock); 38 i915_oa_config_put(oa_config); 39 return -ENOMEM; 40 } 41 42 mutex_unlock(&perf->metrics_lock); 43 44 return 0; 45} 46 47static void 48destroy_empty_config(struct i915_perf *perf) 49{ 50 struct i915_oa_config *oa_config = NULL, *tmp; 51 int id; 52 53 mutex_lock(&perf->metrics_lock); 54 55 idr_for_each_entry(&perf->metrics_idr, tmp, id) { 56 if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) { 57 oa_config = tmp; 58 break; 59 } 60 } 61 62 if (oa_config) 63 idr_remove(&perf->metrics_idr, oa_config->id); 64 65 mutex_unlock(&perf->metrics_lock); 66 67 if (oa_config) 68 i915_oa_config_put(oa_config); 69} 70 71static struct i915_oa_config * 72get_empty_config(struct i915_perf *perf) 73{ 74 struct i915_oa_config *oa_config = NULL, *tmp; 75 int id; 76 77 mutex_lock(&perf->metrics_lock); 78 79 idr_for_each_entry(&perf->metrics_idr, tmp, id) { 80 if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) { 81 oa_config = i915_oa_config_get(tmp); 82 break; 83 } 84 } 85 86 mutex_unlock(&perf->metrics_lock); 87 88 return oa_config; 89} 90 91static struct i915_perf_stream * 92test_stream(struct i915_perf *perf) 93{ 94 struct drm_i915_perf_open_param param = {}; 95 struct i915_oa_config *oa_config = get_empty_config(perf); 96 struct perf_open_properties props = { 97 .engine = intel_engine_lookup_user(perf->i915, 98 I915_ENGINE_CLASS_RENDER, 99 0), 100 .sample_flags = SAMPLE_OA_REPORT, 101 .oa_format = GRAPHICS_VER(perf->i915) == 12 ? 102 I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8, 103 }; 104 struct i915_perf_stream *stream; 105 106 if (!oa_config) 107 return NULL; 108 109 props.metrics_set = oa_config->id; 110 111 stream = kzalloc(sizeof(*stream), GFP_KERNEL); 112 if (!stream) { 113 i915_oa_config_put(oa_config); 114 return NULL; 115 } 116 117 stream->perf = perf; 118 119 mutex_lock(&perf->lock); 120 if (i915_oa_stream_init(stream, ¶m, &props)) { 121 kfree(stream); 122 stream = NULL; 123 } 124 mutex_unlock(&perf->lock); 125 126 i915_oa_config_put(oa_config); 127 128 return stream; 129} 130 131static void stream_destroy(struct i915_perf_stream *stream) 132{ 133 struct i915_perf *perf = stream->perf; 134 135 mutex_lock(&perf->lock); 136 i915_perf_destroy_locked(stream); 137 mutex_unlock(&perf->lock); 138} 139 140static int live_sanitycheck(void *arg) 141{ 142 struct drm_i915_private *i915 = arg; 143 struct i915_perf_stream *stream; 144 145 /* Quick check we can create a perf stream */ 146 147 stream = test_stream(&i915->perf); 148 if (!stream) 149 return -EINVAL; 150 151 stream_destroy(stream); 152 return 0; 153} 154 155static int write_timestamp(struct i915_request *rq, int slot) 156{ 157 u32 *cs; 158 int len; 159 160 cs = intel_ring_begin(rq, 6); 161 if (IS_ERR(cs)) 162 return PTR_ERR(cs); 163 164 len = 5; 165 if (GRAPHICS_VER(rq->engine->i915) >= 8) 166 len++; 167 168 *cs++ = GFX_OP_PIPE_CONTROL(len); 169 *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | 170 PIPE_CONTROL_STORE_DATA_INDEX | 171 PIPE_CONTROL_WRITE_TIMESTAMP; 172 *cs++ = slot * sizeof(u32); 173 *cs++ = 0; 174 *cs++ = 0; 175 *cs++ = 0; 176 177 intel_ring_advance(rq, cs); 178 179 return 0; 180} 181 182static ktime_t poll_status(struct i915_request *rq, int slot) 183{ 184 while (!intel_read_status_page(rq->engine, slot) && 185 !i915_request_completed(rq)) 186 cpu_relax(); 187 188 return ktime_get(); 189} 190 191static int live_noa_delay(void *arg) 192{ 193 struct drm_i915_private *i915 = arg; 194 struct i915_perf_stream *stream; 195 struct i915_request *rq; 196 ktime_t t0, t1; 197 u64 expected; 198 u32 delay; 199 int err; 200 int i; 201 202 /* Check that the GPU delays matches expectations */ 203 204 stream = test_stream(&i915->perf); 205 if (!stream) 206 return -ENOMEM; 207 208 expected = atomic64_read(&stream->perf->noa_programming_delay); 209 210 if (stream->engine->class != RENDER_CLASS) { 211 err = -ENODEV; 212 goto out; 213 } 214 215 for (i = 0; i < 4; i++) 216 intel_write_status_page(stream->engine, 0x100 + i, 0); 217 218 rq = intel_engine_create_kernel_request(stream->engine); 219 if (IS_ERR(rq)) { 220 err = PTR_ERR(rq); 221 goto out; 222 } 223 224 if (rq->engine->emit_init_breadcrumb) { 225 err = rq->engine->emit_init_breadcrumb(rq); 226 if (err) { 227 i915_request_add(rq); 228 goto out; 229 } 230 } 231 232 err = write_timestamp(rq, 0x100); 233 if (err) { 234 i915_request_add(rq); 235 goto out; 236 } 237 238 err = rq->engine->emit_bb_start(rq, 239 i915_ggtt_offset(stream->noa_wait), 0, 240 I915_DISPATCH_SECURE); 241 if (err) { 242 i915_request_add(rq); 243 goto out; 244 } 245 246 err = write_timestamp(rq, 0x102); 247 if (err) { 248 i915_request_add(rq); 249 goto out; 250 } 251 252 i915_request_get(rq); 253 i915_request_add(rq); 254 255 preempt_disable(); 256 t0 = poll_status(rq, 0x100); 257 t1 = poll_status(rq, 0x102); 258 preempt_enable(); 259 260 pr_info("CPU delay: %lluns, expected %lluns\n", 261 ktime_sub(t1, t0), expected); 262 263 delay = intel_read_status_page(stream->engine, 0x102); 264 delay -= intel_read_status_page(stream->engine, 0x100); 265 delay = intel_gt_clock_interval_to_ns(stream->engine->gt, delay); 266 pr_info("GPU delay: %uns, expected %lluns\n", 267 delay, expected); 268 269 if (4 * delay < 3 * expected || 2 * delay > 3 * expected) { 270 pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n", 271 delay / 1000, 272 div_u64(3 * expected, 4000), 273 div_u64(3 * expected, 2000)); 274 err = -EINVAL; 275 } 276 277 i915_request_put(rq); 278out: 279 stream_destroy(stream); 280 return err; 281} 282 283static int live_noa_gpr(void *arg) 284{ 285 struct drm_i915_private *i915 = arg; 286 struct i915_perf_stream *stream; 287 struct intel_context *ce; 288 struct i915_request *rq; 289 u32 *cs, *store; 290 void *scratch; 291 u32 gpr0; 292 int err; 293 int i; 294 295 /* Check that the delay does not clobber user context state (GPR) */ 296 297 stream = test_stream(&i915->perf); 298 if (!stream) 299 return -ENOMEM; 300 301 gpr0 = i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream->engine->mmio_base, 0)); 302 303 ce = intel_context_create(stream->engine); 304 if (IS_ERR(ce)) { 305 err = PTR_ERR(ce); 306 goto out; 307 } 308 309 /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */ 310 scratch = __px_vaddr(ce->vm->scratch[0]); 311 memset(scratch, POISON_FREE, PAGE_SIZE); 312 313 rq = intel_context_create_request(ce); 314 if (IS_ERR(rq)) { 315 err = PTR_ERR(rq); 316 goto out_ce; 317 } 318 i915_request_get(rq); 319 320 if (rq->engine->emit_init_breadcrumb) { 321 err = rq->engine->emit_init_breadcrumb(rq); 322 if (err) { 323 i915_request_add(rq); 324 goto out_rq; 325 } 326 } 327 328 /* Fill the 16 qword [32 dword] GPR with a known unlikely value */ 329 cs = intel_ring_begin(rq, 2 * 32 + 2); 330 if (IS_ERR(cs)) { 331 err = PTR_ERR(cs); 332 i915_request_add(rq); 333 goto out_rq; 334 } 335 336 *cs++ = MI_LOAD_REGISTER_IMM(32); 337 for (i = 0; i < 32; i++) { 338 *cs++ = gpr0 + i * sizeof(u32); 339 *cs++ = STACK_MAGIC; 340 } 341 *cs++ = MI_NOOP; 342 intel_ring_advance(rq, cs); 343 344 /* Execute the GPU delay */ 345 err = rq->engine->emit_bb_start(rq, 346 i915_ggtt_offset(stream->noa_wait), 0, 347 I915_DISPATCH_SECURE); 348 if (err) { 349 i915_request_add(rq); 350 goto out_rq; 351 } 352 353 /* Read the GPR back, using the pinned global HWSP for convenience */ 354 store = memset32(rq->engine->status_page.addr + 512, 0, 32); 355 for (i = 0; i < 32; i++) { 356 u32 cmd; 357 358 cs = intel_ring_begin(rq, 4); 359 if (IS_ERR(cs)) { 360 err = PTR_ERR(cs); 361 i915_request_add(rq); 362 goto out_rq; 363 } 364 365 cmd = MI_STORE_REGISTER_MEM; 366 if (GRAPHICS_VER(i915) >= 8) 367 cmd++; 368 cmd |= MI_USE_GGTT; 369 370 *cs++ = cmd; 371 *cs++ = gpr0 + i * sizeof(u32); 372 *cs++ = i915_ggtt_offset(rq->engine->status_page.vma) + 373 offset_in_page(store) + 374 i * sizeof(u32); 375 *cs++ = 0; 376 intel_ring_advance(rq, cs); 377 } 378 379 i915_request_add(rq); 380 381 if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, HZ / 2) < 0) { 382 pr_err("noa_wait timed out\n"); 383 intel_gt_set_wedged(stream->engine->gt); 384 err = -EIO; 385 goto out_rq; 386 } 387 388 /* Verify that the GPR contain our expected values */ 389 for (i = 0; i < 32; i++) { 390 if (store[i] == STACK_MAGIC) 391 continue; 392 393 pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n", 394 i, store[i], STACK_MAGIC); 395 err = -EINVAL; 396 } 397 398 /* Verify that the user's scratch page was not used for GPR storage */ 399 if (memchr_inv(scratch, POISON_FREE, PAGE_SIZE)) { 400 pr_err("Scratch page overwritten!\n"); 401 igt_hexdump(scratch, 4096); 402 err = -EINVAL; 403 } 404 405out_rq: 406 i915_request_put(rq); 407out_ce: 408 intel_context_put(ce); 409out: 410 stream_destroy(stream); 411 return err; 412} 413 414int i915_perf_live_selftests(struct drm_i915_private *i915) 415{ 416 static const struct i915_subtest tests[] = { 417 SUBTEST(live_sanitycheck), 418 SUBTEST(live_noa_delay), 419 SUBTEST(live_noa_gpr), 420 }; 421 struct i915_perf *perf = &i915->perf; 422 int err; 423 424 if (!perf->metrics_kobj || !perf->ops.enable_metric_set) 425 return 0; 426 427 if (intel_gt_is_wedged(to_gt(i915))) 428 return 0; 429 430 err = alloc_empty_config(&i915->perf); 431 if (err) 432 return err; 433 434 err = i915_subtests(tests, i915); 435 436 destroy_empty_config(&i915->perf); 437 438 return err; 439}