Hi Shekhar, kernel test robot noticed the following build errors: [auto build test ERROR on drm-tip/drm-tip] url: https://github.com/intel-lab-lkp/linux/commits/Shekhar-Chauhan/drm-i915-Add-Wa_14015150844/20230831-000233 base: git://anongit.freedesktop.org/drm/drm-tip drm-tip patch link: https://lore.kernel.org/r/20230830160001.2395993-1-shekhar.chauhan%40intel.com patch subject: [PATCH] drm/i915: Add Wa_14015150844 config: x86_64-defconfig (https://download.01.org/0day-ci/archive/20230831/202308310238.2Dnlp2IN-lkp@xxxxxxxxx/config) compiler: gcc-11 (Debian 11.3.0-12) 11.3.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230831/202308310238.2Dnlp2IN-lkp@xxxxxxxxx/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@xxxxxxxxx> | Closes: https://lore.kernel.org/oe-kbuild-all/202308310238.2Dnlp2IN-lkp@xxxxxxxxx/ All errors (new ones prefixed by >>): drivers/gpu/drm/i915/gt/intel_workarounds.c: In function 'rcs_engine_wa_init': >> drivers/gpu/drm/i915/gt/intel_workarounds.c:2330:13: error: implicit declaration of function 'IS_DG2_GRAPHICS_STEP'; did you mean 'IS_GRAPHICS_STEP'? [-Werror=implicit-function-declaration] 2330 | if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_C0, STEP_FOREVER) || | ^~~~~~~~~~~~~~~~~~~~ | IS_GRAPHICS_STEP >> drivers/gpu/drm/i915/gt/intel_workarounds.c:2330:40: error: 'G10' undeclared (first use in this function) 2330 | if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_C0, STEP_FOREVER) || | ^~~ drivers/gpu/drm/i915/gt/intel_workarounds.c:2330:40: note: each undeclared identifier is reported only once for each function it appears in >> drivers/gpu/drm/i915/gt/intel_workarounds.c:2331:40: error: 'G11' undeclared (first use in this function) 2331 | IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) || | ^~~ >> drivers/gpu/drm/i915/gt/intel_workarounds.c:2333:13: error: implicit declaration of function 'IS_MTL_GRAPHICS_STEP'; did you mean 'INTEL_GRAPHICS_STEP'? [-Werror=implicit-function-declaration] 2333 | IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_FOREVER) || | ^~~~~~~~~~~~~~~~~~~~ | INTEL_GRAPHICS_STEP >> drivers/gpu/drm/i915/gt/intel_workarounds.c:2333:40: error: 'M' undeclared (first use in this function) 2333 | IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_FOREVER) || | ^ >> drivers/gpu/drm/i915/gt/intel_workarounds.c:2334:40: error: 'P' undeclared (first use in this function) 2334 | IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_FOREVER)) { | ^ cc1: some warnings being treated as errors vim +2330 drivers/gpu/drm/i915/gt/intel_workarounds.c 2292 2293 static void 2294 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 2295 { 2296 struct drm_i915_private *i915 = engine->i915; 2297 struct intel_gt *gt = engine->gt; 2298 2299 if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || 2300 IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { 2301 /* Wa_22014600077 */ 2302 wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, 2303 ENABLE_EU_COUNT_FOR_TDL_FLUSH); 2304 } 2305 2306 if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || 2307 IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || 2308 IS_DG2(i915)) { 2309 /* Wa_1509727124 */ 2310 wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, 2311 SC_DISABLE_POWER_OPTIMIZATION_EBB); 2312 } 2313 2314 if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || 2315 IS_DG2(i915)) { 2316 /* Wa_22012856258 */ 2317 wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, 2318 GEN12_DISABLE_READ_SUPPRESSION); 2319 } 2320 2321 if (IS_DG2(i915)) { 2322 /* 2323 * Wa_22010960976:dg2 2324 * Wa_14013347512:dg2 2325 */ 2326 wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0, 2327 LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); 2328 } 2329 > 2330 if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_C0, STEP_FOREVER) || > 2331 IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) || 2332 IS_DG2_G12(i915) || > 2333 IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_FOREVER) || > 2334 IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_FOREVER)) { 2335 /* Wa_14015150844 */ 2336 wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0, 2337 ATOMIC_CHAINING_TYPED_WRITES); 2338 } 2339 2340 if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) { 2341 /* Wa_22014600077:dg2 */ 2342 wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, 2343 _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), 2344 0 /* Wa_14012342262 write-only reg, so skip verification */, 2345 true); 2346 } 2347 2348 if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || 2349 IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { 2350 /* 2351 * Wa_1606700617:tgl,dg1,adl-p 2352 * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p 2353 * Wa_14010826681:tgl,dg1,rkl,adl-p 2354 * Wa_18019627453:dg2 2355 */ 2356 wa_masked_en(wal, 2357 GEN9_CS_DEBUG_MODE1, 2358 FF_DOP_CLOCK_GATE_DISABLE); 2359 } 2360 2361 if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || 2362 IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { 2363 /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */ 2364 wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ); 2365 2366 /* 2367 * Wa_1407928979:tgl A* 2368 * Wa_18011464164:tgl[B0+],dg1[B0+] 2369 * Wa_22010931296:tgl[B0+],dg1[B0+] 2370 * Wa_14010919138:rkl,dg1,adl-s,adl-p 2371 */ 2372 wa_write_or(wal, GEN7_FF_THREAD_MODE, 2373 GEN12_FF_TESSELATION_DOP_GATE_DISABLE); 2374 2375 /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ 2376 wa_mcr_masked_en(wal, 2377 GEN10_SAMPLER_MODE, 2378 ENABLE_SMALLPL); 2379 } 2380 2381 if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || 2382 IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { 2383 /* Wa_1409804808 */ 2384 wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, 2385 GEN12_PUSH_CONST_DEREF_HOLD_DIS); 2386 2387 /* Wa_14010229206 */ 2388 wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH); 2389 } 2390 2391 if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) { 2392 /* 2393 * Wa_1607297627 2394 * 2395 * On TGL and RKL there are multiple entries for this WA in the 2396 * BSpec; some indicate this is an A0-only WA, others indicate 2397 * it applies to all steppings so we trust the "all steppings." 2398 */ 2399 wa_masked_en(wal, 2400 RING_PSMI_CTL(RENDER_RING_BASE), 2401 GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | 2402 GEN8_RC_SEMA_IDLE_MSG_DISABLE); 2403 } 2404 2405 if (GRAPHICS_VER(i915) == 11) { 2406 /* This is not an Wa. Enable for better image quality */ 2407 wa_masked_en(wal, 2408 _3D_CHICKEN3, 2409 _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE); 2410 2411 /* 2412 * Wa_1405543622:icl 2413 * Formerly known as WaGAPZPriorityScheme 2414 */ 2415 wa_write_or(wal, 2416 GEN8_GARBCNTL, 2417 GEN11_ARBITRATION_PRIO_ORDER_MASK); 2418 2419 /* 2420 * Wa_1604223664:icl 2421 * Formerly known as WaL3BankAddressHashing 2422 */ 2423 wa_write_clr_set(wal, 2424 GEN8_GARBCNTL, 2425 GEN11_HASH_CTRL_EXCL_MASK, 2426 GEN11_HASH_CTRL_EXCL_BIT0); 2427 wa_write_clr_set(wal, 2428 GEN11_GLBLINVL, 2429 GEN11_BANK_HASH_ADDR_EXCL_MASK, 2430 GEN11_BANK_HASH_ADDR_EXCL_BIT0); 2431 2432 /* 2433 * Wa_1405733216:icl 2434 * Formerly known as WaDisableCleanEvicts 2435 */ 2436 wa_mcr_write_or(wal, 2437 GEN8_L3SQCREG4, 2438 GEN11_LQSC_CLEAN_EVICT_DISABLE); 2439 2440 /* Wa_1606682166:icl */ 2441 wa_write_or(wal, 2442 GEN7_SARCHKMD, 2443 GEN7_DISABLE_SAMPLER_PREFETCH); 2444 2445 /* Wa_1409178092:icl */ 2446 wa_mcr_write_clr_set(wal, 2447 GEN11_SCRATCH2, 2448 GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, 2449 0); 2450 2451 /* WaEnable32PlaneMode:icl */ 2452 wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS, 2453 GEN11_ENABLE_32_PLANE_MODE); 2454 2455 /* 2456 * Wa_1408767742:icl[a2..forever],ehl[all] 2457 * Wa_1605460711:icl[a0..c0] 2458 */ 2459 wa_write_or(wal, 2460 GEN7_FF_THREAD_MODE, 2461 GEN12_FF_TESSELATION_DOP_GATE_DISABLE); 2462 2463 /* Wa_22010271021 */ 2464 wa_masked_en(wal, 2465 GEN9_CS_DEBUG_MODE1, 2466 FF_DOP_CLOCK_GATE_DISABLE); 2467 } 2468 2469 /* 2470 * Intel platforms that support fine-grained preemption (i.e., gen9 and 2471 * beyond) allow the kernel-mode driver to choose between two different 2472 * options for controlling preemption granularity and behavior. 2473 * 2474 * Option 1 (hardware default): 2475 * Preemption settings are controlled in a global manner via 2476 * kernel-only register CS_DEBUG_MODE1 (0x20EC). Any granularity 2477 * and settings chosen by the kernel-mode driver will apply to all 2478 * userspace clients. 2479 * 2480 * Option 2: 2481 * Preemption settings are controlled on a per-context basis via 2482 * register CS_CHICKEN1 (0x2580). CS_CHICKEN1 is saved/restored on 2483 * context switch and is writable by userspace (e.g., via 2484 * MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer) 2485 * which allows different userspace drivers/clients to select 2486 * different settings, or to change those settings on the fly in 2487 * response to runtime needs. This option was known by name 2488 * "FtrPerCtxtPreemptionGranularityControl" at one time, although 2489 * that name is somewhat misleading as other non-granularity 2490 * preemption settings are also impacted by this decision. 2491 * 2492 * On Linux, our policy has always been to let userspace drivers 2493 * control preemption granularity/settings (Option 2). This was 2494 * originally mandatory on gen9 to prevent ABI breakage (old gen9 2495 * userspace developed before object-level preemption was enabled would 2496 * not behave well if i915 were to go with Option 1 and enable that 2497 * preemption in a global manner). On gen9 each context would have 2498 * object-level preemption disabled by default (see 2499 * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but 2500 * userspace drivers could opt-in to object-level preemption as they 2501 * saw fit. For post-gen9 platforms, we continue to utilize Option 2; 2502 * even though it is no longer necessary for ABI compatibility when 2503 * enabling a new platform, it does ensure that userspace will be able 2504 * to implement any workarounds that show up requiring temporary 2505 * adjustments to preemption behavior at runtime. 2506 * 2507 * Notes/Workarounds: 2508 * - Wa_14015141709: On DG2 and early steppings of MTL, 2509 * CS_CHICKEN1[0] does not disable object-level preemption as 2510 * it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been 2511 * using Option 1). Effectively this means userspace is unable 2512 * to disable object-level preemption on these platforms/steppings 2513 * despite the setting here. 2514 * 2515 * - Wa_16013994831: May require that userspace program 2516 * CS_CHICKEN1[10] when certain runtime conditions are true. 2517 * Userspace requires Option 2 to be in effect for their update of 2518 * CS_CHICKEN1[10] to be effective. 2519 * 2520 * Other workarounds may appear in the future that will also require 2521 * Option 2 behavior to allow proper userspace implementation. 2522 */ 2523 if (GRAPHICS_VER(i915) >= 9) 2524 wa_masked_en(wal, 2525 GEN7_FF_SLICE_CS_CHICKEN1, 2526 GEN9_FFSC_PERCTX_PREEMPT_CTRL); 2527 2528 if (IS_SKYLAKE(i915) || 2529 IS_KABYLAKE(i915) || 2530 IS_COFFEELAKE(i915) || 2531 IS_COMETLAKE(i915)) { 2532 /* WaEnableGapsTsvCreditFix:skl,kbl,cfl */ 2533 wa_write_or(wal, 2534 GEN8_GARBCNTL, 2535 GEN9_GAPS_TSV_CREDIT_DISABLE); 2536 } 2537 2538 if (IS_BROXTON(i915)) { 2539 /* WaDisablePooledEuLoadBalancingFix:bxt */ 2540 wa_masked_en(wal, 2541 FF_SLICE_CS_CHICKEN2, 2542 GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); 2543 } 2544 2545 if (GRAPHICS_VER(i915) == 9) { 2546 /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ 2547 wa_masked_en(wal, 2548 GEN9_CSFE_CHICKEN1_RCS, 2549 GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE); 2550 2551 /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ 2552 wa_mcr_write_or(wal, 2553 BDW_SCRATCH1, 2554 GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); 2555 2556 /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ 2557 if (IS_GEN9_LP(i915)) 2558 wa_mcr_write_clr_set(wal, 2559 GEN8_L3SQCREG1, 2560 L3_PRIO_CREDITS_MASK, 2561 L3_GENERAL_PRIO_CREDITS(62) | 2562 L3_HIGH_PRIO_CREDITS(2)); 2563 2564 /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ 2565 wa_mcr_write_or(wal, 2566 GEN8_L3SQCREG4, 2567 GEN8_LQSC_FLUSH_COHERENT_LINES); 2568 2569 /* Disable atomics in L3 to prevent unrecoverable hangs */ 2570 wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1, 2571 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0); 2572 wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4, 2573 GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0); 2574 wa_mcr_write_clr_set(wal, GEN9_SCRATCH1, 2575 EVICTION_PERF_FIX_ENABLE, 0); 2576 } 2577 2578 if (IS_HASWELL(i915)) { 2579 /* WaSampleCChickenBitEnable:hsw */ 2580 wa_masked_en(wal, 2581 HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); 2582 2583 wa_masked_dis(wal, 2584 CACHE_MODE_0_GEN7, 2585 /* enable HiZ Raw Stall Optimization */ 2586 HIZ_RAW_STALL_OPT_DISABLE); 2587 } 2588 2589 if (IS_VALLEYVIEW(i915)) { 2590 /* WaDisableEarlyCull:vlv */ 2591 wa_masked_en(wal, 2592 _3D_CHICKEN3, 2593 _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); 2594 2595 /* 2596 * WaVSThreadDispatchOverride:ivb,vlv 2597 * 2598 * This actually overrides the dispatch 2599 * mode for all thread types. 2600 */ 2601 wa_write_clr_set(wal, 2602 GEN7_FF_THREAD_MODE, 2603 GEN7_FF_SCHED_MASK, 2604 GEN7_FF_TS_SCHED_HW | 2605 GEN7_FF_VS_SCHED_HW | 2606 GEN7_FF_DS_SCHED_HW); 2607 2608 /* WaPsdDispatchEnable:vlv */ 2609 /* WaDisablePSDDualDispatchEnable:vlv */ 2610 wa_masked_en(wal, 2611 GEN7_HALF_SLICE_CHICKEN1, 2612 GEN7_MAX_PS_THREAD_DEP | 2613 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); 2614 } 2615 2616 if (IS_IVYBRIDGE(i915)) { 2617 /* WaDisableEarlyCull:ivb */ 2618 wa_masked_en(wal, 2619 _3D_CHICKEN3, 2620 _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); 2621 2622 if (0) { /* causes HiZ corruption on ivb:gt1 */ 2623 /* enable HiZ Raw Stall Optimization */ 2624 wa_masked_dis(wal, 2625 CACHE_MODE_0_GEN7, 2626 HIZ_RAW_STALL_OPT_DISABLE); 2627 } 2628 2629 /* 2630 * WaVSThreadDispatchOverride:ivb,vlv 2631 * 2632 * This actually overrides the dispatch 2633 * mode for all thread types. 2634 */ 2635 wa_write_clr_set(wal, 2636 GEN7_FF_THREAD_MODE, 2637 GEN7_FF_SCHED_MASK, 2638 GEN7_FF_TS_SCHED_HW | 2639 GEN7_FF_VS_SCHED_HW | 2640 GEN7_FF_DS_SCHED_HW); 2641 2642 /* WaDisablePSDDualDispatchEnable:ivb */ 2643 if (IS_IVB_GT1(i915)) 2644 wa_masked_en(wal, 2645 GEN7_HALF_SLICE_CHICKEN1, 2646 GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); 2647 } 2648 2649 if (GRAPHICS_VER(i915) == 7) { 2650 /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ 2651 wa_masked_en(wal, 2652 RING_MODE_GEN7(RENDER_RING_BASE), 2653 GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE); 2654 2655 /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */ 2656 wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); 2657 2658 /* 2659 * BSpec says this must be set, even though 2660 * WaDisable4x2SubspanOptimization:ivb,hsw 2661 * WaDisable4x2SubspanOptimization isn't listed for VLV. 2662 */ 2663 wa_masked_en(wal, 2664 CACHE_MODE_1, 2665 PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); 2666 2667 /* 2668 * BSpec recommends 8x4 when MSAA is used, 2669 * however in practice 16x4 seems fastest. 2670 * 2671 * Note that PS/WM thread counts depend on the WIZ hashing 2672 * disable bit, which we don't touch here, but it's good 2673 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 2674 */ 2675 wa_masked_field_set(wal, 2676 GEN7_GT_MODE, 2677 GEN6_WIZ_HASHING_MASK, 2678 GEN6_WIZ_HASHING_16x4); 2679 } 2680 2681 if (IS_GRAPHICS_VER(i915, 6, 7)) 2682 /* 2683 * We need to disable the AsyncFlip performance optimisations in 2684 * order to use MI_WAIT_FOR_EVENT within the CS. It should 2685 * already be programmed to '1' on all products. 2686 * 2687 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv 2688 */ 2689 wa_masked_en(wal, 2690 RING_MI_MODE(RENDER_RING_BASE), 2691 ASYNC_FLIP_PERF_DISABLE); 2692 2693 if (GRAPHICS_VER(i915) == 6) { 2694 /* 2695 * Required for the hardware to program scanline values for 2696 * waiting 2697 * WaEnableFlushTlbInvalidationMode:snb 2698 */ 2699 wa_masked_en(wal, 2700 GFX_MODE, 2701 GFX_TLB_INVALIDATE_EXPLICIT); 2702 2703 /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ 2704 wa_masked_en(wal, 2705 _3D_CHICKEN, 2706 _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB); 2707 2708 wa_masked_en(wal, 2709 _3D_CHICKEN3, 2710 /* WaStripsFansDisableFastClipPerformanceFix:snb */ 2711 _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL | 2712 /* 2713 * Bspec says: 2714 * "This bit must be set if 3DSTATE_CLIP clip mode is set 2715 * to normal and 3DSTATE_SF number of SF output attributes 2716 * is more than 16." 2717 */ 2718 _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH); 2719 2720 /* 2721 * BSpec recommends 8x4 when MSAA is used, 2722 * however in practice 16x4 seems fastest. 2723 * 2724 * Note that PS/WM thread counts depend on the WIZ hashing 2725 * disable bit, which we don't touch here, but it's good 2726 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). 2727 */ 2728 wa_masked_field_set(wal, 2729 GEN6_GT_MODE, 2730 GEN6_WIZ_HASHING_MASK, 2731 GEN6_WIZ_HASHING_16x4); 2732 2733 /* WaDisable_RenderCache_OperationalFlush:snb */ 2734 wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); 2735 2736 /* 2737 * From the Sandybridge PRM, volume 1 part 3, page 24: 2738 * "If this bit is set, STCunit will have LRA as replacement 2739 * policy. [...] This bit must be reset. LRA replacement 2740 * policy is not supported." 2741 */ 2742 wa_masked_dis(wal, 2743 CACHE_MODE_0, 2744 CM0_STC_EVICT_DISABLE_LRA_SNB); 2745 } 2746 2747 if (IS_GRAPHICS_VER(i915, 4, 6)) 2748 /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ 2749 wa_add(wal, RING_MI_MODE(RENDER_RING_BASE), 2750 0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH), 2751 /* XXX bit doesn't stick on Broadwater */ 2752 IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true); 2753 2754 if (GRAPHICS_VER(i915) == 4) 2755 /* 2756 * Disable CONSTANT_BUFFER before it is loaded from the context 2757 * image. For as it is loaded, it is executed and the stored 2758 * address may no longer be valid, leading to a GPU hang. 2759 * 2760 * This imposes the requirement that userspace reload their 2761 * CONSTANT_BUFFER on every batch, fortunately a requirement 2762 * they are already accustomed to from before contexts were 2763 * enabled. 2764 */ 2765 wa_add(wal, ECOSKPD(RENDER_RING_BASE), 2766 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE), 2767 0 /* XXX bit doesn't stick on Broadwater */, 2768 true); 2769 } 2770 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki