Hi David, David Weinehall <david.weinehall@xxxxxxxxxxxxxxx> writes: > Add a bunch of MOCS entries for gen 9 that were missing from intel_mocs. > Some of these are used by media-sdk; if these entries are missing > the default will instead be to do everything uncached. > Keep in mind that MOCS entries are not for free -- Once you expose a new entry it will become part of the kernel ABI and we won't ever be able to re-use them for another purpose. What this means is we probably don't want to add a bunch of entries if only "some" of them are actually useful to the open-source stack. Some more comments below. > This patch improves media-sdk performance with up to 60% > with the (admittedly synthetic) benchmarks we use in our nightly > testing, without regressing any other benchmarks. > > Signed-off-by: David Weinehall <david.weinehall@xxxxxxxxxxxxxxx> > > diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c > index 92e461c68385..5236a442a14f 100644 > --- a/drivers/gpu/drm/i915/intel_mocs.c > +++ b/drivers/gpu/drm/i915/intel_mocs.c > @@ -103,7 +103,6 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > LE_PFM(0) | LE_SCF(0), > - > /* 0x0010 */ > .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > }, > @@ -123,7 +122,133 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { > LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > LE_PFM(0) | LE_SCF(0), > /* 0x0030 */ > - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { Apparently somebody started defining symbolic enums for the current MOCS entries in i915_drm.h, which I guess makes this more obviously tied to the kernel ABI. I suppose it wouldn't hurt if you did the same. > + /* 0x00000037 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000039 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000033 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_PAGETABLE) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, What's the use of an entry with PTE-controlled target cache if you're overriding the cacheability control to WB anyway? Seems effectively redundant with the current I915_MOCS_CACHED entry. Same for the 0x33:0x10, 0x13:0x10 and 0x3:0x10 entries below. > + { > + /* 0x00000017 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000037 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000039 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, This entry is UC in all caches and seems redundant with the current I915_MOCS_UNCACHED entry. Why do you need it? > + { > + /* 0x00000017 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x0000003b */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000033 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_PAGETABLE) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000019 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, Another entry marked UC for all caches. > + { > + /* 0x00000003 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_PAGETABLE) | > + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x0000001b */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x0000001b */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000013 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_PAGETABLE) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > }, > }; > > @@ -135,7 +260,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > LE_PFM(0) | LE_SCF(0), > - > /* 0x0010 */ > .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > }, > @@ -145,7 +269,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > LE_PFM(0) | LE_SCF(0), > - > /* 0x0030 */ > .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > }, > @@ -155,10 +278,36 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > LE_PFM(0) | LE_SCF(0), > - > /* 0x0030 */ > .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > }, > + { > + /* 0x00000005 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000001 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_PAGETABLE) | > + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000005 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > }; > > /** > --- > drivers/gpu/drm/i915/intel_mocs.c | 159 ++++++++++++++++++++++++++++++++++++-- > 1 file changed, 154 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c > index 92e461c68385..5236a442a14f 100644 Huh? Duplicate patch follows? > [...]
Attachment:
signature.asc
Description: PGP signature
_______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx