On Wed, Apr 26, 2017 at 06:00:41PM +0300, David Weinehall wrote: > Add a bunch of MOCS entries for gen 9 that were missing from intel_mocs. > Some of these are used by media-sdk; if these entries are missing > the default will instead be to do everything uncached. > > This patch improves media-sdk performance with up to 60% > with the (admittedly synthetic) benchmarks we use in our nightly > testing, without regressing any other benchmarks. > > Signed-off-by: David Weinehall <david.weinehall@xxxxxxxxxxxxxxx> Hm I guess this discussion is sprawling a bit too much, so top-level summary: - this is kernel/userspace api - which means we must have the corresponding userspace as open-source patches proposed to be merged into the corresponding upstream repos (libva-intel it seems here, since this is for media). Without that this patch is 100% moot for upstream consideration. I also assume that as soon as we'll have the userspace side open and published for review, all the questions around GETPARAM and how to allocate MOCS entries will be much easier to understand and make a meaningful dicision. As-is, half of the picture is flat-out missing, any decision we might is pretty much guaranteed to be bogus. Which incidentally is why upstream wants both sides of the story as open-source. Thanks, Daniel > > diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c > index 92e461c68385..5236a442a14f 100644 > --- a/drivers/gpu/drm/i915/intel_mocs.c > +++ b/drivers/gpu/drm/i915/intel_mocs.c > @@ -103,7 +103,6 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > LE_PFM(0) | LE_SCF(0), > - > /* 0x0010 */ > .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > }, > @@ -123,7 +122,133 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { > LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > LE_PFM(0) | LE_SCF(0), > /* 0x0030 */ > - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000037 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000039 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000033 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_PAGETABLE) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000017 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000037 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000039 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000017 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x0000003b */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000033 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_PAGETABLE) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000019 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000003 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_PAGETABLE) | > + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x0000001b */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x0000001b */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000013 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_PAGETABLE) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > }, > }; > > @@ -135,7 +260,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > LE_PFM(0) | LE_SCF(0), > - > /* 0x0010 */ > .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > }, > @@ -145,7 +269,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > LE_PFM(0) | LE_SCF(0), > - > /* 0x0030 */ > .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > }, > @@ -155,10 +278,36 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > LE_PFM(0) | LE_SCF(0), > - > /* 0x0030 */ > .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > }, > + { > + /* 0x00000005 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000001 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_PAGETABLE) | > + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000005 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > }; > > /** > --- > drivers/gpu/drm/i915/intel_mocs.c | 159 ++++++++++++++++++++++++++++++++++++-- > 1 file changed, 154 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c > index 92e461c68385..5236a442a14f 100644 > --- a/drivers/gpu/drm/i915/intel_mocs.c > +++ b/drivers/gpu/drm/i915/intel_mocs.c > @@ -103,7 +103,6 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > LE_PFM(0) | LE_SCF(0), > - > /* 0x0010 */ > .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > }, > @@ -123,7 +122,133 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = { > LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > LE_PFM(0) | LE_SCF(0), > /* 0x0030 */ > - .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000037 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000039 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000033 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_PAGETABLE) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000017 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000037 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000039 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000017 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x0000003b */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000033 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_PAGETABLE) | > + LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000019 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000003 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_PAGETABLE) | > + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x0000001b */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x0000001b */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_LLC_ELLC) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000013 */ > + .control_value = LE_CACHEABILITY(LE_WB) | > + LE_TGT_CACHE(LE_TC_PAGETABLE) | > + LE_LRUM(1) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > }, > }; > > @@ -135,7 +260,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > LE_PFM(0) | LE_SCF(0), > - > /* 0x0010 */ > .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > }, > @@ -145,7 +269,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > LE_PFM(0) | LE_SCF(0), > - > /* 0x0030 */ > .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > }, > @@ -155,10 +278,36 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { > LE_TGT_CACHE(LE_TC_LLC_ELLC) | > LE_LRUM(3) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > LE_PFM(0) | LE_SCF(0), > - > /* 0x0030 */ > .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > }, > + { > + /* 0x00000005 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0030 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB), > + }, > + { > + /* 0x00000001 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_PAGETABLE) | > + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > + { > + /* 0x00000005 */ > + .control_value = LE_CACHEABILITY(LE_UC) | > + LE_TGT_CACHE(LE_TC_LLC) | > + LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) | > + LE_PFM(0) | LE_SCF(0), > + /* 0x0010 */ > + .l3cc_value = L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC), > + }, > }; > > /** > -- > 2.11.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx