2017-06-26 14:56 GMT+02:00 Lucas Stach <l.stach@xxxxxxxxxxxxxx>: > Am Freitag, den 09.06.2017, 12:21 +0200 schrieb Christian Gmeiner: >> Sadly we can not read any registers via command stream so we need >> to extend the drm_etnaviv_gem_submit struct with performance monitor >> requests. Those requests gets process before and/or after the actual >> submitted command stream. >> >> The Vivante kernel driver has a special ioctl to read all perfmon >> registers at once and return it. There is no connection between >> a specifc command stream and the read perf values. >> >> Signed-off-by: Christian Gmeiner <christian.gmeiner@xxxxxxxxx> >> --- >> include/uapi/drm/etnaviv_drm.h | 16 ++++++++++++++++ >> 1 file changed, 16 insertions(+) >> >> diff --git a/include/uapi/drm/etnaviv_drm.h >> b/include/uapi/drm/etnaviv_drm.h >> index 9e488a0..b67a7dd 100644 >> --- a/include/uapi/drm/etnaviv_drm.h >> +++ b/include/uapi/drm/etnaviv_drm.h >> @@ -150,6 +150,19 @@ struct drm_etnaviv_gem_submit_bo { >> __u64 presumed; /* in/out, presumed buffer address */ >> }; >> >> +/* performance monitor request (pmr) */ >> +#define ETNA_PM_PROCESS_PRE 0x0001 >> +#define ETNA_PM_PROCESS_POST 0x0002 >> +struct drm_etnaviv_gem_submit_pmr { >> + __u32 flags; /* in, when to process request >> (ETNA_PM_PROCESS_x) */ >> + __u8 domain; /* in, pm domain */ >> + __u8 signal; /* in, pm signal */ > > Are we sure that no domain will ever have more than 256 signals? > My crystal ball is not working as expected so I don't know - but I will change it to a __u16. >> + __u16 pad[3]; > > Unnecessary large pad. A single u16 is enough to naturally align the > next member. > Correct. >> + __u32 sequence; /* in, sequence number */ >> + __u32 read_offset; /* in, offset from read_bo */ >> + __u32 read_idx; /* in, index of read_bo buffer */ >> +}; >> + >> /* Each cmdstream submit consists of a table of buffers involved, >> and >> * one or more cmdstream buffers. This allows for conditional >> execution >> * (context-restore), and IB buffers needed for per tile/bin draw >> cmds. >> @@ -175,6 +188,9 @@ struct drm_etnaviv_gem_submit { >> __u64 stream; /* in, ptr to cmdstream */ >> __u32 flags; /* in, mask of ETNA_SUBMIT_x */ >> __s32 fence_fd; /* in/out, fence fd (see >> ETNA_SUBMIT_FENCE_FD_x) */ >> + __u64 pmrs; /* in, ptr to array of submit_pmr's */ >> + __u32 nr_pmrs; /* in, number of submit_pmr's */ >> + __u32 pad; > > This pad isn't needed. > * Pad the entire struct to a multiple of 64-bits if the structure contains 64-bit types - the structure size will otherwise differ on 32-bit versus 64-bit. Having a different structure size hurts when passing arrays of structures to the kernel, or if the kernel checks the structure size, which e.g. the drm core does. $ pahole drivers/gpu/drm/etnaviv/etnaviv.o -C drm_etnaviv_gem_submit struct drm_etnaviv_gem_submit { __u32 fence; /* 0 4 */ __u32 pipe; /* 4 4 */ __u32 exec_state; /* 8 4 */ __u32 nr_bos; /* 12 4 */ __u32 nr_relocs; /* 16 4 */ __u32 stream_size; /* 20 4 */ __u64 bos; /* 24 8 */ __u64 relocs; /* 32 8 */ __u64 stream; /* 40 8 */ __u32 flags; /* 48 4 */ __s32 fence_fd; /* 52 4 */ __u64 pmrs; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ __u32 nr_pmrs; /* 64 4 */ __u32 pad; /* 68 4 */ /* size: 72, cachelines: 2, members: 14 */ /* last cacheline: 8 bytes */ }; >> }; >> >> /* The normal way to synchronize with the GPU is just to CPU_PREP on greets -- Christian Gmeiner, MSc https://www.youtube.com/user/AloryOFFICIAL https://soundcloud.com/christian-gmeiner _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel