23.06.2020 15:09, Mikko Perttunen пишет: > /* Command is an opcode gather from a GEM handle */ > #define DRM_TEGRA_SUBMIT_COMMAND_GATHER 0 > /* Command is an opcode gather from a user pointer */ > #define DRM_TEGRA_SUBMIT_COMMAND_GATHER_UPTR 1 > /* Command is a wait for syncpt fence completion */ > #define DRM_TEGRA_SUBMIT_COMMAND_WAIT_SYNCPT 2 > /* Command is a wait for SYNC_FILE FD completion */ > #define DRM_TEGRA_SUBMIT_COMMAND_WAIT_SYNC_FILE 3 > /* Command is a wait for DRM syncobj completion */ > #define DRM_TEGRA_SUBMIT_COMMAND_WAIT_SYNCOBJ 4 > > /* > * Allow driver to skip command execution if engine > * was not accessed by another channel between > * submissions. > */ > #define DRM_TEGRA_SUBMIT_CONTEXT_SETUP (1<<0) > > struct drm_tegra_submit_command { > __u16 type; > __u16 flags; Shouldn't the "packed" attribute be needed if a non-32bit aligned fields are used? > union { > struct { > /* GEM handle */ > __u32 handle; > > /* > * Offset into GEM object in bytes. > * Must be aligned by 4. > */ > __u64 offset; 64bits for a gather offset is a bit too much, in most cases gathers are under 4K. u32 should be more than enough (maybe even u16 if offset is given in a dword granularity). > /* > * Length of gather in bytes. > * Must be aligned by 4. > */ > __u64 length; u32/16 > } gather; > struct { > __u32 reserved[1]; > > /* > * Pointer to gather data. > * Must be aligned by 4 bytes. > */ > __u64 base; > /* > * Length of gather in bytes. > * Must be aligned by 4. > */ > __u64 length; > } gather_uptr; What about to inline the UPTR gather data and relocs into the drm_tegra_submit_command[] buffer: struct drm_tegra_submit_command { struct { u16 num_words; u16 num_relocs; gather_data[]; drm_tegra_submit_relocation relocs[]; } gather_uptr; }; struct drm_tegra_channel_submit { __u32 num_syncpt_incrs; __u32 syncpt_idx; __u64 commands_ptr; __u32 commands_size; ... }; struct drm_tegra_submit_command example[] = { cmd.gather_uptr{}, ... gather_data[], gather_relocs[], cmd.wait_syncpt{}, ... }; This way we will have only a single copy_from_user() for the whole cmdstream, which should be more efficient to do and nicer from both userspace and kernel perspectives in regards to forming and parsing the commands.