A fresh round of offline design discussions with internal team has decided that: - we dont want to use an interim circular buffer to copy all of the GuC generated register dumps of one or more 'engine-capture-groups'. - instead, we shall dynamically allocate multiple nodes, each node being a single "engine-capture-dump". A link list of one or many engine-capture- dumps would result from a single engine-capture-group. - this dynamic allocation should happen during the G2H error-capture notification event which happens before the corresponding G2H context- reset that triggers the i915_gpu_coredump (where we want to avoid memory allocation moving forward). - we also realize that during the link-list allocation we would need a first-parse of the guc-log-error-state-capture head-to-tail entries in order to duplicate global and engine-class register dumps for each each engine instance register dump if we find dependent-engine resets in a engine-capture-group. - later when i915_gpu_coredump calls into capture_engine, we finally attach the corresponding node from the link list above (detaching it from that link list) into i915_gpu_coredump's intel_engine_coredump structure when have matching LRCA/guc-id/engine-instace. - we would also have to add a flag through i915_gpu_coredump top level trigger through to capture_engine to indicate if the capture was triggered via a guc context reset or a forced user reset or gt-reset. In the latter case (user/gt reset), we should capture the register values directly from the HW, i.e. the pre-guc behavior without matching against GuC. ...alan On Tue, 2022-01-18 at 02:03 -0800, Alan Previn wrote: > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c > index b637628905ec..fc80c5f31915 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c+static void __guc_capture_store_snapshot_work(struct intel_guc *guc) .. .. > +{ > + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; > + unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_count; > + struct guc_log_buffer_state *log_buf_state; > + struct guc_log_buffer_state log_buf_state_local; > + void *src_data, *dst_data = NULL; > + bool new_overflow; > + > + /* Lock to get the pointer to GuC capture-log-buffer-state */ > + mutex_lock(&guc->log_state[GUC_CAPTURE_LOG_BUFFER].lock); > + log_buf_state = guc->log.buf_addr + > + (sizeof(struct guc_log_buffer_state) * GUC_CAPTURE_LOG_BUFFER); > + src_data = guc->log.buf_addr + intel_guc_get_log_buffer_offset(GUC_CAPTURE_LOG_BUFFER); > + > + /* > + * Make a copy of the state structure, inside GuC log buffer > + * (which is uncached mapped), on the stack to avoid reading > + * from it multiple times. > + */ > + memcpy(&log_buf_state_local, log_buf_state, sizeof(struct guc_log_buffer_state)); > + buffer_size = intel_guc_get_log_buffer_size(GUC_CAPTURE_LOG_BUFFER); > + read_offset = log_buf_state_local.read_ptr; > + write_offset = log_buf_state_local.sampled_write_ptr; > + full_count = log_buf_state_local.buffer_full_cnt; > + > + /* Bookkeeping stuff */ > + guc->log_state[GUC_CAPTURE_LOG_BUFFER].flush += log_buf_state_local.flush_to_file; > + new_overflow = intel_guc_check_log_buf_overflow(guc, > + &guc->log_state[GUC_CAPTURE_LOG_BUFFER], > + full_count); > + > + /* Update the state of shared log buffer */ > + log_buf_state->read_ptr = write_offset; > + log_buf_state->flush_to_file = 0; > + > + mutex_unlock(&guc->log_state[GUC_CAPTURE_LOG_BUFFER].lock); > + > + dst_data = guc->capture.priv->out_store.addr; > + if (dst_data) { > + mutex_lock(&guc->capture.priv->out_store.lock); > + > + /* Now copy the actual logs. */ > + if (unlikely(new_overflow)) { > + /* copy the whole buffer in case of overflow */ > + read_offset = 0; > + write_offset = buffer_size; > + } else if (unlikely((read_offset > buffer_size) || > + (write_offset > buffer_size))) { > + drm_err(&i915->drm, "invalid GuC log capture buffer state!\n"); > + /* copy whole buffer as offsets are unreliable */ > + read_offset = 0; > + write_offset = buffer_size; > + } > + > + /* first copy from the tail end of the GuC log capture buffer */ > + if (read_offset > write_offset) { > + guc_capture_store_insert(guc, &guc->capture.priv->out_store, src_data, > + write_offset); > + bytes_to_copy = buffer_size - read_offset; > + } else { > + bytes_to_copy = write_offset - read_offset; > + } > + guc_capture_store_insert(guc, &guc->capture.priv->out_store, src_data + read_offset, > + bytes_to_copy); > + > + mutex_unlock(&guc->capture.priv->out_store.lock); > + } > +} > + > +void intel_guc_capture_store_snapshot(struct intel_guc *guc) > +{ > + if (guc->capture.priv) > + __guc_capture_store_snapshot_work(guc); > +} > + > +static void guc_capture_store_destroy(struct intel_guc *guc) > +{ > + mutex_destroy(&guc->capture.priv->out_store.lock); > + guc->capture.priv->out_store.size = 0; > + kfree(guc->capture.priv->out_store.addr); > + guc->capture.priv->out_store.addr = NULL; > +} > + > +static int guc_capture_store_create(struct intel_guc *guc) > +{ > + /* > + * Make this interim buffer larger than GuC capture output buffer so that we can absorb > + * a little delay when processing the raw capture dumps into text friendly logs > + * for the i915_gpu_coredump output > + */ > + size_t max_dump_size; > + > + GEM_BUG_ON(guc->capture.priv->out_store.addr); > + > + max_dump_size = PAGE_ALIGN(intel_guc_capture_output_min_size_est(guc)); > + max_dump_size = roundup_pow_of_two(max_dump_size); > + > + guc->capture.priv->out_store.addr = kzalloc(max_dump_size, GFP_KERNEL); > + if (!guc->capture.priv->out_store.addr) > + return -ENOMEM; > + > + guc->capture.priv->out_store.size = max_dump_size; > + mutex_init(&guc->capture.priv->out_store.lock); > + > + return 0; > +} > +