This is just quick look up. I basically check some style issues and did some basic static analyzing. I have run - cppcheck (which found couple mistakes) - flawfinder (did not found anything to my eyes) - codespell (did find couple typo) You can run these yourself also or check below. Couple common things which you can ignore or not . - Usually in this code there is goto exit and it is just return. Maybe use just return straight away. No need to jump. - Some comments start capital others not. Maybe all should start capital. Very small nit, but makes nice touch to the code. - Lot of oneline comments are unneccessary three line comments. On 19.4.2022 16.59, Cai Huoqing wrote:
The NVIDIA Deep Learning Accelerator (NVDLA) is an open source IP which is integrated into NVIDIA Jetson AGX Xavier, so add driver support for this accelerator. Signed-off-by: Cai Huoqing <cai.huoqing@xxxxxxxxx>
... snip
diff --git a/drivers/gpu/drm/nvdla/nvdla_bdma.c b/drivers/gpu/drm/nvdla/nvdla_bdma.c new file mode 100644 index 000000000000..225613f27acf --- /dev/null +++ b/drivers/gpu/drm/nvdla/nvdla_bdma.c
... snip
+static int32_t +processor_bdma_program_slot(struct dla_engine *engine, + struct dla_bdma_surface_desc *bdma_surface, + struct dla_bdma_transfer_desc *transfer) +{ + int32_t ret = 0; + uint64_t source_addr = 0; + uint64_t destination_addr = 0; + uint32_t high, low, reg; + uint8_t bdma_free_slots = 0; + + /* make sure there're enough free slots */ + if (bdma_free_slots <= 0) {
This is always true right now.
+ do { + reg = bdma_reg_read(engine, STATUS); + reg = (reg & MASK(BDMA_STATUS_0, FREE_SLOT)) >> + SHIFT(BDMA_STATUS_0, FREE_SLOT); + } while (reg == 0); + bdma_free_slots = (uint8_t)reg; + } + + dla_get_dma_address(engine->driver_context, engine->task->task_data, + transfer->source_address, + (void *)&source_addr, + DESTINATION_DMA); + dla_get_dma_address(engine->driver_context, engine->task->task_data, + transfer->destination_address, + (void *)&destination_addr, + DESTINATION_DMA); + + ASSERT_GOTO((transfer->line_repeat <= 8192), + ret, -EINVAL, exit); + ASSERT_GOTO((transfer->surface_repeat <= 8192), + ret, -EINVAL, exit); + ASSERT_GOTO((transfer->line_size % 32) == 0, + ret, -EINVAL, exit); + ASSERT_GOTO(transfer->source_line >= transfer->line_size, + ret, -EINVAL, exit); + ASSERT_GOTO(transfer->destination_line >= transfer->line_size, + ret, -EINVAL, exit); + ASSERT_GOTO(transfer->source_surface >= + (transfer->source_line * transfer->line_repeat), + ret, -EINVAL, exit); + ASSERT_GOTO(transfer->destination_surface >= + (transfer->destination_line * transfer->line_repeat), + ret, -EINVAL, exit); + + /* config registers */ + high = upper_32_bits(source_addr); + low = lower_32_bits(source_addr); + bdma_reg_write(engine, CFG_SRC_ADDR_LOW, low); + bdma_reg_write(engine, CFG_SRC_ADDR_HIGH, high); + high = upper_32_bits(destination_addr); + low = lower_32_bits(destination_addr); + bdma_reg_write(engine, CFG_DST_ADDR_LOW, low); + bdma_reg_write(engine, CFG_DST_ADDR_HIGH, high); + bdma_reg_write(engine, CFG_LINE, (transfer->line_size >> 5) - 1); + reg = (map_mem[bdma_surface->source_type] << + SHIFT(BDMA_CFG_CMD_0, SRC_RAM_TYPE)) | + (map_mem[bdma_surface->destination_type] << + SHIFT(BDMA_CFG_CMD_0, DST_RAM_TYPE)); + bdma_reg_write(engine, CFG_CMD, reg); + bdma_reg_write(engine, CFG_LINE_REPEAT, transfer->line_repeat - 1); + bdma_reg_write(engine, CFG_SRC_LINE, transfer->source_line); + bdma_reg_write(engine, CFG_DST_LINE, transfer->destination_line); + bdma_reg_write(engine, CFG_SURF_REPEAT, transfer->surface_repeat - 1); + bdma_reg_write(engine, CFG_SRC_SURF, transfer->source_surface); + bdma_reg_write(engine, CFG_DST_SURF, transfer->destination_surface); + bdma_reg_write(engine, CFG_OP, FIELD_ENUM(BDMA_CFG_OP_0, EN, ENABLE)); + +exit: + return ret; +}
... snip
diff --git a/drivers/gpu/drm/nvdla/nvdla_cache.c b/drivers/gpu/drm/nvdla/nvdla_cache.c new file mode 100644 index 000000000000..f8bd7b514aab --- /dev/null +++ b/drivers/gpu/drm/nvdla/nvdla_cache.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* + * Copyright (C) 2017-2018 NVIDIA CORPORATION + * Copyright (C) 2022 Cai Huoqing + */ + +#include "nvdla_common.h" +#include "nvdla_drm.h" +#include "nvdla_reg.h" +#include "nvdla_engine.h" + +#define DLA_OP_CACHE_SIZE (DLA_NUM_GROUPS * ((DLA_OP_NUM + 2) * 2)) + +static struct dla_common_op_desc desc_cache[DLA_OP_NUM][DLA_OP_CACHE_SIZE]; +static int32_t desc_refcount[DLA_OP_NUM][DLA_OP_CACHE_SIZE]; + +void +dla_get_refcount(struct dla_common_op_desc *op_desc) +{ + int32_t i; + struct dla_common_op_desc *desc = NULL; + + if (op_desc == NULL) + return; + + if (op_desc->index == -1) + return; + + desc = &desc_cache[op_desc->op_type][0]; + + for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) { + if (desc->index == op_desc->index && + desc->roi_index == op_desc->roi_index) {
reverse if if (desc->index != op_desc->index) continue; if (desc->roi_index != op_desc->roi_index) continue;
+ desc_refcount[op_desc->op_type][i]++; + return; + } + } +} + +struct dla_common_op_desc * +dla_get_op_desc(struct dla_engine *engine, + struct dla_task *task, int16_t index, + uint8_t op_type, uint8_t roi_index) +{ + int32_t i; + int32_t ret; + uint64_t op_base; + uint64_t dep_graph_addr; + struct dla_common_op_desc *desc = NULL; + + if (index == -1) { + pr_debug("no desc get due to index==-1\n"); + goto exit; + } + + dep_graph_addr = (sizeof(struct dla_common_op_desc) * + engine->network->num_operations * roi_index); + + desc = &desc_cache[op_type][0]; + + for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) { + if (desc->index == index && desc->roi_index == roi_index) { + if (desc->op_type != op_type) { + pr_err("op_cache[op=%u] contains incorrect entry of op[%u]\n", + op_type, desc->op_type); + continue; + }
reverse if so this will be pretty clean if (desc->index != index) continue; if (desc->roi_index != roi_index) continue; if (desc->op_type != op_type) { pr_err("op_cache[op=%u] contains incorrect entry of op[%u]\n", op_type, desc->op_type); continue; }
+ desc_refcount[op_type][i]++; + goto exit; + } + } + + desc = &desc_cache[op_type][0]; + + for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) { + if (desc->index == -1) {
reverse if if (desc->index != -1) continue;
+ op_base = dep_graph_addr + + (sizeof(struct dla_common_op_desc) * + (uint64_t)index); + ret = dla_data_read(engine->driver_context, + task->task_data, + task->dependency_graph_addr, + (void *)(desc), + sizeof(struct dla_common_op_desc), + op_base); + if (ret) { + desc = NULL; + goto exit; + } + + if (op_type != desc->op_type) { + /* + * op_type of entry read from DRAM should not + * mismatch with given op_type. If they + * mismatches, then wrong entry is fetched, so + * report this issue by throwing error. + */ + pr_err("Fetched [op_type=%u] from DRAM doesn't match with op_type[%u]\n", + desc->op_type, op_type); + desc->op_type = op_type; + desc->index = -1; + desc->roi_index = -1; + desc = NULL; + goto exit; + } + + desc->index = index; + desc->roi_index = roi_index; + + desc_refcount[op_type][i]++; + goto exit; + } + } + +exit: + return desc; +} + +static void +dla_free_op_desc(struct dla_engine *engine, struct dla_common_op_desc *op_desc) +{ + uint64_t op_base; + uint64_t dep_graph_addr; + struct dla_task *task; + + pr_debug("Enter: %s op desc index %u ROI %d\n", __func__, + op_desc->index, op_desc->roi_index);
Possiple null pointer dereference
+ task = engine->task; + dep_graph_addr = (sizeof(struct dla_common_op_desc) * + engine->network->num_operations * + op_desc->roi_index); + + if (op_desc->index == -1) + goto exit;
Possiple null pointer dereference
+ if (op_desc == NULL) + goto exit;
Or this is unnecessary.
+ + /** + * TODO: keeping the depth value hardcoded as 0 for now, + * need to replace it once corresponding implementation is done. + */ + op_base = (dep_graph_addr + + (sizeof(struct dla_common_op_desc) * + (uint64_t)op_desc->index)); + + /** + * Flush descriptor to DRAM + */ + dla_data_write(engine->driver_context, + task->task_data, + (void *)op_desc, + task->dependency_graph_addr, + sizeof(struct dla_common_op_desc), + op_base); + + /** + * Release it + */ + op_desc->index = -1; + op_desc->roi_index = -1; +exit: + return; +} + +void +dla_put_op_desc(struct dla_engine *engine, struct dla_common_op_desc *op_desc) +{ + int32_t i; + struct dla_common_op_desc *desc; + + if (op_desc == NULL) + return; + + if (op_desc->index == -1) + return; + + desc = &desc_cache[op_desc->op_type][0]; + + for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) { + if (desc->index == op_desc->index && + desc->roi_index == op_desc->roi_index) {
Reverse if. if (desc->index != op_desc->index) continue; if (desc->roi_index != op_desc->roi_index) continue;
+ + desc_refcount[op_desc->op_type][i]--; + + /** + * Free desc if refcount is 0 + */
Pretty useless comment and totally not needed three line for this.
+ if (desc_refcount[op_desc->op_type][i] == 0) + dla_free_op_desc(engine, op_desc); + + return; + } + } +} + +void +dla_init_op_cache(struct dla_engine *engine) +{ + int32_t i, j; + struct dla_common_op_desc *desc = &desc_cache[0][0]; + + memset((uint8_t *)&desc_cache[0][0], 0, sizeof(desc_cache)); + memset((uint8_t *)&desc_refcount[0][0], 0, sizeof(desc_refcount)); + + for (i = 0; i < DLA_OP_NUM; i++) { + for (j = 0; j < DLA_OP_CACHE_SIZE; j++) { + desc->index = -1; + desc->roi_index = -1; + desc->op_type = (uint8_t)i; + desc++; + } + } +}
... snip
diff --git a/drivers/gpu/drm/nvdla/nvdla_common.h b/drivers/gpu/drm/nvdla/nvdla_common.h new file mode 100644 index 000000000000..38cf43246890 --- /dev/null +++ b/drivers/gpu/drm/nvdla/nvdla_common.h @@ -0,0 +1,835 @@
... snip
+struct dla_conv_op_desc { + /* Performance parameters */ + + /* dla_conv_mode */ + uint8_t conv_mode; + uint8_t data_reuse; + uint8_t weight_reuse; + uint8_t skip_data_rls; + + uint8_t skip_weight_rls; + uint8_t reserved0; + uint16_t entry_per_slice; + + /* dla_data_format */ + uint8_t data_format; + /* dla_pixel_mapping */ + uint8_t pixel_mapping; + /* number of free slices before fetch */ + uint16_t fetch_grain; + + uint8_t reserved_b[8]; + + /* batch_num */ + uint8_t batch; + /* dla_weight_format */ + uint8_t weight_format; + uint8_t data_bank; + uint8_t weight_bank; + + /* the offset in bytes of each data cube in a batch */ + uint32_t batch_stride; + + uint8_t post_extension; + uint8_t pixel_override; + /* number of slices need to be released */ + uint16_t release; + + /* The input cube dimension for CSC */ + uint16_t input_width_csc; + uint16_t input_height_csc; + + uint16_t input_channel_csc; + uint16_t kernel_width_csc; + + uint16_t kernel_height_csc; + uint16_t kernel_channel_csc; + + /* The input cube dimension for CMAC */ + uint16_t input_width_cmac; + uint16_t input_height_cmac; + + /* actual size in bytes */ + uint32_t bytes_per_kernel; + + /* Algorithm parameters */ + + int16_t mean_ry; /* mean value for red in RGB or Y in YUV */ + int16_t mean_gu; /* mean value for green in RGB or U in YUV */ + + int16_t mean_bv; /* mean value for blue in RGB or V in YUV */ + int16_t mean_ax; + + uint8_t mean_format; /* dla_mean_format */ + uint8_t conv_stride_x; + uint8_t conv_stride_y; + uint8_t pad_x_left; + + uint8_t pad_x_right; + uint8_t pad_y_top; + uint8_t pad_y_bottom; + uint8_t dilation_x; + + uint8_t dilation_y; + uint8_t reserved2[2]; + + /* Precision parameters */ + uint8_t pra_truncate; + + uint8_t in_precision; + /* The output precision from CONV, it's the MAC processing precison */
./nvdla_common.h:428: precison ==> precision
+ uint8_t out_precision; + int16_t pad_val; + + /* input converter parameters */ + struct dla_cvt_param in_cvt; + /* output converter parameters, support truncate only */ + struct dla_cvt_param out_cvt; + +} __packed __aligned(4); + +struct dla_conv_stat_desc { + uint32_t data_read_stall; + uint32_t weight_read_stall; + uint32_t data_read_latency; + uint32_t weight_read_latency; + uint32_t saturation_count; + uint32_t nan_data_num; + uint32_t nan_weight_num; + uint32_t inf_data_num; + uint32_t inf_weight_num; +} __packed __aligned(4); + +/** + * @ingroup SDP + * @name Activation functions + * @brief Activation functions supported in SDP + * @{ + */ +#define ACTIVATION_NONE 0 +#define ACTIVATION_RELU 1 +#define ACTIVATION_LUT 2 +#define ACTIVATION_PRELU 3 +/** @} */ + +/** + * @ingroup LUT + * @name LUT size + * @brief LUT sizes for linear and exponentila LUT + * @{ + */ +#define LUT_LINEAR_EXP_TABLE_ENTRY_LOG2 6 +#define LUT_LINEAR_ONLY_TABLE_ENTRY_LOG2 8 +/** @} */ + +/** + * @ingroup LUT + * @name LUT types + * @brief DLA supports two types of LUT, linear and exonential + * @{ + */ +#define LUT_LINEAR_EXP_TABLE 0 +#define LUT_LINEAR_ONLY_TABLE 1 +/** @} */ + +/** + * @ingroup LUT + * @name LUT methods + * @brief DLA supports two types of LUT, linear and exonential + * @{ + */ +#define LUT_METHOD_EXPONENTIAL 0 +#define LUT_METHOD_LINEAR 1 +/** @} */ + +/** + * @ingroup LUT + * @name LUT + * @brief DLA supports two types of LUT, linear and exonential + * @{ + */ +#define LUT_PRI_LINEAR_EXP 0 +#define LUT_PRI_LINEAR_ONLY 1 +/** @} */ + +union dla_lut_offset { + /** + * Number should be substracted on log domain before look up
./nvdla_common.h:505: substracted ==> subtracted
+ * exponetial table it has the same definition as hardware
./nvdla_common.h:506: exponetial ==> exponential
+ * thus input scaling should also take into account when + * set this field. + */ + int8_t exp_offset; + /** + * Number of bits should be right shift before looking + * up linear table + */ + int8_t frac_bits; + uint16_t reserved0; +};
... snip
diff --git a/drivers/gpu/drm/nvdla/nvdla_drm.c b/drivers/gpu/drm/nvdla/nvdla_drm.c new file mode 100644 index 000000000000..9217eee1de3b --- /dev/null +++ b/drivers/gpu/drm/nvdla/nvdla_drm.c @@ -0,0 +1,695 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* + * Copyright (C) 2017-2018 NVIDIA CORPORATION + * Copyright (C) 2022 Cai Huoqing + */ + +#include <linux/dma-buf.h> +#include <linux/dma-mapping.h> +#include <linux/fs.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_irq.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/time.h> +#include <linux/uaccess.h> +#include <linux/types.h> + +#include "nvdla_drm.h" +#include "nvdla_ioctl.h" +#include "nvdla_engine.h" + +static struct nvdla_config nvdla_config_os_initial = { + .atom_size = 32, + .bdma_enable = true, + .rubik_enable = true, + .weight_compress_support = true, +}; + +static struct nvdla_config nvdla_config_small = { + //.atom_size = 8, + .atom_size = 32, // nv_large config + .bdma_enable = false, + .rubik_enable = false, + .weight_compress_support = false, +}; + +int64_t dla_get_time_us(void)
Funtion is never used.
+{ + return ktime_get_ns() / NSEC_PER_USEC; +} + +void dla_reg_write(void *driver_context, uint32_t addr, uint32_t reg) +{ + struct nvdla_device *nvdla_dev = + (struct nvdla_device *)driver_context; + + if (!nvdla_dev) + return; + + writel(reg, nvdla_dev->base + addr); +} + +uint32_t dla_reg_read(void *driver_context, uint32_t addr) +{ + struct nvdla_device *nvdla_dev = + (struct nvdla_device *)driver_context; + + if (!nvdla_dev) + return 0; + + return readl(nvdla_dev->base + addr); +} + +static irqreturn_t nvdla_engine_isr(int32_t irq, void *data) +{ + unsigned long flags; + uint32_t mask; + uint32_t reg; + struct dla_processor *processor = NULL; + struct dla_processor_group *group; + struct dla_engine *engine; + struct nvdla_device *nvdla_dev = (struct nvdla_device *)data; + + if (!nvdla_dev) + return IRQ_NONE; + + engine = nvdla_dev->engine_context; + spin_lock_irqsave(&nvdla_dev->nvdla_lock, flags); + + mask = glb_reg_read(engine, S_INTR_MASK);
Never used. It would be nice so that static analyzer will not complain these anymore, but your choice what you want to do.
+ reg = glb_reg_read(engine, S_INTR_STATUS); + + if (reg & MASK(GLB_S_INTR_STATUS_0, CACC_DONE_STATUS0)) { + processor = &engine->processors[DLA_OP_CONV]; + group = &processor->groups[0]; + group->events |= (1 << DLA_EVENT_OP_COMPLETED); + } + if (reg & MASK(GLB_S_INTR_STATUS_0, CACC_DONE_STATUS1)) { + processor = &engine->processors[DLA_OP_CONV]; + group = &processor->groups[1]; + group->events |= (1 << DLA_EVENT_OP_COMPLETED); + } + if (reg & MASK(GLB_S_INTR_STATUS_0, SDP_DONE_STATUS0)) { + processor = &engine->processors[DLA_OP_SDP]; + group = &processor->groups[0]; + group->events |= (1 << DLA_EVENT_OP_COMPLETED); + } + if (reg & MASK(GLB_S_INTR_STATUS_0, SDP_DONE_STATUS1)) { + processor = &engine->processors[DLA_OP_SDP]; + group = &processor->groups[1]; + group->events |= (1 << DLA_EVENT_OP_COMPLETED); + } + if (reg & MASK(GLB_S_INTR_STATUS_0, CDP_DONE_STATUS0)) { + processor = &engine->processors[DLA_OP_CDP]; + group = &processor->groups[0]; + group->events |= (1 << DLA_EVENT_OP_COMPLETED); + } + if (reg & MASK(GLB_S_INTR_STATUS_0, CDP_DONE_STATUS1)) { + processor = &engine->processors[DLA_OP_CDP]; + group = &processor->groups[1]; + group->events |= (1 << DLA_EVENT_OP_COMPLETED); + } + if (reg & MASK(GLB_S_INTR_STATUS_0, RUBIK_DONE_STATUS0)) { + processor = &engine->processors[DLA_OP_RUBIK]; + group = &processor->groups[0]; + group->events |= (1 << DLA_EVENT_OP_COMPLETED); + } + if (reg & MASK(GLB_S_INTR_STATUS_0, RUBIK_DONE_STATUS1)) { + processor = &engine->processors[DLA_OP_RUBIK]; + group = &processor->groups[1]; + group->events |= (1 << DLA_EVENT_OP_COMPLETED); + } + if (reg & MASK(GLB_S_INTR_STATUS_0, PDP_DONE_STATUS0)) { + processor = &engine->processors[DLA_OP_PDP]; + group = &processor->groups[0]; + group->events |= (1 << DLA_EVENT_OP_COMPLETED); + } + if (reg & MASK(GLB_S_INTR_STATUS_0, PDP_DONE_STATUS1)) { + processor = &engine->processors[DLA_OP_PDP]; + group = &processor->groups[1]; + group->events |= (1 << DLA_EVENT_OP_COMPLETED); + } + if (reg & MASK(GLB_S_INTR_STATUS_0, BDMA_DONE_STATUS0)) { + processor = &engine->processors[DLA_OP_BDMA]; + group = &processor->groups[0]; + group->events |= (1 << DLA_EVENT_OP_COMPLETED); + } + if (reg & MASK(GLB_S_INTR_STATUS_0, BDMA_DONE_STATUS1)) { + processor = &engine->processors[DLA_OP_BDMA]; + group = &processor->groups[1]; + group->events |= (1 << DLA_EVENT_OP_COMPLETED); + } + if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_DAT_DONE_STATUS0)) { + processor = &engine->processors[DLA_OP_CONV]; + group = &processor->groups[0]; + group->events |= (1 << DLA_EVENT_CDMA_DT_DONE); + } + if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_DAT_DONE_STATUS1)) { + processor = &engine->processors[DLA_OP_CONV]; + group = &processor->groups[1]; + group->events |= (1 << DLA_EVENT_CDMA_DT_DONE); + } + if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_WT_DONE_STATUS0)) { + processor = &engine->processors[DLA_OP_CONV]; + group = &processor->groups[0]; + group->events |= (1 << DLA_EVENT_CDMA_WT_DONE); + } + if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_WT_DONE_STATUS1)) { + processor = &engine->processors[DLA_OP_CONV]; + group = &processor->groups[1]; + group->events |= (1 << DLA_EVENT_CDMA_WT_DONE); + } + + glb_reg_write(engine, S_INTR_STATUS, reg); + mask = glb_reg_read(engine, S_INTR_MASK);
Never used
+ reg = glb_reg_read(engine, S_INTR_STATUS);
Never used.
+ + complete(&nvdla_dev->event_notifier); + spin_unlock_irqrestore(&nvdla_dev->nvdla_lock, flags); + + return IRQ_HANDLED; +}
... snip
diff --git a/drivers/gpu/drm/nvdla/nvdla_gem.c b/drivers/gpu/drm/nvdla/nvdla_gem.c new file mode 100644 index 000000000000..cccf6d01a564 --- /dev/null +++ b/drivers/gpu/drm/nvdla/nvdla_gem.c
... snip
+static const struct drm_ioctl_desc nvdla_drm_ioctls[] = { + DRM_IOCTL_DEF_DRV(NVDLA_SUBMIT, nvdla_submit, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(NVDLA_GEM_CREATE, nvdla_gem_create, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(NVDLA_GEM_MMAP, nvdla_gem_map_offset, DRM_RENDER_ALLOW), + /* use DRM_IOCTL_MODE_DESTROY_DUMB to destory */
./nvdla_gem.c:347: destory ==> destroy
+};
... snip
diff --git a/drivers/gpu/drm/nvdla/nvdla_scheduler.c b/drivers/gpu/drm/nvdla/nvdla_scheduler.c new file mode 100644 index 000000000000..b814077478c6 --- /dev/null +++ b/drivers/gpu/drm/nvdla/nvdla_scheduler.c
... snip
+static int +dla_update_dependency(struct dla_engine *engine, + struct dla_consumer *consumer, + struct dla_common_op_desc *op_desc, + uint8_t event, uint8_t roi_index) +{ + int32_t ret = 0; + struct dla_processor *processor; + + if (consumer->index == -1) + goto exit; + + /* Update dependency only if event matches */ + if (event != consumer->event) + goto exit; + + /** + * If consumer index is valid but op desc is NULL means + * op desc for consumer was not pre-fetched + */ + if (op_desc == NULL) { + ret = -EINVAL; + pr_err("Operation descriptor is NULL, consumer index %d", + consumer->index); + goto exit; + } + + pr_debug("Update dependency operation index %d ROI %d DEP_COUNT=%d\n", + op_desc->index, op_desc->roi_index, + op_desc->dependency_count); + op_desc->dependency_count--; + + if (op_desc->dependency_count == 0) { + processor = &engine->processors[op_desc->op_type]; + pr_debug("enable %s in %s as depdency are resolved\n",
./nvdla_scheduler.c:455: depdency ==> dependency
+ processor->name, __func__); + + ret = dla_enable_operation(engine, processor, op_desc); + if (ret) + goto exit; + } +exit: + return ret; +}
... snip
+int +dla_process_events(struct dla_engine *engine, uint32_t *task_complete) +{ + int32_t i; + int32_t ret = 0; + + for (i = 0; i < DLA_OP_NUM; i++) { + struct dla_processor *processor; + + processor = &engine->processors[i]; + ret = dla_handle_events(engine, processor); + /** + * Incase engine status is non-zero, then don't
./nvdla_scheduler.c:905: Incase ==> In case
+ * update the engine status. We should keep its + * status for later cleaning of engine. + */ + if (!engine->status) + engine->status = ret; + } + + if (engine->network->num_operations == engine->num_proc_hwl) + *task_complete = 1; + + return ret; +}
... snip Argillander