On Fri, Feb 09, 2024 at 04:55:36PM +0530, Dibakar Singh wrote: > Expose an API qcom_scm_assign_table to allow client drivers to batch > multiple memory regions in a single hyp assign call. > > In the existing situation, if our goal is to process an sg_table and > transfer its ownership from the current VM to a different VM, we have a > couple of strategies. The first strategy involves processing the entire > sg_table at once and then transferring the ownership. However, this > method may have an adverse impact on the system because during an SMC > call, the NS interrupts are disabled, and this delay could be > significant when dealing with large sg_tables. To address this issue, we > can adopt a second strategy, which involves processing each sg_list in > the sg_table individually and reassigning memory ownership. Although > this method is slower and potentially impacts performance, it will not > keep the NS interrupts disabled for an extended period. > > A more efficient strategy is to process the sg_table in batches. This > approach addresses both scenarios by involving memory processing in > batches, thus avoiding prolonged NS interrupt disablement for longer > duration when dealing with large sg_tables. Moreover, since we process > in batches, this method is faster compared to processing each item > individually. The observations on testing both the approaches for > performance is as follows: > > Allocation Size/ 256MB 512MB 1024MB > Algorithm Used =========== =========== ============ > > Processing each sg_list 73708(us) 149289(us) 266964(us) > in sg_table one by one > > Processing sg_table in 46925(us) 92691(us) 176893(us) > batches > > This implementation serves as a wrapper around the helper function > __qcom_scm_assign_mem, which takes an sg_list and processes it in > batches. We’ve set the limit to a minimum of 32 sg_list in a batch or a > total batch size of 512 pages. The selection of these numbers is > heuristic, based on the test runs conducted. Opting for a smaller number > would compromise performance, while a larger number would result in > non-secure interrupts being disabled for an extended duration. > > Co-developed-by: Prakruthi Deepak Heragu <quic_pheragu@xxxxxxxxxxx> > Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@xxxxxxxxxxx> > Signed-off-by: Dibakar Singh <quic_dibasing@xxxxxxxxxxx> > --- > drivers/firmware/qcom/qcom_scm.c | 211 +++++++++++++++++++++++++ > include/linux/firmware/qcom/qcom_scm.h | 7 + > 2 files changed, 218 insertions(+) > > diff --git a/drivers/firmware/qcom/qcom_scm.c b/drivers/firmware/qcom/qcom_scm.c > index 520de9b5633a..038b96503d65 100644 > --- a/drivers/firmware/qcom/qcom_scm.c > +++ b/drivers/firmware/qcom/qcom_scm.c > @@ -21,6 +21,8 @@ > #include <linux/platform_device.h> > #include <linux/reset-controller.h> > #include <linux/types.h> > +#include <linux/scatterlist.h> > +#include <linux/slab.h> > > #include "qcom_scm.h" > > @@ -1048,6 +1050,215 @@ int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, > } > EXPORT_SYMBOL_GPL(qcom_scm_assign_mem); > > +/** > + * qcom_scm_assign_mem_batch() - Make a secure call to reassign memory > + * ownership of several memory regions > + * @mem_regions: A buffer describing the set of memory regions that need to > + * be reassigned > + * @nr_mem_regions: The number of memory regions that need to be reassigned > + * @srcvms: A buffer populated with he vmid(s) for the current set of > + * owners > + * @src_sz: The size of the srcvms buffer (in bytes) > + * @destvms: A buffer populated with the new owners and corresponding > + * permission flags. > + * @dest_sz: The size of the destvms buffer (in bytes) > + * > + * Return negative errno on failure, 0 on success. > + */ > +static int qcom_scm_assign_mem_batch(struct qcom_scm_mem_map_info *mem_regions, > + size_t nr_mem_regions, u32 *srcvms, > + size_t src_sz, > + struct qcom_scm_current_perm_info *destvms, > + size_t dest_sz) > +{ > + dma_addr_t mem_dma_addr; > + size_t mem_regions_sz; > + int ret = 0, i; > + > + for (i = 0; i < nr_mem_regions; i++) { > + mem_regions[i].mem_addr = cpu_to_le64(mem_regions[i].mem_addr); > + mem_regions[i].mem_size = cpu_to_le64(mem_regions[i].mem_size); > + } > + > + mem_regions_sz = nr_mem_regions * sizeof(*mem_regions); > + mem_dma_addr = dma_map_single(__scm->dev, mem_regions, mem_regions_sz, > + DMA_TO_DEVICE); > + if (dma_mapping_error(__scm->dev, mem_dma_addr)) { > + dev_err(__scm->dev, "mem_dma_addr mapping failed\n"); > + return -ENOMEM; > + } > + > + ret = __qcom_scm_assign_mem(__scm->dev, virt_to_phys(mem_regions), > + mem_regions_sz, virt_to_phys(srcvms), src_sz, > + virt_to_phys(destvms), dest_sz); > + > + dma_unmap_single(__scm->dev, mem_dma_addr, mem_regions_sz, DMA_TO_DEVICE); > + return ret; > +} > + > +/** > + * qcom_scm_prepare_mem_batch() - Prepare batches of memory regions > + * @sg_table: A scatter list whose memory needs to be reassigned > + * @srcvms: A buffer populated with he vmid(s) for the current set of > + * owners > + * @nr_src: The number of the src_vms buffer > + * @destvms: A buffer populated with he vmid(s) for the new owners > + * @destvms_perms: A buffer populated with the permission flags of new owners > + * @nr_dest: The number of the destvms > + * @last_sgl: Denotes to the last scatter list element. Used in case of rollback > + * @roll_back: Identifies whether we are executing rollback in case of failure > + * > + * Return negative errno on failure, 0 on success. > + */ > +static int qcom_scm_prepare_mem_batch(struct sg_table *table, > + u32 *srcvms, int nr_src, > + int *destvms, int *destvms_perms, > + int nr_dest, > + struct scatterlist *last_sgl, bool roll_back) > +{ > + struct qcom_scm_current_perm_info *destvms_cp; > + struct qcom_scm_mem_map_info *mem_regions_buf; > + struct scatterlist *curr_sgl = table->sgl; > + dma_addr_t source_dma_addr, dest_dma_addr; > + size_t batch_iterator; > + size_t batch_start = 0; > + size_t destvms_cp_sz; > + size_t srcvms_cp_sz; > + size_t batch_size; > + u32 *srcvms_cp; > + int ret = 0; > + int i; > + > + if (!table || !table->sgl || !srcvms || !nr_src || > + !destvms || !destvms_perms || !nr_dest || !table->nents) > + return -EINVAL; > + > + srcvms_cp_sz = sizeof(*srcvms_cp) * nr_src; > + srcvms_cp = kmemdup(srcvms, srcvms_cp_sz, GFP_KERNEL); > + if (!srcvms_cp) > + return -ENOMEM; > + > + for (i = 0; i < nr_src; i++) > + srcvms_cp[i] = cpu_to_le32(srcvms_cp[i]); > + > + source_dma_addr = dma_map_single(__scm->dev, srcvms_cp, > + srcvms_cp_sz, DMA_TO_DEVICE); Please use the new tzmem allocator: https://lore.kernel.org/all/20240205182810.58382-1-brgl@xxxxxxxx/ > + > + if (dma_mapping_error(__scm->dev, source_dma_addr)) { > + ret = -ENOMEM; > + goto out_free_source; > + } > + > + destvms_cp_sz = sizeof(*destvms_cp) * nr_dest; > + destvms_cp = kzalloc(destvms_cp_sz, GFP_KERNEL); > + > + if (!destvms_cp) { > + ret = -ENOMEM; > + goto out_unmap_source; > + } > + > + for (i = 0; i < nr_dest; i++) { > + destvms_cp[i].vmid = cpu_to_le32(destvms[i]); > + destvms_cp[i].perm = cpu_to_le32(destvms_perms[i]); > + destvms_cp[i].ctx = 0; > + destvms_cp[i].ctx_size = 0; > + } > + > + dest_dma_addr = dma_map_single(__scm->dev, destvms_cp, > + destvms_cp_sz, DMA_TO_DEVICE); > + if (dma_mapping_error(__scm->dev, dest_dma_addr)) { > + ret = -ENOMEM; > + goto out_free_dest; > + } > + > + mem_regions_buf = kcalloc(QCOM_SCM_MAX_BATCH_SECTION, sizeof(*mem_regions_buf), > + GFP_KERNEL); > + if (!mem_regions_buf) > + return -ENOMEM; > + > + while (batch_start < table->nents) { > + batch_size = 0; > + batch_iterator = 0; > + > + do { > + mem_regions_buf[batch_iterator].mem_addr = page_to_phys(sg_page(curr_sgl)); > + mem_regions_buf[batch_iterator].mem_size = curr_sgl->length; > + batch_size += curr_sgl->length; > + batch_iterator++; > + if (roll_back && curr_sgl == last_sgl) > + break; > + curr_sgl = sg_next(curr_sgl); > + } while (curr_sgl && batch_iterator < QCOM_SCM_MAX_BATCH_SECTION && > + curr_sgl->length + batch_size < QCOM_SCM_MAX_BATCH_SIZE); > + > + batch_start += batch_iterator; > + > + ret = qcom_scm_assign_mem_batch(mem_regions_buf, batch_iterator, > + srcvms_cp, srcvms_cp_sz, destvms_cp, destvms_cp_sz); > + > + if (ret) { > + dev_info(__scm->dev, "Failed to assign memory protection, ret = %d\n", ret); > + last_sgl = curr_sgl; > + ret = -EADDRNOTAVAIL; Probably should not be changing the ret value. What happens to the memory that has been assigned so far? How do we reclaim that? > + break; > + } > + if (roll_back && curr_sgl == last_sgl) > + break; > + } > + kfree(mem_regions_buf); > + > + dma_unmap_single(__scm->dev, dest_dma_addr, > + destvms_cp_sz, DMA_TO_DEVICE); > + > +out_free_dest: > + kfree(destvms_cp); > +out_unmap_source: > + dma_unmap_single(__scm->dev, source_dma_addr, > + srcvms_cp_sz, DMA_TO_DEVICE); > +out_free_source: > + kfree(srcvms_cp); > + return ret; > +} > + > +/** > + * qcom_scm_assign_table() - Make a call to prepare batches of memory regions > + * and reassign memory ownership of several memory regions at once > + * @sg_table: A scatter list whose memory needs to be reassigned > + * @srcvms: A buffer populated with he vmid(s) for the current set of > + * owners > + * @nr_src: The number of the src_vms buffer > + * @destvms: A buffer populated with he vmid(s) for the new owners > + * @destvms_perms: A buffer populated with the permission flags of new owners > + * @nr_dest: The number of the destvms > + * > + * Return negative errno on failure, 0 on success. > + */ > +int qcom_scm_assign_table(struct sg_table *table, > + u32 *srcvms, int nr_src, > + int *destvms, int *destvms_perms, > + int nr_dest) > +{ > + struct scatterlist *last_sgl = NULL; > + int rb_ret = 0; > + u32 new_dests; > + int new_perms; > + int ret = 0; > + > + ret = qcom_scm_prepare_mem_batch(table, srcvms, nr_src, > + destvms, destvms_perms, nr_dest, last_sgl, false); > + > + if (!ret) > + goto out; > + new_dests = QCOM_SCM_VMID_HLOS; We have the original srcvms. Will it always be HLOS? If so, then do we need to pass srcvms at all? > + new_perms = QCOM_SCM_PERM_EXEC | QCOM_SCM_PERM_WRITE | QCOM_SCM_PERM_READ; > + rb_ret = qcom_scm_prepare_mem_batch(table, destvms, nr_dest, &new_dests, > + &new_perms, nr_src, last_sgl, true); > + WARN_ON_ONCE(rb_ret); > +out: > + return ret; > +} > +EXPORT_SYMBOL_GPL(qcom_scm_assign_table); Who uses this function? > + > /** > * qcom_scm_ocmem_lock_available() - is OCMEM lock/unlock interface available > */ > diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h > index ccaf28846054..abd675c7ef49 100644 > --- a/include/linux/firmware/qcom/qcom_scm.h > +++ b/include/linux/firmware/qcom/qcom_scm.h > @@ -8,6 +8,7 @@ > #include <linux/err.h> > #include <linux/types.h> > #include <linux/cpumask.h> > +#include <linux/scatterlist.h> > > #include <dt-bindings/firmware/qcom,scm.h> > > @@ -15,6 +16,8 @@ > #define QCOM_SCM_CPU_PWR_DOWN_L2_ON 0x0 > #define QCOM_SCM_CPU_PWR_DOWN_L2_OFF 0x1 > #define QCOM_SCM_HDCP_MAX_REQ_CNT 5 > +#define QCOM_SCM_MAX_BATCH_SECTION 32 > +#define QCOM_SCM_MAX_BATCH_SIZE SZ_2M > > struct qcom_scm_hdcp_req { > u32 addr; > @@ -93,6 +96,10 @@ int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, > int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, u64 *src, > const struct qcom_scm_vmperm *newvm, > unsigned int dest_cnt); > +int qcom_scm_assign_table(struct sg_table *table, > + u32 *srcvms, int nr_src, > + int *destvms, int *destvms_perms, > + int nr_dest); > > bool qcom_scm_ocmem_lock_available(void); > int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, u32 size, > -- > 2.34.1 >