On 12/6/2022 6:26 PM, Christian König wrote: > Am 06.12.22 um 13:54 schrieb Rijo Thomas: >> >> On 12/6/2022 6:11 PM, Christian König wrote: >>> Am 06.12.22 um 13:30 schrieb Rijo Thomas: >>>> For AMD Secure Processor (ASP) to map and access TEE ring buffer, the >>>> ring buffer address sent by host to ASP must be a real physical >>>> address and the pages must be physically contiguous. >>>> >>>> In a virtualized environment though, when the driver is running in a >>>> guest VM, the pages allocated by __get_free_pages() may not be >>>> contiguous in the host (or machine) physical address space. Guests >>>> will see a guest (or pseudo) physical address and not the actual host >>>> (or machine) physical address. The TEE running on ASP cannot decipher >>>> pseudo physical addresses. It needs host or machine physical address. >>>> >>>> To resolve this problem, use DMA APIs for allocating buffers that must >>>> be shared with TEE. This will ensure that the pages are contiguous in >>>> host (or machine) address space. If the DMA handle is an IOVA, >>>> translate it into a physical address before sending it to ASP. >>>> >>>> This patch also exports two APIs (one for buffer allocation and >>>> another to free the buffer). This API can be used by AMD-TEE driver to >>>> share buffers with TEE. >>> Maybe use some other name than dma_buffer for your structure, cause that is usually something completely different in the Linux kernel. >>> >> Sure Christian. I shall rename "struct dma_buffer" to "struct shm_buffer" (Shared Memory Buffer) in the file include/linux/psp-tee.h >> The functions psp_tee_alloc_dmabuf() and psp_tee_free_dmabuf() shall be renamed to psp_tee_alloc_shmbuf() and psp_tee_free_shmbuf(), respectively. >> I shall do correction in next patch revision. I will wait for others to review as well before I post update. > > I strongly suggest to use the name psp_buffer or something similar because shm_buffer is usually used for something else as well. I see that the TEE subsystem defines "struct tee_shm". Okay, I will name the newly added structure as "struct psp_tee_buffer" and the APIs as psp_tee_alloc_buffer() and psp_tee_free_buffer(). Complete function prototype: struct psp_tee_buffer *psp_tee_alloc_buffer(unsigned long size, gfp_t gfp); void psp_tee_free_buffer(struct psp_tee_buffer *buffer); Does this look okay? Thanks, Rijo > > Regards, > Christian. > >> >> Thanks, >> Rijo >> >>> Regards, >>> Christian. >>> >>>> Fixes: 33960acccfbd ("crypto: ccp - add TEE support for Raven Ridge") >>>> Cc: Tom Lendacky <thomas.lendacky@xxxxxxx> >>>> Cc: stable@xxxxxxxxxxxxxxx >>>> Signed-off-by: Rijo Thomas <Rijo-john.Thomas@xxxxxxx> >>>> Co-developed-by: Jeshwanth <JESHWANTHKUMAR.NK@xxxxxxx> >>>> Signed-off-by: Jeshwanth <JESHWANTHKUMAR.NK@xxxxxxx> >>>> Reviewed-by: Devaraj Rangasamy <Devaraj.Rangasamy@xxxxxxx> >>>> --- >>>> drivers/crypto/ccp/psp-dev.c | 6 +- >>>> drivers/crypto/ccp/tee-dev.c | 116 ++++++++++++++++++++++------------- >>>> drivers/crypto/ccp/tee-dev.h | 9 +-- >>>> include/linux/psp-tee.h | 47 ++++++++++++++ >>>> 4 files changed, 127 insertions(+), 51 deletions(-) >>>> >>>> diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c >>>> index c9c741ac8442..2b86158d7435 100644 >>>> --- a/drivers/crypto/ccp/psp-dev.c >>>> +++ b/drivers/crypto/ccp/psp-dev.c >>>> @@ -161,13 +161,13 @@ int psp_dev_init(struct sp_device *sp) >>>> goto e_err; >>>> } >>>> + if (sp->set_psp_master_device) >>>> + sp->set_psp_master_device(sp); >>>> + >>>> ret = psp_init(psp); >>>> if (ret) >>>> goto e_irq; >>>> - if (sp->set_psp_master_device) >>>> - sp->set_psp_master_device(sp); >>>> - >>>> /* Enable interrupt */ >>>> iowrite32(-1, psp->io_regs + psp->vdata->inten_reg); >>>> diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c >>>> index 5c9d47f3be37..1631d9851e54 100644 >>>> --- a/drivers/crypto/ccp/tee-dev.c >>>> +++ b/drivers/crypto/ccp/tee-dev.c >>>> @@ -12,8 +12,9 @@ >>>> #include <linux/mutex.h> >>>> #include <linux/delay.h> >>>> #include <linux/slab.h> >>>> +#include <linux/dma-direct.h> >>>> +#include <linux/iommu.h> >>>> #include <linux/gfp.h> >>>> -#include <linux/psp-sev.h> >>>> #include <linux/psp-tee.h> >>>> #include "psp-dev.h" >>>> @@ -21,25 +22,64 @@ >>>> static bool psp_dead; >>>> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp) >>>> +{ >>>> + struct psp_device *psp = psp_get_master_device(); >>>> + struct dma_buffer *dma_buf; >>>> + struct iommu_domain *dom; >>>> + >>>> + if (!psp || !size) >>>> + return NULL; >>>> + >>>> + dma_buf = kzalloc(sizeof(*dma_buf), GFP_KERNEL); >>>> + if (!dma_buf) >>>> + return NULL; >>>> + >>>> + dma_buf->vaddr = dma_alloc_coherent(psp->dev, size, &dma_buf->dma, gfp); >>>> + if (!dma_buf->vaddr || !dma_buf->dma) { >>>> + kfree(dma_buf); >>>> + return NULL; >>>> + } >>>> + >>>> + dma_buf->size = size; >>>> + >>>> + dom = iommu_get_domain_for_dev(psp->dev); >>>> + if (dom) >>>> + dma_buf->paddr = iommu_iova_to_phys(dom, dma_buf->dma); >>>> + else >>>> + dma_buf->paddr = dma_buf->dma; >>>> + >>>> + return dma_buf; >>>> +} >>>> +EXPORT_SYMBOL(psp_tee_alloc_dmabuf); >>>> + >>>> +void psp_tee_free_dmabuf(struct dma_buffer *dma_buf) >>>> +{ >>>> + struct psp_device *psp = psp_get_master_device(); >>>> + >>>> + if (!psp || !dma_buf) >>>> + return; >>>> + >>>> + dma_free_coherent(psp->dev, dma_buf->size, >>>> + dma_buf->vaddr, dma_buf->dma); >>>> + >>>> + kfree(dma_buf); >>>> +} >>>> +EXPORT_SYMBOL(psp_tee_free_dmabuf); >>>> + >>>> static int tee_alloc_ring(struct psp_tee_device *tee, int ring_size) >>>> { >>>> struct ring_buf_manager *rb_mgr = &tee->rb_mgr; >>>> - void *start_addr; >>>> if (!ring_size) >>>> return -EINVAL; >>>> - /* We need actual physical address instead of DMA address, since >>>> - * Trusted OS running on AMD Secure Processor will map this region >>>> - */ >>>> - start_addr = (void *)__get_free_pages(GFP_KERNEL, get_order(ring_size)); >>>> - if (!start_addr) >>>> + rb_mgr->ring_buf = psp_tee_alloc_dmabuf(ring_size, >>>> + GFP_KERNEL | __GFP_ZERO); >>>> + if (!rb_mgr->ring_buf) { >>>> + dev_err(tee->dev, "ring allocation failed\n"); >>>> return -ENOMEM; >>>> - >>>> - memset(start_addr, 0x0, ring_size); >>>> - rb_mgr->ring_start = start_addr; >>>> - rb_mgr->ring_size = ring_size; >>>> - rb_mgr->ring_pa = __psp_pa(start_addr); >>>> + } >>>> mutex_init(&rb_mgr->mutex); >>>> return 0; >>>> @@ -49,15 +89,8 @@ static void tee_free_ring(struct psp_tee_device *tee) >>>> { >>>> struct ring_buf_manager *rb_mgr = &tee->rb_mgr; >>>> - if (!rb_mgr->ring_start) >>>> - return; >>>> + psp_tee_free_dmabuf(rb_mgr->ring_buf); >>>> - free_pages((unsigned long)rb_mgr->ring_start, >>>> - get_order(rb_mgr->ring_size)); >>>> - >>>> - rb_mgr->ring_start = NULL; >>>> - rb_mgr->ring_size = 0; >>>> - rb_mgr->ring_pa = 0; >>>> mutex_destroy(&rb_mgr->mutex); >>>> } >>>> @@ -81,35 +114,36 @@ static int tee_wait_cmd_poll(struct psp_tee_device *tee, unsigned int timeout, >>>> return -ETIMEDOUT; >>>> } >>>> -static >>>> -struct tee_init_ring_cmd *tee_alloc_cmd_buffer(struct psp_tee_device *tee) >>>> +struct dma_buffer *tee_alloc_cmd_buffer(struct psp_tee_device *tee) >>>> { >>>> struct tee_init_ring_cmd *cmd; >>>> + struct dma_buffer *cmd_buffer; >>>> - cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); >>>> - if (!cmd) >>>> + cmd_buffer = psp_tee_alloc_dmabuf(sizeof(*cmd), >>>> + GFP_KERNEL | __GFP_ZERO); >>>> + if (!cmd_buffer) >>>> return NULL; >>>> - cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_pa); >>>> - cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_pa); >>>> - cmd->size = tee->rb_mgr.ring_size; >>>> + cmd = (struct tee_init_ring_cmd *)cmd_buffer->vaddr; >>>> + cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_buf->paddr); >>>> + cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_buf->paddr); >>>> + cmd->size = tee->rb_mgr.ring_buf->size; >>>> dev_dbg(tee->dev, "tee: ring address: high = 0x%x low = 0x%x size = %u\n", >>>> cmd->hi_addr, cmd->low_addr, cmd->size); >>>> - return cmd; >>>> + return cmd_buffer; >>>> } >>>> -static inline void tee_free_cmd_buffer(struct tee_init_ring_cmd *cmd) >>>> +static inline void tee_free_cmd_buffer(struct dma_buffer *cmd_buffer) >>>> { >>>> - kfree(cmd); >>>> + psp_tee_free_dmabuf(cmd_buffer); >>>> } >>>> static int tee_init_ring(struct psp_tee_device *tee) >>>> { >>>> int ring_size = MAX_RING_BUFFER_ENTRIES * sizeof(struct tee_ring_cmd); >>>> - struct tee_init_ring_cmd *cmd; >>>> - phys_addr_t cmd_buffer; >>>> + struct dma_buffer *cmd_buffer; >>>> unsigned int reg; >>>> int ret; >>>> @@ -123,21 +157,19 @@ static int tee_init_ring(struct psp_tee_device *tee) >>>> tee->rb_mgr.wptr = 0; >>>> - cmd = tee_alloc_cmd_buffer(tee); >>>> - if (!cmd) { >>>> + cmd_buffer = tee_alloc_cmd_buffer(tee); >>>> + if (!cmd_buffer) { >>>> tee_free_ring(tee); >>>> return -ENOMEM; >>>> } >>>> - cmd_buffer = __psp_pa((void *)cmd); >>>> - >>>> /* Send command buffer details to Trusted OS by writing to >>>> * CPU-PSP message registers >>>> */ >>>> - iowrite32(lower_32_bits(cmd_buffer), >>>> + iowrite32(lower_32_bits(cmd_buffer->paddr), >>>> tee->io_regs + tee->vdata->cmdbuff_addr_lo_reg); >>>> - iowrite32(upper_32_bits(cmd_buffer), >>>> + iowrite32(upper_32_bits(cmd_buffer->paddr), >>>> tee->io_regs + tee->vdata->cmdbuff_addr_hi_reg); >>>> iowrite32(TEE_RING_INIT_CMD, >>>> tee->io_regs + tee->vdata->cmdresp_reg); >>>> @@ -157,7 +189,7 @@ static int tee_init_ring(struct psp_tee_device *tee) >>>> } >>>> free_buf: >>>> - tee_free_cmd_buffer(cmd); >>>> + tee_free_cmd_buffer(cmd_buffer); >>>> return ret; >>>> } >>>> @@ -167,7 +199,7 @@ static void tee_destroy_ring(struct psp_tee_device *tee) >>>> unsigned int reg; >>>> int ret; >>>> - if (!tee->rb_mgr.ring_start) >>>> + if (!tee->rb_mgr.ring_buf->vaddr) >>>> return; >>>> if (psp_dead) >>>> @@ -256,7 +288,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id, >>>> do { >>>> /* Get pointer to ring buffer command entry */ >>>> cmd = (struct tee_ring_cmd *) >>>> - (tee->rb_mgr.ring_start + tee->rb_mgr.wptr); >>>> + (tee->rb_mgr.ring_buf->vaddr + tee->rb_mgr.wptr); >>>> rptr = ioread32(tee->io_regs + tee->vdata->ring_rptr_reg); >>>> @@ -305,7 +337,7 @@ static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id, >>>> /* Update local copy of write pointer */ >>>> tee->rb_mgr.wptr += sizeof(struct tee_ring_cmd); >>>> - if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_size) >>>> + if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_buf->size) >>>> tee->rb_mgr.wptr = 0; >>>> /* Trigger interrupt to Trusted OS */ >>>> diff --git a/drivers/crypto/ccp/tee-dev.h b/drivers/crypto/ccp/tee-dev.h >>>> index 49d26158b71e..9238487ee8bf 100644 >>>> --- a/drivers/crypto/ccp/tee-dev.h >>>> +++ b/drivers/crypto/ccp/tee-dev.h >>>> @@ -16,6 +16,7 @@ >>>> #include <linux/device.h> >>>> #include <linux/mutex.h> >>>> +#include <linux/psp-tee.h> >>>> #define TEE_DEFAULT_TIMEOUT 10 >>>> #define MAX_BUFFER_SIZE 988 >>>> @@ -48,17 +49,13 @@ struct tee_init_ring_cmd { >>>> /** >>>> * struct ring_buf_manager - Helper structure to manage ring buffer. >>>> - * @ring_start: starting address of ring buffer >>>> - * @ring_size: size of ring buffer in bytes >>>> - * @ring_pa: physical address of ring buffer >>>> * @wptr: index to the last written entry in ring buffer >>>> + * @ring_buf: ring buffer allocated using DMA api >>>> */ >>>> struct ring_buf_manager { >>>> struct mutex mutex; /* synchronizes access to ring buffer */ >>>> - void *ring_start; >>>> - u32 ring_size; >>>> - phys_addr_t ring_pa; >>>> u32 wptr; >>>> + struct dma_buffer *ring_buf; >>>> }; >>>> struct psp_tee_device { >>>> diff --git a/include/linux/psp-tee.h b/include/linux/psp-tee.h >>>> index cb0c95d6d76b..c0fa922f24d4 100644 >>>> --- a/include/linux/psp-tee.h >>>> +++ b/include/linux/psp-tee.h >>>> @@ -13,6 +13,7 @@ >>>> #include <linux/types.h> >>>> #include <linux/errno.h> >>>> +#include <linux/dma-mapping.h> >>>> /* This file defines the Trusted Execution Environment (TEE) interface commands >>>> * and the API exported by AMD Secure Processor driver to communicate with >>>> @@ -40,6 +41,20 @@ enum tee_cmd_id { >>>> TEE_CMD_ID_UNMAP_SHARED_MEM, >>>> }; >>>> +/** >>>> + * struct dma_buffer - Structure for a DMA buffer. >>>> + * @dma: DMA buffer address >>>> + * @paddr: Physical address of DMA buffer >>>> + * @vaddr: CPU virtual address of DMA buffer >>>> + * @size: Size of DMA buffer in bytes >>>> + */ >>>> +struct dma_buffer { >>>> + dma_addr_t dma; >>>> + phys_addr_t paddr; >>>> + void *vaddr; >>>> + unsigned long size; >>>> +}; >>>> + >>>> #ifdef CONFIG_CRYPTO_DEV_SP_PSP >>>> /** >>>> * psp_tee_process_cmd() - Process command in Trusted Execution Environment >>>> @@ -75,6 +90,28 @@ int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, size_t len, >>>> */ >>>> int psp_check_tee_status(void); >>>> +/** >>>> + * psp_tee_alloc_dmabuf() - Allocates memory of requested size and flags using >>>> + * dma_alloc_coherent() API. >>>> + * >>>> + * This function can be used to allocate a shared memory region between the >>>> + * host and PSP TEE. >>>> + * >>>> + * Returns: >>>> + * non-NULL a valid pointer to struct dma_buffer >>>> + * NULL on failure >>>> + */ >>>> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp); >>>> + >>>> +/** >>>> + * psp_tee_free_dmabuf() - Deallocates memory using dma_free_coherent() API. >>>> + * >>>> + * This function can be used to release shared memory region between host >>>> + * and PSP TEE. >>>> + * >>>> + */ >>>> +void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer); >>>> + >>>> #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ >>>> static inline int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, >>>> @@ -87,5 +124,15 @@ static inline int psp_check_tee_status(void) >>>> { >>>> return -ENODEV; >>>> } >>>> + >>>> +static inline >>>> +struct dma_buffer *psp_tee_alloc_dmabuf(unsigned long size, gfp_t gfp) >>>> +{ >>>> + return NULL; >>>> +} >>>> + >>>> +static inline void psp_tee_free_dmabuf(struct dma_buffer *dma_buffer) >>>> +{ >>>> +} >>>> #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ >>>> #endif /* __PSP_TEE_H_ */ >