From: Boqun Feng <boqun.feng@xxxxxxxxx> Sent: Tuesday, September 1, 2020 8:01 PM > > This patch introduces two types of GPADL: HV_GPADL_{BUFFER, RING}. The > types of GPADL are purely the concept in the guest, IOW the hypervisor > treat them as the same. > > The reason of introducing the types of GPADL is to support guests whose s/of/for/ > page size is not 4k (the page size of Hyper-V hypervisor). In these > guests, both the headers and the data parts of the ringbuffers need to > be aligned to the PAGE_SIZE, because 1) some of the ringbuffers will be > mapped into userspace and 2) we use "double mapping" mechanism to > support fast wrap-around, and "double mapping" relies on ringbuffers > being page-aligned. However, the Hyper-V hypervisor only uses 4k > (HV_HYP_PAGE_SIZE) headers. Our solution to this is that we always make > the headers of ringbuffers take one guest page and when GPADL is > established between the guest and hypervisor, the only first 4k of > header is used. To handle this special case, we need the types of GPADL > to differ different guest memory usage for GPADL. > > Type enum is introduced along with several general interfaces to > describe the differences between normal buffer GPADL and ringbuffer > GPADL. > > Signed-off-by: Boqun Feng <boqun.feng@xxxxxxxxx> > --- > drivers/hv/channel.c | 159 +++++++++++++++++++++++++++++++++++------ > include/linux/hyperv.h | 44 +++++++++++- > 2 files changed, 182 insertions(+), 21 deletions(-) > > diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c > index 1cbe8fc931fc..7c443fd567e4 100644 > --- a/drivers/hv/channel.c > +++ b/drivers/hv/channel.c > @@ -35,6 +35,98 @@ static unsigned long virt_to_hvpfn(void *addr) > return paddr >> HV_HYP_PAGE_SHIFT; > } > > +/* > + * hv_gpadl_size - Return the real size of a gpadl, the size that Hyper-V uses > + * > + * For BUFFER gpadl, Hyper-V uses the exact same size as the guest does. > + * > + * For RING gpadl, in each ring, the guest uses one PAGE_SIZE as the header > + * (because of the alignment requirement), however, the hypervisor only > + * uses the first HV_HYP_PAGE_SIZE as the header, therefore leaving a > + * (PAGE_SIZE - HV_HYP_PAGE_SIZE) gap. And since there are two rings in a > + * ringbuffer, So the total size for a RING gpadl that Hyper-V uses is the Unneeded word "So" > + * total size that the guest uses minus twice of the gap size. > + */ > +static inline u32 hv_gpadl_size(enum hv_gpadl_type type, u32 size) > +{ > + switch (type) { > + case HV_GPADL_BUFFER: > + return size; > + case HV_GPADL_RING: > + /* The size of a ringbuffer must be page-aligned */ > + BUG_ON(size % PAGE_SIZE); > + /* > + * Two things to notice here: > + * 1) We're processing two ring buffers as a unit > + * 2) We're skipping any space larger than HV_HYP_PAGE_SIZE in > + * the first guest-size page of each of the two ring buffers. > + * So we effectively subtract out two guest-size pages, and add > + * back two Hyper-V size pages. > + */ > + return size - 2 * (PAGE_SIZE - HV_HYP_PAGE_SIZE); > + } > + BUG(); > + return 0; > +} > + > +/* > + * hv_ring_gpadl_send_offset - Calculate the send offset in a ring gpadl based > + * on the offset in the guest > + * > + * @send_offset: the offset (in bytes) where the send ringbuffer starts in the > + * virtual address space of the guest > + */ > +static inline u32 hv_ring_gpadl_send_offset(u32 send_offset) > +{ > + > + /* > + * For RING gpadl, in each ring, the guest uses one PAGE_SIZE as the > + * header (because of the alignment requirement), however, the > + * hypervisor only uses the first HV_HYP_PAGE_SIZE as the header, > + * therefore leaving a (PAGE_SIZE - HV_HYP_PAGE_SIZE) gap. > + * > + * And to calculate the effective send offset in gpadl, we need to > + * substract this gap. > + */ > + return send_offset - (PAGE_SIZE - HV_HYP_PAGE_SIZE); > +} > + > +/* > + * hv_gpadl_hvpfn - Return the Hyper-V page PFN of the @i th Hyper-V page in > + * the gpadl > + * > + * @type: the type of the gpadl > + * @kbuffer: the pointer to the gpadl in the guest > + * @size: the total size (in bytes) of the gpadl > + * @send_offset: the offset (in bytes) where the send ringbuffer starts in the > + * virtual address space of the guest > + * @i: the index > + */ > +static inline u64 hv_gpadl_hvpfn(enum hv_gpadl_type type, void *kbuffer, > + u32 size, u32 send_offset, int i) > +{ > + int send_idx = hv_ring_gpadl_send_offset(send_offset) >> HV_HYP_PAGE_SHIFT; > + unsigned long delta = 0UL; > + > + switch (type) { > + case HV_GPADL_BUFFER: > + break; > + case HV_GPADL_RING: > + if (i == 0) > + delta = 0; > + else if (i <= send_idx) > + delta = PAGE_SIZE - HV_HYP_PAGE_SIZE; > + else > + delta = 2 * (PAGE_SIZE - HV_HYP_PAGE_SIZE); > + break; > + default: > + BUG(); > + break; > + } > + > + return virt_to_hvpfn(kbuffer + delta + (HV_HYP_PAGE_SIZE * i)); > +} > + > /* > * vmbus_setevent- Trigger an event notification on the specified > * channel. > @@ -160,7 +252,8 @@ EXPORT_SYMBOL_GPL(vmbus_send_modifychannel); > /* > * create_gpadl_header - Creates a gpadl for the specified buffer > */ > -static int create_gpadl_header(void *kbuffer, u32 size, > +static int create_gpadl_header(enum hv_gpadl_type type, void *kbuffer, > + u32 size, u32 send_offset, > struct vmbus_channel_msginfo **msginfo) > { > int i; > @@ -173,7 +266,7 @@ static int create_gpadl_header(void *kbuffer, u32 size, > > int pfnsum, pfncount, pfnleft, pfncurr, pfnsize; > > - pagecount = size >> HV_HYP_PAGE_SHIFT; > + pagecount = hv_gpadl_size(type, size) >> HV_HYP_PAGE_SHIFT; > > /* do we need a gpadl body msg */ > pfnsize = MAX_SIZE_CHANNEL_MESSAGE - > @@ -200,10 +293,10 @@ static int create_gpadl_header(void *kbuffer, u32 size, > gpadl_header->range_buflen = sizeof(struct gpa_range) + > pagecount * sizeof(u64); > gpadl_header->range[0].byte_offset = 0; > - gpadl_header->range[0].byte_count = size; > + gpadl_header->range[0].byte_count = hv_gpadl_size(type, size); > for (i = 0; i < pfncount; i++) > - gpadl_header->range[0].pfn_array[i] = virt_to_hvpfn( > - kbuffer + HV_HYP_PAGE_SIZE * i); > + gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn( > + type, kbuffer, size, send_offset, i); > *msginfo = msgheader; > > pfnsum = pfncount; > @@ -254,8 +347,8 @@ static int create_gpadl_header(void *kbuffer, u32 size, > * so the hypervisor guarantees that this is ok. > */ > for (i = 0; i < pfncurr; i++) > - gpadl_body->pfn[i] = virt_to_hvpfn( > - kbuffer + HV_HYP_PAGE_SIZE * (pfnsum + i)); > + gpadl_body->pfn[i] = hv_gpadl_hvpfn(type, > + kbuffer, size, send_offset, pfnsum + i); > > /* add to msg header */ > list_add_tail(&msgbody->msglistentry, > @@ -281,10 +374,10 @@ static int create_gpadl_header(void *kbuffer, u32 size, > gpadl_header->range_buflen = sizeof(struct gpa_range) + > pagecount * sizeof(u64); > gpadl_header->range[0].byte_offset = 0; > - gpadl_header->range[0].byte_count = size; > + gpadl_header->range[0].byte_count = hv_gpadl_size(type, size); > for (i = 0; i < pagecount; i++) > - gpadl_header->range[0].pfn_array[i] = virt_to_hvpfn( > - kbuffer + HV_HYP_PAGE_SIZE * i); > + gpadl_header->range[0].pfn_array[i] = hv_gpadl_hvpfn( > + type, kbuffer, size, send_offset, i); > > *msginfo = msgheader; > } > @@ -297,15 +390,20 @@ static int create_gpadl_header(void *kbuffer, u32 size, > } > > /* > - * vmbus_establish_gpadl - Establish a GPADL for the specified buffer > + * __vmbus_establish_gpadl - Establish a GPADL for a buffer or ringbuffer > * > * @channel: a channel > + * @type: the type of the corresponding GPADL, only meaningful for the guest. > * @kbuffer: from kmalloc or vmalloc > * @size: page-size multiple > + * @send_offset: the offset (in bytes) where the send ring buffer starts, > + * should be 0 for BUFFER type gpadl > * @gpadl_handle: some funky thing > */ > -int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, > - u32 size, u32 *gpadl_handle) > +static int __vmbus_establish_gpadl(struct vmbus_channel *channel, > + enum hv_gpadl_type type, void *kbuffer, > + u32 size, u32 send_offset, > + u32 *gpadl_handle) > { > struct vmbus_channel_gpadl_header *gpadlmsg; > struct vmbus_channel_gpadl_body *gpadl_body; > @@ -319,7 +417,7 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void > *kbuffer, > next_gpadl_handle = > (atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1); > > - ret = create_gpadl_header(kbuffer, size, &msginfo); > + ret = create_gpadl_header(type, kbuffer, size, send_offset, &msginfo); > if (ret) > return ret; > > @@ -400,6 +498,21 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void > *kbuffer, > kfree(msginfo); > return ret; > } > + > +/* > + * vmbus_establish_gpadl - Establish a GPADL for the specified buffer > + * > + * @channel: a channel > + * @kbuffer: from kmalloc or vmalloc > + * @size: page-size multiple > + * @gpadl_handle: some funky thing > + */ > +int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, > + u32 size, u32 *gpadl_handle) > +{ > + return __vmbus_establish_gpadl(channel, HV_GPADL_BUFFER, kbuffer, size, > + 0U, gpadl_handle); > +} > EXPORT_SYMBOL_GPL(vmbus_establish_gpadl); > > static int __vmbus_open(struct vmbus_channel *newchannel, > @@ -438,10 +551,11 @@ static int __vmbus_open(struct vmbus_channel *newchannel, > /* Establish the gpadl for the ring buffer */ > newchannel->ringbuffer_gpadlhandle = 0; > > - err = vmbus_establish_gpadl(newchannel, > - page_address(newchannel->ringbuffer_page), > - (send_pages + recv_pages) << PAGE_SHIFT, > - &newchannel->ringbuffer_gpadlhandle); > + err = __vmbus_establish_gpadl(newchannel, HV_GPADL_RING, > + page_address(newchannel->ringbuffer_page), > + (send_pages + recv_pages) << PAGE_SHIFT, > + newchannel->ringbuffer_send_offset << PAGE_SHIFT, > + &newchannel->ringbuffer_gpadlhandle); > if (err) > goto error_clean_ring; > > @@ -462,7 +576,13 @@ static int __vmbus_open(struct vmbus_channel *newchannel, > open_msg->openid = newchannel->offermsg.child_relid; > open_msg->child_relid = newchannel->offermsg.child_relid; > open_msg->ringbuffer_gpadlhandle = newchannel->ringbuffer_gpadlhandle; > - open_msg->downstream_ringbuffer_pageoffset = newchannel- > >ringbuffer_send_offset; > + /* > + * The unit of ->downstream_ringbuffer_pageoffset is HV_HYP_PAGE and > + * the unit of ->ringbuffer_send_offset is PAGE, so here we first > + * calculate it into bytes and then convert into HV_HYP_PAGE. > + */ > + open_msg->downstream_ringbuffer_pageoffset = > + hv_ring_gpadl_send_offset(newchannel->ringbuffer_send_offset << PAGE_SHIFT) >> HV_HYP_PAGE_SHIFT; Line length? > open_msg->target_vp = hv_cpu_number_to_vp_number(newchannel->target_cpu); > > if (userdatalen) > @@ -556,7 +676,6 @@ int vmbus_open(struct vmbus_channel *newchannel, > } > EXPORT_SYMBOL_GPL(vmbus_open); > > - > /* > * vmbus_teardown_gpadl -Teardown the specified GPADL handle > */ > diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h > index 38100e80360a..7d16dd28aa48 100644 > --- a/include/linux/hyperv.h > +++ b/include/linux/hyperv.h > @@ -29,6 +29,48 @@ > > #pragma pack(push, 1) > > +/* > + * Types for GPADL, decides is how GPADL header is created. > + * > + * It doesn't make much difference between BUFFER and RING if PAGE_SIZE is the > + * same as HV_HYP_PAGE_SIZE. > + * > + * If PAGE_SIZE is bigger than HV_HYP_PAGE_SIZE, the headers of ring buffers > + * will be of PAGE_SIZE, however, only the first HV_HYP_PAGE will be put > + * into gpadl, therefore the number for HV_HYP_PAGE and the indexes of each > + * HV_HYP_PAGE will be different between different types of GPADL, for example > + * if PAGE_SIZE is 64K: > + * > + * BUFFER: > + * > + * gva: |-- 64k --|-- 64k --| ... | > + * gpa: | 4k | 4k | ... | 4k | 4k | 4k | ... | 4k | > + * index: 0 1 2 15 16 17 18 .. 31 32 ... > + * | | ... | | | ... | ... > + * v V V V V V > + * gpadl: | 4k | 4k | ... | 4k | 4k | 4k | ... | 4k | ... | > + * index: 0 1 2 ... 15 16 17 18 .. 31 32 ... > + * > + * RING: > + * > + * | header | data | header | data | > + * gva: |-- 64k --|-- 64k --| ... |-- 64k --|-- 64k --| ... | > + * gpa: | 4k | .. | 4k | 4k | ... | 4k | ... | 4k | .. | 4k | .. | ... | > + * index: 0 1 16 17 18 31 ... n n+1 n+16 ... 2n > + * | / / / | / / > + * | / / / | / / > + * | / / ... / ... | / ... / > + * | / / / | / / > + * | / / / | / / > + * V V V V V V v > + * gpadl: | 4k | 4k | ... | ... | 4k | 4k | ... | > + * index: 0 1 2 ... 16 ... n-15 n-14 n-13 ... 2n-30 > + */ > +enum hv_gpadl_type { > + HV_GPADL_BUFFER, > + HV_GPADL_RING > +}; > + > /* Single-page buffer */ > struct hv_page_buffer { > u32 len; > @@ -111,7 +153,7 @@ struct hv_ring_buffer { > } feature_bits; > > /* Pad it to PAGE_SIZE so that data starts on page boundary */ > - u8 reserved2[4028]; > + u8 reserved2[PAGE_SIZE - 68]; > > /* > * Ring data starts here + RingDataStartOffset > -- > 2.28.0