On 26. 06. 21, 8:30, longli@xxxxxxxxxxxxxxxxx wrote:
Azure Blob storage provides scalable and durable data storage for Azure. (https://azure.microsoft.com/en-us/services/storage/blobs/) This driver adds support for accelerated access to Azure Blob storage. As an alternative to REST APIs, it provides a fast data path that uses host native network stack and secure direct data link for storage server access.
...
Cc: Jiri Slaby <jirislaby@xxxxxxxxxx>
I am just curious, why am I CCed? Anyway, some comments below:
--- /dev/null +++ b/drivers/hv/azure_blob.c @@ -0,0 +1,655 @@
...
+static void az_blob_on_channel_callback(void *context) +{ + struct vmbus_channel *channel = (struct vmbus_channel *)context;
Have you fed this patch through checkpatch?
+ const struct vmpacket_descriptor *desc; + + az_blob_dbg("entering interrupt from vmbus\n"); + foreach_vmbus_pkt(desc, channel) { + struct az_blob_vsp_request_ctx *request_ctx; + struct az_blob_vsp_response *response; + u64 cmd_rqst = vmbus_request_addr(&channel->requestor, + desc->trans_id); + if (cmd_rqst == VMBUS_RQST_ERROR) { + az_blob_err("incorrect transaction id %llu\n", + desc->trans_id); + continue; + } + request_ctx = (struct az_blob_vsp_request_ctx *) cmd_rqst; + response = hv_pkt_data(desc); + + az_blob_dbg("got response for request %pUb status %u " + "response_len %u\n", + &request_ctx->request->guid, response->error, + response->response_len); + request_ctx->request->response.status = response->error; + request_ctx->request->response.response_len = + response->response_len; + complete(&request_ctx->wait_vsp); + } + +}
...
+static long az_blob_ioctl_user_request(struct file *filp, unsigned long arg) +{ + struct az_blob_device *dev = &az_blob_dev; + struct az_blob_file_ctx *file_ctx = filp->private_data; + char __user *argp = (char __user *) arg; + struct az_blob_request_sync request; + struct az_blob_vsp_request_ctx request_ctx; + unsigned long flags; + int ret; + size_t request_start, request_num_pages = 0; + size_t response_start, response_num_pages = 0; + size_t data_start, data_num_pages = 0, total_num_pages; + struct page **request_pages = NULL, **response_pages = NULL; + struct page **data_pages = NULL; + struct vmbus_packet_mpb_array *desc; + u64 *pfn_array; + int desc_size; + int page_idx; + struct az_blob_vsp_request *vsp_request;
Ugh, see further.
+ + /* Fast fail if device is being removed */ + if (dev->removing) + return -ENODEV; + + if (!az_blob_safe_file_access(filp)) { + az_blob_dbg("process %d(%s) changed security contexts after" + " opening file descriptor\n", + task_tgid_vnr(current), current->comm); + return -EACCES; + } + + if (copy_from_user(&request, argp, sizeof(request))) { + az_blob_dbg("don't have permission to user provided buffer\n"); + return -EFAULT; + } + + az_blob_dbg("az_blob ioctl request guid %pUb timeout %u request_len %u" + " response_len %u data_len %u request_buffer %llx " + "response_buffer %llx data_buffer %llx\n", + &request.guid, request.timeout, request.request_len, + request.response_len, request.data_len, request.request_buffer, + request.response_buffer, request.data_buffer); + + if (!request.request_len || !request.response_len) + return -EINVAL; + + if (request.data_len && request.data_len < request.data_valid) + return -EINVAL; + + init_completion(&request_ctx.wait_vsp); + request_ctx.request = &request; + + /* + * Need to set rw to READ to have page table set up for passing to VSP. + * Setting it to WRITE will cause the page table entry not allocated + * as it's waiting on Copy-On-Write on next page fault. This doesn't + * work in this scenario because VSP wants all the pages to be present. + */ + ret = get_buffer_pages(READ, (void __user *) request.request_buffer,
Does this need __force for sparse not to complain?
+ request.request_len, &request_pages, &request_start, + &request_num_pages); + if (ret) + goto get_user_page_failed; + + ret = get_buffer_pages(READ, (void __user *) request.response_buffer, + request.response_len, &response_pages, &response_start, + &response_num_pages); + if (ret) + goto get_user_page_failed; + + if (request.data_len) { + ret = get_buffer_pages(READ, + (void __user *) request.data_buffer, request.data_len, + &data_pages, &data_start, &data_num_pages); + if (ret) + goto get_user_page_failed; + } + + total_num_pages = request_num_pages + response_num_pages + + data_num_pages; + if (total_num_pages > AZ_BLOB_MAX_PAGES) { + az_blob_dbg("number of DMA pages %lu buffer exceeding %u\n", + total_num_pages, AZ_BLOB_MAX_PAGES); + ret = -EINVAL; + goto get_user_page_failed; + } + + /* Construct a VMBUS packet and send it over to VSP */ + desc_size = sizeof(struct vmbus_packet_mpb_array) + + sizeof(u64) * total_num_pages; + desc = kzalloc(desc_size, GFP_KERNEL);
Smells like a call for struct_size().
+ vsp_request = kzalloc(sizeof(*vsp_request), GFP_KERNEL); + if (!desc || !vsp_request) { + kfree(desc); + kfree(vsp_request); + ret = -ENOMEM; + goto get_user_page_failed; + } + + desc->range.offset = 0; + desc->range.len = total_num_pages * PAGE_SIZE; + pfn_array = desc->range.pfn_array; + page_idx = 0; + + if (request.data_len) { + fill_in_page_buffer(pfn_array, &page_idx, data_pages, + data_num_pages); + vsp_request->data_buffer_offset = data_start; + vsp_request->data_buffer_length = request.data_len; + vsp_request->data_buffer_valid = request.data_valid; + } + + fill_in_page_buffer(pfn_array, &page_idx, request_pages, + request_num_pages); + vsp_request->request_buffer_offset = request_start + + data_num_pages * PAGE_SIZE; + vsp_request->request_buffer_length = request.request_len; + + fill_in_page_buffer(pfn_array, &page_idx, response_pages, + response_num_pages); + vsp_request->response_buffer_offset = response_start + + (data_num_pages + request_num_pages) * PAGE_SIZE; + vsp_request->response_buffer_length = request.response_len; + + vsp_request->version = 0; + vsp_request->timeout_ms = request.timeout; + vsp_request->operation_type = AZ_BLOB_DRIVER_USER_REQUEST; + guid_copy(&vsp_request->transaction_id, &request.guid); + + spin_lock_irqsave(&file_ctx->vsp_pending_lock, flags); + list_add_tail(&request_ctx.list, &file_ctx->vsp_pending_requests); + spin_unlock_irqrestore(&file_ctx->vsp_pending_lock, flags); + + az_blob_dbg("sending request to VSP\n"); + az_blob_dbg("desc_size %u desc->range.len %u desc->range.offset %u\n", + desc_size, desc->range.len, desc->range.offset); + az_blob_dbg("vsp_request data_buffer_offset %u data_buffer_length %u " + "data_buffer_valid %u request_buffer_offset %u " + "request_buffer_length %u response_buffer_offset %u " + "response_buffer_length %u\n", + vsp_request->data_buffer_offset, + vsp_request->data_buffer_length, + vsp_request->data_buffer_valid, + vsp_request->request_buffer_offset, + vsp_request->request_buffer_length, + vsp_request->response_buffer_offset, + vsp_request->response_buffer_length); + + ret = vmbus_sendpacket_mpb_desc(dev->device->channel, desc, desc_size, + vsp_request, sizeof(*vsp_request), (u64) &request_ctx); + + kfree(desc); + kfree(vsp_request); + if (ret) + goto vmbus_send_failed; + + wait_for_completion(&request_ctx.wait_vsp);
Provided this is ioctl, this should likely be interruptible. You don't want to account to I/O load. The same likely for az_blob_fop_release.
+ + /* + * At this point, the response is already written to request + * by VMBUS completion handler, copy them to user-mode buffers + * and return to user-mode + */ + if (copy_to_user(argp + + offsetof(struct az_blob_request_sync, + response.status), + &request.response.status,
This is ugly, why don't you make argp the appropriate pointer instead of char *? You'd need not do copy_to_user twice then, right?
+ sizeof(request.response.status))) { + ret = -EFAULT; + goto vmbus_send_failed; + } + + if (copy_to_user(argp + + offsetof(struct az_blob_request_sync, + response.response_len),
The same here.
+ &request.response.response_len, + sizeof(request.response.response_len))) + ret = -EFAULT; + +vmbus_send_failed: + spin_lock_irqsave(&file_ctx->vsp_pending_lock, flags); + list_del(&request_ctx.list); + if (list_empty(&file_ctx->vsp_pending_requests)) + wake_up(&file_ctx->wait_vsp_pending); + spin_unlock_irqrestore(&file_ctx->vsp_pending_lock, flags); + +get_user_page_failed: + free_buffer_pages(request_num_pages, request_pages); + free_buffer_pages(response_num_pages, response_pages); + free_buffer_pages(data_num_pages, data_pages); + + return ret; +}
This function is overly long. Care to split it (e.g. moving away the initialization of the structs and the debug stuff)?
+ +static long az_blob_fop_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + long ret = -EIO;
EINVAL would be more appropriate.
+ + switch (cmd) { + case IOCTL_AZ_BLOB_DRIVER_USER_REQUEST: + if (_IOC_SIZE(cmd) != sizeof(struct az_blob_request_sync)) + return -EINVAL;
How can that happen, provided the switch (cmd) and case?
+ ret = az_blob_ioctl_user_request(filp, arg); + break;
Simply: return az_blob_ioctl_user_request(filp, arg);
+ + default: + az_blob_dbg("unrecognized IOCTL code %u\n", cmd); + } + + return ret;
So return -EINVAL here directly now.
+}
...
+static int az_blob_connect_to_vsp(struct hv_device *device, u32 ring_size) +{ + int ret;
...
+ int rc;
Sometimes ret, sometimes rc, could you unify the two?
+static int __init az_blob_drv_init(void) +{ + int ret; + + ret = vmbus_driver_register(&az_blob_drv); + return ret;
Simply: return vmbus_driver_register(&az_blob_drv); regards, -- -- js suse labs