On Tue, Apr 10, 2018 at 12:06:47PM -0700, Alistair Strachan wrote: > +static int do_create_fd_scoped_permission( > + struct vsoc_device_region *region_p, > + struct fd_scoped_permission_node *np, > + struct fd_scoped_permission_arg *__user arg) > +{ > + struct file *managed_filp; > + s32 managed_fd; > + atomic_t *owner_ptr = NULL; > + struct vsoc_device_region *managed_region_p; > + > + if (copy_from_user(&np->permission, &arg->perm, sizeof(*np)) || > + copy_from_user(&managed_fd, > + &arg->managed_region_fd, sizeof(managed_fd))) { > + return -EFAULT; > + } > + managed_filp = fdget(managed_fd).file; > + /* Check that it's a valid fd, */ > + if (!managed_filp || vsoc_validate_filep(managed_filp)) > + return -EPERM; > + /* EEXIST if the given fd already has a permission. */ > + if (((struct vsoc_private_data *)managed_filp->private_data)-> > + fd_scoped_permission_node) > + return -EEXIST; > + managed_region_p = vsoc_region_from_filep(managed_filp); > + /* Check that the provided region is managed by this one */ > + if (&vsoc_dev.regions[managed_region_p->managed_by] != region_p) > + return -EPERM; > + /* The area must be well formed and have non-zero size */ > + if (np->permission.begin_offset >= np->permission.end_offset) > + return -EINVAL; > + /* The area must fit in the memory window */ > + if (np->permission.end_offset > > + vsoc_device_region_size(managed_region_p)) > + return -ERANGE; > + /* The area must be in the region data section */ > + if (np->permission.begin_offset < > + managed_region_p->offset_of_region_data) > + return -ERANGE; > + /* The area must be page aligned */ > + if (!PAGE_ALIGNED(np->permission.begin_offset) || > + !PAGE_ALIGNED(np->permission.end_offset)) > + return -EINVAL; > + /* The owner flag must reside in the owner memory */ > + if (np->permission.owner_offset + sizeof(np->permission.owner_offset) > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ There is a potential integer overflow here > + vsoc_device_region_size(region_p)) > + return -ERANGE; > + /* The owner flag must reside in the data section */ > + if (np->permission.owner_offset < region_p->offset_of_region_data) > + return -EINVAL; > + /* Owner offset must be naturally aligned in the window */ > + if (np->permission.owner_offset & > + (sizeof(np->permission.owner_offset) - 1)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ but then we prevent it here so that's fine. But it would be better to reverse these checks so that it's easier to review. > + return -EINVAL; > + /* The owner value must change to claim the memory */ > + if (np->permission.owned_value == VSOC_REGION_FREE) > + return -EINVAL; > + owner_ptr = > + (atomic_t *)shm_off_to_virtual_addr(region_p->region_begin_offset + > + np->permission.owner_offset); > + /* We've already verified that this is in the shared memory window, so > + * it should be safe to write to this address. > + */ > + if (atomic_cmpxchg(owner_ptr, > + VSOC_REGION_FREE, > + np->permission.owned_value) != VSOC_REGION_FREE) { > + return -EBUSY; > + } > + ((struct vsoc_private_data *)managed_filp->private_data)-> > + fd_scoped_permission_node = np; > + /* The file offset needs to be adjusted if the calling > + * process did any read/write operations on the fd > + * before creating the permission. > + */ > + if (managed_filp->f_pos) { > + if (managed_filp->f_pos > np->permission.end_offset) { > + /* If the offset is beyond the permission end, set it > + * to the end. > + */ > + managed_filp->f_pos = np->permission.end_offset; > + } else { > + /* If the offset is within the permission interval > + * keep it there otherwise reset it to zero. > + */ > + if (managed_filp->f_pos < np->permission.begin_offset) { > + managed_filp->f_pos = 0; > + } else { > + managed_filp->f_pos -= > + np->permission.begin_offset; > + } > + } > + } > + return 0; > +} > + [ snip ] > + > +/** > + * Implements the inner logic of cond_wait. Copies to and from userspace are > + * done in the helper function below. > + */ > +static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg) > +{ > + DEFINE_WAIT(wait); > + u32 region_number = iminor(file_inode(filp)); > + struct vsoc_region_data *data = vsoc_dev.regions_data + region_number; > + struct hrtimer_sleeper timeout, *to = NULL; > + int ret = 0; > + struct vsoc_device_region *region_p = vsoc_region_from_filep(filp); > + atomic_t *address = NULL; > + struct timespec ts; > + > + /* Ensure that the offset is aligned */ > + if (arg->offset & (sizeof(uint32_t) - 1)) > + return -EADDRNOTAVAIL; > + /* Ensure that the offset is within shared memory */ > + if (((uint64_t)arg->offset) + region_p->region_begin_offset + > + sizeof(uint32_t) > region_p->region_end_offset) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This could overflow. > + return -E2BIG; > + address = shm_off_to_virtual_addr(region_p->region_begin_offset + > + arg->offset); > + > + /* Ensure that the type of wait is valid */ > + switch (arg->wait_type) { > + case VSOC_WAIT_IF_EQUAL: > + break; > + case VSOC_WAIT_IF_EQUAL_TIMEOUT: > + to = &timeout; > + break; > + default: > + return -EINVAL; > + } > + > + if (to) { > + /* Copy the user-supplied timesec into the kernel structure. > + * We do things this way to flatten differences between 32 bit > + * and 64 bit timespecs. > + */ > + ts.tv_sec = arg->wake_time_sec; > + ts.tv_nsec = arg->wake_time_nsec; > + > + if (!timespec_valid(&ts)) > + return -EINVAL; > + hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC, > + HRTIMER_MODE_ABS); > + hrtimer_set_expires_range_ns(&to->timer, timespec_to_ktime(ts), > + current->timer_slack_ns); > + > + hrtimer_init_sleeper(to, current); > + } > + > + while (1) { > + prepare_to_wait(&data->futex_wait_queue, &wait, > + TASK_INTERRUPTIBLE); > + /* > + * Check the sentinel value after prepare_to_wait. If the value > + * changes after this check the writer will call signal, > + * changing the task state from INTERRUPTIBLE to RUNNING. That > + * will ensure that schedule() will eventually schedule this > + * task. > + */ > + if (atomic_read(address) != arg->value) { > + ret = 0; > + break; > + } > + if (to) { > + hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); > + if (likely(to->task)) > + freezable_schedule(); > + hrtimer_cancel(&to->timer); > + if (!to->task) { > + ret = -ETIMEDOUT; > + break; > + } > + } else { > + freezable_schedule(); > + } > + /* Count the number of times that we woke up. This is useful > + * for unit testing. > + */ > + ++arg->wakes; > + if (signal_pending(current)) { > + ret = -EINTR; > + break; > + } > + } > + finish_wait(&data->futex_wait_queue, &wait); > + if (to) > + destroy_hrtimer_on_stack(&to->timer); > + return ret; > +} > + [ snip ] > + > +static int vsoc_probe_device(struct pci_dev *pdev, > + const struct pci_device_id *ent) > +{ > + int result; > + int i; > + resource_size_t reg_size; > + dev_t devt; > + > + vsoc_dev.dev = pdev; > + result = pci_enable_device(pdev); > + if (result) { > + dev_err(&pdev->dev, > + "pci_enable_device failed %s: error %d\n", > + pci_name(pdev), result); > + return result; > + } > + vsoc_dev.enabled_device = 1; > + result = pci_request_regions(pdev, "vsoc"); > + if (result < 0) { > + dev_err(&pdev->dev, "pci_request_regions failed\n"); > + vsoc_remove_device(pdev); > + return -EBUSY; > + } > + vsoc_dev.requested_regions = 1; > + /* Set up the control registers in BAR 0 */ > + reg_size = pci_resource_len(pdev, REGISTER_BAR); > + if (reg_size > MAX_REGISTER_BAR_LEN) > + vsoc_dev.regs = > + pci_iomap(pdev, REGISTER_BAR, MAX_REGISTER_BAR_LEN); > + else > + vsoc_dev.regs = pci_iomap(pdev, REGISTER_BAR, reg_size); > + > + if (!vsoc_dev.regs) { > + dev_err(&pdev->dev, > + "cannot ioremap registers of size %zu\n", > + (size_t)reg_size); > + vsoc_remove_device(pdev); > + return -EBUSY; > + } > + > + /* Map the shared memory in BAR 2 */ > + vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR); > + vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR); > + > + dev_info(&pdev->dev, "shared memory @ DMA %p size=0x%zx\n", > + (void *)vsoc_dev.shm_phys_start, vsoc_dev.shm_size); > + /* TODO(ghartman): ioremap_wc should work here */ > + vsoc_dev.kernel_mapped_shm = ioremap_nocache( > + vsoc_dev.shm_phys_start, vsoc_dev.shm_size); > + if (!vsoc_dev.kernel_mapped_shm) { > + dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n"); > + vsoc_remove_device(pdev); > + return -EBUSY; > + } > + > + vsoc_dev.layout = > + (struct vsoc_shm_layout_descriptor *)vsoc_dev.kernel_mapped_shm; > + dev_info(&pdev->dev, "major_version: %d\n", > + vsoc_dev.layout->major_version); > + dev_info(&pdev->dev, "minor_version: %d\n", > + vsoc_dev.layout->minor_version); > + dev_info(&pdev->dev, "size: 0x%x\n", vsoc_dev.layout->size); > + dev_info(&pdev->dev, "regions: %d\n", vsoc_dev.layout->region_count); > + if (vsoc_dev.layout->major_version != > + CURRENT_VSOC_LAYOUT_MAJOR_VERSION) { > + dev_err(&vsoc_dev.dev->dev, > + "driver supports only major_version %d\n", > + CURRENT_VSOC_LAYOUT_MAJOR_VERSION); > + vsoc_remove_device(pdev); > + return -EBUSY; > + } > + result = alloc_chrdev_region(&devt, 0, vsoc_dev.layout->region_count, > + VSOC_DEV_NAME); > + if (result) { > + dev_err(&vsoc_dev.dev->dev, "alloc_chrdev_region failed\n"); > + vsoc_remove_device(pdev); > + return -EBUSY; > + } > + vsoc_dev.major = MAJOR(devt); > + cdev_init(&vsoc_dev.cdev, &vsoc_ops); > + vsoc_dev.cdev.owner = THIS_MODULE; > + result = cdev_add(&vsoc_dev.cdev, devt, vsoc_dev.layout->region_count); > + if (result) { > + dev_err(&vsoc_dev.dev->dev, "cdev_add error\n"); > + vsoc_remove_device(pdev); > + return -EBUSY; > + } > + vsoc_dev.cdev_added = 1; > + vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME); > + if (!vsoc_dev.class) { ^^^^^^^^^^^^^^^ This should be if (IS_ERR(vsoc_dev.class)) {. The test in vsoc_remove_device() needs to be updated as well. > + dev_err(&vsoc_dev.dev->dev, "class_create failed\n"); > + vsoc_remove_device(pdev); > + return -EBUSY; > + } > + vsoc_dev.regions = (struct vsoc_device_region *) > + (vsoc_dev.kernel_mapped_shm + > + vsoc_dev.layout->vsoc_region_desc_offset); > + vsoc_dev.msix_entries = kcalloc( > + vsoc_dev.layout->region_count, > + sizeof(vsoc_dev.msix_entries[0]), GFP_KERNEL); > + if (!vsoc_dev.msix_entries) { > + dev_err(&vsoc_dev.dev->dev, > + "unable to allocate msix_entries\n"); > + vsoc_remove_device(pdev); > + return -ENOSPC; > + } > + vsoc_dev.regions_data = kcalloc( > + vsoc_dev.layout->region_count, > + sizeof(vsoc_dev.regions_data[0]), GFP_KERNEL); > + if (!vsoc_dev.regions_data) { > + dev_err(&vsoc_dev.dev->dev, > + "unable to allocate regions' data\n"); > + vsoc_remove_device(pdev); > + return -ENOSPC; > + } > + for (i = 0; i < vsoc_dev.layout->region_count; ++i) > + vsoc_dev.msix_entries[i].entry = i; > + > + result = pci_enable_msix_exact(vsoc_dev.dev, vsoc_dev.msix_entries, > + vsoc_dev.layout->region_count); > + if (result) { > + dev_info(&pdev->dev, "pci_enable_msix failed: %d\n", result); > + vsoc_remove_device(pdev); > + return -ENOSPC; > + } > + /* Check that all regions are well formed */ > + for (i = 0; i < vsoc_dev.layout->region_count; ++i) { > + const struct vsoc_device_region *region = vsoc_dev.regions + i; > + > + if (!PAGE_ALIGNED(region->region_begin_offset) || > + !PAGE_ALIGNED(region->region_end_offset)) { > + dev_err(&vsoc_dev.dev->dev, > + "region %d not aligned (%x:%x)", i, > + region->region_begin_offset, > + region->region_end_offset); > + vsoc_remove_device(pdev); > + return -EFAULT; > + } > + if (region->region_begin_offset >= region->region_end_offset || > + region->region_end_offset > vsoc_dev.shm_size) { > + dev_err(&vsoc_dev.dev->dev, > + "region %d offsets are wrong: %x %x %zx", > + i, region->region_begin_offset, > + region->region_end_offset, vsoc_dev.shm_size); > + vsoc_remove_device(pdev); > + return -EFAULT; > + } > + if (region->managed_by >= vsoc_dev.layout->region_count) { > + dev_err(&vsoc_dev.dev->dev, > + "region %d has invalid owner: %u", > + i, region->managed_by); > + vsoc_remove_device(pdev); > + return -EFAULT; > + } > + } > + vsoc_dev.msix_enabled = 1; > + for (i = 0; i < vsoc_dev.layout->region_count; ++i) { > + const struct vsoc_device_region *region = vsoc_dev.regions + i; > + size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1; > + const struct vsoc_signal_table_layout *h_to_g_signal_table = > + ®ion->host_to_guest_signal_table; > + const struct vsoc_signal_table_layout *g_to_h_signal_table = > + ®ion->guest_to_host_signal_table; > + > + vsoc_dev.regions_data[i].name[name_sz] = '\0'; > + memcpy(vsoc_dev.regions_data[i].name, region->device_name, > + name_sz); > + dev_info(&pdev->dev, "region %d name=%s\n", > + i, vsoc_dev.regions_data[i].name); > + init_waitqueue_head( > + &vsoc_dev.regions_data[i].interrupt_wait_queue); > + init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue); > + vsoc_dev.regions_data[i].incoming_signalled = > + vsoc_dev.kernel_mapped_shm + > + region->region_begin_offset + > + h_to_g_signal_table->interrupt_signalled_offset; > + vsoc_dev.regions_data[i].outgoing_signalled = > + vsoc_dev.kernel_mapped_shm + > + region->region_begin_offset + > + g_to_h_signal_table->interrupt_signalled_offset; > + > + result = request_irq( > + vsoc_dev.msix_entries[i].vector, > + vsoc_interrupt, 0, > + vsoc_dev.regions_data[i].name, > + vsoc_dev.regions_data + i); > + if (result) { > + dev_info(&pdev->dev, > + "request_irq failed irq=%d vector=%d\n", > + i, vsoc_dev.msix_entries[i].vector); > + vsoc_remove_device(pdev); > + return -ENOSPC; > + } > + vsoc_dev.regions_data[i].irq_requested = 1; > + if (!device_create(vsoc_dev.class, NULL, > + MKDEV(vsoc_dev.major, i), > + NULL, vsoc_dev.regions_data[i].name)) { > + dev_err(&vsoc_dev.dev->dev, "device_create failed\n"); > + vsoc_remove_device(pdev); > + return -EBUSY; > + } > + vsoc_dev.regions_data[i].device_created = 1; > + } > + return 0; > +} > + > +/* > + * This should undo all of the allocations in the probe function in reverse > + * order. > + * > + * Notes: > + * > + * The device may have been partially initialized, so double check > + * that the allocations happened. > + * > + * This function may be called multiple times, so mark resources as freed > + * as they are deallocated. > + */ > +static void vsoc_remove_device(struct pci_dev *pdev) > +{ > + int i; > + /* > + * pdev is the first thing to be set on probe and the last thing > + * to be cleared here. If it's NULL then there is no cleanup. > + */ > + if (!pdev || !vsoc_dev.dev) > + return; > + dev_info(&pdev->dev, "remove_device\n"); > + if (vsoc_dev.regions_data) { > + for (i = 0; i < vsoc_dev.layout->region_count; ++i) { > + if (vsoc_dev.regions_data[i].device_created) { > + device_destroy(vsoc_dev.class, > + MKDEV(vsoc_dev.major, i)); > + vsoc_dev.regions_data[i].device_created = 0; > + } > + if (vsoc_dev.regions_data[i].irq_requested) > + free_irq(vsoc_dev.msix_entries[i].vector, NULL); > + vsoc_dev.regions_data[i].irq_requested = 0; > + } > + kfree(vsoc_dev.regions_data); > + vsoc_dev.regions_data = 0; > + } > + if (vsoc_dev.msix_enabled) { > + pci_disable_msix(pdev); > + vsoc_dev.msix_enabled = 0; > + } > + kfree(vsoc_dev.msix_entries); > + vsoc_dev.msix_entries = 0; > + vsoc_dev.regions = 0; > + if (vsoc_dev.class) { > + class_destroy(vsoc_dev.class); > + vsoc_dev.class = 0; > + } > + if (vsoc_dev.cdev_added) { > + cdev_del(&vsoc_dev.cdev); > + vsoc_dev.cdev_added = 0; > + } > + if (vsoc_dev.major && vsoc_dev.layout) { > + unregister_chrdev_region(MKDEV(vsoc_dev.major, 0), > + vsoc_dev.layout->region_count); > + vsoc_dev.major = 0; > + } > + vsoc_dev.layout = 0; > + if (vsoc_dev.kernel_mapped_shm && pdev) { ^^^^ These tests can be removed since we checked at the start of the function. > + pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm); > + vsoc_dev.kernel_mapped_shm = 0; > + } > + if (vsoc_dev.regs && pdev) { ^^^^ > + pci_iounmap(pdev, vsoc_dev.regs); > + vsoc_dev.regs = 0; > + } > + if (vsoc_dev.requested_regions && pdev) { ^^^^ > + pci_release_regions(pdev); > + vsoc_dev.requested_regions = 0; > + } > + if (vsoc_dev.enabled_device && pdev) { ^^^^ > + pci_disable_device(pdev); > + vsoc_dev.enabled_device = 0; > + } > + /* Do this last: it indicates that the device is not initialized. */ > + vsoc_dev.dev = NULL; > +} > + regards, dan carpenter _______________________________________________ devel mailing list devel@xxxxxxxxxxxxxxxxxxxxxx http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel