Hi Steve, On Tue, Apr 19, 2016 at 10:34:23AM -0400, Steven Rostedt wrote: > From: Steven Rostedt <rostedt@xxxxxxxxxxx> > > In order to add the ability to let tasks that are filtered by the events > have their children also be traced on fork (and then not traced on exit), > convert the array into a pid bitmask. Most of the time the number of pids is > only 32768 pids or a 4k bitmask, which is the same size as the default list > currently is, and that list could grow if more pids are listed. > > This also greatly simplifies the code. > > Suggested-by: "H. Peter Anvin" <hpa@xxxxxxxxx> > Signed-off-by: Steven Rostedt <rostedt@xxxxxxxxxxx> > --- [SNIP] > @@ -1571,7 +1572,7 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf, > struct seq_file *m = filp->private_data; > struct trace_array *tr = m->private; > struct trace_pid_list *filtered_pids = NULL; > - struct trace_pid_list *pid_list = NULL; > + struct trace_pid_list *pid_list; > struct trace_event_file *file; > struct trace_parser parser; > unsigned long val; > @@ -1579,7 +1580,7 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf, > ssize_t read = 0; > ssize_t ret = 0; > pid_t pid; > - int i; > + int nr_pids = 0; > > if (!cnt) > return 0; > @@ -1592,10 +1593,43 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf, > return -ENOMEM; > > mutex_lock(&event_mutex); > + filtered_pids = rcu_dereference_protected(tr->filtered_pids, > + lockdep_is_held(&event_mutex)); > + > /* > - * Load as many pids into the array before doing a > - * swap from the tr->filtered_pids to the new list. > + * Always recreate a new array. The write is an all or nothing > + * operation. Always create a new array when adding new pids by > + * the user. If the operation fails, then the current list is > + * not modified. > */ > + pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL); > + if (!pid_list) { > + read = -ENOMEM; > + goto out; > + } > + pid_list->pid_max = READ_ONCE(pid_max); > + /* Only truncating will shrink pid_max */ > + if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max) > + pid_list->pid_max = filtered_pids->pid_max; > + pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3); > + if (!pid_list->pids) { > + kfree(pid_list); > + read = -ENOMEM; > + goto out; > + } > + if (filtered_pids) { > + /* copy the current bits to the new max */ > + pid = find_first_bit(filtered_pids->pids, > + filtered_pids->pid_max); > + while (pid < filtered_pids->pid_max) { > + set_bit(pid, pid_list->pids); > + pid = find_next_bit(filtered_pids->pids, > + filtered_pids->pid_max, > + pid + 1); > + nr_pids++; > + } Why not just use memcpy and keep nr_pids in the pid_list? Thanks, Namhyung > + } > + > while (cnt > 0) { > > this_pos = 0; > @@ -1613,92 +1647,35 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf, > ret = -EINVAL; > if (kstrtoul(parser.buffer, 0, &val)) > break; > - if (val > INT_MAX) > + if (val >= pid_list->pid_max) > break; > > pid = (pid_t)val; > > - ret = -ENOMEM; > - if (!pid_list) { > - pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL); > - if (!pid_list) > - break; > - > - filtered_pids = rcu_dereference_protected(tr->filtered_pids, > - lockdep_is_held(&event_mutex)); > - if (filtered_pids) > - pid_list->order = filtered_pids->order; > - else > - pid_list->order = 0; > - > - pid_list->pids = (void *)__get_free_pages(GFP_KERNEL, > - pid_list->order); > - if (!pid_list->pids) > - break; > - > - if (filtered_pids) { > - pid_list->nr_pids = filtered_pids->nr_pids; > - memcpy(pid_list->pids, filtered_pids->pids, > - pid_list->nr_pids * sizeof(pid_t)); > - } else > - pid_list->nr_pids = 0; > - } > - > - if (pid_list->nr_pids >= max_pids(pid_list)) { > - pid_t *pid_page; > - > - pid_page = (void *)__get_free_pages(GFP_KERNEL, > - pid_list->order + 1); > - if (!pid_page) > - break; > - memcpy(pid_page, pid_list->pids, > - pid_list->nr_pids * sizeof(pid_t)); > - free_pages((unsigned long)pid_list->pids, pid_list->order); > - > - pid_list->order++; > - pid_list->pids = pid_page; > - } > + set_bit(pid, pid_list->pids); > + nr_pids++; > > - pid_list->pids[pid_list->nr_pids++] = pid; > trace_parser_clear(&parser); > ret = 0; > } > trace_parser_put(&parser); > > if (ret < 0) { > - if (pid_list) > - free_pages((unsigned long)pid_list->pids, pid_list->order); > + vfree(pid_list->pids); > kfree(pid_list); > - mutex_unlock(&event_mutex); > - return ret; > - } > - > - if (!pid_list) { > - mutex_unlock(&event_mutex); > - return ret; > + read = ret; > + goto out; > } > > - sort(pid_list->pids, pid_list->nr_pids, sizeof(pid_t), cmp_pid, NULL); > - > - /* Remove duplicates */ > - for (i = 1; i < pid_list->nr_pids; i++) { > - int start = i; > - > - while (i < pid_list->nr_pids && > - pid_list->pids[i - 1] == pid_list->pids[i]) > - i++; > - > - if (start != i) { > - if (i < pid_list->nr_pids) { > - memmove(&pid_list->pids[start], &pid_list->pids[i], > - (pid_list->nr_pids - i) * sizeof(pid_t)); > - pid_list->nr_pids -= i - start; > - i = start; > - } else > - pid_list->nr_pids = start; > - } > + if (!nr_pids) { > + /* Cleared the list of pids */ > + vfree(pid_list->pids); > + kfree(pid_list); > + read = ret; > + if (!filtered_pids) > + goto out; > + pid_list = NULL; > } > - > rcu_assign_pointer(tr->filtered_pids, pid_list); > > list_for_each_entry(file, &tr->events, list) { > @@ -1708,7 +1685,7 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf, > if (filtered_pids) { > synchronize_sched(); > > - free_pages((unsigned long)filtered_pids->pids, filtered_pids->order); > + vfree(filtered_pids->pids); > kfree(filtered_pids); > } else { > /* > @@ -1745,10 +1722,12 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf, > */ > on_each_cpu(ignore_task_cpu, tr, 1); > > + out: > mutex_unlock(&event_mutex); > > ret = read; > - *ppos += read; > + if (read > 0) > + *ppos += read; > > return ret; > } > -- > 2.8.0.rc3 > > -- To unsubscribe from this list: send the line "unsubscribe linux-trace-users" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html