On Thu, Apr 14, 2022 at 3:58 PM Yordan Karadzhov <y.karadz@xxxxxxxxx> wrote: > > > > On 8.04.22 г. 13:03 ч., Tzvetomir Stoyanov (VMware) wrote: > > Initial perf support for trace-cruncher, using libperf. As a first > > stage, collecting of stack trace samples of given process is supported. > > > > Signed-off-by: Tzvetomir Stoyanov (VMware) <tz.stoyanov@xxxxxxxxx> > > --- > > setup.py | 9 +- > > src/perfpy-utils.c | 896 +++++++++++++++++++++++++++++++++++++++++++++ > > src/perfpy-utils.h | 43 +++ > > src/perfpy.c | 141 +++++++ > > 4 files changed, 1087 insertions(+), 2 deletions(-) > > create mode 100644 src/perfpy-utils.c > > create mode 100644 src/perfpy-utils.h > > create mode 100644 src/perfpy.c > > > [ .. ] > > +int py_perf_handle_destroy(struct perf_handle *perf) > > +{ > > + if (!perf || !perf->running) > > + return 0; > > + > > + perf->running = false; > > + pthread_join(perf->reader, NULL); > > + fsync(perf->fd); > > + if (perf->command && perf->pid > 0) { > > + kill(perf->pid, SIGINT); > > + perf->pid = 0; > > Maybe we can free 'perf->command' and set it to NULL here. Freeing the command string should be done in py_perf_handle_free(), it looks more logical. The py_perf_handle_destroy() disables the running perf samples. It should be possible to run sampling again on the same command, by calling PyPerf_enable(). > > > + } > > + > > + return 0; > > +} > > + [ ... ] > > + > > +PyObject *PyPerfSampler_new(PyObject *self, PyObject *args, PyObject *kwargs) > > The name of this function has to be 'PyPerf_sampler_instance' > > +{ > > + char *kwlist[] = {"pid", "command", "freq", "argv", NULL}; > > 'command' argument look redundant. It is the same as argv[0] There is a little difference between 'command' and argv[0]. The 'command' argument is supposed to include the full path to the application file that should be traced, while argv[0] could be just the file name, without the path. Resolving the full path of the file is done in the python code and passed to the API. For example, in: ./perf_sampling.py ls -al 'command' is '/usr/bin/ls' argv[0] is 'ls' > > > + PyObject *py_perf, *py_arg, *py_args = NULL; > > + struct perf_handle *perf = NULL; > > + int freq = 10, pid = 0; > > + char *command = NULL; > > + char **argv = NULL; > > + int i, argc; > > + > > + if (!PyArg_ParseTupleAndKeywords(args, > > + kwargs, > > + "|isiO", > > + kwlist, > > + &pid, > > + &command, > > + &freq, > > + &py_args > > + )) { > > + return NULL; > > + } > > + > > + if (pid == 0 && !command) { > > We have to handle also the case when both 'pid' and 'command' (argv) are provided. > Also the case when 'pid' is negative. I'll add a check for negative pid. In case both 'pid' and 'command' are provided, 'command' is with higher priority - that logic is in new_perf_sampling_handle(). > > > + PyErr_Format(PERF_ERROR, "PID or command must be specified"); > > + return NULL; > > + } > > + > > + if (command && py_args) { > > + if (!PyList_CheckExact(py_args)) { > > + PyErr_SetString(PERF_ERROR, "Failed to parse argv list"); > > + return NULL; > > + } > > + argc = PyList_Size(py_args); > > + argv = calloc(argc + 1, sizeof(char *)); > > + for (i = 0; i < argc; i++) { > > + py_arg = PyList_GetItem(py_args, i); > > + if (!PyUnicode_Check(py_arg)) > > + continue; > > + argv[i] = strdup(PyUnicode_DATA(py_arg)); > > + if (!argv[i]) > > + return NULL; > > + } > > + argv[i] = NULL; > This was allocated using calloc(). No need to set it to NULL. > > > + } > > + > > + perf = new_perf_sampling_handle(freq, pid, command, argv); > > + > > + if (!perf) { > > + PyErr_SetString(PERF_ERROR, "Failed create new perf context"); > > + return NULL; > > + } > > + > > + py_perf = PyPerf_New(perf); > > + > > + return py_perf; > > +} [ ... ] > > +static int increase_file_limit(void) > > +{ > > + struct rlimit lim; > > + > > + if (getrlimit(RLIMIT_NOFILE, &lim)) > > + return -1; > > + > > + if (lim.rlim_cur < lim.rlim_max) { > > + lim.rlim_cur = lim.rlim_max; > > + } else { > > + lim.rlim_cur += 100; > > + lim.rlim_max += 100; > > I wonder where this number 100 comes from? It is just a safe step to increase the limit. Perf could use a lot of file descriptors, on some systems the default limit is not enough. > > > + } > > + > > + return setrlimit(RLIMIT_NOFILE, &lim); > > +} > > + > > +static int perf_maps_init(struct perf_handle *perf) > > +{ > > + int ret; > > + > > + if (!perf->threads) > > + perf->threads = create_thread_map(perf); > > + if (!perf->threads) > > + return -1; > > + > > + perf_evlist__set_maps(perf->evlist, perf->cpus, perf->threads); > > + do { > > + ret = perf_evlist__open(perf->evlist); > > + if (!ret) > > + break; > > + if (ret != -EMFILE) > > + goto out; > > + ret = increase_file_limit(); > > + if (ret) > > + goto out; > > + } while (ret); > > + > > This loops seems over-complicated. Something like this reads easier (at least for me) > > for(;;) { > if (perf_evlist__open(perf->evlist) == 0) > break; > > if (ret != -EMFILE) > goto out; > > if (increase_file_limit() != 0) > goto out; > } > The return code should be preserved, as in case of an error it is returned. Also, the error for not enough file descriptors should be handled. > > + ret = perf_evlist__mmap(perf->evlist, 4); > > +out: > > + if (ret) > > + perf_evlist__close(perf->evlist); > > + return ret; > > +} > > + [ ... ] > > +static PyMethodDef PyPerfEventSampler_methods[] = { > > + {"id", > > + (PyCFunction) PyPerfSampler_id, > > + METH_NOARGS, > > + "get sample id" > > + }, > > + {"ip", > > + (PyCFunction) PyPerfSampler_ip, > > + METH_NOARGS, > > + "get sample ip" > > + }, > > + {"pid", > > + (PyCFunction) PyPerfSampler_pid, > > + METH_NOARGS, > > + "get sample pid" > > + }, > > + {"tid", > > + (PyCFunction) PyPerfSampler_tid, > > + METH_NOARGS, > > + "get sample tid" > > + }, > > + {"tid_comm", > > + (PyCFunction) PyPerfSampler_tid_comm, > > + METH_VARARGS | METH_KEYWORDS, > > + "get sample tid" > > + }, > > + {"time", > > + (PyCFunction) PyPerfSampler_time, > > + METH_NOARGS, > > + "get sample timestamp" > > + }, > > + {"cpu", > > + (PyCFunction) PyPerfSampler_cpu, > > + METH_NOARGS, > > + "get sample cpu" > > + }, > > + {"stack_count", > > + (PyCFunction) PyPerfSampler_nr, > > + METH_NOARGS, > > + "get sample stack count" > > + }, > > + {"stack", > > + (PyCFunction) PyPerfSampler_ips, > > + METH_NOARGS, > > + "get sample stack" > > + }, > > + {NULL} > > So far I've been using as a convention that the 'C' function that implements a method of the Python module has a name > that starts with the type of the module (or object from the module) as a prefix, followed by the name of the method itself. > > For example the name of the function that implements 'stack()' must be 'PyPerfEventSampler_stack()'. > Thanks for this clarification, I'll check the names of the methods. > Thanks! > Yordan > Thank you for that review! I'll address your comments in v2. > > +}; > > +C_OBJECT_WRAPPER(perf_event_sample, PyPerfEventSampler, NO_DESTROY, py_perf_sample_free); > > + > > +static PyMethodDef perfpy_methods[] = { > > + {"sampler_instance", > > + (PyCFunction) PyPerfSampler_new, > > + METH_VARARGS | METH_KEYWORDS, > > + "Allocate new perf sampler instance" > > + }, > > + {NULL} > > +}; > > + > > +static int perf_error_print(enum libperf_print_level level, > > + const char *fmt, va_list ap) > > +{ > > + return vfprintf(stderr, fmt, ap); > > +} > > + > > +static struct PyModuleDef perfpy_module = { > > + PyModuleDef_HEAD_INIT, > > + "perfpy", > > + "Python interface for Perf.", > > + -1, > > + perfpy_methods > > +}; > > + > > +PyMODINIT_FUNC PyInit_perfpy(void) > > +{ > > + > > + if (!PyPerfTypeInit()) > > + return NULL; > > + if (!PyPerfEventSamplerTypeInit()) > > + return NULL; > > + > > + PERF_ERROR = PyErr_NewException("tracecruncher.perfpy.perf_error", > > + NULL, NULL); > > + > > + PyObject *module = PyModule_Create(&perfpy_module); > > + > > + PyModule_AddObject(module, "perf_error", PERF_ERROR); > > + PyModule_AddObject(module, "perf_handle", (PyObject *) &PyPerfType); > > + PyModule_AddObject(module, "perf_event_sample", (PyObject *) &PyPerfEventSamplerType); > > + > > + if (geteuid() != 0) { > > + PyErr_SetString(PERF_ERROR, > > + "Permission denied. Root privileges are required."); > > + return NULL; > > + } > > + > > + libperf_init(perf_error_print); > > + > > + return module; > > +} -- Tzvetomir (Ceco) Stoyanov VMware Open Source Technology Center