Memory management folks. Please review this patch. Specifically the "map_pages()" function below. On Thu, 06 Jun 2024 17:17:43 -0400 Steven Rostedt <rostedt@xxxxxxxxxxx> wrote: > From: "Steven Rostedt (Google)" <rostedt@xxxxxxxxxxx> > > Add an option to the trace_instance kernel command line parameter that > allows it to use the reserved memory from memmap boot parameter. > > memmap=12M$0x284500000 trace_instance=boot_mapped@0x284500000:12M > > The above will reserves 12 megs at the physical address 0x284500000. > The second parameter will create a "boot_mapped" instance and use the > memory reserved as the memory for the ring buffer. > > That will create an instance called "boot_mapped": > > /sys/kernel/tracing/instances/boot_mapped > > Note, because the ring buffer is using a defined memory ranged, it will > act just like a memory mapped ring buffer. It will not have a snapshot > buffer, as it can't swap out the buffer. The snapshot files as well as any > tracers that uses a snapshot will not be present in the boot_mapped > instance. > > Cc: linux-mm@xxxxxxxxx > Signed-off-by: Steven Rostedt (Google) <rostedt@xxxxxxxxxxx> > --- > .../admin-guide/kernel-parameters.txt | 9 +++ > kernel/trace/trace.c | 75 +++++++++++++++++-- > 2 files changed, 78 insertions(+), 6 deletions(-) > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > index b600df82669d..ff26b6094e79 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -6754,6 +6754,15 @@ > the same thing would happen if it was left off). The irq_handler_entry > event, and all events under the "initcall" system. > > + If memory has been reserved (see memmap for x86), the instance > + can use that memory: > + > + memmap=12M$0x284500000 trace_instance=boot_map@0x284500000:12M > + > + The above will create a "boot_map" instance that uses the physical > + memory at 0x284500000 that is 12Megs. The per CPU buffers of that > + instance will be split up accordingly. > + > trace_options=[option-list] > [FTRACE] Enable or disable tracer options at boot. > The option-list is a comma delimited list of options > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c > index 622fe670949d..13e89023f33b 100644 > --- a/kernel/trace/trace.c > +++ b/kernel/trace/trace.c > @@ -9504,6 +9504,31 @@ static int instance_mkdir(const char *name) > return ret; > } > > +static u64 map_pages(u64 start, u64 size) > +{ > + struct page **pages; > + phys_addr_t page_start; > + unsigned int page_count; > + unsigned int i; > + void *vaddr; > + > + page_count = DIV_ROUND_UP(size, PAGE_SIZE); > + > + page_start = start; > + pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); > + if (!pages) > + return 0; > + > + for (i = 0; i < page_count; i++) { > + phys_addr_t addr = page_start + i * PAGE_SIZE; > + pages[i] = pfn_to_page(addr >> PAGE_SHIFT); > + } > + vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL); > + kfree(pages); > + > + return (u64)(unsigned long)vaddr; > +} If for some reason the memmap=nn$ss fails, but this still gets called, will the above just map over any memory. That is, is it possible that the kernel could have used this memory? Is there a way to detect this? That is, I don't want this to succeed if the memory location it's about to map to is used by the kernel, or will be used by user space. -- Steve > + > /** > * trace_array_get_by_name - Create/Lookup a trace array, given its name. > * @name: The name of the trace array to be looked up/created. > @@ -10350,6 +10375,7 @@ __init static void enable_instances(void) > { > struct trace_array *tr; > char *curr_str; > + char *name; > char *str; > char *tok; > > @@ -10358,19 +10384,56 @@ __init static void enable_instances(void) > str = boot_instance_info; > > while ((curr_str = strsep(&str, "\t"))) { > + unsigned long start = 0; > + unsigned long size = 0; > + unsigned long addr = 0; > > tok = strsep(&curr_str, ","); > + name = strsep(&tok, "@"); > + if (tok) { > + start = memparse(tok, &tok); > + if (!start) { > + pr_warn("Tracing: Invalid boot instance address for %s\n", > + name); > + continue; > + } > + } > > - if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE)) > - do_allocate_snapshot(tok); > + if (start) { > + if (*tok != ':') { > + pr_warn("Tracing: No size specified for instance %s\n", name); > + continue; > + } > + tok++; > + size = memparse(tok, &tok); > + if (!size) { > + pr_warn("Tracing: Invalid boot instance size for %s\n", > + name); > + continue; > + } > + addr = map_pages(start, size); > + if (addr) { > + pr_info("Tracing: mapped boot instance %s at physical memory 0x%lx of size 0x%lx\n", > + name, start, size); > + } else { > + pr_warn("Tracing: Failed to map boot instance %s\n", name); > + continue; > + } > + } else { > + /* Only non mapped buffers have snapshot buffers */ > + if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE)) > + do_allocate_snapshot(tok); > + } > > - tr = trace_array_get_by_name(tok, NULL); > + tr = trace_array_create_systems(name, NULL, addr, size); > if (!tr) { > - pr_warn("Failed to create instance buffer %s\n", curr_str); > + pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str); > continue; > } > - /* Allow user space to delete it */ > - trace_array_put(tr); > + > + /* Only allow non mapped buffers to be deleted */ > + if (!start) > + trace_array_put(tr); > > while ((tok = strsep(&curr_str, ","))) { > early_enable_events(tr, tok, true);