On Fri, Feb 17, 2023 at 11:19 AM Daniel Müller <deso@xxxxxxxxxx> wrote: > > This change adds support for attaching uprobes to shared objects located > in APKs, which is relevant for Android systems where various libraries Is there a good link with description of APK that we can record somewhere in the comments for future us? Also, does .apk contains only shared libraries, or it could be also just a binary? > may reside in APKs. To make that happen, we extend the syntax for the > "binary path" argument to attach to with that supported by various > Android tools: > <archive>!/<binary-in-archive> > > For example: > /system/app/test-app/test-app.apk!/lib/arm64-v8a/libc++_shared.so > > APKs need to be specified via full path, i.e., we do not attempt to > resolve mere file names by searching system directories. mere? > > We cannot currently test this functionality end-to-end in an automated > fashion, because it relies on an Android system being present, but there > is no support for that in CI. I have tested the functionality manually, > by creating a libbpf program containing a uretprobe, attaching it to a > function inside a shared object inside an APK, and verifying the sanity > of the returned values. > > Signed-off-by: Daniel Müller <deso@xxxxxxxxxx> > --- > tools/lib/bpf/libbpf.c | 84 ++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 80 insertions(+), 4 deletions(-) > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c > index a474f49..79ab85f 100644 > --- a/tools/lib/bpf/libbpf.c > +++ b/tools/lib/bpf/libbpf.c > @@ -53,6 +53,7 @@ > #include "libbpf_internal.h" > #include "hashmap.h" > #include "bpf_gen_internal.h" > +#include "zip.h" > > #ifndef BPF_FS_MAGIC > #define BPF_FS_MAGIC 0xcafe4a11 > @@ -10702,6 +10703,60 @@ static long elf_find_func_offset_from_elf_file(const char *binary_path, const ch > return ret; > } > > +/* Find offset of function name in archive specified by path. Currently > + * supported are .zip files that do not compress their contents (as used on > + * Android in the form of APKs, for example). "file_name" is the name of the > + * ELF file inside the archive. "func_name" matches symbol name or name@@LIB > + * for library functions. These double spaces after dot were not intended, let's not add more. > + */ > +static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name, > + const char *func_name) > +{ > + struct zip_archive *archive; > + struct zip_entry entry; > + long ret = -ENOENT; > + Elf *elf; > + > + archive = zip_archive_open(archive_path); > + if (!archive) { > + pr_warn("failed to open %s\n", archive_path); add "zip: " prefix? > + return -LIBBPF_ERRNO__FORMAT; > + } > + > + if (zip_archive_find_entry(archive, file_name, &entry)) { > + pr_warn("zip: could not find archive member %s in %s\n", file_name, archive_path); > + ret = -LIBBPF_ERRNO__FORMAT; > + goto out; > + } > + > + if (entry.compression) { > + pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name, > + archive_path); > + ret = -LIBBPF_ERRNO__FORMAT; > + goto out; > + } > + > + elf = elf_memory((void *)entry.data, entry.data_length); > + if (!elf) { > + pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path, I kind of like preserving the "archive/path!/file/path" consistently through error messages when referring to file within APK, WDYT? > + elf_errmsg(-1)); > + ret = -LIBBPF_ERRNO__FORMAT; > + goto out; > + } > + > + ret = elf_find_func_offset(elf, file_name, func_name); > + if (ret > 0) { > + ret += entry.data_offset; > + pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", func_name, > + archive_path, ret); so for debugging I feel like we'll want to know both entry.data_offset and original ELF offset, let's report all three offset (including the final calculated one)? > + } > + elf_end(elf); > + > +out: > + zip_archive_close(archive); > + return ret; > +} > + > static const char *arch_specific_lib_paths(void) > { > /* > @@ -10789,6 +10844,9 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, > { > DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); > char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; > + const char *archive_path = NULL; > + const char *archive_sep = NULL; nit: combine on a single line? > + char full_archive_path[PATH_MAX]; > char full_binary_path[PATH_MAX]; > struct bpf_link *link; > size_t ref_ctr_off; > @@ -10806,9 +10864,21 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, > if (!binary_path) > return libbpf_err_ptr(-EINVAL); > > - if (!strchr(binary_path, '/')) { > - err = resolve_full_path(binary_path, full_binary_path, > - sizeof(full_binary_path)); > + /* Check if "binary_path" refers to an archive. */ > + archive_sep = strstr(binary_path, "!/"); > + if (archive_sep) { > + if (archive_sep - binary_path >= sizeof(full_archive_path)) { very unlikely to happen, I wouldn't bother checking, especially that strncpy will just truncate and make us fail anyways > + return libbpf_err_ptr(-EINVAL); > + } > + > + strncpy(full_archive_path, binary_path, archive_sep - binary_path); let's use saner libbpf_strlcpy() instead of strncpy, we stopped using strncpy relatively recently > + full_archive_path[archive_sep - binary_path] = 0; strlcpy makes sure the resulting string is zero-terminated. But note that full_archive_path[0] is not guaranteed to be zero, so strncpy/strlcpy might preserve some garbage in front. Let's make sure full_archive_path[0] = '\0'; before manipulating that buffer > + archive_path = full_archive_path; > + > + strcpy(full_binary_path, archive_sep + 2); > + binary_path = full_binary_path; no need to copy, just `binary_path = archive_sep + 2;`? And thus we can reuse full_binary_path buffer for archive path (we can rename it to be more generic "full_path" name or something) > + } else if (!strchr(binary_path, '/')) { > + err = resolve_full_path(binary_path, full_binary_path, sizeof(full_binary_path)); > if (err) { > pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", > prog->name, binary_path, err); > @@ -10820,7 +10890,13 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, > if (func_name) { > long sym_off; > > - sym_off = elf_find_func_offset_from_elf_file(binary_path, func_name); > + if (archive_path) { > + sym_off = elf_find_func_offset_from_archive(archive_path, binary_path, > + func_name); > + binary_path = archive_path; > + } else { > + sym_off = elf_find_func_offset_from_elf_file(binary_path, func_name); > + } > if (sym_off < 0) > return libbpf_err_ptr(sym_off); > func_offset += sym_off; > -- > 2.30.2 >