On Tue, Mar 08, 2022 at 10:56:30AM +0200, Elena Reshetova wrote: > This is just a quick trial to trace inputs received > from the host/VMM in the same way as user inputs. > > Signed-off-by: Elena Reshetova <elena.reshetova@xxxxxxxxx> > --- > smatch_kernel_host_data.c | 1320 ++++++++++++++++++++++++++++++++++ > smatch_points_to_host_data.c | 334 +++++++++ The changes to smatch.h and check_list.h are missing. > 2 files changed, 1654 insertions(+) > create mode 100755 smatch_kernel_host_data.c > create mode 100755 smatch_points_to_host_data.c > > diff --git a/smatch_kernel_host_data.c b/smatch_kernel_host_data.c > new file mode 100755 > index 00000000..540875c5 > --- /dev/null > +++ b/smatch_kernel_host_data.c > @@ -0,0 +1,1320 @@ > +/* > + * Copyright (C) 2011 Dan Carpenter. > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version 2 > + * of the License, or (at your option) any later version. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, see http://www.gnu.org/copyleft/gpl.txt > + */ > + > +/* Note: The below code is just a quick trial to modify the > + * smatch_kernel_host_data.c to work on data received from a > + * untrusted host/VMM. > + * Similar as smatch_kernel_user_data.c it works with > + * smatch_points_to_host_data.c code. It also uses some helper functions > + * from the check_host_input.c pattern. > + */ > + > +#include "smatch.h" > +#include "smatch_slist.h" > +#include "smatch_extra.h" > +#include <math.h> > + > +const char *host_input_funcs[] = { > + "inb", "inw", "inl", "inb_p", "inw_p", "inl_p", "insb", "insw", "insl", "get_dma_residue", "ioread8", "ioread16", "ioread32", > + "ioread16be", "ioread32be", "ioread64_lo_hi", "ioread64_hi_lo", "ioread64be_lo_hi", "ioread64be_hi_lo", "ioread8_rep", > + "ioread16_rep", "ioread32_rep", "__ioread32_copy", "iomap_readq", "iomap_readb", "iomap_readw", "iomap_readl", "memcpy_fromio", > + "mmio_insb", "mmio_insw", "mmio_insl", "readb", "readw", "readl", "readq", "readsb", "readsw", "readsl", "readsq", "__readb", "__readw", > + "__readl", "__readq", "__readsb", "__readsw", "__readsl", "__readsq", "__raw_readb", "__raw_readw", "__raw_readl", "__raw_readq", > + "lo_hi_readq", "hi_lo_readq", "lo_hi_readq_relaxed", "hi_lo_readq_relaxed", "readb_relaxed", "readw_relaxed", "readl_relaxed", > + "readq_relaxed", "native_read_msr", "native_read_msr_safe", "__rdmsr", "rdmsrl", "rdmsrl_safe", "rdmsr_on_cpu", "rdmsrl_on_cpu", > + "rdmsr_on_cpus", "rdmsr_safe_on_cpu", "rdmsrl_safe_on_cpu", "paravirt_read_msr", "paravirt_read_msr_safe", "read_msr", "msr_read", > + "native_apic_msr_read", "native_apic_mem_read", "native_apic_icr_read", "apic_read", "apic_icr_read", "native_x2apic_icr_read", > + "io_apic_read", "native_io_apic_read", "__ioapic_read_entry", "ioapic_read_entry", "vp_ioread8", "vp_ioread16", "vp_ioread32", > + "__virtio_cread_many", "virtio_cread", "virtio_cread_le", "virtio_cread8", "virtio_cread16", "virtio_cread32", "virtio_cread64", > + "virtio_cread_bytes", "virtio16_to_cpu", "virtio32_to_cpu", "virtio64_to_cpu", "__virtio16_to_cpu", "__virtio32_to_cpu", > + "__virtio64_to_cpu", "virtqueue_get_buf", "vringh16_to_cpu", "vringh32_to_cpu", "vringh64_to_cpu", "tap16_to_cpu", "tun16_to_cpu", > + "read_pci_config", "read_pci_config_byte", "read_pci_config_16", "raw_pci_read", "pci_read", "pci_read_config_byte", > + "pci_read_config_word", "pci_read_config_dword", "pci_bus_read_config_byte", "pci_bus_read_config_word", > + "pci_bus_read_config_dword", "pci_generic_config_read", "pci_generic_config_read32", "pci_user_read_config_byte", > + "pci_user_read_config_word", "pci_user_read_config_dword", "pcie_capability_read_word", "pcie_capability_read_dword", > + "pci_read_vpd", "serial8250_early_in", "serial_dl_read", "serial8250_in_MCR", "serial_in", "serial_port_in", "serial_icr_read", > + "serial8250_rx_chars", "dw8250_readl_ext", "udma_readl", "sio_read_reg", "irq_readl_be", "irq_reg_readl", "fw_cfg_read_blob", > + "acpi_os_read_iomem", "acpi_os_read_port", "acpi_hw_read_multiple", "acpi_hw_read", "acpi_hw_read_port", "acpi_hw_register_read", > + "acpi_hw_gpe_read", "apei_read", "acpi_read", "__apei_exec_read_register", "cpc_read", "hv_get_register", "iosf_mbi_read", > + "cpuid", "cpuid_count", "cpuid_eax", "cpuid_ebx", "cpuid_ecx", "cpuid_edx" > + > +}; > + > + > +static int my_id; > +static int my_call_id; > + > +STATE(called); This state is not used here. Even in smatch_kernel_user_data.c it should be moved out of there into a separate file... > +static unsigned long func_gets_host_data; > +static struct stree *start_states; > + > +static void save_start_states(struct statement *stmt) > +{ > + start_states = clone_stree(__get_cur_stree()); > +} > + > +static void free_start_states(void) > +{ > + free_stree(&start_states); > +} > + No need to do this these days. It's stored in get_start_states(). > +static struct smatch_state *empty_state(struct sm_state *sm) > +{ > + return alloc_estate_empty(); > +} > + > +static struct smatch_state *new_state(struct symbol *type) > +{ > + struct smatch_state *state; > + > + if (!type || type_is_ptr(type)) > + return NULL; > + > + state = alloc_estate_whole(type); > + estate_set_new(state); > + > + return state; This code is supposed to differentiate between places where we return user data because it was passed to us or we got fresh user data inside the function. In smatch_kernel_user_data.c it's only a single question "did we recieve user data? Y/N." But it should be "Was variable foo->bar user data." I would probably pull returned user data out into a separate file these days. And have a different file for if a variable currently holds user data or if we pass user data to a function. > +} > + > +static void pre_merge_hook(struct sm_state *cur, struct sm_state *other) > +{ > + struct smatch_state *user = cur->state; > + struct smatch_state *extra; > + struct smatch_state *state; > + struct range_list *rl; > + > + extra = __get_state(SMATCH_EXTRA, cur->name, cur->sym); > + if (!extra) > + return; > + rl = rl_intersection(estate_rl(user), estate_rl(extra)); > + state = alloc_estate_rl(clone_rl(rl)); > + if (estate_capped(user) || is_capped_var_sym(cur->name, cur->sym)) > + estate_set_capped(state); > + if (estate_treat_untagged(user)) > + estate_set_treat_untagged(state); Tagged is for ARM tagged pointers. It's probably not something you need. Search for "tag" and delete. > + if (estates_equiv(state, cur->state)) > + return; > + set_state(my_id, cur->name, cur->sym, state); > +} > + [ snip ] > + > +extern uint get_arg_bitmask(struct expression *expr); > +static struct expression *ignore_param_set; > +extern bool is_ignored_func(struct expression *expr); > + > +static void match_host_input(const char *fn, struct expression *expr) > +{ > + > + uint arg_bitmask = 0; > + > + if (!expr) > + return; > + > + arg_bitmask = get_arg_bitmask(expr); > + > + if (!arg_bitmask) /* function returns host data, handled via match_returns_host_rl */ > + return; > + > + if (is_ignored_func(expr)) > + return; > + > + func_gets_host_data = true; > + ignore_param_set = expr; > + > + switch((uint)log2(arg_bitmask)) { > + case 0xC: > + tag_argument(expr, 2); > + tag_argument(expr, 3); > + break; > + case 0x36: > + tag_argument(expr, 1); > + tag_argument(expr, 2); > + tag_argument(expr, 3); > + tag_argument(expr, 4); > + break; > + case 0x74: > + tag_argument(expr, 2); > + tag_argument(expr, 3); > + tag_argument(expr, 4); > + tag_argument(expr, 5); > + break; > + default: > + tag_argument(expr, (uint)log2(arg_bitmask)); > + break; > + } > + > + return; > +} This function would be better re-written with param key API. Sorry that Smatch doesn't have documentation. :( An example, of how that works is in check_unwind.c, instead of a bitmap you would just have multiple entries in the table. I'm typing directly into my email client so this might not compile... struct host_fn_info { const char *name; int type; int param; const char *key; const sval_t *implies_start, *implies_end; func_hook *call_back; }; static struct host_fn_info func_table[] = { { "memcpy_fromio", HOST_DATA, 0, "*$" }, ... { "cpuid", HOST_DATA, 1, "*$" }, { "cpuid", HOST_DATA, 2, "*$" }, { "cpuid", HOST_DATA, 3, "*$" }, { "cpuid", HOST_DATA, 4, "*$" }, }; static void set_param_host_data(struct expression *expr, const char *name, struct symbol *sym, void *data) { struct expression *arg; struct range_list *rl; arg = gen_expression_from_name_sym(name, sym); if (strcmp(key, "*$") == 0) { tag_as_user_data(arg); return; } // make sure that smatch_extra was run first get_absolute_rl(arg, &rl); set_state(my_id, name, sym, alloc_estate_rl(rl)); } The code you have should work though and it should propagate. What we need is some simple test cases. //--- void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx); unsigned int a, b, c, d e; unsigned int test(void) { cpuid(0, &a, &b, &c, &d); __smatch_states("host"); return a; } unsigned int test2(void) { unsigned int x = test(); __smatch_states("host"); } regards, dan carpenter