Powered by Linux
Re: [PATCH] Quick trial on tracing host inputs — Semantic Matching Tool

Re: [PATCH] Quick trial on tracing host inputs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Mar 08, 2022 at 10:56:30AM +0200, Elena Reshetova wrote:
> This is just a quick trial to trace inputs received
> from the host/VMM in the same way as user inputs.
> 
> Signed-off-by: Elena Reshetova <elena.reshetova@xxxxxxxxx>
> ---
>  smatch_kernel_host_data.c    | 1320 ++++++++++++++++++++++++++++++++++
>  smatch_points_to_host_data.c |  334 +++++++++

The changes to smatch.h and check_list.h are missing.

>  2 files changed, 1654 insertions(+)
>  create mode 100755 smatch_kernel_host_data.c
>  create mode 100755 smatch_points_to_host_data.c
> 
> diff --git a/smatch_kernel_host_data.c b/smatch_kernel_host_data.c
> new file mode 100755
> index 00000000..540875c5
> --- /dev/null
> +++ b/smatch_kernel_host_data.c
> @@ -0,0 +1,1320 @@
> +/*
> + * Copyright (C) 2011 Dan Carpenter.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, see http://www.gnu.org/copyleft/gpl.txt 
> + */
> +
> +/* Note: The below code is just a quick trial to modify the
> + * smatch_kernel_host_data.c to work on data received from a
> + * untrusted host/VMM. 
> + * Similar as smatch_kernel_user_data.c it works with 
> + * smatch_points_to_host_data.c code. It also uses some helper functions
> + * from the check_host_input.c pattern. 
> + */
> +
> +#include "smatch.h"
> +#include "smatch_slist.h"
> +#include "smatch_extra.h"
> +#include <math.h>
> +
> +const char *host_input_funcs[] = {
> +	"inb", "inw", "inl", "inb_p", "inw_p", "inl_p", "insb", "insw", "insl", "get_dma_residue", "ioread8", "ioread16", "ioread32",
> +	"ioread16be", "ioread32be", "ioread64_lo_hi", "ioread64_hi_lo", "ioread64be_lo_hi", "ioread64be_hi_lo", "ioread8_rep",
> +	"ioread16_rep", "ioread32_rep", "__ioread32_copy", "iomap_readq", "iomap_readb", "iomap_readw", "iomap_readl", "memcpy_fromio",
> +	"mmio_insb", "mmio_insw", "mmio_insl", "readb", "readw", "readl", "readq", "readsb", "readsw", "readsl", "readsq", "__readb", "__readw",
> +	"__readl", "__readq", "__readsb", "__readsw", "__readsl", "__readsq", "__raw_readb", "__raw_readw", "__raw_readl", "__raw_readq",
> +	"lo_hi_readq", "hi_lo_readq", "lo_hi_readq_relaxed", "hi_lo_readq_relaxed", "readb_relaxed", "readw_relaxed", "readl_relaxed",
> +	"readq_relaxed", "native_read_msr", "native_read_msr_safe", "__rdmsr", "rdmsrl", "rdmsrl_safe", "rdmsr_on_cpu", "rdmsrl_on_cpu",
> +	"rdmsr_on_cpus", "rdmsr_safe_on_cpu", "rdmsrl_safe_on_cpu", "paravirt_read_msr", "paravirt_read_msr_safe", "read_msr", "msr_read",
> +	"native_apic_msr_read", "native_apic_mem_read", "native_apic_icr_read", "apic_read", "apic_icr_read", "native_x2apic_icr_read",
> +	"io_apic_read", "native_io_apic_read", "__ioapic_read_entry", "ioapic_read_entry", "vp_ioread8", "vp_ioread16", "vp_ioread32",
> +	"__virtio_cread_many", "virtio_cread", "virtio_cread_le", "virtio_cread8", "virtio_cread16", "virtio_cread32", "virtio_cread64",
> +	"virtio_cread_bytes", "virtio16_to_cpu", "virtio32_to_cpu", "virtio64_to_cpu", "__virtio16_to_cpu", "__virtio32_to_cpu",
> +	"__virtio64_to_cpu", "virtqueue_get_buf", "vringh16_to_cpu", "vringh32_to_cpu", "vringh64_to_cpu", "tap16_to_cpu", "tun16_to_cpu",
> +	"read_pci_config", "read_pci_config_byte", "read_pci_config_16", "raw_pci_read", "pci_read", "pci_read_config_byte",
> +	"pci_read_config_word", "pci_read_config_dword", "pci_bus_read_config_byte", "pci_bus_read_config_word",
> +	"pci_bus_read_config_dword", "pci_generic_config_read", "pci_generic_config_read32", "pci_user_read_config_byte",
> +	"pci_user_read_config_word", "pci_user_read_config_dword", "pcie_capability_read_word", "pcie_capability_read_dword",
> +	"pci_read_vpd", "serial8250_early_in", "serial_dl_read", "serial8250_in_MCR", "serial_in", "serial_port_in", "serial_icr_read",
> +	"serial8250_rx_chars", "dw8250_readl_ext", "udma_readl", "sio_read_reg", "irq_readl_be", "irq_reg_readl", "fw_cfg_read_blob",
> +	"acpi_os_read_iomem", "acpi_os_read_port", "acpi_hw_read_multiple", "acpi_hw_read", "acpi_hw_read_port", "acpi_hw_register_read",
> +	"acpi_hw_gpe_read", "apei_read", "acpi_read", "__apei_exec_read_register", "cpc_read", "hv_get_register", "iosf_mbi_read",
> +	"cpuid", "cpuid_count", "cpuid_eax", "cpuid_ebx", "cpuid_ecx", "cpuid_edx"
> +
> +};
> +
> +
> +static int my_id;
> +static int my_call_id;
> +
> +STATE(called);

This state is not used here.  Even in smatch_kernel_user_data.c it
should be moved out of there into a separate file...

> +static unsigned long func_gets_host_data;
> +static struct stree *start_states;
> +
> +static void save_start_states(struct statement *stmt)
> +{
> +	start_states = clone_stree(__get_cur_stree());
> +}
> +
> +static void free_start_states(void)
> +{
> +	free_stree(&start_states);
> +}
> +

No need to do this these days.  It's stored in get_start_states().

> +static struct smatch_state *empty_state(struct sm_state *sm)
> +{
> +	return alloc_estate_empty();
> +}
> +
> +static struct smatch_state *new_state(struct symbol *type)
> +{
> +	struct smatch_state *state;
> +
> +	if (!type || type_is_ptr(type))
> +		return NULL;
> +
> +	state = alloc_estate_whole(type);
> +	estate_set_new(state);
> +
> +	return state;

This code is supposed to differentiate between places where we return
user data because it was passed to us or we got fresh user data inside
the function.  In smatch_kernel_user_data.c it's only a single question
"did we recieve user data? Y/N."  But it should be "Was variable foo->bar
user data."

I would probably pull returned user data out into a separate file these
days.  And have a different file for if a variable currently holds user
data or if we pass user data to a function.

> +}
> +
> +static void pre_merge_hook(struct sm_state *cur, struct sm_state *other)
> +{
> +	struct smatch_state *user = cur->state;
> +	struct smatch_state *extra;
> +	struct smatch_state *state;
> +	struct range_list *rl;
> +
> +	extra = __get_state(SMATCH_EXTRA, cur->name, cur->sym);
> +	if (!extra)
> +		return;
> +	rl = rl_intersection(estate_rl(user), estate_rl(extra));
> +	state = alloc_estate_rl(clone_rl(rl));
> +	if (estate_capped(user) || is_capped_var_sym(cur->name, cur->sym))
> +		estate_set_capped(state);
> +	if (estate_treat_untagged(user))
> +		estate_set_treat_untagged(state);

Tagged is for ARM tagged pointers.  It's probably not something you
need.  Search for "tag" and delete.

> +	if (estates_equiv(state, cur->state))
> +		return;
> +	set_state(my_id, cur->name, cur->sym, state);
> +}
> +

[ snip ]

> +
> +extern uint get_arg_bitmask(struct expression *expr);
> +static struct expression *ignore_param_set;
> +extern bool is_ignored_func(struct expression *expr);
> +
> +static void match_host_input(const char *fn, struct expression *expr)
> +{
> +
> +    uint arg_bitmask = 0;
> +
> +	if (!expr)
> +		return;
> +
> +	arg_bitmask = get_arg_bitmask(expr);
> +
> +    if (!arg_bitmask)  	/* function returns host data, handled via match_returns_host_rl */
> +    	return;
> +
> +	if (is_ignored_func(expr))
> +        return;
> +
> +	func_gets_host_data = true;
> +	ignore_param_set = expr;
> +
> +	switch((uint)log2(arg_bitmask)) {
> +		case 0xC:
> +			tag_argument(expr, 2);
> +			tag_argument(expr, 3);
> +			break;
> +		case 0x36:
> +			tag_argument(expr, 1);
> +			tag_argument(expr, 2);
> +			tag_argument(expr, 3);
> +			tag_argument(expr, 4);
> +			break;		
> +		case 0x74:
> +			tag_argument(expr, 2);
> +			tag_argument(expr, 3);
> +			tag_argument(expr, 4);
> +			tag_argument(expr, 5);
> +			break;		
> +		default:
> +			tag_argument(expr, (uint)log2(arg_bitmask));
> +			break;
> +	}
> +
> +	return;
> +}

This function would be better re-written with param key API.  Sorry
that Smatch doesn't have documentation.  :(  An example, of how that
works is in check_unwind.c, instead of a bitmap you would just have
multiple entries in the table.

I'm typing directly into my email client so this might not compile...

struct host_fn_info {
        const char *name;
        int type;
        int param;
        const char *key;
        const sval_t *implies_start, *implies_end;
        func_hook *call_back;
};

static struct host_fn_info func_table[] = {
	{ "memcpy_fromio", HOST_DATA, 0, "*$" },
	...
	{ "cpuid", HOST_DATA, 1, "*$" },
	{ "cpuid", HOST_DATA, 2, "*$" },
	{ "cpuid", HOST_DATA, 3, "*$" },
	{ "cpuid", HOST_DATA, 4, "*$" },
};

static void set_param_host_data(struct expression *expr, const char *name, struct symbol *sym, void *data)
{
	struct expression *arg;
	struct range_list *rl;

	arg = gen_expression_from_name_sym(name, sym);

	if (strcmp(key, "*$") == 0) {
		tag_as_user_data(arg);
		return;
	}

	// make sure that smatch_extra was run first
	get_absolute_rl(arg, &rl);
	set_state(my_id, name, sym, alloc_estate_rl(rl));
}

The code you have should work though and it should propagate.

What we need is some simple test cases.

//---

void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx,
           unsigned int *ecx, unsigned int *edx);

unsigned int a, b, c, d e;
unsigned int test(void)
{
	cpuid(0, &a, &b, &c, &d);
	__smatch_states("host");
	return a;
}

unsigned int test2(void)
{
	unsigned int x = test();
	__smatch_states("host");
}

regards,
dan carpenter




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Big List of Linux Books]

  Powered by Linux