Powered by Linux
[PATCH 1/1] check_host_input: add a pattern — Semantic Matching Tool

[PATCH 1/1] check_host_input: add a pattern

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



check_host_input pattern helps identifying locations
where a guest kernel can take a potentially malicious
input from the untrusted host in a confidential computing
threat model. The output of the pattern helps to
facilitate manual code audit and cross verify fuzzing
coverage.

Signed-off-by: Elena Reshetova <elena.reshetova@xxxxxxxxx>
---
 check_host_input.c | 886 +++++++++++++++++++++++++++++++++++++++++++++
 check_list.h       |   2 +
 2 files changed, 888 insertions(+)
 create mode 100644 check_host_input.c

diff --git a/check_host_input.c b/check_host_input.c
new file mode 100644
index 00000000..dc18ab34
--- /dev/null
+++ b/check_host_input.c
@@ -0,0 +1,886 @@
+/*
+ * Smatch pattern to facilitate the hardening of the Linux guest kernel
+ * for Confidential Cloud Computing threat model. 
+ * In this model the Linux guest kernel cannot trust the values
+ * it obtains using low level IO functions because they can be provided
+ * by a potentially malicious host or VMM. Instead it needs to make
+ * sure the code that handles processing of such values is hardened,
+ * free of memory safety issues and other potential security issues. 
+ *
+ * This smatch pattern helps to indentify such places.
+ * Currently it covers most of MSR, portIO, MMIO and cpuid reading primitives.
+ * The full list of covered functions is stored in host_input_funcs array.
+ * The output of the pattern can be used to facilitate code audit, as
+ * well as to verify that utilized fuzzing strategy can reach all the
+ * code paths that can take a low-level input from a potentially malicious host.
+ *
+ * When ran, the pattern produces two types of findings: errors and warnings.
+ * This is done to help prioritizing the issues for the manual code audit.
+ * However if time permits all locations reported by the pattern should be checked. 
+ *
+ * Written based on existing smatch patterns.
+ * 
+ * Author: Elena Reshetova <elena.reshetova@xxxxxxxxx>
+ * Copyright (c) 2021-22, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ */
+#include "smatch.h"
+#include "smatch_slist.h"
+#include "smatch_extra.h"
+#include <math.h>
+
+static int my_id;
+
+STATE(local_variables);
+STATE(local_from_host);
+STATE(tainted_from_host);
+STATE(called_funcs);
+
+static const char* pattern_name = "check_host_input";
+
+/* The below functions are all low level functions
+ * as well as their higher level wrappers that can take
+ * an input from a host/VMM. */
+const char *host_input_funcs[] = {
+    "inb", "inw", "inl", "inb_p", "inw_p", "inl_p", "insb", "insw", "insl", "get_dma_residue", "ioread8", "ioread16", "ioread32",
+    "ioread16be", "ioread32be", "ioread64_lo_hi", "ioread64_hi_lo", "ioread64be_lo_hi", "ioread64be_hi_lo", "ioread8_rep",
+    "ioread16_rep", "ioread32_rep", "__ioread32_copy", "iomap_readq", "iomap_readb", "iomap_readw", "iomap_readl", "memcpy_fromio",
+    "mmio_insb", "mmio_insw", "mmio_insl", "readb", "readw", "readl", "readq", "readsb", "readsw", "readsl", "readsq", "__readb", "__readw",
+    "__readl", "__readq", "__readsb", "__readsw", "__readsl", "__readsq", "__raw_readb", "__raw_readw", "__raw_readl", "__raw_readq",
+    "lo_hi_readq", "hi_lo_readq", "lo_hi_readq_relaxed", "hi_lo_readq_relaxed", "readb_relaxed", "readw_relaxed", "readl_relaxed",
+    "readq_relaxed", "native_read_msr", "native_read_msr_safe", "__rdmsr", "rdmsrl", "rdmsrl_safe", "rdmsr_on_cpu", "rdmsrl_on_cpu",
+    "rdmsr_on_cpus", "rdmsr_safe_on_cpu", "rdmsrl_safe_on_cpu", "paravirt_read_msr", "paravirt_read_msr_safe", "read_msr", "msr_read",
+    "native_apic_msr_read", "native_apic_mem_read", "native_apic_icr_read", "apic_read", "apic_icr_read", "native_x2apic_icr_read",
+    "io_apic_read", "native_io_apic_read", "__ioapic_read_entry", "ioapic_read_entry", "vp_ioread8", "vp_ioread16", "vp_ioread32",
+    "__virtio_cread_many", "virtio_cread", "virtio_cread_le", "virtio_cread8", "virtio_cread16", "virtio_cread32", "virtio_cread64",
+    "virtio_cread_bytes", "virtio16_to_cpu", "virtio32_to_cpu", "virtio64_to_cpu", "__virtio16_to_cpu", "__virtio32_to_cpu",
+    "__virtio64_to_cpu", "virtqueue_get_buf", "vringh16_to_cpu", "vringh32_to_cpu", "vringh64_to_cpu", "tap16_to_cpu", "tun16_to_cpu",
+    "read_pci_config", "read_pci_config_byte", "read_pci_config_16", "raw_pci_read", "pci_read", "pci_read_config_byte",
+    "pci_read_config_word", "pci_read_config_dword", "pci_bus_read_config_byte", "pci_bus_read_config_word",
+    "pci_bus_read_config_dword", "pci_generic_config_read", "pci_generic_config_read32", "pci_user_read_config_byte",
+    "pci_user_read_config_word", "pci_user_read_config_dword", "pcie_capability_read_word", "pcie_capability_read_dword",
+    "pci_read_vpd", "serial8250_early_in", "serial_dl_read", "serial8250_in_MCR", "serial_in", "serial_port_in", "serial_icr_read",
+    "serial8250_rx_chars", "dw8250_readl_ext", "udma_readl", "sio_read_reg", "irq_readl_be", "irq_reg_readl", "fw_cfg_read_blob",
+    "acpi_os_read_iomem", "acpi_os_read_port", "acpi_hw_read_multiple", "acpi_hw_read", "acpi_hw_read_port", "acpi_hw_register_read",
+    "acpi_hw_gpe_read", "apei_read", "acpi_read", "__apei_exec_read_register", "cpc_read", "hv_get_register", "iosf_mbi_read",
+    "cpuid", "cpuid_count", "cpuid_eax", "cpuid_ebx", "cpuid_ecx", "cpuid_edx"
+};
+
+
+/* The below functions are 'write' counterpart of the
+ * above 'read' primitives. Report them as warnings
+ * since we dont care if the potentially malicious input
+ * from the host is given back to the host for a 'write'.
+ * TODO: write a full list */ 
+static const char* out_func_table[] = {
+                "outb","outw","outl","outb_p","outw_p","outl_p","outsb","outsw","outsl",
+                "iowrite8","iowrite16","iowrite32","iowrite16be","iowrite32be",
+                "iowrite64be","iowrite64","iowrite64_lo_hi","iowrite64_hi_lo",
+                "iowrite64be_lo_hi","iowrite64be_hi_lo", "pio_write64_lo_hi","pio_write64_hi_lo",
+                "iowrite8_rep","iowrite16_rep","iowrite32_rep",
+                "pio_write64be_lo_hi","pio_write64be_hi_lo",  "mmio_outb","mmio_outw","mmio_outl",
+                "writeb","writew","writel","writeq","__raw_writeb","__raw_writebw", "__raw_writel",
+                "__raw_writeq", "native_write_msr_safe", "writesb","writesw","writesl","writesq",
+                "wrmsrl", "wrmsrl_safe", "paravirt_write_msr", "wrmsrl_safe_on_cpu", "apic_write"
+            };
+
+static const char* host_macros[] = {
+                "virtio_cread_le"
+            };
+
+/* The below functions are printing primitives.
+ * Report them as warnings since they have much
+ * lower probability to contain problems. */
+static const char* safe_func_table[] = {
+                "printk", "_printk", "dev_printk", "_dev_err", "_dev_warn", "_dev_notice",
+                "_dev_info", "_dev_alert", "_dev_crit", "_dev_emerg", "dev_dbg",
+                "__dynamic_dev_dbg", "__dynamic_pr_debug", "sysfs_emit"
+            };
+
+static const char* cpuid_func_table[] = {
+                "cpuid", "cpuid_count", "cpuid_eax", "cpuid_ebx", "cpuid_ecx", "cpuid_edx"
+            };
+
+static const unsigned long untrusted_cpuids[] = {
+                0x2, 0x5, 0x6, 0x9, 0xb, 0xc, 0xf, 0x10, 0x16, 0x17, 0x18, 0x1a, 0x1b, 0x1f,
+                0x80000002, 0x80000003, 0x80000004, 0x80000005, 0x80000006, 0x80000007
+            };
+
+/* This struct contains functions from
+ * host_input_funcs array that do not return the value
+ * read from the host directly (i.e. via return value),
+ * but pass it via arguments.
+ * This helps the pattern to fecth the correct
+ * arguments in such cases and taint them for further
+ * analysis. */
+typedef struct {
+       const char* func_name;
+       uint arg_bitmask;
+} in_func_args;
+
+static const in_func_args func_args[] = {
+    /* argument 1 only */
+    { "memcpy_fromio",              0x1 },
+    { "apei_read",                  0x1 },
+    { "acpi_hw_read",               0x1 },
+    { "__ioread32_copy",            0x1 },
+    /* argument 2 only */
+    { "readsb",                     0x2 },
+    { "readsw",                     0x2 },
+    { "readsl",                     0x2 },
+    { "readsq",                     0x2 },
+    { "acpi_os_read_iomem",         0x2 },
+    { "acpi_os_read_port",          0x2 },
+    { "ioread8_rep",                0x2 },
+    { "ioread16_rep",               0x2 },
+    { "ioread32_rep",               0x2 },
+    { "rdmsrl_safe",                0x2 },
+    { "msr_read",                   0x2 },
+    { "fw_cfg_read_blob",           0x2 },
+    { "acpi_hw_read_port",          0x2 },
+    { "acpi_hw_register_read",      0x2 },
+    /* argument 3 only */
+    { "rdmsrl_on_cpu",              0x4 },
+    { "rdmsr_on_cpus",              0x4 },
+    { "rdmsrl_safe_on_cpu",         0x4 },
+    { "__virtio_cread_many",        0x4 },
+    { "virtio_cread_bytes",         0x4 },
+    { "pci_read_config_byte",       0x4 },
+    { "pci_read_config_word",       0x4 },
+    { "pci_read_config_dword",      0x4 },
+    { "pcie_capability_read_word",  0x4 },
+    { "pcie_capability_read_dword", 0x4 },
+    { "pci_user_read_config_word",  0x4 },
+    { "pci_user_read_config_byte",  0x4 },
+    { "pci_user_read_config_dword", 0x4 },
+    { "cpc_read",                   0x4 },
+    /* argument 4 only */
+    { "pci_bus_read_config_byte",   0x8 },
+    { "pci_bus_read_config_word",   0x8 },
+    { "pci_bus_read_config_dword",  0x8 },
+    { "pci_read_vpd",               0x8 },
+    { "iosf_mbi_read",              0x8 },
+    /* arguments 3 and 4  */
+    { "rdmsr_on_cpu",               0xC },
+    { "rdmsr_safe_on_cpu",          0xC },
+    /* argument 5 only */
+    { "pci_read",                   0x10 },
+    { "pci_generic_config_read",    0x10 },
+    { "pci_generic_config_read32",  0x10 },
+    /* argument 6 only */
+    { "raw_pci_read",               0x20 },
+    /* arguments 2-5 */
+    { "cpuid",                      0x36 },
+    /* arguments 3-6 */
+    { "cpuid_count",                0x74 },
+
+};
+
+/* Obtain the line number where a current function
+ * starts. Used to calculate a relative offset for
+ * the pattern findings. */
+static int get_func_start_lineno(char* func_name)
+{
+    struct sm_state *sm;
+    
+    if (!func_name)
+        return -1;
+
+    FOR_EACH_MY_SM(my_id, __get_cur_stree(), sm) {
+        if ( (sm->sym) && (strstr(func_name, sm->name) != NULL) 
+        && (slist_has_state(sm->possible, &called_funcs)))
+            return sm->sym->pos.line;
+    } END_FOR_EACH_SM(sm);
+    return -1;
+}
+
+/* Calculate djb2 hash */
+unsigned long djb2_hash(const char *str, int num)
+{
+        unsigned long hash = 5381;
+        int c;
+
+        while ((c = *str++))
+            hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
+        return ((hash << 5) + hash) + num;
+}
+
+/* Produce the djb2 hash from a given expression.
+ * Used in order to generate unique identifies for each
+ * reported issue. These identifiers are used then
+ * to automatically transfer previously seen results. */
+unsigned long produce_expression_hash(struct expression *expr)
+{
+
+    unsigned long hash = 0;
+    int line_offset = get_lineno() - get_func_start_lineno(get_function());
+    const char *str = expr_to_str(expr);
+
+    /* for non-parsable exressions and expressions
+     * contatining temp variables (like __UNIQUE_ID_*, $expr_), it is
+     * more stable to use a fix string for hasing together
+     * with line offset to avoid many results that do not
+     * automatically transfer between the audits on different
+     * versions */
+
+    if (str && !(strstr(str, "__UNIQUE_ID_")) && !(strstr(str, "$expr_")))
+        hash = djb2_hash(str, line_offset);
+    else
+        hash = djb2_hash("complex", line_offset);
+    return hash;
+
+}
+
+/* Helper utility to remove various operands
+ * to get a clean expression */
+static struct expression* strip_pre_post_ops(struct expression *expr)
+{
+
+    while (expr) {
+        if((expr->type == EXPR_PREOP) || (expr->type == EXPR_POSTOP)) {
+            expr = expr->unop;
+        } else if ((expr->type == EXPR_CAST) || (expr->type == EXPR_FORCE_CAST)
+            || (expr->type == EXPR_IMPLIED_CAST)) {
+            expr = expr->cast_expression;
+        } else {
+            // Done if we can't strip anything more
+            break;
+        }
+    }
+
+    return expr; 
+
+}
+
+/* Helper to check whenever a function is either output function
+ * or printing function. For such functions we can only raise
+ * warning (and not error) when a tainted value from the host
+ * is passed into it.
+ * TODO: need to add checking arguments for these functions */
+static bool is_safe_function(struct expression *fn)
+{
+    for (int i = 0; i < ARRAY_SIZE(out_func_table); i++) {
+        if (sym_name_is(out_func_table[i], fn))
+            return true;
+    }
+
+    for (int i = 0; i < ARRAY_SIZE(safe_func_table); i++) {
+        if (sym_name_is(safe_func_table[i], fn))
+            return true;
+    }
+
+    return false;
+}
+
+static bool is_host_macro(const char *macro)
+{
+    for (int i = 0; i < ARRAY_SIZE(host_macros); i++) {
+        if (strcmp(host_macros[i], macro) == 0)
+            return true;
+    }
+    return false;
+}
+
+
+/* Helper to store the info on called functions.
+ * Used to calculate the line number in get_func_start_lineno() */
+static void match_function_def(struct symbol *sym)
+{
+    set_state(my_id, sym->ident->name, sym, &called_funcs);
+}
+
+/* Helper to check if a given symbol contains
+ * (either directly or via propagation) a value
+ * received from the host.*/
+static int is_symbol_tainted(struct expression *expr)
+{
+    struct sm_state *sm;
+    
+    sm = get_sm_state_expr(my_id, expr);
+    if (!sm)
+        return 0;
+    if (!slist_has_state(sm->possible, &local_from_host) &&
+        !slist_has_state(sm->possible, &tainted_from_host))
+        return 0;
+    return 1;
+}
+
+/* Helper to check if a given symbol is local
+ * in the given function */
+static int is_symbol_local(struct expression *expr)
+{
+    struct sm_state *sm;
+    
+    sm = get_sm_state_expr(my_id, expr);
+
+    if (!sm)
+        return 0;
+    if (!slist_has_state(sm->possible, &local_variables) &&
+    !slist_has_state(sm->possible, &local_from_host))
+        return 0;
+    return 1;
+}
+
+/* Helper to check if a given expression contains
+ * (either directly or via propagation) a value
+ * received from the host.*/
+static int is_expression_tainted(struct expression *expr)
+{
+    struct sm_state *sm;
+
+    if (!expr)
+        return 0;
+
+    expr = strip_pre_post_ops(expr);
+
+    if (expr->type == EXPR_CALL){
+        /* calls are handled in match_call, no need to parse */
+        return 0;
+    }
+    
+    if((expr->type == EXPR_BINOP) || (expr->type == EXPR_COMPARE)
+        || (expr->type == EXPR_COMMA) || (expr->type == EXPR_LOGICAL)
+        || (expr->type == EXPR_ASSIGNMENT)) {
+        /* need to check both left and right expressions */
+        if (is_expression_tainted(expr->left))
+            return 1;
+        if (is_expression_tainted(expr->right))
+            return 1;
+        return 0;
+    }
+
+    if((expr->type == EXPR_CONDITIONAL) || (expr->type == EXPR_SELECT)) {
+        /* need to check each part */
+        if (is_expression_tainted(expr->conditional))
+            return 1;
+        if (is_expression_tainted(expr->cond_true))
+            return 1;
+        if (is_expression_tainted(expr->cond_false))
+            return 1;
+        return 0;
+    }
+
+    if ((expr->type == EXPR_SYMBOL) || (expr->type == EXPR_DEREF)) {
+        if (is_symbol_tainted(expr))
+            return 1;
+        return 0;
+    }
+
+    if (!expr_to_str(expr)) {
+        sm_error("expression was not parsed.");
+        return 0;
+    }
+
+    /* the below matching has a small number of false 
+     * positives when a tainted symbol from host happens
+     * to match another symbol. ntimer (tainted) vs. hpetp->hp_ntimer
+     * to be fixed later */ 
+
+    FOR_EACH_MY_SM(my_id, __get_cur_stree(), sm) {
+        if ((strstr(expr_to_str(expr),sm->name) != NULL) 
+            && (slist_has_state(sm->possible, &local_from_host)
+            || slist_has_state(sm->possible, &tainted_from_host)))
+                return 1;
+    } END_FOR_EACH_SM(sm);
+    
+    return 0;
+}
+
+
+/* Builds a list of local vars inside a function */
+static void match_declarations(struct symbol *sym)
+{
+    if (!sym)
+        return;
+    set_state(my_id, sym->ident->name, sym, &local_variables);
+}
+
+/* Checks all return expressions for tainted values */
+static void match_return(struct expression *ret_value)
+{
+    unsigned long hash;
+
+    if (!ret_value)
+        return;
+
+    if (is_expression_tainted(ret_value)) {
+        hash = produce_expression_hash(ret_value);
+        sm_error("{%lu}\n\t'%s' return an expression containing a propagated value from the host '%s';",
+                    hash, pattern_name, expr_to_str(ret_value));
+    }
+}
+
+
+/* Checks all STMT_ITERATOR expressions for tainted values.
+ * Also catch virtio_cread_le macro invocation here and mark the correct tainted arg */
+static void match_statement(struct statement *stmt)
+{
+    unsigned long hash;
+    struct expression *expr = NULL, *arg;
+    const char *macro;
+
+    if (!stmt)
+        return;
+
+    /* virtio_cread_le macro needs a special handling, otherwise we miss results */
+    macro = get_macro_name(stmt->pos);
+    if ((macro) && (is_host_macro(macro)) && (stmt->type == STMT_EXPRESSION)) {
+        if ((stmt->expression) && (stmt->expression->type == EXPR_CALL)) {
+            if (strstr(expr_to_str(stmt->expression->fn), "vdev->config->get") != 0) {
+                    arg = get_argument_from_call_expr(stmt->expression->args, 2);
+                    arg = strip_pre_post_ops(arg);
+                    set_state(my_id, arg->symbol->ident->name, arg->symbol, &tainted_from_host);
+            }
+        }
+    }
+
+    if (stmt->type != STMT_ITERATOR)
+        return;
+
+    if ((stmt->iterator_pre_statement) && (stmt->iterator_pre_statement->type == STMT_EXPRESSION) 
+        && (stmt->iterator_pre_statement->expression) 
+        && (is_expression_tainted(stmt->iterator_pre_statement->expression)))
+        expr = stmt->iterator_pre_statement->expression;
+
+    if ((stmt->iterator_post_statement) && (stmt->iterator_post_statement->type == STMT_EXPRESSION)
+        && (stmt->iterator_post_statement->expression)
+        && (is_expression_tainted(stmt->iterator_post_statement->expression)))
+        expr = stmt->iterator_post_statement->expression;
+
+    if ((stmt->iterator_pre_condition) && (is_expression_tainted(stmt->iterator_pre_condition)))
+        expr = stmt->iterator_pre_condition;
+
+    if ((stmt->iterator_post_condition) && (is_expression_tainted(stmt->iterator_post_condition)))
+        expr = stmt->iterator_post_condition;
+
+    /* The above logic only stores the latest tainted expr.
+     * This is ok since one warning per line is enough */
+    if (expr) {
+        hash = produce_expression_hash(expr);
+        sm_error("{%lu}\n\t'%s' an expression containing a propagated value from the host '%s' used in iterator;",
+                hash, pattern_name, expr_to_str(expr));
+    }
+
+}
+
+/* Helper to check if a given function returns
+ * a value from the host via one of its arguments.
+ * If positive, returns also the respected argument
+ * in **arg. For multiple arguments only produces
+ * the smatch finding (very rare case now). */ 
+static bool is_special_arg(struct expression *expr, const char *fn, unsigned long hash, struct expression **arg)
+{
+    uint arg_bitmask = 0;
+    struct expression *texpr = NULL;
+
+    if ((!expr) || (!fn))
+        return false;
+
+    for (int i = 0; i < ARRAY_SIZE(func_args); i++) {
+        if (sym_name_is(func_args[i].func_name, expr->fn)){
+            arg_bitmask = func_args[i].arg_bitmask;
+            break;
+        }
+    }
+
+    if (!arg_bitmask)
+        return false;
+
+    *arg = get_argument_from_call_expr(expr->args, (uint)log2(arg_bitmask));
+
+    if (arg_bitmask == 0xC) {
+        /* function returns values via 3 and 4 args */
+        texpr = get_argument_from_call_expr(expr->args, (uint)log2(arg_bitmask) - 1);
+        sm_error("{%lu}\n\t'%s' read from the host using function '%s' to values %s and %s;", 
+                hash, pattern_name, fn, expr_to_str(texpr), expr_to_str(*arg));
+        *arg = NULL;
+    }
+
+    if ((arg_bitmask == 0x36) || (arg_bitmask == 0x74)){
+        /* function returns values via multiple args */
+        sm_error("{%lu}\n\t'%s' read from the host using function '%s' to multiple values;", 
+                hash, pattern_name, fn);
+        *arg = NULL;
+    }
+   
+    return true;
+}
+
+static struct expression* match_expression(struct expression *host_input_expression, struct expression *checked)
+{
+    if (!checked)
+        return NULL;
+
+    if (checked->type != EXPR_ASSIGNMENT)
+        return NULL;
+
+    if ((checked->left) == host_input_expression)
+        return checked->right;
+    if ((checked->right) == host_input_expression)
+        return checked->left;
+    return NULL;
+}
+
+/* This function parses the iterator statement and, if sucessful,
+ * returns the symbol where the host input assigment went into */  
+static bool handle_iterator_statement(struct statement *stmt, struct expression *host_input_expression,
+                                    struct expression **left_return)
+{
+    struct expression *condition = NULL;
+    struct expression *left = NULL, *right = NULL;
+    *left_return = NULL;
+
+    if (!stmt)
+        return false;
+
+    if (stmt->iterator_pre_condition)
+        condition = stmt->iterator_pre_condition;
+    else if (stmt->iterator_post_condition)
+        condition = stmt->iterator_post_condition;
+    else
+        return false;
+
+    if (condition->type == EXPR_COMPARE) {
+        left = strip_expr(condition->left);
+        right = strip_expr(condition->right);
+
+        *left_return = match_expression(host_input_expression, left);
+        if (*left_return)
+            return true;
+        *left_return = match_expression(host_input_expression, right);
+        if (*left_return)
+            return true;
+    }
+
+    return false;
+
+}
+
+static bool is_untrusted_cpuid(unsigned long long value)
+{
+    if ((value > 0x40000001) && (value <= 0x400000FF))
+        return true; /* sw defined cpuid excluding KVM cpuids and identification string */
+    for (int i = 0; i < ARRAY_SIZE(untrusted_cpuids); i++)
+        if (untrusted_cpuids[i] == value)
+            return true;
+    return false;
+}
+
+/* Main routine */
+static void host_input_check(const char *fn, struct expression *expr, void *data)
+{
+    struct statement *stmt = NULL;
+    struct expression *left = NULL, *cpuid_op = NULL, *stmt_expr_clean = NULL;
+    struct symbol *left_sym = NULL;
+    unsigned long hash;
+    bool is_cpuid_func = false;
+
+    if ((!expr) || (!fn) || (!expr_to_str(expr->fn)))
+        return;
+    
+    for (int i = 0; i < ARRAY_SIZE(cpuid_func_table); i++) {
+        if (sym_name_is(cpuid_func_table[i], expr->fn)) {
+            is_cpuid_func = true;
+            break;
+        }
+    }
+
+    if (is_cpuid_func) {
+        cpuid_op = get_argument_from_call_expr(expr->args, 0);
+        if ((cpuid_op->type == EXPR_VALUE) && (!is_untrusted_cpuid(cpuid_op->value)))
+            return;
+    }
+
+    stmt = last_ptr_list((struct ptr_list *)big_statement_stack);
+
+    if (unreachable())
+        return;
+
+    hash = produce_expression_hash(expr);
+
+    if (is_special_arg(expr, fn, hash, &left)) {
+        if (!left) 
+        /* Special case of multi-argument host input functions.
+         * Very rare case, just report them in is_special_arg() for now */
+            return;
+    } else if (stmt->type == STMT_EXPRESSION) {
+
+        if ((!stmt->expression) || (stmt->expression == expr)
+            || (!expr_to_str(stmt->expression))) {
+            sm_warning("{%lu}\n\t'%s' empty read from the host using function '%s';",
+                hash, pattern_name, fn);
+            return;
+        }
+
+        stmt_expr_clean = strip_pre_post_ops(stmt->expression);
+
+        if (strstr(expr_to_str(stmt_expr_clean), expr_to_str(expr->fn)) == NULL) {
+            /* stmt expr normally includes analyzed host input func
+             * report these cases as unsure for now*/
+            sm_warning("{%lu}\n\t'%s' potential read from the host using function '%s';",
+                hash, pattern_name, fn);
+            return;
+        }
+
+        if (stmt_expr_clean->type == EXPR_CALL) {
+            if (is_safe_function(stmt_expr_clean->fn))
+                sm_warning("{%lu}\n\t'%s' read from the host using function '%s' as argument into function '%s';",
+                    hash, pattern_name, fn, expr_to_str(stmt_expr_clean->fn));
+            else
+                sm_error("{%lu}\n\t'%s' read from the host using function '%s' as argument into function '%s';",
+                    hash, pattern_name, fn, expr_to_str(stmt_expr_clean->fn));                
+            return;
+        }
+
+        if ((stmt_expr_clean->type != EXPR_BINOP) &&
+            (stmt_expr_clean->type != EXPR_ASSIGNMENT)) {
+            sm_warning("{%lu}\n\t'%s' no assigment read from the host using function '%s';",
+                hash, pattern_name, fn);
+            return;
+        }
+        left = strip_expr(stmt_expr_clean->left);
+    } else if (stmt->type == STMT_RETURN) {
+            if (stmt->expression == expr)
+                sm_error("{%lu}\n\t'%s' read from the host using function '%s' returned as a value;",
+                    hash, pattern_name, fn);
+            else
+                sm_error("{%lu}\n\t'%s' read from the host using function '%s' returned as a part of an expression;",
+                 hash, pattern_name, fn);
+            return;
+    } else if (stmt->type == STMT_IF) {
+            sm_warning("{%lu}\n\t'%s' empty read from the host using function '%s' used inside an 'if' clause;",
+             hash, pattern_name, fn);
+            return;
+    } else if (stmt->type == STMT_ITERATOR) {
+            /* report usage within an iterator as errors
+             * because it migth be used as bound or index for memory
+             * access. We dont have too many of them in filtered
+             * output so it is ok not to parse this deeper */
+            if (!handle_iterator_statement(stmt, expr, &left)){
+                sm_error("{%lu}\n\t'%s' empty read from the host using function '%s' used inside a condition for iteration;",
+             hash, pattern_name, fn);
+                return; 
+            }
+    } else if (stmt->type == STMT_SWITCH) {
+            sm_warning("{%lu}\n\t'%s' read from the host using function '%s' used inside a 'switch' clause;",
+             hash, pattern_name, fn);
+            return;
+    } else if (stmt->type == STMT_DECLARATION) {
+                FOR_EACH_PTR(stmt->declaration, left_sym) {
+                    /* we are going to take the last declaration */
+                    ;
+                } END_FOR_EACH_PTR(left_sym);
+                goto symb;
+    } else if (stmt->type == STMT_COMPOUND) {
+        sm_error("{%lu}\n\t'%s' read from the host using function '%s' into a compound statement;",
+            hash, pattern_name, fn);
+        return;            
+    } else {
+        sm_error("{%lu}\n\t'%s' not covered statement type when reading from the host using function '%s';",
+            hash, pattern_name, fn);
+        return;
+    }
+
+preop:
+
+    left = strip_pre_post_ops(left);
+
+    if (left->type == EXPR_DEREF) {
+        sm_error("{%lu}\n\t'%s' read from the host using function '%s' to a member of the structure '%s';",
+            hash, pattern_name, fn, expr_to_str(left));
+        return;           
+    }
+
+    if (left->type == EXPR_BINOP) {
+        /* trying to extract the leftmost symbol from this expression */
+        /* we have a case like foo[smth1][smth2] +- number = ... here */
+        /* strip the right element of the binop in order to unfold the leftmost symbol foo*/
+        left = left->left;
+        goto preop;
+    }
+
+    if (left->type != EXPR_SYMBOL) {
+        sm_error("{%lu}\n\t'%s' read from the host using function '%s' to a complex expression '%s', type is %d;",
+            hash, pattern_name, fn, expr_to_str(left), left->type);
+        return;
+    }
+
+    if (!left->symbol) {
+        /* this is the case when we have cast like (void)readl(addr); */
+        sm_warning("{%lu}\n\t'%s' empty read from the host using function '%s';",
+            hash, pattern_name, fn);
+        return;
+    }
+
+    left_sym = left->symbol;
+
+    if (!is_symbol_local(left))
+        sm_error("{%lu}\n\t'%s' read from the host using function '%s' into a non-local variable '%s';",
+                hash, pattern_name, fn, left_sym->ident->name);
+    else {
+
+symb:
+        if (!left_sym) {
+            sm_error("Empty left_sym. Abort");
+            return;
+        }
+
+        if (!is_int_type(left_sym->ctype.base_type)) {
+            sm_error("{%lu}\n\t'%s' read from the host using function '%s' to a non int type local variable '%s', type is %s;",
+                    hash, pattern_name, fn, left_sym->ident->name, type_to_str(left_sym->ctype.base_type));        
+        } else {
+            sm_warning("{%lu}\n\t'%s' read from the host using function '%s' to an int type local variable '%s', type is %s;",
+                        hash, pattern_name, fn, left_sym->ident->name, type_to_str(left_sym->ctype.base_type));
+        }
+        set_state(my_id, left_sym->ident->name, left_sym, &local_from_host);
+    }
+
+    return; 
+}
+
+static struct expression *top_level_left_assign = NULL;
+unsigned long top_level_left_hash = 0;
+
+/* Tracks propagation of the values received from the host.
+ * Currently only limited to a single function. */
+static void match_assign(struct expression *expr)
+{
+    struct expression *current = expr;
+
+    if (!expr)
+        return;
+
+    if (current->type == EXPR_ASSIGNMENT) {
+        /* this is a top level call, need to store expr
+         * for further processing */
+        top_level_left_hash = produce_expression_hash(expr);
+        top_level_left_assign = current->left;
+        top_level_left_assign = strip_pre_post_ops(top_level_left_assign);
+        current = strip_expr(current->right);
+        if (current->type == EXPR_CALL) {
+            /* nothing to do here, will be handled by match_call */
+            return;
+        }
+    } 
+
+    current = strip_pre_post_ops(current);
+
+    if (current->type == EXPR_GENERIC) {
+        current = strip_expr(current->control);
+    } 
+    
+    if (current->type == EXPR_SYMBOL) {
+
+        if (!is_symbol_tainted(current))
+            return;
+
+        if (!top_level_left_assign)
+            return;
+
+        if (top_level_left_assign->type != EXPR_SYMBOL) {
+            sm_error("{%lu}\n\t'%s' propagating read value from the host '%s' into a different complex variable '%s';",
+                            top_level_left_hash, pattern_name, expr_to_str(current), expr_to_str(top_level_left_assign));
+            return;        
+        }
+
+        if ((strstr(expr_to_str(top_level_left_assign), "__sm_fake_") != NULL) ||
+            (strstr(expr_to_str(top_level_left_assign), "__fake_param") != NULL)) {
+            /* we are handling a fake variable/expression here, don't do anything for now */
+            return;
+        }
+
+        if (current->symbol->ident == top_level_left_assign->symbol->ident)
+            return;
+
+        if (is_symbol_local(top_level_left_assign)) 
+            sm_warning("{%lu}\n\t'%s' propagating read value from the host '%s' into a different local value '%s';",
+                    top_level_left_hash, pattern_name, expr_to_str(current), expr_to_str(top_level_left_assign));               
+        else
+            sm_error("{%lu}\n\t'%s' propagating read value from the host '%s' into a different non-local value '%s';",
+                    top_level_left_hash, pattern_name, expr_to_str(current), expr_to_str(top_level_left_assign));
+
+        set_state(my_id, top_level_left_assign->symbol->ident->name,
+                        top_level_left_assign->symbol, &tainted_from_host);
+        top_level_left_assign = NULL;           
+    }
+    
+    if (current->type == EXPR_BINOP) {  
+        /* here we need to match against both left or right
+         * expressions since both of them can match.
+         * So start unwrapping both parts until we reach symbols */ 
+        match_assign(current->left);
+        match_assign(current->right);
+    }
+
+    return;
+}
+
+/* Catches cases when a (tainted) value from the host is
+ * used as argument to another function. */ 
+static void match_call(struct expression *expr)
+{
+    struct expression *arg;
+    bool is_safe_func = false;
+    const char *message, *function_name;
+    unsigned long hash;
+
+    if ((!expr) || (!expr->fn))
+        return;
+
+    if (parse_error)
+        return;
+
+    if (is_impossible_path())
+        return;
+
+    hash = produce_expression_hash(expr);
+    is_safe_func = is_safe_function(expr->fn);
+
+    FOR_EACH_PTR(expr->args, arg) {
+        arg = strip_pre_post_ops(arg);
+        if ((arg->type != EXPR_BINOP) && (arg->type != EXPR_SYMBOL)) {
+            /* we only care of the above two types because
+             * they are the ones that can contain tainted values
+             * from the host */
+            continue;
+        }
+        
+        if ((arg->type == EXPR_BINOP) && !is_expression_tainted(arg))
+            continue;
+        if ((arg->type == EXPR_SYMBOL) && !is_symbol_tainted(arg))
+            continue;
+
+        if (!expr->fn->symbol_name)
+            function_name = expr_to_str(expr);
+        else
+            function_name = expr->fn->symbol_name->name;
+
+        if (arg->type == EXPR_BINOP) 
+            message = "{%lu}\n\t'%s' propagating an expression containing a tainted value from the host '%s' into a function '%s';";
+        else
+            message = "{%lu}\n\t'%s' propagating a tainted value from the host '%s' into a function '%s';";
+
+        if (is_safe_func)
+            sm_warning(message, hash, pattern_name, expr_to_str(arg), function_name);         
+        else
+            sm_error(message, hash, pattern_name, expr_to_str(arg), function_name);
+
+    } END_FOR_EACH_PTR(arg);
+}
+
+void check_host_input(int id)
+{
+    my_id = id;
+    for (int i = 0; i < ARRAY_SIZE(host_input_funcs); i++) 
+        add_function_hook(host_input_funcs[i], host_input_check, NULL);
+    add_hook(&match_declarations, DECLARATION_HOOK);
+    add_hook(&match_assign, ASSIGNMENT_HOOK);
+    add_hook(&match_call, FUNCTION_CALL_HOOK);
+    add_hook(&match_return, RETURN_HOOK);
+    add_hook(match_statement, STMT_HOOK);
+    add_hook(&match_function_def, AFTER_DEF_HOOK);
+}
diff --git a/check_list.h b/check_list.h
index 97856dcf..7fab4147 100644
--- a/check_list.h
+++ b/check_list.h
@@ -179,6 +179,8 @@ CK(register_param_bits_clear)
 CK(check_do_while_loop_limit)
 
 /* <- your test goes here */
+CK(check_host_input)
+
 /* CK(register_template) */
 
 /* kernel specific */
-- 
2.25.1




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Big List of Linux Books]

  Powered by Linux