Re: [PATCH v3 52/70] i386/tdx: handle TDG.VP.VMCALL<GetQuote>

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Nov 15, 2023 at 02:15:01AM -0500, Xiaoyao Li wrote:
> From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
> 
> For GetQuote, delegate a request to Quote Generation Service.
> Add property "quote-generation-socket" to tdx-guest, whihc is a property
> of type SocketAddress to specify Quote Generation Service(QGS).
> 
> On request, connect to the QGS, read request buffer from shared guest
> memory, send the request buffer to the server and store the response
> into shared guest memory and notify TD guest by interrupt.
> 
> command line example:
>   qemu-system-x86_64 \
>     -object '{"qom-type":"tdx-guest","id":"tdx0","quote-generation-socket":{"type": "vsock", "cid":"2","port":"1234"}}' \
>     -machine confidential-guest-support=tdx0
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
> Codeveloped-by: Chenyi Qiang <chenyi.qiang@xxxxxxxxx>
> Signed-off-by: Chenyi Qiang <chenyi.qiang@xxxxxxxxx>
> Signed-off-by: Xiaoyao Li <xiaoyao.li@xxxxxxxxx>
> ---
> Changes in v3:
> - rename property "quote-generation-service" to "quote-generation-socket";
> - change the type of "quote-generation-socket" from str to
>   SocketAddress;
> - squash next patch into this one;
> ---
>  qapi/qom.json         |   5 +-
>  target/i386/kvm/tdx.c | 430 ++++++++++++++++++++++++++++++++++++++++++
>  target/i386/kvm/tdx.h |   6 +
>  3 files changed, 440 insertions(+), 1 deletion(-)

> @@ -969,6 +1001,7 @@ static void tdx_guest_class_init(ObjectClass *oc, void *data)
>  {
>  }
>  
> +#define TDG_VP_VMCALL_GET_QUOTE                         0x10002ULL
>  #define TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT      0x10004ULL
>  
>  #define TDG_VP_VMCALL_SUCCESS           0x0000000000000000ULL
> @@ -977,6 +1010,400 @@ static void tdx_guest_class_init(ObjectClass *oc, void *data)
>  #define TDG_VP_VMCALL_GPA_INUSE         0x8000000000000001ULL
>  #define TDG_VP_VMCALL_ALIGN_ERROR       0x8000000000000002ULL
>  
> +#define TDX_GET_QUOTE_STRUCTURE_VERSION 1ULL
> +
> +#define TDX_VP_GET_QUOTE_SUCCESS                0ULL
> +#define TDX_VP_GET_QUOTE_IN_FLIGHT              (-1ULL)
> +#define TDX_VP_GET_QUOTE_ERROR                  0x8000000000000000ULL
> +#define TDX_VP_GET_QUOTE_QGS_UNAVAILABLE        0x8000000000000001ULL
> +
> +/* Limit to avoid resource starvation. */
> +#define TDX_GET_QUOTE_MAX_BUF_LEN       (128 * 1024)
> +#define TDX_MAX_GET_QUOTE_REQUEST       16
> +
> +/* Format of pages shared with guest. */
> +struct tdx_get_quote_header {
> +    /* Format version: must be 1 in little endian. */
> +    uint64_t structure_version;
> +
> +    /*
> +     * GetQuote status code in little endian:
> +     *   Guest must set error_code to 0 to avoid information leak.
> +     *   Qemu sets this before interrupting guest.
> +     */
> +    uint64_t error_code;
> +
> +    /*
> +     * in-message size in little endian: The message will follow this header.
> +     * The in-message will be send to QGS.
> +     */
> +    uint32_t in_len;
> +
> +    /*
> +     * out-message size in little endian:
> +     * On request, out_len must be zero to avoid information leak.
> +     * On return, message size from QGS. Qemu overwrites this field.
> +     * The message will follows this header.  The in-message is overwritten.
> +     */
> +    uint32_t out_len;
> +
> +    /*
> +     * Message buffer follows.
> +     * Guest sets message that will be send to QGS.  If out_len > in_len, guest
> +     * should zero remaining buffer to avoid information leak.
> +     * Qemu overwrites this buffer with a message returned from QGS.
> +     */
> +};
> +
> +static hwaddr tdx_shared_bit(X86CPU *cpu)
> +{
> +    return (cpu->phys_bits > 48) ? BIT_ULL(51) : BIT_ULL(47);
> +}
> +
> +struct tdx_get_quote_task {
> +    uint32_t apic_id;
> +    hwaddr gpa;
> +    uint64_t buf_len;
> +    char *out_data;
> +    uint64_t out_len;
> +    struct tdx_get_quote_header hdr;
> +    int event_notify_interrupt;
> +    QIOChannelSocket *ioc;
> +};
> +
> +struct x86_msi {
> +    union {
> +        struct {
> +            uint32_t    reserved_0              : 2,
> +                        dest_mode_logical       : 1,
> +                        redirect_hint           : 1,
> +                        reserved_1              : 1,
> +                        virt_destid_8_14        : 7,
> +                        destid_0_7              : 8,
> +                        base_address            : 12;
> +        } QEMU_PACKED x86_address_lo;
> +        uint32_t address_lo;
> +    };
> +    union {
> +        struct {
> +            uint32_t    reserved        : 8,
> +                        destid_8_31     : 24;
> +        } QEMU_PACKED x86_address_hi;
> +        uint32_t address_hi;
> +    };
> +    union {
> +        struct {
> +            uint32_t    vector                  : 8,
> +                        delivery_mode           : 3,
> +                        dest_mode_logical       : 1,
> +                        reserved                : 2,
> +                        active_low              : 1,
> +                        is_level                : 1;
> +        } QEMU_PACKED x86_data;
> +        uint32_t data;
> +    };
> +};
> +
> +static void tdx_td_notify(struct tdx_get_quote_task *t)
> +{
> +    struct x86_msi x86_msi;
> +    struct kvm_msi msi;
> +    int ret;
> +
> +    /* It is optional for host VMM to interrupt TD. */
> +    if(!(32 <= t->event_notify_interrupt && t->event_notify_interrupt <= 255))
> +        return;
> +
> +    x86_msi = (struct x86_msi) {
> +        .x86_address_lo  = {
> +            .reserved_0 = 0,
> +            .dest_mode_logical = 0,
> +            .redirect_hint = 0,
> +            .reserved_1 = 0,
> +            .virt_destid_8_14 = 0,
> +            .destid_0_7 = t->apic_id & 0xff,
> +        },
> +        .x86_address_hi = {
> +            .reserved = 0,
> +            .destid_8_31 = t->apic_id >> 8,
> +        },
> +        .x86_data = {
> +            .vector = t->event_notify_interrupt,
> +            .delivery_mode = APIC_DM_FIXED,
> +            .dest_mode_logical = 0,
> +            .reserved = 0,
> +            .active_low = 0,
> +            .is_level = 0,
> +        },
> +    };
> +    msi = (struct kvm_msi) {
> +        .address_lo = x86_msi.address_lo,
> +        .address_hi = x86_msi.address_hi,
> +        .data = x86_msi.data,
> +        .flags = 0,
> +        .devid = 0,
> +    };
> +    ret = kvm_vm_ioctl(kvm_state, KVM_SIGNAL_MSI, &msi);
> +    if (ret < 0) {
> +        /* In this case, no better way to tell it to guest.  Log it. */
> +        error_report("TDX: injection %d failed, interrupt lost (%s).\n",
> +                     t->event_notify_interrupt, strerror(-ret));
> +    }
> +}
> +
> +static void tdx_get_quote_read(void *opaque)
> +{
> +    struct tdx_get_quote_task *t = opaque;
> +    ssize_t size = 0;
> +    Error *err = NULL;

This error is set, but never read and more importantly
never freed.  If you're not going to use it just pass
NULL to the methods, otherwise use error_report_err to
print and free it.

> +    MachineState *ms;
> +    TdxGuest *tdx;
> +
> +    while (true) {
> +        char *buf;
> +        size_t buf_size;
> +
> +        if (t->out_len < t->buf_len) {
> +            buf = t->out_data + t->out_len;
> +            buf_size = t->buf_len - t->out_len;
> +        } else {
> +            /*
> +             * The received data is too large to fit in the shared GPA.
> +             * Discard the received data and try to know the data size.
> +             */
> +            buf = t->out_data;
> +            buf_size = t->buf_len;
> +        }
> +
> +        size = qio_channel_read(QIO_CHANNEL(t->ioc), buf, buf_size, &err);
> +        if (!size) {
> +            break;
> +        }
> +
> +        if (size < 0) {
> +            if (size == QIO_CHANNEL_ERR_BLOCK) {
> +                return;
> +            } else {
> +                break;
> +            }
> +        }
> +        t->out_len += size;
> +    }
> +    /*
> +     * If partial read successfully but return error at last, also treat it
> +     * as failure.
> +     */
> +    if (size < 0) {
> +        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
> +        goto error;
> +    }
> +    if (t->out_len > 0 && t->out_len > t->buf_len) {
> +        /*
> +         * There is no specific error code defined for this case(E2BIG) at the
> +         * moment.
> +         * TODO: Once an error code for this case is defined in GHCI spec ,
> +         * update the error code.
> +         */
> +        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_ERROR);
> +        t->hdr.out_len = cpu_to_le32(t->out_len);
> +        goto error_hdr;
> +    }
> +
> +    if (address_space_write(
> +            &address_space_memory, t->gpa + sizeof(t->hdr),
> +            MEMTXATTRS_UNSPECIFIED, t->out_data, t->out_len) != MEMTX_OK) {
> +        goto error;
> +    }
> +    /*
> +     * Even if out_len == 0, it's a success.  It's up to the QGS-client contract
> +     * how to interpret the zero-sized message as return message.
> +     */
> +    t->hdr.out_len = cpu_to_le32(t->out_len);
> +    t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_SUCCESS);
> +
> +error:
> +    if (t->hdr.error_code != cpu_to_le64(TDX_VP_GET_QUOTE_SUCCESS)) {
> +        t->hdr.out_len = cpu_to_le32(0);
> +    }
> +error_hdr:
> +    if (address_space_write(
> +            &address_space_memory, t->gpa,
> +            MEMTXATTRS_UNSPECIFIED, &t->hdr, sizeof(t->hdr)) != MEMTX_OK) {
> +        error_report("TDX: failed to update GetQuote header.");
> +    }
> +    tdx_td_notify(t);
> +
> +    qemu_set_fd_handler(t->ioc->fd, NULL, NULL, NULL);
> +    qio_channel_close(QIO_CHANNEL(t->ioc), &err);

Likely overwriting a previously set 'err'

> +    object_unref(OBJECT(t->ioc));
> +    g_free(t->out_data);
> +    g_free(t);
> +
> +    /* Maintain the number of in-flight requests. */
> +    ms = MACHINE(qdev_get_machine());
> +    tdx = TDX_GUEST(ms->cgs);
> +    qemu_mutex_lock(&tdx->lock);
> +    tdx->quote_generation_num--;
> +    qemu_mutex_unlock(&tdx->lock);
> +}
> +
> +/*
> + * TODO: If QGS doesn't reply for long time, make it an error and interrupt
> + * guest.
> + */
> +static void tdx_handle_get_quote_connected(QIOTask *task, gpointer opaque)
> +{
> +    struct tdx_get_quote_task *t = opaque;
> +    Error *err = NULL;

Same leak problem in this method

> +    char *in_data = NULL;

g_autofree for simpler cleanup

> +    MachineState *ms;
> +    TdxGuest *tdx;
> +
> +    t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_ERROR);
> +    if (qio_task_propagate_error(task, NULL)) {
> +        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
> +        goto error;
> +    }
> +
> +    in_data = g_malloc(le32_to_cpu(t->hdr.in_len));

IF  't->hdr.in_len' is going from the guest then they needs
bounds checking, otherwise its a trivial denial of service
to make QEMU allocate all of host RAM.

> +    if (!in_data) {
> +        goto error;
> +    }
> +
> +    if (address_space_read(&address_space_memory, t->gpa + sizeof(t->hdr),
> +                           MEMTXATTRS_UNSPECIFIED, in_data,
> +                           le32_to_cpu(t->hdr.in_len)) != MEMTX_OK) {
> +        goto error;
> +    }
> +
> +    qio_channel_set_blocking(QIO_CHANNEL(t->ioc), false, NULL);
> +
> +    if (qio_channel_write_all(QIO_CHANNEL(t->ioc), in_data,
> +                              le32_to_cpu(t->hdr.in_len), &err) ||
> +        err) {
> +        t->hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE);
> +        goto error;
> +    }
> +
> +    g_free(in_data);
> +    qemu_set_fd_handler(t->ioc->fd, tdx_get_quote_read, NULL, t);

Dn't use  qemu_set_fd_handler() with QIOChannel objects.
qio_channel_add_watch() is the API for dealing with event
callbacks

> +
> +    return;
> +error:
> +    t->hdr.out_len = cpu_to_le32(0);
> +
> +    if (address_space_write(
> +            &address_space_memory, t->gpa,
> +            MEMTXATTRS_UNSPECIFIED, &t->hdr, sizeof(t->hdr)) != MEMTX_OK) {
> +        error_report("TDX: failed to update GetQuote header.\n");
> +    }
> +    tdx_td_notify(t);
> +
> +    qio_channel_close(QIO_CHANNEL(t->ioc), &err);
> +    object_unref(OBJECT(t->ioc));
> +    g_free(t);
> +    g_free(in_data);
> +
> +    /* Maintain the number of in-flight requests. */
> +    ms = MACHINE(qdev_get_machine());
> +    tdx = TDX_GUEST(ms->cgs);
> +    qemu_mutex_lock(&tdx->lock);
> +    tdx->quote_generation_num--;
> +    qemu_mutex_unlock(&tdx->lock);
> +    return;
> +}
> +
> +static void tdx_handle_get_quote(X86CPU *cpu, struct kvm_tdx_vmcall *vmcall)
> +{
> +    hwaddr gpa = vmcall->in_r12;
> +    uint64_t buf_len = vmcall->in_r13;
> +    struct tdx_get_quote_header hdr;
> +    MachineState *ms;
> +    TdxGuest *tdx;
> +    QIOChannelSocket *ioc;
> +    struct tdx_get_quote_task *t;
> +
> +    vmcall->status_code = TDG_VP_VMCALL_INVALID_OPERAND;
> +
> +    /* GPA must be shared. */
> +    if (!(gpa & tdx_shared_bit(cpu))) {
> +        return;
> +    }
> +    gpa &= ~tdx_shared_bit(cpu);
> +
> +    if (!QEMU_IS_ALIGNED(gpa, 4096) || !QEMU_IS_ALIGNED(buf_len, 4096)) {
> +        vmcall->status_code = TDG_VP_VMCALL_ALIGN_ERROR;
> +        return;
> +    }
> +    if (buf_len == 0) {
> +        return;
> +    }
> +
> +    if (address_space_read(&address_space_memory, gpa, MEMTXATTRS_UNSPECIFIED,
> +                           &hdr, sizeof(hdr)) != MEMTX_OK) {
> +        return;
> +    }
> +    if (le64_to_cpu(hdr.structure_version) != TDX_GET_QUOTE_STRUCTURE_VERSION) {
> +        return;
> +    }
> +    /*
> +     * Paranoid: Guest should clear error_code and out_len to avoid information
> +     * leak.  Enforce it.  The initial value of them doesn't matter for qemu to
> +     * process the request.
> +     */
> +    if (le64_to_cpu(hdr.error_code) != TDX_VP_GET_QUOTE_SUCCESS ||
> +        le32_to_cpu(hdr.out_len) != 0) {
> +        return;
> +    }
> +
> +    /* Only safe-guard check to avoid too large buffer size. */
> +    if (buf_len > TDX_GET_QUOTE_MAX_BUF_LEN ||
> +        le32_to_cpu(hdr.in_len) > TDX_GET_QUOTE_MAX_BUF_LEN ||
> +        le32_to_cpu(hdr.in_len) > buf_len) {
> +        return;
> +    }
> +
> +    /* Mark the buffer in-flight. */
> +    hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_IN_FLIGHT);
> +    if (address_space_write(&address_space_memory, gpa, MEMTXATTRS_UNSPECIFIED,
> +                            &hdr, sizeof(hdr)) != MEMTX_OK) {
> +        return;
> +    }
> +
> +    ms = MACHINE(qdev_get_machine());
> +    tdx = TDX_GUEST(ms->cgs);
> +    ioc = qio_channel_socket_new();
> +
> +    t = g_malloc(sizeof(*t));
> +    t->apic_id = tdx->event_notify_apic_id;
> +    t->gpa = gpa;
> +    t->buf_len = buf_len;
> +    t->out_data = g_malloc(t->buf_len);
> +    t->out_len = 0;
> +    t->hdr = hdr;
> +    t->ioc = ioc;
> +
> +    qemu_mutex_lock(&tdx->lock);
> +    if (!tdx->quote_generation ||
> +        /* Prevent too many in-flight get-quote request. */
> +        tdx->quote_generation_num >= TDX_MAX_GET_QUOTE_REQUEST) {
> +        qemu_mutex_unlock(&tdx->lock);
> +        vmcall->status_code = TDG_VP_VMCALL_RETRY;
> +        object_unref(OBJECT(ioc));
> +        g_free(t->out_data);
> +        g_free(t);
> +        return;
> +    }
> +    tdx->quote_generation_num++;
> +    t->event_notify_interrupt = tdx->event_notify_interrupt;
> +    qio_channel_socket_connect_async(
> +        ioc, tdx->quote_generation, tdx_handle_get_quote_connected, t, NULL,
> +        NULL);
> +    qemu_mutex_unlock(&tdx->lock);
> +
> +    vmcall->status_code = TDG_VP_VMCALL_SUCCESS;
> +}
> +
>  static void tdx_handle_setup_event_notify_interrupt(X86CPU *cpu,
>                                                      struct kvm_tdx_vmcall *vmcall)
>  {
> @@ -1005,6 +1432,9 @@ static void tdx_handle_vmcall(X86CPU *cpu, struct kvm_tdx_vmcall *vmcall)
>      }
>  
>      switch (vmcall->subfunction) {
> +    case TDG_VP_VMCALL_GET_QUOTE:
> +        tdx_handle_get_quote(cpu, vmcall);
> +        break;
>      case TDG_VP_VMCALL_SETUP_EVENT_NOTIFY_INTERRUPT:
>          tdx_handle_setup_event_notify_interrupt(cpu, vmcall);
>          break;
> diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
> index 4a8d67cc9fdb..4a989805493e 100644
> --- a/target/i386/kvm/tdx.h
> +++ b/target/i386/kvm/tdx.h
> @@ -5,8 +5,10 @@
>  #include CONFIG_DEVICES /* CONFIG_TDX */
>  #endif
>  
> +#include <linux/kvm.h>
>  #include "exec/confidential-guest-support.h"
>  #include "hw/i386/tdvf.h"
> +#include "io/channel-socket.h"
>  #include "sysemu/kvm.h"
>  
>  #define TYPE_TDX_GUEST "tdx-guest"
> @@ -47,6 +49,10 @@ typedef struct TdxGuest {
>      /* runtime state */
>      int event_notify_interrupt;
>      uint32_t event_notify_apic_id;
> +
> +    /* GetQuote */
> +    int quote_generation_num;
> +    SocketAddress *quote_generation;
>  } TdxGuest;

IMHO all the quote generation logic would benefit from being split
out into a completely separate self contained files

eg 'tdx-quote-generation.{c,h}'

this should define an object "TdxQuoteGenerator" which  holds these
two quote_generation_num and quote_generation  fields, and exposes
a high level API for each command taking inputs & outputs,
and doing serialization to/from the socket.  This API should do
verification of all command inputs eg the length field to prevent
guest denial of service.

The tdx_handle_get_quote() method could then call into this API.

This will give us clean separation between interaction with guest
memory, and interaction with the socket.

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux