Hi Balbir, So I got this running and it seems to work well. I have some comments on the implementation though, see below ... On Mon, 2016-03-21 at 18:17 +1100, Balbir Singh wrote: > Basic infrastructure for queuing a task to a specifici CPU and > the use of that in setting ILE (Little Endian Interrupt Handling) > on power via h_set_mode hypercall > > Signed-off-by: Balbir Singh <bsingharora@xxxxxxxxx> > diff --git a/include/kvm/kvm.h b/include/kvm/kvm.h > index 37155db..731abee 100644 > --- a/include/kvm/kvm.h > +++ b/include/kvm/kvm.h > @@ -15,6 +15,7 @@ > > #define SIGKVMEXIT (SIGRTMIN + 0) > #define SIGKVMPAUSE (SIGRTMIN + 1) > +#define SIGKVMTASK (SIGRTMIN + 2) > > #define KVM_PID_FILE_PATH "/.lkvm/" > #define HOME_DIR getenv("HOME") > diff --git a/kvm-cpu.c b/kvm-cpu.c > index ad4441b..438414f 100644 > --- a/kvm-cpu.c > +++ b/kvm-cpu.c > @@ -83,10 +83,59 @@ void kvm_cpu__reboot(struct kvm *kvm) > } > } > > +static void kvm_cpu__run_task(int sig, siginfo_t * info, void *context) > +{ > + union sigval val; > + struct kvm_cpu_task *task_ptr; > + > + if (!info) { > + pr_warning("signal queued without info\n"); > + return; > + } > + > + val = info->si_value; > + task_ptr = val.sival_ptr; > + if (!task_ptr) { > + pr_warning("Task queued without data\n"); > + return; > + } > + > + if (!task_ptr->task || !task_ptr->data) { > + pr_warning("Failed to get task information\n"); > + return; > + } > + > + task_ptr->task(task_ptr->data); > + free(task_ptr); > +} I don't think it's safe to do the actual task call from signal context. Rather it should set a flag that the main loop detects and then runs the task there. > +int kvm_cpu__queue_task(struct kvm_cpu *cpu, void (*task)(void *data), > + void *data) > +{ > + struct kvm_cpu_task *task_ptr = NULL; > + union sigval val; > + > + task_ptr = malloc(sizeof(struct kvm_cpu_task)); > + if (!task_ptr) > + return -ENOMEM; > + > + task_ptr->task = task; > + task_ptr->data = data; > + val.sival_ptr = task_ptr; > + > + pthread_sigqueue(cpu->thread, SIGKVMTASK, val); > + return 0; > +} I think it would be nicer if this interface dealt with waiting for the response. Rather than the caller having to do it. Possibly in future we'll want to do an async task, but we can refactor the code then to skip doing the wait. > diff --git a/powerpc/include/kvm/kvm-cpu-arch.h b/powerpc/include/kvm/kvm-cpu-arch.h > index 01eafdf..033b702 100644 > --- a/powerpc/include/kvm/kvm-cpu-arch.h > +++ b/powerpc/include/kvm/kvm-cpu-arch.h > @@ -38,6 +38,8 @@ > > #define POWER7_EXT_IRQ 0 > > +#define LPCR_ILE (1 << (63-38)) > + > struct kvm; > > struct kvm_cpu { > diff --git a/powerpc/spapr.h b/powerpc/spapr.h > index 8b294d1..f851f4a 100644 > --- a/powerpc/spapr.h > +++ b/powerpc/spapr.h > @@ -27,7 +27,7 @@ typedef uintptr_t target_phys_addr_t; > #define H_HARDWARE -1 /* Hardware error */ > #define H_FUNCTION -2 /* Function not supported */ > #define H_PARAMETER -4 /* Parameter invalid, out-of-range or conflicting */ > - > +#define H_P2 -55 > #define H_SET_DABR 0x28 > #define H_LOGICAL_CI_LOAD 0x3c > #define H_LOGICAL_CI_STORE 0x40 > @@ -41,7 +41,18 @@ typedef uintptr_t target_phys_addr_t; > #define H_EOI 0x64 > #define H_IPI 0x6c > #define H_XIRR 0x74 > -#define MAX_HCALL_OPCODE H_XIRR > +#define H_SET_MODE 0x31C > +#define MAX_HCALL_OPCODE H_SET_MODE > + > +/* Values for 2nd argument to H_SET_MODE */ > +#define H_SET_MODE_RESOURCE_SET_CIABR 1 > +#define H_SET_MODE_RESOURCE_SET_DAWR 2 > +#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 > +#define H_SET_MODE_RESOURCE_LE 4 > + > +/* Flags for H_SET_MODE_RESOURCE_LE */ > +#define H_SET_MODE_ENDIAN_BIG 0 > +#define H_SET_MODE_ENDIAN_LITTLE 1 > > /* > * The hcalls above are standardized in PAPR and implemented by pHyp > diff --git a/powerpc/spapr_hcall.c b/powerpc/spapr_hcall.c > index ff1d63a..682fad5 100644 > --- a/powerpc/spapr_hcall.c > +++ b/powerpc/spapr_hcall.c > @@ -18,6 +18,9 @@ > > #include <stdio.h> > #include <assert.h> > +#include <sys/eventfd.h> > + > +static int task_event; > > static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1]; > static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - > @@ -74,6 +77,113 @@ static target_ulong h_logical_dcbf(struct kvm_cpu *vcpu, target_ulong opcode, ta > return H_SUCCESS; > } > > +struct lpcr_data { > + struct kvm_cpu *cpu; > + int mode; > +}; > + > +static int get_cpu_lpcr(struct kvm_cpu *vcpu, target_ulong *lpcr) > +{ > + struct kvm_one_reg reg = { > + .id = KVM_REG_PPC_LPCR_64, > + .addr = (__u64)lpcr > + }; > + > + return ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, ®); > +} > + > +static int set_cpu_lpcr(struct kvm_cpu *vcpu, target_ulong *lpcr) This function has a reasonable name .. > +{ > + struct kvm_one_reg reg = { > + .id = KVM_REG_PPC_LPCR_64, > + .addr = (__u64)lpcr > + }; > + > + return ioctl(vcpu->vcpu_fd, KVM_SET_ONE_REG, ®); > +} > + > +static void set_lpcr_cpu(void *data) But then this one is *very* similar. I think this should actually be called set_cpu_ile(), because that's what it does. And maybe have "task" in the name because it's the version for using with kvm_cpu__queue_task(). > +{ > + struct lpcr_data *fn_data = (struct lpcr_data *)data; > + int ret; > + target_ulong lpcr; > + u64 task_done = 1; > + > + if (!fn_data || !fn_data->cpu) > + return; This should be hard errors IMHO. > + ret = get_cpu_lpcr(fn_data->cpu, &lpcr); > + if (ret < 0) > + return; Uh oh! It looks like most code calls die() if KVM_SET_ONE_REG fails, that would be preferable I think than running some cpus with a different endian :) > + if (fn_data->mode == H_SET_MODE_ENDIAN_BIG) > + lpcr &= ~LPCR_ILE; > + else > + lpcr |= LPCR_ILE; > + > + ret = set_cpu_lpcr(fn_data->cpu, &lpcr); > + if (ret < 0) > + return; > + > + free(data); I don't think we should be doing the free here. > + if (write(task_event, &task_done, sizeof(task_done)) < 0) > + pr_warning("Failed to notify of lpcr task done\n"); > +} > + > +#define for_each_vcpu(cpu, kvm, i) \ > + for ((i) = 0, (cpu) = (kvm)->cpus[i]; (i) < (kvm)->nrcpus; (i)++, (cpu) = (kvm)->cpus[i]) That should probably be in a header. > > +static target_ulong h_set_mode(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args) > +{ > + int ret = H_SUCCESS; That init should be removed. > + struct kvm *kvm = vcpu->kvm; > + struct kvm_cpu *cpu; > + int i; > + > + switch (args[1]) { > + case H_SET_MODE_RESOURCE_LE: { > + u64 total_done = 0; > + u64 task_read; > + > + task_event = eventfd(0, 0); > + if (task_event < 0) { > + pr_warning("Failed to create task_event"); > + break; That will return H_SUCCESS which is not OK. > + } > + for_each_vcpu(cpu, kvm, i) { > + struct lpcr_data *data; > + > + data = malloc(sizeof(struct lpcr_data)); Is there any reason not to do this synchronously? That would allow you to put data on the stack. And also avoid the while loop below. > + if (!data) { > + ret = H_P2; > + break; > + } > + data->cpu = cpu; > + data->mode = args[0]; > + > + kvm_cpu__queue_task(cpu, set_lpcr_cpu, data); > + } > + > + while ((int)total_done < kvm->nrcpus) { > + int err; > + err = read(task_event, &task_read, sizeof(task_read)); > + if (err < 0) { > + ret = H_P2; > + break; > + } > + total_done += task_read; > + } > + close(task_event); > + break; > + } > + default: > + ret = H_FUNCTION; > + break; > + } > + return (ret < 0) ? H_P2 : H_SUCCESS; > +} I think that ends up being correct, but it's pretty obscure. ie. for an unsupported resource we should return H_P2, and you get that to happen by setting ret to H_FUNCTION (-2). cheers -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html