On 30/03/16 16:39, Michael Ellerman wrote: > Hi Balbir, > > So I got this running and it seems to work well. > > I have some comments on the implementation though, see below ... > > On Mon, 2016-03-21 at 18:17 +1100, Balbir Singh wrote: > >> Basic infrastructure for queuing a task to a specifici CPU and >> the use of that in setting ILE (Little Endian Interrupt Handling) >> on power via h_set_mode hypercall >> >> Signed-off-by: Balbir Singh <bsingharora@xxxxxxxxx> >> diff --git a/include/kvm/kvm.h b/include/kvm/kvm.h >> index 37155db..731abee 100644 >> --- a/include/kvm/kvm.h >> +++ b/include/kvm/kvm.h >> @@ -15,6 +15,7 @@ >> >> #define SIGKVMEXIT (SIGRTMIN + 0) >> #define SIGKVMPAUSE (SIGRTMIN + 1) >> +#define SIGKVMTASK (SIGRTMIN + 2) >> >> #define KVM_PID_FILE_PATH "/.lkvm/" >> #define HOME_DIR getenv("HOME") >> diff --git a/kvm-cpu.c b/kvm-cpu.c >> index ad4441b..438414f 100644 >> --- a/kvm-cpu.c >> +++ b/kvm-cpu.c >> @@ -83,10 +83,59 @@ void kvm_cpu__reboot(struct kvm *kvm) >> } >> } >> >> +static void kvm_cpu__run_task(int sig, siginfo_t * info, void *context) >> +{ >> + union sigval val; >> + struct kvm_cpu_task *task_ptr; >> + >> + if (!info) { >> + pr_warning("signal queued without info\n"); >> + return; >> + } >> + >> + val = info->si_value; >> + task_ptr = val.sival_ptr; >> + if (!task_ptr) { >> + pr_warning("Task queued without data\n"); >> + return; >> + } >> + >> + if (!task_ptr->task || !task_ptr->data) { >> + pr_warning("Failed to get task information\n"); >> + return; >> + } >> + >> + task_ptr->task(task_ptr->data); >> + free(task_ptr); >> +} > I don't think it's safe to do the actual task call from signal context. Rather > it should set a flag that the main loop detects and then runs the task there. I don't think it matters. We do other stuff like kvm__pause() from signal context. The problem I see with the main loop detection is that we could potentially have both modes queued up >> +int kvm_cpu__queue_task(struct kvm_cpu *cpu, void (*task)(void *data), >> + void *data) >> +{ >> + struct kvm_cpu_task *task_ptr = NULL; >> + union sigval val; >> + >> + task_ptr = malloc(sizeof(struct kvm_cpu_task)); >> + if (!task_ptr) >> + return -ENOMEM; >> + >> + task_ptr->task = task; >> + task_ptr->data = data; >> + val.sival_ptr = task_ptr; >> + >> + pthread_sigqueue(cpu->thread, SIGKVMTASK, val); >> + return 0; >> +} > I think it would be nicer if this interface dealt with waiting for the > response. Rather than the caller having to do it. > > Possibly in future we'll want to do an async task, but we can refactor the code > then to skip doing the wait. I wrote the core code (powerpc independent bits) to be async with an example of our code of how to do sync. >> diff --git a/powerpc/include/kvm/kvm-cpu-arch.h b/powerpc/include/kvm/kvm-cpu-arch.h >> index 01eafdf..033b702 100644 >> --- a/powerpc/include/kvm/kvm-cpu-arch.h >> +++ b/powerpc/include/kvm/kvm-cpu-arch.h >> @@ -38,6 +38,8 @@ >> >> #define POWER7_EXT_IRQ 0 >> >> +#define LPCR_ILE (1 << (63-38)) >> + >> struct kvm; >> >> struct kvm_cpu { >> diff --git a/powerpc/spapr.h b/powerpc/spapr.h >> index 8b294d1..f851f4a 100644 >> --- a/powerpc/spapr.h >> +++ b/powerpc/spapr.h >> @@ -27,7 +27,7 @@ typedef uintptr_t target_phys_addr_t; >> #define H_HARDWARE -1 /* Hardware error */ >> #define H_FUNCTION -2 /* Function not supported */ >> #define H_PARAMETER -4 /* Parameter invalid, out-of-range or conflicting */ >> - >> +#define H_P2 -55 >> #define H_SET_DABR 0x28 >> #define H_LOGICAL_CI_LOAD 0x3c >> #define H_LOGICAL_CI_STORE 0x40 >> @@ -41,7 +41,18 @@ typedef uintptr_t target_phys_addr_t; >> #define H_EOI 0x64 >> #define H_IPI 0x6c >> #define H_XIRR 0x74 >> -#define MAX_HCALL_OPCODE H_XIRR >> +#define H_SET_MODE 0x31C >> +#define MAX_HCALL_OPCODE H_SET_MODE >> + >> +/* Values for 2nd argument to H_SET_MODE */ >> +#define H_SET_MODE_RESOURCE_SET_CIABR 1 >> +#define H_SET_MODE_RESOURCE_SET_DAWR 2 >> +#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3 >> +#define H_SET_MODE_RESOURCE_LE 4 >> + >> +/* Flags for H_SET_MODE_RESOURCE_LE */ >> +#define H_SET_MODE_ENDIAN_BIG 0 >> +#define H_SET_MODE_ENDIAN_LITTLE 1 >> >> /* >> * The hcalls above are standardized in PAPR and implemented by pHyp >> diff --git a/powerpc/spapr_hcall.c b/powerpc/spapr_hcall.c >> index ff1d63a..682fad5 100644 >> --- a/powerpc/spapr_hcall.c >> +++ b/powerpc/spapr_hcall.c >> @@ -18,6 +18,9 @@ >> >> #include <stdio.h> >> #include <assert.h> >> +#include <sys/eventfd.h> >> + >> +static int task_event; >> >> static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1]; >> static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - >> @@ -74,6 +77,113 @@ static target_ulong h_logical_dcbf(struct kvm_cpu *vcpu, target_ulong opcode, ta >> return H_SUCCESS; >> } >> >> +struct lpcr_data { >> + struct kvm_cpu *cpu; >> + int mode; >> +}; >> + >> +static int get_cpu_lpcr(struct kvm_cpu *vcpu, target_ulong *lpcr) >> +{ >> + struct kvm_one_reg reg = { >> + .id = KVM_REG_PPC_LPCR_64, >> + .addr = (__u64)lpcr >> + }; >> + >> + return ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, ®); >> +} >> + >> +static int set_cpu_lpcr(struct kvm_cpu *vcpu, target_ulong *lpcr) > This function has a reasonable name .. > >> +{ >> + struct kvm_one_reg reg = { >> + .id = KVM_REG_PPC_LPCR_64, >> + .addr = (__u64)lpcr >> + }; >> + >> + return ioctl(vcpu->vcpu_fd, KVM_SET_ONE_REG, ®); >> +} >> + >> +static void set_lpcr_cpu(void *data) > But then this one is *very* similar. > > I think this should actually be called set_cpu_ile(), because that's what it > does. And maybe have "task" in the name because it's the version for using with > kvm_cpu__queue_task(). I'll change this to set_cpu_ile -- good catch > >> +{ >> + struct lpcr_data *fn_data = (struct lpcr_data *)data; >> + int ret; >> + target_ulong lpcr; >> + u64 task_done = 1; >> + >> + if (!fn_data || !fn_data->cpu) >> + return; > This should be hard errors IMHO. I wanted to avoid hard errors to see if the OS can fail with an OOPS later. My concern is that hard errors always leave the system in a very bad state with no scope to debug. I can change that if required >> + ret = get_cpu_lpcr(fn_data->cpu, &lpcr); >> + if (ret < 0) >> + return; > Uh oh! > > It looks like most code calls die() if KVM_SET_ONE_REG fails, that would be > preferable I think than running some cpus with a different endian :) >> + if (fn_data->mode == H_SET_MODE_ENDIAN_BIG) >> + lpcr &= ~LPCR_ILE; >> + else >> + lpcr |= LPCR_ILE; >> + >> + ret = set_cpu_lpcr(fn_data->cpu, &lpcr); >> + if (ret < 0) >> + return; >> + >> + free(data); > I don't think we should be doing the free here. >From the point the callback gets the data, it owns it. Otherwise we'd have to implement an I am done with this processing >> + if (write(task_event, &task_done, sizeof(task_done)) < 0) >> + pr_warning("Failed to notify of lpcr task done\n"); >> +} >> + >> +#define for_each_vcpu(cpu, kvm, i) \ >> + for ((i) = 0, (cpu) = (kvm)->cpus[i]; (i) < (kvm)->nrcpus; (i)++, (cpu) = (kvm)->cpus[i]) > That should probably be in a header. Yep, I did not see any use outside this function, but I can move it >> +static target_ulong h_set_mode(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args) >> +{ >> + int ret = H_SUCCESS; > That init should be removed. OK, I'll revisit the error handling >> + struct kvm *kvm = vcpu->kvm; >> + struct kvm_cpu *cpu; >> + int i; >> + >> + switch (args[1]) { >> + case H_SET_MODE_RESOURCE_LE: { >> + u64 total_done = 0; >> + u64 task_read; >> + >> + task_event = eventfd(0, 0); >> + if (task_event < 0) { >> + pr_warning("Failed to create task_event"); >> + break; > That will return H_SUCCESS which is not OK. Good catch! >> + } >> + for_each_vcpu(cpu, kvm, i) { >> + struct lpcr_data *data; >> + >> + data = malloc(sizeof(struct lpcr_data)); > Is there any reason not to do this synchronously? > > That would allow you to put data on the stack. And also avoid the while loop > below. Synchronous implies a two way communication mechanism between this vCPU and all others to communicate begin and end of a task > >> + if (!data) { >> + ret = H_P2; >> + break; >> + } >> + data->cpu = cpu; >> + data->mode = args[0]; >> + >> + kvm_cpu__queue_task(cpu, set_lpcr_cpu, data); >> + } >> + >> + while ((int)total_done < kvm->nrcpus) { >> + int err; >> + err = read(task_event, &task_read, sizeof(task_read)); >> + if (err < 0) { >> + ret = H_P2; >> + break; >> + } >> + total_done += task_read; >> + } >> + close(task_event); >> + break; >> + } >> + default: >> + ret = H_FUNCTION; >> + break; >> + } >> + return (ret < 0) ? H_P2 : H_SUCCESS; >> +} > I think that ends up being correct, but it's pretty obscure. ie. for an > unsupported resource we should return H_P2, and you get that to happen by > setting ret to H_FUNCTION (-2). Good catch! I'll fix this > > cheers > Thanks for the review Balbir -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html