Re: [PATCH 2/4] Implement H_SET_MODE for ppc64le

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Balbir,

So I got this running and it seems to work well.

I have some comments on the implementation though, see below ...

On Mon, 2016-03-21 at 18:17 +1100, Balbir Singh wrote:

> Basic infrastructure for queuing a task to a specifici CPU and
> the use of that in setting ILE (Little Endian Interrupt Handling)
> on power via h_set_mode hypercall
> 
> Signed-off-by: Balbir Singh <bsingharora@xxxxxxxxx>
> diff --git a/include/kvm/kvm.h b/include/kvm/kvm.h
> index 37155db..731abee 100644
> --- a/include/kvm/kvm.h
> +++ b/include/kvm/kvm.h
> @@ -15,6 +15,7 @@
>  
>  #define SIGKVMEXIT		(SIGRTMIN + 0)
>  #define SIGKVMPAUSE		(SIGRTMIN + 1)
> +#define SIGKVMTASK		(SIGRTMIN + 2)
>  
>  #define KVM_PID_FILE_PATH	"/.lkvm/"
>  #define HOME_DIR		getenv("HOME")
> diff --git a/kvm-cpu.c b/kvm-cpu.c
> index ad4441b..438414f 100644
> --- a/kvm-cpu.c
> +++ b/kvm-cpu.c
> @@ -83,10 +83,59 @@ void kvm_cpu__reboot(struct kvm *kvm)
>  	}
>  }
>  
> +static void kvm_cpu__run_task(int sig, siginfo_t * info, void *context)
> +{
> +	union sigval val;
> +	struct kvm_cpu_task *task_ptr;
> +
> +	if (!info) {
> +		pr_warning("signal queued without info\n");
> +		return;
> +	}
> +
> +	val = info->si_value;
> +	task_ptr = val.sival_ptr;
> +	if (!task_ptr) {
> +		pr_warning("Task queued without data\n");
> +		return;
> +	}
> +
> +	if (!task_ptr->task || !task_ptr->data) {
> +		pr_warning("Failed to get task information\n");
> +		return;
> +	}
> +
> +	task_ptr->task(task_ptr->data);
> +	free(task_ptr);
> +}

I don't think it's safe to do the actual task call from signal context. Rather
it should set a flag that the main loop detects and then runs the task there.

> +int kvm_cpu__queue_task(struct kvm_cpu *cpu, void (*task)(void *data),
> +			void *data)
> +{
> +	struct kvm_cpu_task *task_ptr = NULL;
> +	union sigval val;
> +
> +	task_ptr = malloc(sizeof(struct kvm_cpu_task));
> +	if (!task_ptr)
> +		return -ENOMEM;
> +
> +	task_ptr->task = task;
> +	task_ptr->data = data;
> +	val.sival_ptr = task_ptr;
> +
> +	pthread_sigqueue(cpu->thread, SIGKVMTASK, val);
> +	return 0;
> +}

I think it would be nicer if this interface dealt with waiting for the
response. Rather than the caller having to do it.

Possibly in future we'll want to do an async task, but we can refactor the code
then to skip doing the wait.

> diff --git a/powerpc/include/kvm/kvm-cpu-arch.h b/powerpc/include/kvm/kvm-cpu-arch.h
> index 01eafdf..033b702 100644
> --- a/powerpc/include/kvm/kvm-cpu-arch.h
> +++ b/powerpc/include/kvm/kvm-cpu-arch.h
> @@ -38,6 +38,8 @@
>  
>  #define POWER7_EXT_IRQ	0
>  
> +#define LPCR_ILE (1 << (63-38))
> +
>  struct kvm;
>  
>  struct kvm_cpu {
> diff --git a/powerpc/spapr.h b/powerpc/spapr.h
> index 8b294d1..f851f4a 100644
> --- a/powerpc/spapr.h
> +++ b/powerpc/spapr.h
> @@ -27,7 +27,7 @@ typedef uintptr_t target_phys_addr_t;
>  #define H_HARDWARE	-1	/* Hardware error */
>  #define H_FUNCTION	-2	/* Function not supported */
>  #define H_PARAMETER	-4	/* Parameter invalid, out-of-range or conflicting */
> -
> +#define H_P2		-55
>  #define H_SET_DABR		0x28
>  #define H_LOGICAL_CI_LOAD	0x3c
>  #define H_LOGICAL_CI_STORE	0x40
> @@ -41,7 +41,18 @@ typedef uintptr_t target_phys_addr_t;
>  #define H_EOI			0x64
>  #define H_IPI			0x6c
>  #define H_XIRR			0x74
> -#define MAX_HCALL_OPCODE	H_XIRR
> +#define H_SET_MODE		0x31C
> +#define MAX_HCALL_OPCODE	H_SET_MODE
> +
> +/* Values for 2nd argument to H_SET_MODE */
> +#define H_SET_MODE_RESOURCE_SET_CIABR		1
> +#define H_SET_MODE_RESOURCE_SET_DAWR		2
> +#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE	3
> +#define H_SET_MODE_RESOURCE_LE			4
> +
> +/* Flags for H_SET_MODE_RESOURCE_LE */
> +#define H_SET_MODE_ENDIAN_BIG		0
> +#define H_SET_MODE_ENDIAN_LITTLE	1
>  
>  /*
>   * The hcalls above are standardized in PAPR and implemented by pHyp
> diff --git a/powerpc/spapr_hcall.c b/powerpc/spapr_hcall.c
> index ff1d63a..682fad5 100644
> --- a/powerpc/spapr_hcall.c
> +++ b/powerpc/spapr_hcall.c
> @@ -18,6 +18,9 @@
>  
>  #include <stdio.h>
>  #include <assert.h>
> +#include <sys/eventfd.h>
> +
> +static int task_event;
>  
>  static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
>  static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX -
> @@ -74,6 +77,113 @@ static target_ulong h_logical_dcbf(struct kvm_cpu *vcpu, target_ulong opcode, ta
>  	return H_SUCCESS;
>  }
>  
> +struct lpcr_data {
> +	struct kvm_cpu	*cpu;
> +	int		mode;
> +};
> +
> +static int get_cpu_lpcr(struct kvm_cpu *vcpu, target_ulong *lpcr)
> +{
> +	struct kvm_one_reg reg = {
> +		.id = KVM_REG_PPC_LPCR_64,
> +		.addr = (__u64)lpcr
> +	};
> +
> +	return ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, &reg);
> +}
> +
> +static int set_cpu_lpcr(struct kvm_cpu *vcpu, target_ulong *lpcr)

This function has a reasonable name ..

> +{
> +	struct kvm_one_reg reg = {
> +		.id = KVM_REG_PPC_LPCR_64,
> +		.addr = (__u64)lpcr
> +	};
> +
> +	return ioctl(vcpu->vcpu_fd, KVM_SET_ONE_REG, &reg);
> +}
> +
> +static void set_lpcr_cpu(void *data)

But then this one is *very* similar.

I think this should actually be called set_cpu_ile(), because that's what it
does. And maybe have "task" in the name because it's the version for using with
kvm_cpu__queue_task().

> +{
> +	struct lpcr_data *fn_data = (struct lpcr_data *)data;
> +	int ret;
> +	target_ulong lpcr;
> +	u64 task_done = 1;
> +
> +	if (!fn_data || !fn_data->cpu)
> +		return;

This should be hard errors IMHO.

> +	ret = get_cpu_lpcr(fn_data->cpu, &lpcr);
> +	if (ret < 0)
> +		return;

Uh oh!

It looks like most code calls die() if KVM_SET_ONE_REG fails, that would be
preferable I think than running some cpus with a different endian :)

> +	if (fn_data->mode == H_SET_MODE_ENDIAN_BIG)
> +		lpcr &= ~LPCR_ILE;
> +	else
> +		lpcr |= LPCR_ILE;
> +
> +	ret = set_cpu_lpcr(fn_data->cpu, &lpcr);
> +	if (ret < 0)
> +		return;
> +
> +	free(data);

I don't think we should be doing the free here.

> +	if (write(task_event, &task_done, sizeof(task_done)) < 0)
> +		pr_warning("Failed to notify of lpcr task done\n");
> +}
> +
> +#define for_each_vcpu(cpu, kvm, i) \
> +	for ((i) = 0, (cpu) = (kvm)->cpus[i]; (i) < (kvm)->nrcpus; (i)++, (cpu) = (kvm)->cpus[i])

That should probably be in a header.
>
> +static target_ulong h_set_mode(struct kvm_cpu *vcpu, target_ulong opcode, target_ulong *args)
> +{
> +	int ret = H_SUCCESS;

That init should be removed.

> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_cpu *cpu;
> +	int i;
> +
> +	switch (args[1]) {
> +	case H_SET_MODE_RESOURCE_LE: {
> +		u64 total_done = 0;
> +		u64 task_read;
> +
> +		task_event = eventfd(0, 0);
> +		if (task_event < 0) {
> +			pr_warning("Failed to create task_event");
> +			break;

That will return H_SUCCESS which is not OK.

> +		}
> +		for_each_vcpu(cpu, kvm, i) {
> +			struct lpcr_data *data;
> +
> +			data = malloc(sizeof(struct lpcr_data));

Is there any reason not to do this synchronously?

That would allow you to put data on the stack. And also avoid the while loop
below.

> +			if (!data) {
> +				ret = H_P2;
> +				break;
> +			}
> +			data->cpu = cpu;
> +			data->mode = args[0];
> +
> +			kvm_cpu__queue_task(cpu, set_lpcr_cpu, data);
> +		}
> +
> +		while ((int)total_done < kvm->nrcpus) {
> +			int err;
> +			err = read(task_event, &task_read, sizeof(task_read));
> +			if (err < 0) {
> +				ret = H_P2;
> +				break;
> +			}
> +			total_done += task_read;
> +		}
> +		close(task_event);
> +		break;
> +	}
> +	default:
> +		ret = H_FUNCTION;
> +		break;
> +	}
> +	return (ret < 0) ? H_P2 : H_SUCCESS;
> +}

I think that ends up being correct, but it's pretty obscure. ie. for an
unsupported resource we should return H_P2, and you get that to happen by
setting ret to H_FUNCTION (-2).

cheers

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux