Hi Juan, There are some significant fixes in xen-ia64-unstable.hg right now, especially the xencomm patches. All of the patches are ia64-only. Since it's unlikely that Keir will pull xen-ia64 again prior to 3.0.3 release, how would you like to handle getting these fixes into your tree? I've done it two different ways so far: 1. Clone xen-3.0.3-testing, clone xen-ia64-unstable. Pull xen-ia64-unstable into xen-3.0.3-testing, hg merge, then use pull-xen-unstable as normal. The problem with this is that your changeset id will reflect the extra changesets pulled in from ia64-land. OR 2. Start with the same clones. Create two patches and apply them in the spec-file. Since method #1 is obvious and simple, I documented method #2 below. Here are the steps I used to generate the patches attached to this mail: # 1. Clone upstream repos and create temporary trees hg clone http://xenbits.xensource.com/ext/xen-ia64-unstable.hg hg clone http://xenbits.xensource.com/xen-3.0.3-testing.hg hg clone http://free.linux.hp.com/~agriffis/pull-xen-unstable.hg hg clone http://hg.et.redhat.com/kernel/linux-2.6-xen-fedora cp -al xen-3.0.3-testing.hg xen-3.0.3-testing.hg+ia64 cp -al linux-2.6-xen-fedora linux-2.6-xen-fedora+ia64 # 2. Remember tip changesets ia64_cset=$(hg -R xen-ia64-unstable.hg parents | \ awk -F'[ :]' '/^changeset:/{print $(NF-1);exit}') testing_cset=$(hg -R xen-3.0.3-testing.hg parents | \ awk -F'[ :]' '/^changeset:/{print $(NF-1);exit}') # 3. Merge ia64 into testing cd xen-3.0.3-testing.hg+ia64 hg pull ../xen-ia64-unstable.hg hg merge hg ci -m "merge xen-ia64-unstable.hg" cd .. # 4. Generate hypervisor patch hg -R xen-3.0.3-testing.hg+ia64 di -r $testing_cset -r tip | \ filterdiff -p1 -i 'xen/*' --strip=1 > ../xen-ia64-$ia64_cset.patch # 5. Generate linux patch cd linux-2.6-xen-fedora+ia64 hg -R ../xen-3.0.3-testing.hg+ia64 di -r $testing_cset -r tip | \ ../pull-xen-unstable.hg/pull-xen-unstable --filter | \ patch -p1 hg add hg remove --after hg ci -m "update to xen-ia64-unstable-$ia64_cset" hg export tip > ../linux-2.6-xen-ia64-$ia64_cset.patch Which method do you prefer? Thanks, Aron
# HG changeset patch # User agriffis@xxxxxxxxxxxxxxxxxxxxxxxxx # Date 1160612360 14400 # Node ID 85a15e585061cc195e3eb9888179b3cb18c69d5e # Parent 47c098fdce14af6def37c12074d6271cb5c13b10 update to xen-ia64-unstable-11745 diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/Kconfig --- a/arch/ia64/Kconfig Wed Sep 20 15:35:23 2006 +0200 +++ b/arch/ia64/Kconfig Wed Oct 11 20:19:20 2006 -0400 @@ -71,6 +71,20 @@ config XEN_IA64_VDSO_PARAVIRT default y help vDSO paravirtualization + +config XEN_IA64_EXPOSE_P2M + bool "Xen/IA64 exposure p2m table" + depends on XEN + default y + help + expose p2m from xen + +config XEN_IA64_EXPOSE_P2M_USE_DTR + bool "Xen/IA64 map p2m table with dtr" + depends on XEN_IA64_EXPOSE_P2M + default y + help + use dtr to map the exposed p2m table config SCHED_NO_NO_OMIT_FRAME_POINTER bool diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/kernel/setup.c --- a/arch/ia64/kernel/setup.c Wed Sep 20 15:35:23 2006 +0200 +++ b/arch/ia64/kernel/setup.c Wed Oct 11 20:19:20 2006 -0400 @@ -75,6 +75,8 @@ EXPORT_SYMBOL(__per_cpu_offset); #endif #ifdef CONFIG_XEN +unsigned long kernel_start_pa; + static int xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -490,6 +492,7 @@ setup_arch (char **cmdline_p) #ifdef CONFIG_XEN if (is_running_on_xen()) { + kernel_start_pa = KERNEL_START - ia64_tpa(KERNEL_START); setup_xen_features(); /* Register a call for panic conditions. */ atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/Makefile --- a/arch/ia64/xen/Makefile Wed Sep 20 15:35:23 2006 +0200 +++ b/arch/ia64/xen/Makefile Wed Oct 11 20:19:20 2006 -0400 @@ -3,6 +3,7 @@ # obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o \ - hypervisor.o pci-dma-xen.o util.o + hypervisor.o pci-dma-xen.o util.o xencomm.o xcom_hcall.o \ + xcom_privcmd.o pci-dma-xen-y := ../../i386/kernel/pci-dma-xen.o diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/hypervisor.c --- a/arch/ia64/xen/hypervisor.c Wed Sep 20 15:35:23 2006 +0200 +++ b/arch/ia64/xen/hypervisor.c Wed Oct 11 20:19:20 2006 -0400 @@ -40,59 +40,11 @@ int running_on_xen; int running_on_xen; EXPORT_SYMBOL(running_on_xen); -//XXX xen/ia64 copy_from_guest() is broken. -// This is a temporal work around until it is fixed. -// used by balloon.c netfront.c - -// get_xen_guest_handle is defined only when __XEN_TOOLS__ is defined -// if the definition in arch-ia64.h is changed, this must be updated. -#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) - -int -ia64_xenmem_reservation_op(unsigned long op, - struct xen_memory_reservation* reservation__) -{ - struct xen_memory_reservation reservation = *reservation__; - unsigned long* frame_list; - unsigned long nr_extents = reservation__->nr_extents; - int ret = 0; - get_xen_guest_handle(frame_list, reservation__->extent_start); - - BUG_ON(op != XENMEM_increase_reservation && - op != XENMEM_decrease_reservation && - op != XENMEM_populate_physmap); - - while (nr_extents > 0) { - int tmp_ret; - volatile unsigned long dummy; - - set_xen_guest_handle(reservation.extent_start, frame_list); - reservation.nr_extents = nr_extents; - - dummy = frame_list[0];// re-install tlb entry before hypercall - tmp_ret = ____HYPERVISOR_memory_op(op, &reservation); - if (tmp_ret < 0) { - if (ret == 0) { - ret = tmp_ret; - } - break; - } - if (tmp_ret == 0) { - //XXX dirty work around for skbuff_ctor() - // of a non-privileged domain, - if ((op == XENMEM_increase_reservation || - op == XENMEM_populate_physmap) && - !is_initial_xendomain() && - reservation.extent_order > 0) - return ret; - } - frame_list += tmp_ret; - nr_extents -= tmp_ret; - ret += tmp_ret; - } - return ret; -} -EXPORT_SYMBOL(ia64_xenmem_reservation_op); +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M +static int p2m_expose_init(void); +#else +#define p2m_expose_init() (-ENOSYS) +#endif //XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear() // move those to lib/contiguous_bitmap? @@ -371,8 +323,6 @@ int int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count) { - __u64 va1, va2, pa1, pa2; - if (cmd == GNTTABOP_map_grant_ref) { unsigned int i; for (i = 0; i < count; i++) { @@ -380,29 +330,7 @@ HYPERVISOR_grant_table_op(unsigned int c (struct gnttab_map_grant_ref*)uop + i); } } - va1 = (__u64)uop & PAGE_MASK; - pa1 = pa2 = 0; - if ((REGION_NUMBER(va1) == 5) && - ((va1 - KERNEL_START) >= KERNEL_TR_PAGE_SIZE)) { - pa1 = ia64_tpa(va1); - if (cmd <= GNTTABOP_transfer) { - static uint32_t uop_size[GNTTABOP_transfer + 1] = { - sizeof(struct gnttab_map_grant_ref), - sizeof(struct gnttab_unmap_grant_ref), - sizeof(struct gnttab_setup_table), - sizeof(struct gnttab_dump_table), - sizeof(struct gnttab_transfer), - }; - va2 = (__u64)uop + (uop_size[cmd] * count) - 1; - va2 &= PAGE_MASK; - if (va1 != va2) { - /* maximum size of uop is 2pages */ - BUG_ON(va2 > va1 + PAGE_SIZE); - pa2 = ia64_tpa(va2); - } - } - } - return ____HYPERVISOR_grant_table_op(cmd, uop, count, pa1, pa2); + return xencomm_mini_hypercall_grant_table_op(cmd, uop, count); } EXPORT_SYMBOL(HYPERVISOR_grant_table_op); @@ -526,6 +454,10 @@ out: privcmd_resource_min, privcmd_resource_max, (privcmd_resource_max - privcmd_resource_min) >> 20); BUG_ON(privcmd_resource_min >= privcmd_resource_max); + + // XXX this should be somewhere appropriate + (void)p2m_expose_init(); + return 0; } late_initcall(xen_ia64_privcmd_init); @@ -831,3 +763,276 @@ time_resume(void) /* Just trigger a tick. */ ia64_cpu_local_tick(); } + +/////////////////////////////////////////////////////////////////////////// +// expose p2m table +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M +#include <linux/cpu.h> +#include <asm/uaccess.h> + +int p2m_initialized __read_mostly = 0; + +unsigned long p2m_min_low_pfn __read_mostly; +unsigned long p2m_max_low_pfn __read_mostly; +unsigned long p2m_convert_min_pfn __read_mostly; +unsigned long p2m_convert_max_pfn __read_mostly; + +static struct resource p2m_resource = { + .name = "Xen p2m table", + .flags = IORESOURCE_MEM, +}; +static unsigned long p2m_assign_start_pfn __read_mostly; +static unsigned long p2m_assign_end_pfn __read_mostly; +volatile const pte_t* p2m_pte __read_mostly; + +#define GRNULE_PFN PTRS_PER_PTE +static unsigned long p2m_granule_pfn __read_mostly = GRNULE_PFN; + +#define ROUNDDOWN(x, y) ((x) & ~((y) - 1)) +#define ROUNDUP(x, y) (((x) + (y) - 1) & ~((y) - 1)) + +#define P2M_PREFIX "Xen p2m: " + +static int xen_ia64_p2m_expose __read_mostly = 1; +module_param(xen_ia64_p2m_expose, int, 0); +MODULE_PARM_DESC(xen_ia64_p2m_expose, + "enable/disable xen/ia64 p2m exposure optimization\n"); + +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR +static int xen_ia64_p2m_expose_use_dtr __read_mostly = 1; +module_param(xen_ia64_p2m_expose_use_dtr, int, 0); +MODULE_PARM_DESC(xen_ia64_p2m_expose_use_dtr, + "use/unuse dtr to map exposed p2m table\n"); + +static const int p2m_page_shifts[] = { + _PAGE_SIZE_4K, + _PAGE_SIZE_8K, + _PAGE_SIZE_16K, + _PAGE_SIZE_64K, + _PAGE_SIZE_256K, + _PAGE_SIZE_1M, + _PAGE_SIZE_4M, + _PAGE_SIZE_16M, + _PAGE_SIZE_64M, + _PAGE_SIZE_256M, +}; + +struct p2m_itr_arg { + unsigned long vaddr; + unsigned long pteval; + unsigned long log_page_size; +}; +static struct p2m_itr_arg p2m_itr_arg __read_mostly; + +// This should be in asm-ia64/kregs.h +#define IA64_TR_P2M_TABLE 3 + +static void +p2m_itr(void* info) +{ + struct p2m_itr_arg* arg = (struct p2m_itr_arg*)info; + ia64_itr(0x2, IA64_TR_P2M_TABLE, + arg->vaddr, arg->pteval, arg->log_page_size); + ia64_srlz_d(); +} + +static int +p2m_expose_dtr_call(struct notifier_block *self, + unsigned long event, void* ptr) +{ + unsigned int cpu = (unsigned int)(long)ptr; + if (event != CPU_ONLINE) + return 0; + if (!(p2m_initialized && xen_ia64_p2m_expose_use_dtr)) + smp_call_function_single(cpu, &p2m_itr, &p2m_itr_arg, 1, 1); + return 0; +} + +static struct notifier_block p2m_expose_dtr_hotplug_notifier = { + .notifier_call = p2m_expose_dtr_call, + .next = NULL, + .priority = 0 +}; +#endif + +static int +p2m_expose_init(void) +{ + unsigned long num_pfn; + unsigned long size = 0; + unsigned long p2m_size = 0; + unsigned long align = ~0UL; + int error = 0; +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR + int i; + unsigned long page_size; + unsigned long log_page_size = 0; +#endif + + if (!xen_ia64_p2m_expose) + return -ENOSYS; + if (p2m_initialized) + return 0; + +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR + error = register_cpu_notifier(&p2m_expose_dtr_hotplug_notifier); + if (error < 0) + return error; +#endif + + lock_cpu_hotplug(); + if (p2m_initialized) + goto out; + +#ifdef CONFIG_DISCONTIGMEM + p2m_min_low_pfn = min_low_pfn; + p2m_max_low_pfn = max_low_pfn; +#else + p2m_min_low_pfn = 0; + p2m_max_low_pfn = max_pfn; +#endif + +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR + if (xen_ia64_p2m_expose_use_dtr) { + unsigned long granule_pfn = 0; + p2m_size = p2m_max_low_pfn - p2m_min_low_pfn; + for (i = 0; + i < sizeof(p2m_page_shifts)/sizeof(p2m_page_shifts[0]); + i++) { + log_page_size = p2m_page_shifts[i]; + page_size = 1UL << log_page_size; + if (page_size < p2m_size) + continue; + + granule_pfn = max(page_size >> PAGE_SHIFT, + p2m_granule_pfn); + p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn, + granule_pfn); + p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn, + granule_pfn); + num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn; + size = num_pfn << PAGE_SHIFT; + p2m_size = num_pfn / PTRS_PER_PTE; + p2m_size = ROUNDUP(p2m_size, granule_pfn << PAGE_SHIFT); + if (p2m_size == page_size) + break; + } + if (p2m_size != page_size) { + printk(KERN_ERR "p2m_size != page_size\n"); + error = -EINVAL; + goto out; + } + align = max(privcmd_resource_align, granule_pfn << PAGE_SHIFT); + } else +#endif + { + BUG_ON(p2m_granule_pfn & (p2m_granule_pfn - 1)); + p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn, + p2m_granule_pfn); + p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn, p2m_granule_pfn); + num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn; + size = num_pfn << PAGE_SHIFT; + p2m_size = num_pfn / PTRS_PER_PTE; + p2m_size = ROUNDUP(p2m_size, p2m_granule_pfn << PAGE_SHIFT); + align = max(privcmd_resource_align, + p2m_granule_pfn << PAGE_SHIFT); + } + + // use privcmd region + error = allocate_resource(&iomem_resource, &p2m_resource, p2m_size, + privcmd_resource_min, privcmd_resource_max, + align, NULL, NULL); + if (error) { + printk(KERN_ERR P2M_PREFIX + "can't allocate region for p2m exposure " + "[0x%016lx, 0x%016lx) 0x%016lx\n", + p2m_convert_min_pfn, p2m_convert_max_pfn, p2m_size); + goto out; + } + + p2m_assign_start_pfn = p2m_resource.start >> PAGE_SHIFT; + p2m_assign_end_pfn = p2m_resource.end >> PAGE_SHIFT; + + error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn, + p2m_assign_start_pfn, + size, p2m_granule_pfn); + if (error) { + printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n", + error); + printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx " + "size 0x%016lx granule 0x%016lx\n", + p2m_convert_min_pfn, p2m_assign_start_pfn, + size, p2m_granule_pfn);; + release_resource(&p2m_resource); + goto out; + } + p2m_pte = (volatile const pte_t*)pfn_to_kaddr(p2m_assign_start_pfn); +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR + if (xen_ia64_p2m_expose_use_dtr) { + p2m_itr_arg.vaddr = (unsigned long)__va(p2m_assign_start_pfn + << PAGE_SHIFT); + p2m_itr_arg.pteval = pte_val(pfn_pte(p2m_assign_start_pfn, + PAGE_KERNEL)); + p2m_itr_arg.log_page_size = log_page_size; + smp_mb(); + smp_call_function(&p2m_itr, &p2m_itr_arg, 1, 1); + p2m_itr(&p2m_itr_arg); + } +#endif + smp_mb(); + p2m_initialized = 1; + printk(P2M_PREFIX "assign p2m table of [0x%016lx, 0x%016lx)\n", + p2m_convert_min_pfn << PAGE_SHIFT, + p2m_convert_max_pfn << PAGE_SHIFT); + printk(P2M_PREFIX "to [0x%016lx, 0x%016lx) (%ld KBytes)\n", + p2m_assign_start_pfn << PAGE_SHIFT, + p2m_assign_end_pfn << PAGE_SHIFT, + p2m_size / 1024); +out: + unlock_cpu_hotplug(); + return error; +} + +#ifdef notyet +void +p2m_expose_cleanup(void) +{ + BUG_ON(!p2m_initialized); +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR + unregister_cpu_notifier(&p2m_expose_dtr_hotplug_notifier); +#endif + release_resource(&p2m_resource); +} +#endif + +//XXX inlinize? +unsigned long +p2m_phystomach(unsigned long gpfn) +{ + volatile const pte_t* pte; + unsigned long mfn; + unsigned long pteval; + + if (!p2m_initialized || + gpfn < p2m_min_low_pfn || gpfn > p2m_max_low_pfn + /* || !pfn_valid(gpfn) */) + return INVALID_MFN; + pte = p2m_pte + (gpfn - p2m_convert_min_pfn); + + mfn = INVALID_MFN; + if (likely(__get_user(pteval, (unsigned long __user *)pte) == 0 && + pte_present(__pte(pteval)) && + pte_pfn(__pte(pteval)) != (INVALID_MFN >> PAGE_SHIFT))) + mfn = (pteval & _PFN_MASK) >> PAGE_SHIFT; + + return mfn; +} + +EXPORT_SYMBOL_GPL(p2m_initialized); +EXPORT_SYMBOL_GPL(p2m_min_low_pfn); +EXPORT_SYMBOL_GPL(p2m_max_low_pfn); +EXPORT_SYMBOL_GPL(p2m_convert_min_pfn); +EXPORT_SYMBOL_GPL(p2m_convert_max_pfn); +EXPORT_SYMBOL_GPL(p2m_pte); +EXPORT_SYMBOL_GPL(p2m_phystomach); +#endif diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/util.c --- a/arch/ia64/xen/util.c Wed Sep 20 15:35:23 2006 +0200 +++ b/arch/ia64/xen/util.c Wed Oct 11 20:19:20 2006 -0400 @@ -27,6 +27,8 @@ #include <linux/vmalloc.h> #include <asm/uaccess.h> #include <xen/driver_util.h> +#include <xen/interface/memory.h> +#include <asm/hypercall.h> struct vm_struct *alloc_vm_area(unsigned long size) { diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/xensetup.S --- a/arch/ia64/xen/xensetup.S Wed Sep 20 15:35:23 2006 +0200 +++ b/arch/ia64/xen/xensetup.S Wed Oct 11 20:19:20 2006 -0400 @@ -22,12 +22,11 @@ GLOBAL_ENTRY(early_xen_setup) mov cr.iva=r10 -#if XSI_BASE != 0xf100000000000000UL - /* Backward compatibility. */ -(isBP) mov r2=0x600 + /* Set xsi base. */ +#define FW_HYPERCALL_SET_SHARED_INFO_VA 0x600 +(isBP) mov r2=FW_HYPERCALL_SET_SHARED_INFO_VA (isBP) movl r28=XSI_BASE;; (isBP) break 0x1000;; -#endif br.ret.sptk.many rp ;; @@ -37,18 +36,18 @@ END(early_xen_setup) /* Stub for suspend. Just force the stacked registers to be written in memory. */ -GLOBAL_ENTRY(HYPERVISOR_suspend) +GLOBAL_ENTRY(xencomm_arch_hypercall_suspend) + mov r15=r32 + ;; alloc r20=ar.pfs,0,0,0,0 - mov r14=2 - mov r15=r12 - ;; + mov r2=__HYPERVISOR_sched_op + ;; /* We don't want to deal with RSE. */ flushrs - mov r2=__HYPERVISOR_sched_op - st4 [r12]=r14 + mov r14=2 // SCHEDOP_shutdown ;; break 0x1000 ;; mov ar.pfs=r20 br.ret.sptk.many b0 -END(HYPERVISOR_suspend) +END(xencomm_arch_hypercall_suspend) diff -r 47c098fdce14 -r 85a15e585061 drivers/xen/privcmd/privcmd.c --- a/drivers/xen/privcmd/privcmd.c Wed Sep 20 15:35:23 2006 +0200 +++ b/drivers/xen/privcmd/privcmd.c Wed Oct 11 20:19:20 2006 -0400 @@ -82,18 +82,7 @@ static int privcmd_ioctl(struct inode *i : "r8", "r10", "memory" ); } #elif defined (__ia64__) - __asm__ __volatile__ ( - ";; mov r14=%2; mov r15=%3; " - "mov r16=%4; mov r17=%5; mov r18=%6;" - "mov r2=%1; break 0x1000;; mov %0=r8 ;;" - : "=r" (ret) - : "r" (hypercall.op), - "r" (hypercall.arg[0]), - "r" (hypercall.arg[1]), - "r" (hypercall.arg[2]), - "r" (hypercall.arg[3]), - "r" (hypercall.arg[4]) - : "r14","r15","r16","r17","r18","r2","r8","memory"); + ret = privcmd_hypercall(&hypercall); #endif } break; diff -r 47c098fdce14 -r 85a15e585061 include/asm-ia64/hypercall.h --- a/include/asm-ia64/hypercall.h Wed Sep 20 15:35:23 2006 +0200 +++ b/include/asm-ia64/hypercall.h Wed Oct 11 20:19:20 2006 -0400 @@ -33,11 +33,12 @@ #ifndef __HYPERCALL_H__ #define __HYPERCALL_H__ -#include <linux/string.h> /* memcpy() */ - #ifndef __HYPERVISOR_H__ # error "please don't include this file directly" #endif + +#include <asm/xen/xcom_hcall.h> +struct xencomm_handle; /* * Assembler stubs for hyper-calls. @@ -157,157 +158,117 @@ (type)__res; \ }) -static inline int -HYPERVISOR_sched_op_compat( - int cmd, unsigned long arg) -{ - return _hypercall2(int, sched_op_compat, cmd, arg); -} - -static inline int -HYPERVISOR_sched_op( - int cmd, void *arg) + +static inline int +xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg) { return _hypercall2(int, sched_op, cmd, arg); } static inline long -HYPERVISOR_set_timer_op( - u64 timeout) -{ - unsigned long timeout_hi = (unsigned long)(timeout>>32); - unsigned long timeout_lo = (unsigned long)timeout; - return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); -} - -static inline int -HYPERVISOR_dom0_op( - dom0_op_t *dom0_op) -{ - dom0_op->interface_version = DOM0_INTERFACE_VERSION; - return _hypercall1(int, dom0_op, dom0_op); -} - -static inline int -HYPERVISOR_multicall( - void *call_list, int nr_calls) -{ - return _hypercall2(int, multicall, call_list, nr_calls); -} - -//XXX xen/ia64 copy_from_guest() is broken. -// This is a temporal work around until it is fixed. -static inline int -____HYPERVISOR_memory_op( - unsigned int cmd, void *arg) -{ - return _hypercall2(int, memory_op, cmd, arg); -} - -#include <xen/interface/memory.h> -#ifdef CONFIG_VMX_GUEST -# define ia64_xenmem_reservation_op(op, xmr) (0) -#else -int ia64_xenmem_reservation_op(unsigned long op, - struct xen_memory_reservation* reservation__); -#endif -static inline int -HYPERVISOR_memory_op( - unsigned int cmd, void *arg) -{ - switch (cmd) { - case XENMEM_increase_reservation: - case XENMEM_decrease_reservation: - case XENMEM_populate_physmap: - return ia64_xenmem_reservation_op(cmd, - (struct xen_memory_reservation*)arg); - default: - return ____HYPERVISOR_memory_op(cmd, arg); - } - /* NOTREACHED */ -} - -static inline int -HYPERVISOR_event_channel_op( - int cmd, void *arg) -{ - int rc = _hypercall2(int, event_channel_op, cmd, arg); - if (unlikely(rc == -ENOSYS)) { - struct evtchn_op op; - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, event_channel_op_compat, &op); - } - return rc; -} - -static inline int -HYPERVISOR_acm_op( - unsigned int cmd, void *arg) -{ - return _hypercall2(int, acm_op, cmd, arg); -} - -static inline int -HYPERVISOR_xen_version( - int cmd, void *arg) -{ - return _hypercall2(int, xen_version, cmd, arg); -} - -static inline int -HYPERVISOR_console_io( - int cmd, int count, char *str) -{ - return _hypercall3(int, console_io, cmd, count, str); -} - -static inline int -HYPERVISOR_physdev_op( - int cmd, void *arg) -{ - int rc = _hypercall2(int, physdev_op, cmd, arg); - if (unlikely(rc == -ENOSYS)) { - struct physdev_op op; - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, physdev_op_compat, &op); - } - return rc; -} - -//XXX __HYPERVISOR_grant_table_op is used for this hypercall constant. -static inline int -____HYPERVISOR_grant_table_op( - unsigned int cmd, void *uop, unsigned int count, - unsigned long pa1, unsigned long pa2) -{ - return _hypercall5(int, grant_table_op, cmd, uop, count, pa1, pa2); +HYPERVISOR_set_timer_op(u64 timeout) +{ + unsigned long timeout_hi = (unsigned long)(timeout >> 32); + unsigned long timeout_lo = (unsigned long)timeout; + return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); +} + +static inline int +xencomm_arch_hypercall_dom0_op(struct xencomm_handle *op) +{ + return _hypercall1(int, dom0_op, op); +} + +static inline int +xencomm_arch_hypercall_sysctl(struct xencomm_handle *op) +{ + return _hypercall1(int, sysctl, op); +} + +static inline int +xencomm_arch_hypercall_domctl(struct xencomm_handle *op) +{ + return _hypercall1(int, domctl, op); +} + +static inline int +xencomm_arch_hypercall_multicall(struct xencomm_handle *call_list, + int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int +xencomm_arch_hypercall_memory_op(unsigned int cmd, struct xencomm_handle *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int +xencomm_arch_hypercall_event_channel_op(int cmd, struct xencomm_handle *arg) +{ + return _hypercall2(int, event_channel_op, cmd, arg); +} + +static inline int +xencomm_arch_hypercall_acm_op(unsigned int cmd, struct xencomm_handle *arg) +{ + return _hypercall2(int, acm_op, cmd, arg); +} + +static inline int +xencomm_arch_hypercall_xen_version(int cmd, struct xencomm_handle *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int +xencomm_arch_hypercall_console_io(int cmd, int count, + struct xencomm_handle *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int +xencomm_arch_hypercall_physdev_op(int cmd, struct xencomm_handle *arg) +{ + return _hypercall2(int, physdev_op, cmd, arg); +} + +static inline int +xencomm_arch_hypercall_grant_table_op(unsigned int cmd, + struct xencomm_handle *uop, + unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); } int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count); -static inline int -HYPERVISOR_vcpu_op( - int cmd, int vcpuid, void *extra_args) -{ - return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); -} - -extern int HYPERVISOR_suspend(unsigned long srec); - -static inline unsigned long -HYPERVISOR_hvm_op( - int cmd, void *arg) +extern int xencomm_arch_hypercall_suspend(struct xencomm_handle *arg); + +static inline int +xencomm_arch_hypercall_callback_op(int cmd, struct xencomm_handle *arg) +{ + return _hypercall2(int, callback_op, cmd, arg); +} + +static inline unsigned long +xencomm_arch_hypercall_hvm_op(int cmd, void *arg) { return _hypercall2(unsigned long, hvm_op, cmd, arg); } static inline int -HYPERVISOR_callback_op( - int cmd, void *arg) -{ - return _hypercall2(int, callback_op, cmd, arg); +HYPERVISOR_physdev_op(int cmd, void *arg) +{ + switch (cmd) { + case PHYSDEVOP_eoi: + return _hypercall1(int, ia64_fast_eoi, + ((struct physdev_eoi *)arg)->irq); + default: + return xencomm_hypercall_physdev_op(cmd, arg); + } } extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs); @@ -417,7 +378,46 @@ HYPERVISOR_add_physmap(unsigned long gpf return ret; } +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M +static inline unsigned long +HYPERVISOR_expose_p2m(unsigned long conv_start_gpfn, + unsigned long assign_start_gpfn, + unsigned long expose_size, unsigned long granule_pfn) +{ + return _hypercall5(unsigned long, ia64_dom0vp_op, + IA64_DOM0VP_expose_p2m, conv_start_gpfn, + assign_start_gpfn, expose_size, granule_pfn); +} +#endif + // for balloon driver #define HYPERVISOR_update_va_mapping(va, new_val, flags) (0) +/* Use xencomm to do hypercalls. */ +#ifdef MODULE +#define HYPERVISOR_sched_op xencomm_mini_hypercall_sched_op +#define HYPERVISOR_event_channel_op xencomm_mini_hypercall_event_channel_op +#define HYPERVISOR_callback_op xencomm_mini_hypercall_callback_op +#define HYPERVISOR_multicall xencomm_mini_hypercall_multicall +#define HYPERVISOR_xen_version xencomm_mini_hypercall_xen_version +#define HYPERVISOR_console_io xencomm_mini_hypercall_console_io +#define HYPERVISOR_hvm_op xencomm_mini_hypercall_hvm_op +#ifdef CONFIG_VMX_GUEST +#define HYPERVISOR_memory_op 0 +#else +#define HYPERVISOR_memory_op xencomm_mini_hypercall_memory_op +#endif +#else +#define HYPERVISOR_sched_op xencomm_hypercall_sched_op +#define HYPERVISOR_event_channel_op xencomm_hypercall_event_channel_op +#define HYPERVISOR_callback_op xencomm_hypercall_callback_op +#define HYPERVISOR_multicall xencomm_hypercall_multicall +#define HYPERVISOR_xen_version xencomm_hypercall_xen_version +#define HYPERVISOR_console_io xencomm_hypercall_console_io +#define HYPERVISOR_hvm_op xencomm_hypercall_hvm_op +#define HYPERVISOR_memory_op xencomm_hypercall_memory_op +#endif + +#define HYPERVISOR_suspend xencomm_hypercall_suspend + #endif /* __HYPERCALL_H__ */ diff -r 47c098fdce14 -r 85a15e585061 include/asm-ia64/hypervisor.h --- a/include/asm-ia64/hypervisor.h Wed Sep 20 15:35:23 2006 +0200 +++ b/include/asm-ia64/hypervisor.h Wed Oct 11 20:19:20 2006 -0400 @@ -74,9 +74,6 @@ HYPERVISOR_yield( { int rc = HYPERVISOR_sched_op(SCHEDOP_yield, NULL); - if (rc == -ENOSYS) - rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0); - return rc; } @@ -85,9 +82,6 @@ HYPERVISOR_block( void) { int rc = HYPERVISOR_sched_op(SCHEDOP_block, NULL); - - if (rc == -ENOSYS) - rc = HYPERVISOR_sched_op_compat(SCHEDOP_block, 0); return rc; } @@ -101,9 +95,6 @@ HYPERVISOR_shutdown( }; int rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); - - if (rc == -ENOSYS) - rc = HYPERVISOR_sched_op_compat(SCHEDOP_shutdown, reason); return rc; } @@ -121,8 +112,6 @@ HYPERVISOR_poll( set_xen_guest_handle(sched_poll.ports, ports); rc = HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll); - if (rc == -ENOSYS) - rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0); return rc; } diff -r 47c098fdce14 -r 85a15e585061 include/asm-ia64/maddr.h --- a/include/asm-ia64/maddr.h Wed Sep 20 15:35:23 2006 +0200 +++ b/include/asm-ia64/maddr.h Wed Oct 11 20:19:20 2006 -0400 @@ -10,11 +10,26 @@ #define INVALID_P2M_ENTRY (~0UL) +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M +extern int p2m_initialized; +extern unsigned long p2m_min_low_pfn; +extern unsigned long p2m_max_low_pfn; +extern unsigned long p2m_convert_min_pfn; +extern unsigned long p2m_convert_max_pfn; +extern volatile const pte_t* p2m_pte; +unsigned long p2m_phystomach(unsigned long gpfn); +#else +#define p2m_initialized (0) +#define p2m_phystomach(gpfn) INVALID_MFN +#endif + /* XXX xen page size != page size */ static inline unsigned long pfn_to_mfn_for_dma(unsigned long pfn) { unsigned long mfn; + if (p2m_initialized) + return p2m_phystomach(pfn); mfn = HYPERVISOR_phystomach(pfn); BUG_ON(mfn == 0); // XXX BUG_ON(mfn == INVALID_P2M_ENTRY); // XXX diff -r 47c098fdce14 -r 85a15e585061 include/asm-ia64/xen/privop.h --- a/include/asm-ia64/xen/privop.h Wed Sep 20 15:35:23 2006 +0200 +++ b/include/asm-ia64/xen/privop.h Wed Oct 11 20:19:20 2006 -0400 @@ -14,12 +14,9 @@ #define IA64_PARAVIRTUALIZED -#if 0 -#undef XSI_BASE /* At 1 MB, before per-cpu space but still addressable using addl instead of movl. */ #define XSI_BASE 0xfffffffffff00000 -#endif /* Address of mapped regs. */ #define XMAPPEDREGS_BASE (XSI_BASE + XSI_SIZE) diff -r 47c098fdce14 -r 85a15e585061 include/xen/interface/arch-ia64.h --- a/include/xen/interface/arch-ia64.h Wed Sep 20 15:35:23 2006 +0200 +++ b/include/xen/interface/arch-ia64.h Wed Oct 11 20:19:20 2006 -0400 @@ -47,18 +47,6 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #ifndef __ASSEMBLY__ typedef unsigned long xen_ulong_t; - -#define GPFN_MEM (0UL << 56) /* Guest pfn is normal mem */ -#define GPFN_FRAME_BUFFER (1UL << 56) /* VGA framebuffer */ -#define GPFN_LOW_MMIO (2UL << 56) /* Low MMIO range */ -#define GPFN_PIB (3UL << 56) /* PIB base */ -#define GPFN_IOSAPIC (4UL << 56) /* IOSAPIC base */ -#define GPFN_LEGACY_IO (5UL << 56) /* Legacy I/O base */ -#define GPFN_GFW (6UL << 56) /* Guest Firmware */ -#define GPFN_HIGH_MMIO (7UL << 56) /* High MMIO range */ - -#define GPFN_IO_MASK (7UL << 56) /* Guest pfn is I/O type */ -#define GPFN_INV_MASK (31UL << 59) /* Guest pfn is invalid */ #define INVALID_MFN (~0UL) @@ -336,33 +324,33 @@ typedef struct vcpu_guest_context vcpu_g typedef struct vcpu_guest_context vcpu_guest_context_t; DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); -// dom0 vp op +/* dom0 vp op */ #define __HYPERVISOR_ia64_dom0vp_op __HYPERVISOR_arch_0 -#define IA64_DOM0VP_ioremap 0 // map io space in machine - // address to dom0 physical - // address space. - // currently physical - // assignedg address equals to - // machine address -#define IA64_DOM0VP_phystomach 1 // convert a pseudo physical - // page frame number - // to the corresponding - // machine page frame number. - // if no page is assigned, - // INVALID_MFN or GPFN_INV_MASK - // is returned depending on - // domain's non-vti/vti mode. -#define IA64_DOM0VP_machtophys 3 // convert a machine page - // frame number - // to the corresponding - // pseudo physical page frame - // number of the caller domain -#define IA64_DOM0VP_zap_physmap 17 // unmap and free pages - // contained in the specified - // pseudo physical region -#define IA64_DOM0VP_add_physmap 18 // assigne machine page frane - // to dom0's pseudo physical - // address space. +/* Map io space in machine address to dom0 physical address space. + Currently physical assigned address equals to machine address. */ +#define IA64_DOM0VP_ioremap 0 + +/* Convert a pseudo physical page frame number to the corresponding + machine page frame number. If no page is assigned, INVALID_MFN or + GPFN_INV_MASK is returned depending on domain's non-vti/vti mode. */ +#define IA64_DOM0VP_phystomach 1 + +/* Convert a machine page frame number to the corresponding pseudo physical + page frame number of the caller domain. */ +#define IA64_DOM0VP_machtophys 3 + +/* Reserved for future use. */ +#define IA64_DOM0VP_iounmap 4 + +/* Unmap and free pages contained in the specified pseudo physical region. */ +#define IA64_DOM0VP_zap_physmap 5 + +/* Assign machine page frame to dom0's pseudo physical address space. */ +#define IA64_DOM0VP_add_physmap 6 + +/* expose the p2m table into domain */ +#define IA64_DOM0VP_expose_p2m 7 + // flags for page assignement to pseudo physical address space #define _ASSIGN_readonly 0 #define ASSIGN_readonly (1UL << _ASSIGN_readonly) @@ -395,15 +383,12 @@ struct xen_ia64_boot_param { #endif /* !__ASSEMBLY__ */ -/* Address of shared_info in domain virtual space. - This is the default address, for compatibility only. */ -#define XSI_BASE 0xf100000000000000 - /* Size of the shared_info area (this is not related to page size). */ #define XSI_SHIFT 14 #define XSI_SIZE (1 << XSI_SHIFT) /* Log size of mapped_regs area (64 KB - only 4KB is used). */ #define XMAPPEDREGS_SHIFT 12 +#define XMAPPEDREGS_SIZE (1 << XMAPPEDREGS_SHIFT) /* Offset of XASI (Xen arch shared info) wrt XSI_BASE. */ #define XMAPPEDREGS_OFS XSI_SIZE @@ -435,6 +420,17 @@ struct xen_ia64_boot_param { #define HYPERPRIVOP_GET_PSR 0x19 #define HYPERPRIVOP_MAX 0x19 +/* Fast and light hypercalls. */ +#define __HYPERVISOR_ia64_fast_eoi 0x0200 + +/* Xencomm macros. */ +#define XENCOMM_INLINE_MASK 0xf800000000000000UL +#define XENCOMM_INLINE_FLAG 0x8000000000000000UL + +#define XENCOMM_IS_INLINE(addr) \ + (((unsigned long)(addr) & XENCOMM_INLINE_MASK) == XENCOMM_INLINE_FLAG) +#define XENCOMM_INLINE_ADDR(addr) \ + ((unsigned long)(addr) & ~XENCOMM_INLINE_MASK) #endif /* __HYPERVISOR_IF_IA64_H__ */ /* diff -r 47c098fdce14 -r 85a15e585061 lib/Makefile --- a/lib/Makefile Wed Sep 20 15:35:23 2006 +0200 +++ b/lib/Makefile Wed Oct 11 20:19:20 2006 -0400 @@ -52,9 +52,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o -ifneq ($(CONFIG_XEN_IA64_DOM0_NON_VP),y) swiotlb-$(CONFIG_XEN) := ../arch/i386/kernel/swiotlb.o -endif hostprogs-y := gen_crc32table clean-files := crc32table.h diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/Kconfig.orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arch/ia64/Kconfig.orig Wed Oct 11 20:19:20 2006 -0400 @@ -0,0 +1,590 @@ +# +# For a description of the syntax of this configuration file, +# see Documentation/kbuild/kconfig-language.txt. +# + +mainmenu "IA-64 Linux Kernel Configuration" + +source "init/Kconfig" + +menu "Processor type and features" + +config IA64 + bool + default y + help + The Itanium Processor Family is Intel's 64-bit successor to + the 32-bit X86 line. The IA-64 Linux project has a home + page at <http://www.linuxia64.org/> and a mailing list at + <linux-ia64@xxxxxxxxxxxxxxx>. + +config 64BIT + bool + default y + +config MMU + bool + default y + +config SWIOTLB + bool + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + default y + +config GENERIC_FIND_NEXT_BIT + bool + default y + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config TIME_INTERPOLATION + bool + default y + +config DMI + bool + default y + +config EFI + bool + default y + +config GENERIC_IOMAP + bool + default y + +config XEN + bool "Xen hypervisor support" + default y + help + Enable Xen hypervisor support. Resulting kernel runs + both as a guest OS on Xen and natively on hardware. + +config XEN_IA64_VDSO_PARAVIRT + bool + depends on XEN && !ITANIUM + default y + help + vDSO paravirtualization + +config SCHED_NO_NO_OMIT_FRAME_POINTER + bool + default y + +config IA64_UNCACHED_ALLOCATOR + bool + select GENERIC_ALLOCATOR + +config DMA_IS_DMA32 + bool + default y + +config DMA_IS_NORMAL + bool + depends on IA64_SGI_SN2 + default y + +config AUDIT_ARCH + bool + default y + +choice + prompt "System type" + default IA64_GENERIC + +config IA64_GENERIC + bool "generic" + select ACPI + select PCI + select NUMA + select ACPI_NUMA + help + This selects the system type of your hardware. A "generic" kernel + will run on any supported IA-64 system. However, if you configure + a kernel for your specific system, it will be faster and smaller. + + generic For any supported IA-64 system + DIG-compliant For DIG ("Developer's Interface Guide") compliant systems + HP-zx1/sx1000 For HP systems + HP-zx1/sx1000+swiotlb For HP systems with (broken) DMA-constrained devices. + SGI-SN2 For SGI Altix systems + Ski-simulator For the HP simulator <http://www.hpl.hp.com/research/linux/ski/> + + If you don't know what to do, choose "generic". + +config IA64_DIG + bool "DIG-compliant" + +config IA64_HP_ZX1 + bool "HP-zx1/sx1000" + help + Build a kernel that runs on HP zx1 and sx1000 systems. This adds + support for the HP I/O MMU. + +config IA64_HP_ZX1_SWIOTLB + bool "HP-zx1/sx1000 with software I/O TLB" + help + Build a kernel that runs on HP zx1 and sx1000 systems even when they + have broken PCI devices which cannot DMA to full 32 bits. Apart + from support for the HP I/O MMU, this includes support for the software + I/O TLB, which allows supporting the broken devices at the expense of + wasting some kernel memory (about 2MB by default). + +config IA64_SGI_SN2 + bool "SGI-SN2" + help + Selecting this option will optimize the kernel for use on sn2 based + systems, but the resulting kernel binary will not run on other + types of ia64 systems. If you have an SGI Altix system, it's safe + to select this option. If in doubt, select ia64 generic support + instead. + +config IA64_HP_SIM + bool "Ski-simulator" + +endchoice + +choice + prompt "Processor type" + default ITANIUM + +config ITANIUM + bool "Itanium" + help + Select your IA-64 processor type. The default is Itanium. + This choice is safe for all IA-64 systems, but may not perform + optimally on systems with, say, Itanium 2 or newer processors. + +config MCKINLEY + bool "Itanium 2" + help + Select this to configure for an Itanium 2 (McKinley) processor. + +endchoice + +choice + prompt "Kernel page size" + default IA64_PAGE_SIZE_16KB + +config IA64_PAGE_SIZE_4KB + bool "4KB" + help + This lets you select the page size of the kernel. For best IA-64 + performance, a page size of 8KB or 16KB is recommended. For best + IA-32 compatibility, a page size of 4KB should be selected (the vast + majority of IA-32 binaries work perfectly fine with a larger page + size). For Itanium 2 or newer systems, a page size of 64KB can also + be selected. + + 4KB For best IA-32 compatibility + 8KB For best IA-64 performance + 16KB For best IA-64 performance + 64KB Requires Itanium 2 or newer processor. + + If you don't know what to do, choose 16KB. + +config IA64_PAGE_SIZE_8KB + bool "8KB" + +config IA64_PAGE_SIZE_16KB + bool "16KB" + +config IA64_PAGE_SIZE_64KB + depends on !ITANIUM + bool "64KB" + +endchoice + +choice + prompt "Page Table Levels" + default PGTABLE_3 + +config PGTABLE_3 + bool "3 Levels" + +config PGTABLE_4 + depends on !IA64_PAGE_SIZE_64KB + bool "4 Levels" + +endchoice + +source kernel/Kconfig.hz + +config IA64_BRL_EMU + bool + depends on ITANIUM + default y + +# align cache-sensitive data to 128 bytes +config IA64_L1_CACHE_SHIFT + int + default "7" if MCKINLEY + default "6" if ITANIUM + +config IA64_CYCLONE + bool "Cyclone (EXA) Time Source support" + help + Say Y here to enable support for IBM EXA Cyclone time source. + If you're unsure, answer N. + +config IOSAPIC + bool + depends on !IA64_HP_SIM + default y + +config IA64_SGI_SN_XP + tristate "Support communication between SGI SSIs" + depends on IA64_GENERIC || IA64_SGI_SN2 + select IA64_UNCACHED_ALLOCATOR + help + An SGI machine can be divided into multiple Single System + Images which act independently of each other and have + hardware based memory protection from the others. Enabling + this feature will allow for direct communication between SSIs + based on a network adapter and DMA messaging. + +config FORCE_MAX_ZONEORDER + int "MAX_ORDER (11 - 17)" if !HUGETLB_PAGE + range 11 17 if !HUGETLB_PAGE + default "17" if HUGETLB_PAGE + default "11" + +config SMP + bool "Symmetric multi-processing support" + help + This enables support for systems with more than one CPU. If you have + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. + + If you say N here, the kernel will run on single and multiprocessor + systems, but will use only one CPU of a multiprocessor system. If + you say Y here, the kernel will run on many, but not all, + single processor systems. On a single processor system, the kernel + will run faster if you say N here. + + See also the <file:Documentation/smp.txt> and the SMP-HOWTO + available at <http://www.tldp.org/docs.html#howto>. + + If you don't know what to do here, say N. + +config NR_CPUS + int "Maximum number of CPUs (2-1024)" + range 2 1024 + depends on SMP + default "1024" + help + You should set this to the number of CPUs in your system, but + keep in mind that a kernel compiled for, e.g., 2 CPUs will boot but + only use 2 CPUs on a >2 CPU system. Setting this to a value larger + than 64 will cause the use of a CPU mask array, causing a small + performance hit. + +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" + depends on SMP && EXPERIMENTAL + select HOTPLUG + default n + ---help--- + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu/cpu#. + Say N if you want to disable CPU hotplug. + +config ARCH_ENABLE_MEMORY_HOTPLUG + def_bool y + +config SCHED_SMT + bool "SMT scheduler support" + depends on SMP + help + Improves the CPU scheduler's decision making when dealing with + Intel IA64 chips with MultiThreading at a cost of slightly increased + overhead in some places. If unsure say N here. + +config PERMIT_BSP_REMOVE + bool "Support removal of Bootstrap Processor" + depends on HOTPLUG_CPU + default n + ---help--- + Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU + support. + +config FORCE_CPEI_RETARGET + bool "Force assumption that CPEI can be re-targetted" + depends on PERMIT_BSP_REMOVE + default n + ---help--- + Say Y if you need to force the assumption that CPEI can be re-targetted to + any cpu in the system. This hint is available via ACPI 3.0 specifications. + Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP. + This option it useful to enable this feature on older BIOS's as well. + You can also enable this by using boot command line option force_cpei=1. + +config PREEMPT + bool "Preemptible Kernel" + help + This option reduces the latency of the kernel when reacting to + real-time or interactive events by allowing a low priority process to + be preempted even if it is in kernel mode executing a system call. + This allows applications to run more reliably even when the system is + under load. + + Say Y here if you are building a kernel for a desktop, embedded + or real-time system. Say N if you are unsure. + +source "mm/Kconfig" + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + help + Say Y to support efficient handling of discontiguous physical memory, + for architectures which are either NUMA (Non-Uniform Memory Access) + or have huge holes in the physical address space for other reasons. + See <file:Documentation/vm/numa> for more. + +config ARCH_FLATMEM_ENABLE + def_bool y + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on ARCH_DISCONTIGMEM_ENABLE + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y if (IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB) + depends on ARCH_DISCONTIGMEM_ENABLE + +config NUMA + bool "NUMA support" + depends on !IA64_HP_SIM && !FLATMEM + default y if IA64_SGI_SN2 + help + Say Y to compile the kernel to support NUMA (Non-Uniform Memory + Access). This option is for configuring high-end multiprocessor + server systems. If in doubt, say N. + +config NODES_SHIFT + int "Max num nodes shift(3-10)" + range 3 10 + default "10" + depends on NEED_MULTIPLE_NODES + help + This option specifies the maximum number of nodes in your SSI system. + MAX_NUMNODES will be 2^(This value). + If in doubt, use the default. + +# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent. +# VIRTUAL_MEM_MAP has been retained for historical reasons. +config VIRTUAL_MEM_MAP + bool "Virtual mem map" + depends on !SPARSEMEM + default y if !IA64_HP_SIM + help + Say Y to compile the kernel with support for a virtual mem map. + This code also only takes effect if a memory hole of greater than + 1 Gb is found during boot. You must turn this option on if you + require the DISCONTIGMEM option for your machine. If you are + unsure, say Y. + +config HOLES_IN_ZONE + bool + default y if VIRTUAL_MEM_MAP + +config HAVE_ARCH_EARLY_PFN_TO_NID + def_bool y + depends on NEED_MULTIPLE_NODES + +config HAVE_ARCH_NODEDATA_EXTENSION + def_bool y + depends on NUMA + +config IA32_SUPPORT + bool "Support for Linux/x86 binaries" + help + IA-64 processors can execute IA-32 (X86) instructions. By + saying Y here, the kernel will include IA-32 system call + emulation support which makes it possible to transparently + run IA-32 Linux binaries on an IA-64 Linux system. + If in doubt, say Y. + +config COMPAT + bool + depends on IA32_SUPPORT + default y + +config IA64_MCA_RECOVERY + tristate "MCA recovery from errors other than TLB." + +config PERFMON + bool "Performance monitor support" + help + Selects whether support for the IA-64 performance monitor hardware + is included in the kernel. This makes some kernel data-structures a + little bigger and slows down execution a bit, but it is generally + a good idea to turn this on. If you're unsure, say Y. + +config IA64_PALINFO + tristate "/proc/pal support" + help + If you say Y here, you are able to get PAL (Processor Abstraction + Layer) information in /proc/pal. This contains useful information + about the processors in your systems, such as cache and TLB sizes + and the PAL firmware version in use. + + To use this option, you have to ensure that the "/proc file system + support" (CONFIG_PROC_FS) is enabled, too. + +config SGI_SN + def_bool y if (IA64_SGI_SN2 || IA64_GENERIC) + +source "drivers/sn/Kconfig" + +source "drivers/firmware/Kconfig" + +source "fs/Kconfig.binfmt" + +endmenu + +menu "Power management and ACPI" + +source "kernel/power/Kconfig" + +source "drivers/acpi/Kconfig" + +if PM + +source "arch/ia64/kernel/cpufreq/Kconfig" + +endif + +endmenu + +if !IA64_HP_SIM + +menu "Bus options (PCI, PCMCIA)" + +config PCI + bool "PCI support" + help + Real IA-64 machines all have PCI/PCI-X/PCI Express busses. Say Y + here unless you are using a simulator without PCI support. + +config PCI_DOMAINS + bool + default PCI + +config XEN_PCIDEV_FRONTEND + bool "Xen PCI Frontend" + depends on PCI && XEN + default y + help + The PCI device frontend driver allows the kernel to import arbitrary + PCI devices from a PCI backend to support PCI driver domains. + +config XEN_PCIDEV_FE_DEBUG + bool "Xen PCI Frontend Debugging" + depends on XEN_PCIDEV_FRONTEND + default n + help + Enables some debug statements within the PCI Frontend. + +source "drivers/pci/pcie/Kconfig" + +source "drivers/pci/Kconfig" + +source "drivers/pci/hotplug/Kconfig" + +source "drivers/pcmcia/Kconfig" + +endmenu + +endif + +source "net/Kconfig" + +source "drivers/Kconfig" + +source "fs/Kconfig" + +source "lib/Kconfig" + +# +# Use the generic interrupt handling code in kernel/irq/: +# +config GENERIC_HARDIRQS + bool + default y + +config GENERIC_IRQ_PROBE + bool + default y + +config GENERIC_PENDING_IRQ + bool + depends on GENERIC_HARDIRQS && SMP + default y + +config IRQ_PER_CPU + bool + default y + +source "arch/ia64/hp/sim/Kconfig" + +menu "Instrumentation Support" + depends on EXPERIMENTAL + +source "arch/ia64/oprofile/Kconfig" + +config KPROBES + bool "Kprobes (EXPERIMENTAL)" + depends on EXPERIMENTAL && MODULES + help + Kprobes allows you to trap at almost any kernel address and + execute a callback function. register_kprobe() establishes + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". +endmenu + +source "arch/ia64/Kconfig.debug" + +source "security/Kconfig" + +source "crypto/Kconfig" + +# +# override default values of drivers/xen/Kconfig +# +if XEN +config XEN_UTIL + default n + +config HAVE_ARCH_ALLOC_SKB + default y + +config HAVE_ARCH_DEV_ALLOC_SKB + default y + +config XEN_BALLOON + default y + +config XEN_SKBUFF + default y + +config XEN_DEVMEM + default n + +config XEN_REBOOT + default y + +config XEN_SMPBOOT + default n +endif + +source "drivers/xen/Kconfig" diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/kernel/setup.c.orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arch/ia64/kernel/setup.c.orig Wed Oct 11 20:19:20 2006 -0400 @@ -0,0 +1,1020 @@ +/* + * Architecture-specific setup. + * + * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co + * David Mosberger-Tang <davidm@xxxxxxxxxx> + * Stephane Eranian <eranian@xxxxxxxxxx> + * Copyright (C) 2000, 2004 Intel Corp + * Rohit Seth <rohit.seth@xxxxxxxxx> + * Suresh Siddha <suresh.b.siddha@xxxxxxxxx> + * Gordon Jin <gordon.jin@xxxxxxxxx> + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond <drummond@xxxxxxxxxxx> + * + * 12/26/04 S.Siddha, G.Jin, R.Seth + * Add multi-threading and multi-core detection + * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo(). + * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map + * 03/31/00 R.Seth cpu_initialized and current->processor fixes + * 02/04/00 D.Mosberger some more get_cpuinfo fixes... + * 02/01/00 R.Seth fixed get_cpuinfo for SMP + * 01/07/99 S.Eranian added the support for command line argument + * 06/24/99 W.Drummond added boot_cpu_data. + * 05/28/05 Z. Menyhart Dynamic stride size for "flush_icache_range()" + */ +#include <linux/module.h> +#include <linux/init.h> + +#include <linux/acpi.h> +#include <linux/bootmem.h> +#include <linux/console.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/reboot.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/string.h> +#include <linux/threads.h> +#include <linux/screen_info.h> +#include <linux/dmi.h> +#include <linux/serial.h> +#include <linux/serial_core.h> +#include <linux/efi.h> +#include <linux/initrd.h> +#include <linux/pm.h> +#include <linux/cpufreq.h> + +#include <asm/ia32.h> +#include <asm/machvec.h> +#include <asm/mca.h> +#include <asm/meminit.h> +#include <asm/page.h> +#include <asm/patch.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/sal.h> +#include <asm/sections.h> +#include <asm/serial.h> +#include <asm/setup.h> +#include <asm/smp.h> +#include <asm/system.h> +#include <asm/unistd.h> +#include <asm/system.h> +#ifdef CONFIG_XEN +#include <asm/hypervisor.h> +#endif +#include <linux/dma-mapping.h> + +#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE) +# error "struct cpuinfo_ia64 too big!" +#endif + +#ifdef CONFIG_SMP +unsigned long __per_cpu_offset[NR_CPUS]; +EXPORT_SYMBOL(__per_cpu_offset); +#endif + +#ifdef CONFIG_XEN +static int +xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + HYPERVISOR_shutdown(SHUTDOWN_crash); + /* we're never actually going to get here... */ + return NOTIFY_DONE; +} + +static struct notifier_block xen_panic_block = { + .notifier_call = xen_panic_event, + .next = NULL, + .priority = 0 /* try to go last */ +}; +#endif + +extern void ia64_setup_printk_clock(void); + +DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info); +DEFINE_PER_CPU(unsigned long, local_per_cpu_offset); +DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8); +unsigned long ia64_cycles_per_usec; +struct ia64_boot_param *ia64_boot_param; +struct screen_info screen_info; +unsigned long vga_console_iobase; +unsigned long vga_console_membase; + +static struct resource data_resource = { + .name = "Kernel data", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource code_resource = { + .name = "Kernel code", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; +extern void efi_initialize_iomem_resources(struct resource *, + struct resource *); +extern char _text[], _end[], _etext[]; + +unsigned long ia64_max_cacheline_size; + +int dma_get_cache_alignment(void) +{ + return ia64_max_cacheline_size; +} +EXPORT_SYMBOL(dma_get_cache_alignment); + +unsigned long ia64_iobase; /* virtual address for I/O accesses */ +EXPORT_SYMBOL(ia64_iobase); +struct io_space io_space[MAX_IO_SPACES]; +EXPORT_SYMBOL(io_space); +unsigned int num_io_spaces; + +/* + * "flush_icache_range()" needs to know what processor dependent stride size to use + * when it makes i-cache(s) coherent with d-caches. + */ +#define I_CACHE_STRIDE_SHIFT 5 /* Safest way to go: 32 bytes by 32 bytes */ +unsigned long ia64_i_cache_stride_shift = ~0; + +/* + * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This + * mask specifies a mask of address bits that must be 0 in order for two buffers to be + * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start + * address of the second buffer must be aligned to (merge_mask+1) in order to be + * mergeable). By default, we assume there is no I/O MMU which can merge physically + * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu + * page-size of 2^64. + */ +unsigned long ia64_max_iommu_merge_mask = ~0UL; +EXPORT_SYMBOL(ia64_max_iommu_merge_mask); + +/* + * We use a special marker for the end of memory and it uses the extra (+1) slot + */ +struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata; +int num_rsvd_regions __initdata; + + +/* + * Filter incoming memory segments based on the primitive map created from the boot + * parameters. Segments contained in the map are removed from the memory ranges. A + * caller-specified function is called with the memory ranges that remain after filtering. + * This routine does not assume the incoming segments are sorted. + */ +int __init +filter_rsvd_memory (unsigned long start, unsigned long end, void *arg) +{ + unsigned long range_start, range_end, prev_start; + void (*func)(unsigned long, unsigned long, int); + int i; + +#if IGNORE_PFN0 + if (start == PAGE_OFFSET) { + printk(KERN_WARNING "warning: skipping physical page 0\n"); + start += PAGE_SIZE; + if (start >= end) return 0; + } +#endif + /* + * lowest possible address(walker uses virtual) + */ + prev_start = PAGE_OFFSET; + func = arg; + + for (i = 0; i < num_rsvd_regions; ++i) { + range_start = max(start, prev_start); + range_end = min(end, rsvd_region[i].start); + + if (range_start < range_end) + call_pernode_memory(__pa(range_start), range_end - range_start, func); + + /* nothing more available in this segment */ + if (range_end == end) return 0; + + prev_start = rsvd_region[i].end; + } + /* end of memory marker allows full processing inside loop body */ + return 0; +} + +static int __init +rsvd_region_cmp(struct rsvd_region *lhs, struct rsvd_region *rhs) +{ + if (lhs->start > rhs->start) + return 1; + if (lhs->start < rhs->start) + return -1; + + if (lhs->end > rhs->end) + return 1; + if (lhs->end < rhs->end) + return -1; + + return 0; +} + +static void __init +sort_regions (struct rsvd_region *rsvd_region, int max) +{ + int num = max; + int j; + + /* simple bubble sorting */ + while (max--) { + for (j = 0; j < max; ++j) { + if (rsvd_region_cmp(&rsvd_region[j], + &rsvd_region[j + 1]) > 0) { + struct rsvd_region tmp; + tmp = rsvd_region[j]; + rsvd_region[j] = rsvd_region[j + 1]; + rsvd_region[j + 1] = tmp; + } + } + } + + for (j = 0; j < num - 1; j++) { + int k; + unsigned long start = rsvd_region[j].start; + unsigned long end = rsvd_region[j].end; + int collapsed; + + for (k = j + 1; k < num; k++) { + BUG_ON(start > rsvd_region[k].start); + if (end < rsvd_region[k].start) { + k--; + break; + } + end = max(end, rsvd_region[k].end); + } + if (k == num) + k--; + rsvd_region[j].end = end; + collapsed = k - j; + num -= collapsed; + for (k = j + 1; k < num; k++) { + rsvd_region[k] = rsvd_region[k + collapsed]; + } + } + + num_rsvd_regions = num; + for (j = 0; j < num; j++) { + printk("rsvd_region[%d]: [0x%016lx, 0x%06lx)\n", + j, rsvd_region[j].start, rsvd_region[j].end); + } +} + +/* + * Request address space for all standard resources + */ +static int __init register_memory(void) +{ + code_resource.start = ia64_tpa(_text); + code_resource.end = ia64_tpa(_etext) - 1; + data_resource.start = ia64_tpa(_etext); + data_resource.end = ia64_tpa(_end) - 1; + efi_initialize_iomem_resources(&code_resource, &data_resource); + + return 0; +} + +__initcall(register_memory); + +/** + * reserve_memory - setup reserved memory areas + * + * Setup the reserved memory areas set aside for the boot parameters, + * initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined, + * see include/asm-ia64/meminit.h if you need to define more. + */ +void __init +reserve_memory (void) +{ + int n = 0; + + /* + * none of the entries in this table overlap + */ + rsvd_region[n].start = (unsigned long) ia64_boot_param; + rsvd_region[n].end = rsvd_region[n].start + sizeof(*ia64_boot_param); + n++; + + rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap); + rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->efi_memmap_size; + n++; + + rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line); + rsvd_region[n].end = (rsvd_region[n].start + + strlen(__va(ia64_boot_param->command_line)) + 1); + n++; + + rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START); + rsvd_region[n].end = (unsigned long) ia64_imva(_end); + n++; + +#ifdef CONFIG_XEN + if (is_running_on_xen()) { + rsvd_region[n].start = (unsigned long)__va((HYPERVISOR_shared_info->arch.start_info_pfn << PAGE_SHIFT)); + rsvd_region[n].end = rsvd_region[n].start + PAGE_SIZE; + n++; + } +#endif + +#ifdef CONFIG_BLK_DEV_INITRD + if (ia64_boot_param->initrd_start) { + rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start); + rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->initrd_size; + n++; + } +#endif + + efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end); + n++; + + /* end of memory marker */ + rsvd_region[n].start = ~0UL; + rsvd_region[n].end = ~0UL; + n++; + + num_rsvd_regions = n; + BUG_ON(IA64_MAX_RSVD_REGIONS + 1 < n); + + sort_regions(rsvd_region, num_rsvd_regions); +} + +/** + * find_initrd - get initrd parameters from the boot parameter structure + * + * Grab the initrd start and end from the boot parameter struct given us by + * the boot loader. + */ +void __init +find_initrd (void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + if (ia64_boot_param->initrd_start) { + initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start); + initrd_end = initrd_start+ia64_boot_param->initrd_size; + + printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n", + initrd_start, ia64_boot_param->initrd_size); + } +#endif +} + +static void __init +io_port_init (void) +{ + unsigned long phys_iobase; + + /* + * Set `iobase' based on the EFI memory map or, failing that, the + * value firmware left in ar.k0. + * + * Note that in ia32 mode, IN/OUT instructions use ar.k0 to compute + * the port's virtual address, so ia32_load_state() loads it with a + * user virtual address. But in ia64 mode, glibc uses the + * *physical* address in ar.k0 to mmap the appropriate area from + * /dev/mem, and the inX()/outX() interfaces use MMIO. In both + * cases, user-mode can only use the legacy 0-64K I/O port space. + * + * ar.k0 is not involved in kernel I/O port accesses, which can use + * any of the I/O port spaces and are done via MMIO using the + * virtual mmio_base from the appropriate io_space[]. + */ + phys_iobase = efi_get_iobase(); + if (!phys_iobase) { + phys_iobase = ia64_get_kr(IA64_KR_IO_BASE); + printk(KERN_INFO "No I/O port range found in EFI memory map, " + "falling back to AR.KR0 (0x%lx)\n", phys_iobase); + } + ia64_iobase = (unsigned long) ioremap(phys_iobase, 0); + ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase)); + + /* setup legacy IO port space */ + io_space[0].mmio_base = ia64_iobase; + io_space[0].sparse = 1; + num_io_spaces = 1; +} + +/** + * early_console_setup - setup debugging console + * + * Consoles started here require little enough setup that we can start using + * them very early in the boot process, either right after the machine + * vector initialization, or even before if the drivers can detect their hw. + * + * Returns non-zero if a console couldn't be setup. + */ +static inline int __init +early_console_setup (char *cmdline) +{ + int earlycons = 0; + +#ifdef CONFIG_XEN +#ifndef CONFIG_IA64_HP_SIM + if (is_running_on_xen()) { + extern struct console hpsim_cons; + hpsim_cons.flags |= CON_BOOT; + register_console(&hpsim_cons); + earlycons++; + } +#endif +#endif +#ifdef CONFIG_SERIAL_SGI_L1_CONSOLE + { + extern int sn_serial_console_early_setup(void); + if (!sn_serial_console_early_setup()) + earlycons++; + } +#endif +#ifdef CONFIG_EFI_PCDP + if (!efi_setup_pcdp_console(cmdline)) + earlycons++; +#endif +#ifdef CONFIG_SERIAL_8250_CONSOLE + if (!early_serial_console_init(cmdline)) + earlycons++; +#endif + + return (earlycons) ? 0 : -1; +} + +static inline void +mark_bsp_online (void) +{ +#ifdef CONFIG_SMP + /* If we register an early console, allow CPU 0 to printk */ + cpu_set(smp_processor_id(), cpu_online_map); +#endif +} + +#ifdef CONFIG_SMP +static void __init +check_for_logical_procs (void) +{ + pal_logical_to_physical_t info; + s64 status; + + status = ia64_pal_logical_to_phys(0, &info); + if (status == -1) { + printk(KERN_INFO "No logical to physical processor mapping " + "available\n"); + return; + } + if (status) { + printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n", + status); + return; + } + /* + * Total number of siblings that BSP has. Though not all of them + * may have booted successfully. The correct number of siblings + * booted is in info.overview_num_log. + */ + smp_num_siblings = info.overview_tpc; + smp_num_cpucores = info.overview_cpp; +} +#endif + +static __initdata int nomca; +static __init int setup_nomca(char *s) +{ + nomca = 1; + return 0; +} +early_param("nomca", setup_nomca); + +void __init +setup_arch (char **cmdline_p) +{ + unw_init(); + +#ifdef CONFIG_XEN + if (is_running_on_xen()) { + setup_xen_features(); + /* Register a call for panic conditions. */ + atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); + } +#endif + + ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist); + + *cmdline_p = __va(ia64_boot_param->command_line); + strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE); + + efi_init(); + io_port_init(); + + parse_early_param(); + +#ifdef CONFIG_IA64_GENERIC + machvec_init(NULL); +#endif + + if (early_console_setup(*cmdline_p) == 0) + mark_bsp_online(); + +#ifdef CONFIG_ACPI + /* Initialize the ACPI boot-time table parser */ + acpi_table_init(); +# ifdef CONFIG_ACPI_NUMA + acpi_numa_init(); +# endif +#else +# ifdef CONFIG_SMP + smp_build_cpu_map(); /* happens, e.g., with the Ski simulator */ +# endif +#endif /* CONFIG_APCI_BOOT */ + + find_memory(); + + /* process SAL system table: */ + ia64_sal_init(__va(efi.sal_systab)); + + ia64_setup_printk_clock(); + +#ifdef CONFIG_SMP + cpu_physical_id(0) = hard_smp_processor_id(); + + cpu_set(0, cpu_sibling_map[0]); + cpu_set(0, cpu_core_map[0]); + + check_for_logical_procs(); + if (smp_num_cpucores > 1) + printk(KERN_INFO + "cpu package is Multi-Core capable: number of cores=%d\n", + smp_num_cpucores); + if (smp_num_siblings > 1) + printk(KERN_INFO + "cpu package is Multi-Threading capable: number of siblings=%d\n", + smp_num_siblings); +#endif + + cpu_init(); /* initialize the bootstrap CPU */ + mmu_context_init(); /* initialize context_id bitmap */ + +#ifdef CONFIG_ACPI + acpi_boot_init(); +#endif + +#ifdef CONFIG_VT + if (!conswitchp) { +# if defined(CONFIG_DUMMY_CONSOLE) + conswitchp = &dummy_con; +# endif +# if defined(CONFIG_VGA_CONSOLE) + /* + * Non-legacy systems may route legacy VGA MMIO range to system + * memory. vga_con probes the MMIO hole, so memory looks like + * a VGA device to it. The EFI memory map can tell us if it's + * memory so we can avoid this problem. + */ + if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY) + conswitchp = &vga_con; +# endif + } +#ifdef CONFIG_XEN + if (is_running_on_xen()) { + shared_info_t *s = HYPERVISOR_shared_info; + + xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT); + + printk("Running on Xen! start_info_pfn=0x%lx nr_pages=%ld " + "flags=0x%x\n", s->arch.start_info_pfn, + xen_start_info->nr_pages, xen_start_info->flags); + + if (!is_initial_xendomain()) { + extern int console_use_vt; + conswitchp = NULL; + console_use_vt = 0; + } + } +#endif +#endif + + /* enable IA-64 Machine Check Abort Handling unless disabled */ + if (!nomca) + ia64_mca_init(); + + platform_setup(cmdline_p); + paging_init(); +#ifdef CONFIG_XEN + contiguous_bitmap_init(max_pfn); +#endif +} + +/* + * Display cpu info for all cpu's. + */ +static int +show_cpuinfo (struct seq_file *m, void *v) +{ +#ifdef CONFIG_SMP +# define lpj c->loops_per_jiffy +# define cpunum c->cpu +#else +# define lpj loops_per_jiffy +# define cpunum 0 +#endif + static struct { + unsigned long mask; + const char *feature_name; + } feature_bits[] = { + { 1UL << 0, "branchlong" }, + { 1UL << 1, "spontaneous deferral"}, + { 1UL << 2, "16-byte atomic ops" } + }; + char family[32], features[128], *cp, sep; + struct cpuinfo_ia64 *c = v; + unsigned long mask; + unsigned long proc_freq; + int i; + + mask = c->features; + + switch (c->family) { + case 0x07: memcpy(family, "Itanium", 8); break; + case 0x1f: memcpy(family, "Itanium 2", 10); break; + default: sprintf(family, "%u", c->family); break; + } + + /* build the feature string: */ + memcpy(features, " standard", 10); + cp = features; + sep = 0; + for (i = 0; i < (int) ARRAY_SIZE(feature_bits); ++i) { + if (mask & feature_bits[i].mask) { + if (sep) + *cp++ = sep; + sep = ','; + *cp++ = ' '; + strcpy(cp, feature_bits[i].feature_name); + cp += strlen(feature_bits[i].feature_name); + mask &= ~feature_bits[i].mask; + } + } + if (mask) { + /* print unknown features as a hex value: */ + if (sep) + *cp++ = sep; + sprintf(cp, " 0x%lx", mask); + } + + proc_freq = cpufreq_quick_get(cpunum); + if (!proc_freq) + proc_freq = c->proc_freq / 1000; + + seq_printf(m, + "processor : %d\n" + "vendor : %s\n" + "arch : IA-64\n" + "family : %s\n" + "model : %u\n" + "revision : %u\n" + "archrev : %u\n" + "features :%s\n" /* don't change this---it _is_ right! */ + "cpu number : %lu\n" + "cpu regs : %u\n" + "cpu MHz : %lu.%06lu\n" + "itc MHz : %lu.%06lu\n" + "BogoMIPS : %lu.%02lu\n", + cpunum, c->vendor, family, c->model, c->revision, c->archrev, + features, c->ppn, c->number, + proc_freq / 1000, proc_freq % 1000, + c->itc_freq / 1000000, c->itc_freq % 1000000, + lpj*HZ/500000, (lpj*HZ/5000) % 100); +#ifdef CONFIG_SMP + seq_printf(m, "siblings : %u\n", cpus_weight(cpu_core_map[cpunum])); + if (c->threads_per_core > 1 || c->cores_per_socket > 1) + seq_printf(m, + "physical id: %u\n" + "core id : %u\n" + "thread id : %u\n", + c->socket_id, c->core_id, c->thread_id); +#endif + seq_printf(m,"\n"); + + return 0; +} + +static void * +c_start (struct seq_file *m, loff_t *pos) +{ +#ifdef CONFIG_SMP + while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map)) + ++*pos; +#endif + return *pos < NR_CPUS ? cpu_data(*pos) : NULL; +} + +static void * +c_next (struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void +c_stop (struct seq_file *m, void *v) +{ +} + +struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo +}; + +static void __cpuinit +identify_cpu (struct cpuinfo_ia64 *c) +{ + union { + unsigned long bits[5]; + struct { + /* id 0 & 1: */ + char vendor[16]; + + /* id 2 */ + u64 ppn; /* processor serial number */ + + /* id 3: */ + unsigned number : 8; + unsigned revision : 8; + unsigned model : 8; + unsigned family : 8; + unsigned archrev : 8; + unsigned reserved : 24; + + /* id 4: */ + u64 features; + } field; + } cpuid; + pal_vm_info_1_u_t vm1; + pal_vm_info_2_u_t vm2; + pal_status_t status; + unsigned long impl_va_msb = 50, phys_addr_size = 44; /* Itanium defaults */ + int i; + + for (i = 0; i < 5; ++i) + cpuid.bits[i] = ia64_get_cpuid(i); + + memcpy(c->vendor, cpuid.field.vendor, 16); +#ifdef CONFIG_SMP + c->cpu = smp_processor_id(); + + /* below default values will be overwritten by identify_siblings() + * for Multi-Threading/Multi-Core capable cpu's + */ + c->threads_per_core = c->cores_per_socket = c->num_log = 1; + c->socket_id = -1; + + identify_siblings(c); +#endif + c->ppn = cpuid.field.ppn; + c->number = cpuid.field.number; + c->revision = cpuid.field.revision; + c->model = cpuid.field.model; + c->family = cpuid.field.family; + c->archrev = cpuid.field.archrev; + c->features = cpuid.field.features; + + status = ia64_pal_vm_summary(&vm1, &vm2); + if (status == PAL_STATUS_SUCCESS) { + impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb; + phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size; + } + c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1)); + c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1)); +} + +void +setup_per_cpu_areas (void) +{ + /* start_kernel() requires this... */ +#ifdef CONFIG_ACPI_HOTPLUG_CPU + prefill_possible_map(); +#endif +} + +/* + * Calculate the max. cache line size. + * + * In addition, the minimum of the i-cache stride sizes is calculated for + * "flush_icache_range()". + */ +static void __cpuinit +get_max_cacheline_size (void) +{ + unsigned long line_size, max = 1; + unsigned int cache_size = 0; + u64 l, levels, unique_caches; + pal_cache_config_info_t cci; + s64 status; + + status = ia64_pal_cache_summary(&levels, &unique_caches); + if (status != 0) { + printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n", + __FUNCTION__, status); + max = SMP_CACHE_BYTES; + /* Safest setup for "flush_icache_range()" */ + ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT; + goto out; + } + + for (l = 0; l < levels; ++l) { + status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2, + &cci); + if (status != 0) { + printk(KERN_ERR + "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n", + __FUNCTION__, l, status); + max = SMP_CACHE_BYTES; + /* The safest setup for "flush_icache_range()" */ + cci.pcci_stride = I_CACHE_STRIDE_SHIFT; + cci.pcci_unified = 1; + } + line_size = 1 << cci.pcci_line_size; + if (line_size > max) + max = line_size; + if (cache_size < cci.pcci_cache_size) + cache_size = cci.pcci_cache_size; + if (!cci.pcci_unified) { + status = ia64_pal_cache_config_info(l, + /* cache_type (instruction)= */ 1, + &cci); + if (status != 0) { + printk(KERN_ERR + "%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n", + __FUNCTION__, l, status); + /* The safest setup for "flush_icache_range()" */ + cci.pcci_stride = I_CACHE_STRIDE_SHIFT; + } + } + if (cci.pcci_stride < ia64_i_cache_stride_shift) + ia64_i_cache_stride_shift = cci.pcci_stride; + } + out: +#ifdef CONFIG_SMP + max_cache_size = max(max_cache_size, cache_size); +#endif + if (max > ia64_max_cacheline_size) + ia64_max_cacheline_size = max; +} + +/* + * cpu_init() initializes state that is per-CPU. This function acts + * as a 'CPU state barrier', nothing should get across. + */ +void __cpuinit +cpu_init (void) +{ + extern void __cpuinit ia64_mmu_init (void *); + unsigned long num_phys_stacked; + pal_vm_info_2_u_t vmi; + unsigned int max_ctx; + struct cpuinfo_ia64 *cpu_info; + void *cpu_data; + + cpu_data = per_cpu_init(); + + /* + * We set ar.k3 so that assembly code in MCA handler can compute + * physical addresses of per cpu variables with a simple: + * phys = ar.k3 + &per_cpu_var + */ + ia64_set_kr(IA64_KR_PER_CPU_DATA, + ia64_tpa(cpu_data) - (long) __per_cpu_start); + + get_max_cacheline_size(); + + /* + * We can't pass "local_cpu_data" to identify_cpu() because we haven't called + * ia64_mmu_init() yet. And we can't call ia64_mmu_init() first because it + * depends on the data returned by identify_cpu(). We break the dependency by + * accessing cpu_data() through the canonical per-CPU address. + */ + cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start); + identify_cpu(cpu_info); + +#ifdef CONFIG_MCKINLEY + { +# define FEATURE_SET 16 + struct ia64_pal_retval iprv; + + if (cpu_info->family == 0x1f) { + PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0); + if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80)) + PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES, + (iprv.v1 | 0x80), FEATURE_SET, 0); + } + } +#endif + + /* Clear the stack memory reserved for pt_regs: */ + memset(task_pt_regs(current), 0, sizeof(struct pt_regs)); + + ia64_set_kr(IA64_KR_FPU_OWNER, 0); + + /* + * Initialize the page-table base register to a global + * directory with all zeroes. This ensure that we can handle + * TLB-misses to user address-space even before we created the + * first user address-space. This may happen, e.g., due to + * aggressive use of lfetch.fault. + */ + ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page))); + + /* + * Initialize default control register to defer speculative faults except + * for those arising from TLB misses, which are not deferred. The + * kernel MUST NOT depend on a particular setting of these bits (in other words, + * the kernel must have recovery code for all speculative accesses). Turn on + * dcr.lc as per recommendation by the architecture team. Most IA-32 apps + * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll + * be fine). + */ + ia64_setreg(_IA64_REG_CR_DCR, ( IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR + | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC)); + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + if (current->mm) + BUG(); + + ia64_mmu_init(ia64_imva(cpu_data)); + ia64_mca_cpu_init(ia64_imva(cpu_data)); + +#ifdef CONFIG_IA32_SUPPORT + ia32_cpu_init(); +#endif + + /* Clear ITC to eliminiate sched_clock() overflows in human time. */ + ia64_set_itc(0); + + /* disable all local interrupt sources: */ + ia64_set_itv(1 << 16); + ia64_set_lrr0(1 << 16); + ia64_set_lrr1(1 << 16); + ia64_setreg(_IA64_REG_CR_PMV, 1 << 16); + ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16); + + /* clear TPR & XTP to enable all interrupt classes: */ + ia64_setreg(_IA64_REG_CR_TPR, 0); +#ifdef CONFIG_SMP + normal_xtp(); +#endif + + /* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */ + if (ia64_pal_vm_summary(NULL, &vmi) == 0) + max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1; + else { + printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n"); + max_ctx = (1U << 15) - 1; /* use architected minimum */ + } + while (max_ctx < ia64_ctx.max_ctx) { + unsigned int old = ia64_ctx.max_ctx; + if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old) + break; + } + + if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) { + printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical " + "stacked regs\n"); + num_phys_stacked = 96; + } + /* size of physical stacked register partition plus 8 bytes: */ + __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8; + platform_cpu_init(); + +#ifdef CONFIG_XEN + /* Need to be moved into platform_cpu_init later */ + if (is_running_on_xen()) { + extern void xen_smp_intr_init(void); + xen_smp_intr_init(); + } +#endif + + pm_idle = default_idle; +} + +/* + * On SMP systems, when the scheduler does migration-cost autodetection, + * it needs a way to flush as much of the CPU's caches as possible. + */ +void sched_cacheflush(void) +{ + ia64_sal_cache_flush(3); +} + +void __init +check_bugs (void) +{ + ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles, + (unsigned long) __end___mckinley_e9_bundles); +} + +static int __init run_dmi_scan(void) +{ + dmi_scan_machine(); + return 0; +} +core_initcall(run_dmi_scan); diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/util.c.orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arch/ia64/xen/util.c.orig Wed Oct 11 20:19:20 2006 -0400 @@ -0,0 +1,115 @@ +/****************************************************************************** + * arch/ia64/xen/util.c + * This file is the ia64 counterpart of drivers/xen/util.c + * + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <asm/uaccess.h> +#include <xen/driver_util.h> + +struct vm_struct *alloc_vm_area(unsigned long size) +{ + int order; + unsigned long virt; + unsigned long nr_pages; + struct vm_struct* area; + + order = get_order(size); + virt = __get_free_pages(GFP_KERNEL, order); + if (virt == 0) { + goto err0; + } + nr_pages = 1 << order; + scrub_pages(virt, nr_pages); + + area = kmalloc(sizeof(*area), GFP_KERNEL); + if (area == NULL) { + goto err1; + } + + area->flags = VM_IOREMAP;//XXX + area->addr = (void*)virt; + area->size = size; + area->pages = NULL; //XXX + area->nr_pages = nr_pages; + area->phys_addr = 0; /* xenbus_map_ring_valloc uses this field! */ + + return area; + +err1: + free_pages(virt, order); +err0: + return NULL; + +} +EXPORT_SYMBOL_GPL(alloc_vm_area); + +void free_vm_area(struct vm_struct *area) +{ + unsigned int order = get_order(area->size); + unsigned long i; + unsigned long phys_addr = __pa(area->addr); + + // This area is used for foreign page mappping. + // So underlying machine page may not be assigned. + for (i = 0; i < (1 << order); i++) { + unsigned long ret; + unsigned long gpfn = (phys_addr >> PAGE_SHIFT) + i; + struct xen_memory_reservation reservation = { + .nr_extents = 1, + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + set_xen_guest_handle(reservation.extent_start, &gpfn); + ret = HYPERVISOR_memory_op(XENMEM_populate_physmap, + &reservation); + BUG_ON(ret != 1); + } + free_pages((unsigned long)area->addr, order); + kfree(area); +} +EXPORT_SYMBOL_GPL(free_vm_area); + +void lock_vm_area(struct vm_struct *area) +{ + // nothing +} +EXPORT_SYMBOL_GPL(lock_vm_area); + +void unlock_vm_area(struct vm_struct *area) +{ + // nothing +} +EXPORT_SYMBOL_GPL(unlock_vm_area); + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/xcom_hcall.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arch/ia64/xen/xcom_hcall.c Wed Oct 11 20:19:20 2006 -0400 @@ -0,0 +1,469 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Tristan Gingold <tristan.gingold@xxxxxxxx> + */ +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/gfp.h> +#include <linux/module.h> +#include <xen/interface/xen.h> +#include <xen/interface/dom0_ops.h> +#include <xen/interface/memory.h> +#include <xen/interface/xencomm.h> +#include <xen/interface/version.h> +#include <xen/interface/sched.h> +#include <xen/interface/event_channel.h> +#include <xen/interface/physdev.h> +#include <xen/interface/grant_table.h> +#include <xen/interface/callback.h> +#include <xen/interface/acm_ops.h> +#include <xen/interface/hvm/params.h> +#include <xen/public/privcmd.h> +#include <asm/hypercall.h> +#include <asm/page.h> +#include <asm/uaccess.h> +#include <asm/xen/xencomm.h> + +/* Xencomm notes: + * This file defines hypercalls to be used by xencomm. The hypercalls simply + * create inlines descriptors for pointers and then call the raw arch hypercall + * xencomm_arch_hypercall_XXX + * + * If the arch wants to directly use these hypercalls, simply define macros + * in asm/hypercall.h, eg: + * #define HYPERVISOR_sched_op xencomm_hypercall_sched_op + * + * The arch may also define HYPERVISOR_xxx as a function and do more operations + * before/after doing the hypercall. + * + * Note: because only inline descriptors are created these functions must only + * be called with in kernel memory parameters. + */ + +int +xencomm_hypercall_console_io(int cmd, int count, char *str) +{ + return xencomm_arch_hypercall_console_io + (cmd, count, xencomm_create_inline(str)); +} + +int +xencomm_hypercall_event_channel_op(int cmd, void *op) +{ + return xencomm_arch_hypercall_event_channel_op + (cmd, xencomm_create_inline(op)); +} + +int +xencomm_hypercall_xen_version(int cmd, void *arg) +{ + switch (cmd) { + case XENVER_version: + case XENVER_extraversion: + case XENVER_compile_info: + case XENVER_capabilities: + case XENVER_changeset: + case XENVER_platform_parameters: + case XENVER_pagesize: + case XENVER_get_features: + break; + default: + printk("%s: unknown version cmd %d\n", __func__, cmd); + return -ENOSYS; + } + + return xencomm_arch_hypercall_xen_version + (cmd, xencomm_create_inline(arg)); +} + +int +xencomm_hypercall_physdev_op(int cmd, void *op) +{ + return xencomm_arch_hypercall_physdev_op + (cmd, xencomm_create_inline(op)); +} + +static void * +xencommize_grant_table_op(unsigned int cmd, void *op, unsigned int count) +{ + switch (cmd) { + case GNTTABOP_map_grant_ref: + case GNTTABOP_unmap_grant_ref: + break; + case GNTTABOP_setup_table: + { + struct gnttab_setup_table *setup = op; + struct xencomm_handle *frame_list; + + frame_list = xencomm_create_inline + (xen_guest_handle(setup->frame_list)); + + set_xen_guest_handle(setup->frame_list, (void *)frame_list); + break; + } + case GNTTABOP_dump_table: + case GNTTABOP_transfer: + case GNTTABOP_copy: + break; + default: + printk("%s: unknown grant table op %d\n", __func__, cmd); + BUG(); + } + + return xencomm_create_inline(op); +} + +int +xencomm_hypercall_grant_table_op(unsigned int cmd, void *op, unsigned int count) +{ + void *desc = xencommize_grant_table_op (cmd, op, count); + + return xencomm_arch_hypercall_grant_table_op(cmd, desc, count); +} + +int +xencomm_hypercall_sched_op(int cmd, void *arg) +{ + switch (cmd) { + case SCHEDOP_yield: + case SCHEDOP_block: + case SCHEDOP_shutdown: + case SCHEDOP_poll: + case SCHEDOP_remote_shutdown: + break; + default: + printk("%s: unknown sched op %d\n", __func__, cmd); + return -ENOSYS; + } + + return xencomm_arch_hypercall_sched_op(cmd, xencomm_create_inline(arg)); +} + +int +xencomm_hypercall_multicall(void *call_list, int nr_calls) +{ + int i; + multicall_entry_t *mce; + + for (i = 0; i < nr_calls; i++) { + mce = (multicall_entry_t *)call_list + i; + + switch (mce->op) { + case __HYPERVISOR_update_va_mapping: + case __HYPERVISOR_mmu_update: + /* No-op on ia64. */ + break; + case __HYPERVISOR_grant_table_op: + mce->args[1] = (unsigned long)xencommize_grant_table_op + (mce->args[0], (void *)mce->args[1], + mce->args[2]); + break; + case __HYPERVISOR_memory_op: + default: + printk("%s: unhandled multicall op entry op %lu\n", + __func__, mce->op); + return -ENOSYS; + } + } + + return xencomm_arch_hypercall_multicall + (xencomm_create_inline(call_list), nr_calls); +} + +int +xencomm_hypercall_callback_op(int cmd, void *arg) +{ + switch (cmd) + { + case CALLBACKOP_register: + case CALLBACKOP_unregister: + break; + default: + printk("%s: unknown callback op %d\n", __func__, cmd); + return -ENOSYS; + } + + return xencomm_arch_hypercall_callback_op + (cmd, xencomm_create_inline(arg)); +} + +static void +xencommize_memory_reservation (xen_memory_reservation_t *mop) +{ + struct xencomm_handle *desc; + + desc = xencomm_create_inline(xen_guest_handle(mop->extent_start)); + set_xen_guest_handle(mop->extent_start, (void *)desc); +} + +int +xencomm_hypercall_memory_op(unsigned int cmd, void *arg) +{ + switch (cmd) { + case XENMEM_increase_reservation: + case XENMEM_decrease_reservation: + case XENMEM_populate_physmap: + xencommize_memory_reservation((xen_memory_reservation_t *)arg); + break; + + case XENMEM_maximum_ram_page: + break; + + case XENMEM_exchange: + xencommize_memory_reservation + (&((xen_memory_exchange_t *)arg)->in); + xencommize_memory_reservation + (&((xen_memory_exchange_t *)arg)->out); + break; + + default: + printk("%s: unknown memory op %d\n", __func__, cmd); + return -ENOSYS; + } + + return xencomm_arch_hypercall_memory_op + (cmd, xencomm_create_inline(arg)); +} + +unsigned long +xencomm_hypercall_hvm_op(int cmd, void *arg) +{ + switch (cmd) { + case HVMOP_set_param: + case HVMOP_get_param: + break; + default: + printk("%s: unknown hvm op %d\n", __func__, cmd); + return -ENOSYS; + } + + return xencomm_arch_hypercall_hvm_op(cmd, xencomm_create_inline(arg)); +} + +int +xencomm_hypercall_suspend(unsigned long srec) +{ + struct sched_shutdown arg; + + arg.reason = SHUTDOWN_suspend; + + return xencomm_arch_hypercall_suspend(xencomm_create_inline(&arg)); +} + +int +xencomm_mini_hypercall_event_channel_op(int cmd, void *op) +{ + struct xencomm_mini xc_area[2]; + int nbr_area = 2; + struct xencomm_handle *desc; + int rc; + + rc = xencomm_create_mini(xc_area, &nbr_area, + op, sizeof(evtchn_op_t), &desc); + if (rc) + return rc; + + return xencomm_arch_hypercall_event_channel_op(cmd, desc); +} +EXPORT_SYMBOL(xencomm_mini_hypercall_event_channel_op); + +static int +xencommize_mini_grant_table_op(struct xencomm_mini *xc_area, int *nbr_area, + unsigned int cmd, void *op, unsigned int count, + struct xencomm_handle **desc) +{ + struct xencomm_handle *desc1; + unsigned int argsize; + int rc; + + switch (cmd) { + case GNTTABOP_map_grant_ref: + argsize = sizeof(struct gnttab_map_grant_ref); + break; + case GNTTABOP_unmap_grant_ref: + argsize = sizeof(struct gnttab_unmap_grant_ref); + break; + case GNTTABOP_setup_table: + { + struct gnttab_setup_table *setup = op; + + argsize = sizeof(*setup); + + if (count != 1) + return -EINVAL; + rc = xencomm_create_mini + (xc_area, nbr_area, + xen_guest_handle(setup->frame_list), + setup->nr_frames + * sizeof(*xen_guest_handle(setup->frame_list)), + &desc1); + if (rc) + return rc; + set_xen_guest_handle(setup->frame_list, (void *)desc1); + break; + } + case GNTTABOP_dump_table: + argsize = sizeof(struct gnttab_dump_table); + break; + case GNTTABOP_transfer: + argsize = sizeof(struct gnttab_transfer); + break; + default: + printk("%s: unknown mini grant table op %d\n", __func__, cmd); + BUG(); + } + + rc = xencomm_create_mini(xc_area, nbr_area, op, count * argsize, desc); + if (rc) + return rc; + + return 0; +} + +int +xencomm_mini_hypercall_grant_table_op(unsigned int cmd, void *op, + unsigned int count) +{ + int rc; + struct xencomm_handle *desc; + int nbr_area = 2; + struct xencomm_mini xc_area[2]; + + rc = xencommize_mini_grant_table_op(xc_area, &nbr_area, + cmd, op, count, &desc); + if (rc) + return rc; + + return xencomm_arch_hypercall_grant_table_op(cmd, desc, count); +} +EXPORT_SYMBOL(xencomm_mini_hypercall_grant_table_op); + +int +xencomm_mini_hypercall_multicall(void *call_list, int nr_calls) +{ + int i; + multicall_entry_t *mce; + int nbr_area = 2 + nr_calls * 3; + struct xencomm_mini xc_area[nbr_area]; + struct xencomm_handle *desc; + int rc; + + for (i = 0; i < nr_calls; i++) { + mce = (multicall_entry_t *)call_list + i; + + switch (mce->op) { + case __HYPERVISOR_update_va_mapping: + case __HYPERVISOR_mmu_update: + /* No-op on ia64. */ + break; + case __HYPERVISOR_grant_table_op: + rc = xencommize_mini_grant_table_op + (xc_area, &nbr_area, + mce->args[0], (void *)mce->args[1], + mce->args[2], &desc); + if (rc) + return rc; + mce->args[1] = (unsigned long)desc; + break; + case __HYPERVISOR_memory_op: + default: + printk("%s: unhandled multicall op entry op %lu\n", + __func__, mce->op); + return -ENOSYS; + } + } + + rc = xencomm_create_mini(xc_area, &nbr_area, call_list, + nr_calls * sizeof(multicall_entry_t), &desc); + if (rc) + return rc; + + return xencomm_arch_hypercall_multicall(desc, nr_calls); +} +EXPORT_SYMBOL(xencomm_mini_hypercall_multicall); + +static int +xencommize_mini_memory_reservation(struct xencomm_mini *area, int *nbr_area, + xen_memory_reservation_t *mop) +{ + struct xencomm_handle *desc; + int rc; + + rc = xencomm_create_mini + (area, nbr_area, + xen_guest_handle(mop->extent_start), + mop->nr_extents + * sizeof(*xen_guest_handle(mop->extent_start)), + &desc); + if (rc) + return rc; + + set_xen_guest_handle(mop->extent_start, (void *)desc); + + return 0; +} + +int +xencomm_mini_hypercall_memory_op(unsigned int cmd, void *arg) +{ + int nbr_area = 4; + struct xencomm_mini xc_area[4]; + struct xencomm_handle *desc; + int rc; + unsigned int argsize; + + switch (cmd) { + case XENMEM_increase_reservation: + case XENMEM_decrease_reservation: + case XENMEM_populate_physmap: + argsize = sizeof(xen_memory_reservation_t); + rc = xencommize_mini_memory_reservation + (xc_area, &nbr_area, (xen_memory_reservation_t *)arg); + if (rc) + return rc; + break; + + case XENMEM_maximum_ram_page: + argsize = 0; + break; + + case XENMEM_exchange: + argsize = sizeof(xen_memory_exchange_t); + rc = xencommize_mini_memory_reservation + (xc_area, &nbr_area, + &((xen_memory_exchange_t *)arg)->in); + if (rc) + return rc; + rc = xencommize_mini_memory_reservation + (xc_area, &nbr_area, + &((xen_memory_exchange_t *)arg)->out); + if (rc) + return rc; + break; + + default: + printk("%s: unknown mini memory op %d\n", __func__, cmd); + return -ENOSYS; + } + + rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc); + if (rc) + return rc; + + return xencomm_arch_hypercall_memory_op(cmd, desc); +} +EXPORT_SYMBOL(xencomm_mini_hypercall_memory_op); diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/xcom_privcmd.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arch/ia64/xen/xcom_privcmd.c Wed Oct 11 20:19:20 2006 -0400 @@ -0,0 +1,600 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> + * Tristan Gingold <tristan.gingold@xxxxxxxx> + */ +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/gfp.h> +#include <linux/module.h> +#include <xen/interface/xen.h> +#include <xen/interface/dom0_ops.h> +#define __XEN__ +#include <xen/interface/domctl.h> +#include <xen/interface/sysctl.h> +#include <xen/interface/memory.h> +#include <xen/interface/version.h> +#include <xen/interface/event_channel.h> +#include <xen/interface/acm_ops.h> +#include <xen/interface/hvm/params.h> +#include <xen/public/privcmd.h> +#include <asm/hypercall.h> +#include <asm/page.h> +#include <asm/uaccess.h> +#include <asm/xen/xencomm.h> + +#define ROUND_DIV(v,s) (((v) + (s) - 1) / (s)) + +static int +xencomm_privcmd_dom0_op(privcmd_hypercall_t *hypercall) +{ + dom0_op_t kern_op; + dom0_op_t __user *user_op = (dom0_op_t __user *)hypercall->arg[0]; + struct xencomm_handle *op_desc; + struct xencomm_handle *desc = NULL; + int ret = 0; + + if (copy_from_user(&kern_op, user_op, sizeof(dom0_op_t))) + return -EFAULT; + + if (kern_op.interface_version != DOM0_INTERFACE_VERSION) + return -EACCES; + + op_desc = xencomm_create_inline(&kern_op); + + switch (kern_op.cmd) { + default: + printk("%s: unknown dom0 cmd %d\n", __func__, kern_op.cmd); + return -ENOSYS; + } + + if (ret) { + /* error mapping the nested pointer */ + return ret; + } + + ret = xencomm_arch_hypercall_dom0_op(op_desc); + + /* FIXME: should we restore the handle? */ + if (copy_to_user(user_op, &kern_op, sizeof(dom0_op_t))) + ret = -EFAULT; + + if (desc) + xencomm_free(desc); + return ret; +} + +static int +xencomm_privcmd_sysctl(privcmd_hypercall_t *hypercall) +{ + xen_sysctl_t kern_op; + xen_sysctl_t __user *user_op; + struct xencomm_handle *op_desc; + struct xencomm_handle *desc = NULL; + struct xencomm_handle *desc1 = NULL; + int ret = 0; + + user_op = (xen_sysctl_t __user *)hypercall->arg[0]; + + if (copy_from_user(&kern_op, user_op, sizeof(xen_sysctl_t))) + return -EFAULT; + + if (kern_op.interface_version != XEN_SYSCTL_INTERFACE_VERSION) + return -EACCES; + + op_desc = xencomm_create_inline(&kern_op); + + switch (kern_op.cmd) { + case XEN_SYSCTL_readconsole: + ret = xencomm_create( + xen_guest_handle(kern_op.u.readconsole.buffer), + kern_op.u.readconsole.count, + &desc, GFP_KERNEL); + set_xen_guest_handle(kern_op.u.readconsole.buffer, + (void *)desc); + break; + case XEN_SYSCTL_tbuf_op: + case XEN_SYSCTL_physinfo: + case XEN_SYSCTL_sched_id: + break; + case XEN_SYSCTL_perfc_op: + ret = xencomm_create( + xen_guest_handle(kern_op.u.perfc_op.desc), + kern_op.u.perfc_op.nr_counters * + sizeof(xen_sysctl_perfc_desc_t), + &desc, GFP_KERNEL); + if (ret) + return ret; + set_xen_guest_handle(kern_op.u.perfc_op.val, + (void *)desc); + ret = xencomm_create( + xen_guest_handle(kern_op.u.perfc_op.val), + kern_op.u.perfc_op.nr_vals * + sizeof(xen_sysctl_perfc_desc_t), + &desc1, GFP_KERNEL); + if (ret) + xencomm_free(desc); + set_xen_guest_handle(kern_op.u.perfc_op.val, + (void *)desc1); + break; + case XEN_SYSCTL_getdomaininfolist: + ret = xencomm_create( + xen_guest_handle(kern_op.u.getdomaininfolist.buffer), + kern_op.u.getdomaininfolist.max_domains * + sizeof(xen_domctl_getdomaininfo_t), + &desc, GFP_KERNEL); + set_xen_guest_handle(kern_op.u.getdomaininfolist.buffer, + (void *)desc); + break; + default: + printk("%s: unknown sysctl cmd %d\n", __func__, kern_op.cmd); + return -ENOSYS; + } + + if (ret) { + /* error mapping the nested pointer */ + return ret; + } + + ret = xencomm_arch_hypercall_sysctl(op_desc); + + /* FIXME: should we restore the handle? */ + if (copy_to_user(user_op, &kern_op, sizeof(xen_sysctl_t))) + ret = -EFAULT; + + if (desc) + xencomm_free(desc); + if (desc1) + xencomm_free(desc1); + return ret; +} + +static int +xencomm_privcmd_domctl(privcmd_hypercall_t *hypercall) +{ + xen_domctl_t kern_op; + xen_domctl_t __user *user_op; + struct xencomm_handle *op_desc; + struct xencomm_handle *desc = NULL; + int ret = 0; + + user_op = (xen_domctl_t __user *)hypercall->arg[0]; + + if (copy_from_user(&kern_op, user_op, sizeof(xen_domctl_t))) + return -EFAULT; + + if (kern_op.interface_version != XEN_DOMCTL_INTERFACE_VERSION) + return -EACCES; + + op_desc = xencomm_create_inline(&kern_op); + + switch (kern_op.cmd) { + case XEN_DOMCTL_createdomain: + case XEN_DOMCTL_destroydomain: + case XEN_DOMCTL_pausedomain: + case XEN_DOMCTL_unpausedomain: + case XEN_DOMCTL_getdomaininfo: + break; + case XEN_DOMCTL_getmemlist: + { + unsigned long nr_pages = kern_op.u.getmemlist.max_pfns; + + ret = xencomm_create( + xen_guest_handle(kern_op.u.getmemlist.buffer), + nr_pages * sizeof(unsigned long), + &desc, GFP_KERNEL); + set_xen_guest_handle(kern_op.u.getmemlist.buffer, + (void *)desc); + break; + } + case XEN_DOMCTL_getpageframeinfo: + break; + case XEN_DOMCTL_getpageframeinfo2: + ret = xencomm_create( + xen_guest_handle(kern_op.u.getpageframeinfo2.array), + kern_op.u.getpageframeinfo2.num, + &desc, GFP_KERNEL); + set_xen_guest_handle(kern_op.u.getpageframeinfo2.array, + (void *)desc); + break; + case XEN_DOMCTL_shadow_op: + ret = xencomm_create( + xen_guest_handle(kern_op.u.shadow_op.dirty_bitmap), + ROUND_DIV(kern_op.u.shadow_op.pages, 8), + &desc, GFP_KERNEL); + set_xen_guest_handle(kern_op.u.shadow_op.dirty_bitmap, + (void *)desc); + break; + case XEN_DOMCTL_max_mem: + break; + case XEN_DOMCTL_setvcpucontext: + case XEN_DOMCTL_getvcpucontext: + ret = xencomm_create( + xen_guest_handle(kern_op.u.vcpucontext.ctxt), + sizeof(vcpu_guest_context_t), + &desc, GFP_KERNEL); + set_xen_guest_handle(kern_op.u.vcpucontext.ctxt, (void *)desc); + break; + case XEN_DOMCTL_getvcpuinfo: + break; + case XEN_DOMCTL_setvcpuaffinity: + case XEN_DOMCTL_getvcpuaffinity: + ret = xencomm_create( + xen_guest_handle(kern_op.u.vcpuaffinity.cpumap.bitmap), + ROUND_DIV(kern_op.u.vcpuaffinity.cpumap.nr_cpus, 8), + &desc, GFP_KERNEL); + set_xen_guest_handle(kern_op.u.vcpuaffinity.cpumap.bitmap, + (void *)desc); + break; + case XEN_DOMCTL_max_vcpus: + case XEN_DOMCTL_scheduler_op: + case XEN_DOMCTL_setdomainhandle: + case XEN_DOMCTL_setdebugging: + case XEN_DOMCTL_irq_permission: + case XEN_DOMCTL_iomem_permission: + case XEN_DOMCTL_ioport_permission: + case XEN_DOMCTL_hypercall_init: + case XEN_DOMCTL_arch_setup: + case XEN_DOMCTL_settimeoffset: + break; + default: + printk("%s: unknown domctl cmd %d\n", __func__, kern_op.cmd); + return -ENOSYS; + } + + if (ret) { + /* error mapping the nested pointer */ + return ret; + } + + ret = xencomm_arch_hypercall_domctl (op_desc); + + /* FIXME: should we restore the handle? */ + if (copy_to_user(user_op, &kern_op, sizeof(xen_domctl_t))) + ret = -EFAULT; + + if (desc) + xencomm_free(desc); + return ret; +} + +static int +xencomm_privcmd_acm_op(privcmd_hypercall_t *hypercall) +{ + int cmd = hypercall->arg[0]; + void __user *arg = (void __user *)hypercall->arg[1]; + struct xencomm_handle *op_desc; + struct xencomm_handle *desc = NULL; + int ret; + + switch (cmd) { + case ACMOP_getssid: + { + struct acm_getssid kern_arg; + + if (copy_from_user(&kern_arg, arg, sizeof (kern_arg))) + return -EFAULT; + + op_desc = xencomm_create_inline(&kern_arg); + + ret = xencomm_create(xen_guest_handle(kern_arg.ssidbuf), + kern_arg.ssidbuf_size, &desc, GFP_KERNEL); + if (ret) + return ret; + + set_xen_guest_handle(kern_arg.ssidbuf, (void *)desc); + + ret = xencomm_arch_hypercall_acm_op(cmd, op_desc); + + xencomm_free(desc); + + if (copy_to_user(arg, &kern_arg, sizeof (kern_arg))) + return -EFAULT; + + return ret; + } + default: + printk("%s: unknown acm_op cmd %d\n", __func__, cmd); + return -ENOSYS; + } + + return ret; +} + +static int +xencomm_privcmd_memory_op(privcmd_hypercall_t *hypercall) +{ + const unsigned long cmd = hypercall->arg[0]; + int ret = 0; + + switch (cmd) { + case XENMEM_increase_reservation: + case XENMEM_decrease_reservation: + case XENMEM_populate_physmap: + { + xen_memory_reservation_t kern_op; + xen_memory_reservation_t __user *user_op; + struct xencomm_handle *desc = NULL; + struct xencomm_handle *desc_op; + + user_op = (xen_memory_reservation_t __user *)hypercall->arg[1]; + if (copy_from_user(&kern_op, user_op, + sizeof(xen_memory_reservation_t))) + return -EFAULT; + desc_op = xencomm_create_inline(&kern_op); + + if (xen_guest_handle(kern_op.extent_start)) { + void * addr; + + addr = xen_guest_handle(kern_op.extent_start); + ret = xencomm_create + (addr, + kern_op.nr_extents * + sizeof(*xen_guest_handle + (kern_op.extent_start)), + &desc, GFP_KERNEL); + if (ret) + return ret; + set_xen_guest_handle(kern_op.extent_start, + (void *)desc); + } + + ret = xencomm_arch_hypercall_memory_op(cmd, desc_op); + + if (desc) + xencomm_free(desc); + + if (ret != 0) + return ret; + + if (copy_to_user(user_op, &kern_op, + sizeof(xen_memory_reservation_t))) + return -EFAULT; + + return ret; + } + case XENMEM_translate_gpfn_list: + { + xen_translate_gpfn_list_t kern_op; + xen_translate_gpfn_list_t __user *user_op; + struct xencomm_handle *desc_gpfn = NULL; + struct xencomm_handle *desc_mfn = NULL; + struct xencomm_handle *desc_op; + void *addr; + + user_op = (xen_translate_gpfn_list_t __user *) + hypercall->arg[1]; + if (copy_from_user(&kern_op, user_op, + sizeof(xen_translate_gpfn_list_t))) + return -EFAULT; + desc_op = xencomm_create_inline(&kern_op); + + if (kern_op.nr_gpfns) { + /* gpfn_list. */ + addr = xen_guest_handle(kern_op.gpfn_list); + + ret = xencomm_create(addr, kern_op.nr_gpfns * + sizeof(*xen_guest_handle + (kern_op.gpfn_list)), + &desc_gpfn, GFP_KERNEL); + if (ret) + return ret; + set_xen_guest_handle(kern_op.gpfn_list, + (void *)desc_gpfn); + + /* mfn_list. */ + addr = xen_guest_handle(kern_op.mfn_list); + + ret = xencomm_create(addr, kern_op.nr_gpfns * + sizeof(*xen_guest_handle + (kern_op.mfn_list)), + &desc_mfn, GFP_KERNEL); + if (ret) + return ret; + set_xen_guest_handle(kern_op.mfn_list, + (void *)desc_mfn); + } + + ret = xencomm_arch_hypercall_memory_op(cmd, desc_op); + + if (desc_gpfn) + xencomm_free(desc_gpfn); + + if (desc_mfn) + xencomm_free(desc_mfn); + + if (ret != 0) + return ret; + + return ret; + } + default: + printk("%s: unknown memory op %lu\n", __func__, cmd); + ret = -ENOSYS; + } + return ret; +} + +static int +xencomm_privcmd_xen_version(privcmd_hypercall_t *hypercall) +{ + int cmd = hypercall->arg[0]; + void __user *arg = (void __user *)hypercall->arg[1]; + struct xencomm_handle *desc; + size_t argsize; + int rc; + + switch (cmd) { + case XENVER_version: + /* do not actually pass an argument */ + return xencomm_arch_hypercall_xen_version(cmd, 0); + case XENVER_extraversion: + argsize = sizeof(xen_extraversion_t); + break; + case XENVER_compile_info: + argsize = sizeof(xen_compile_info_t); + break; + case XENVER_capabilities: + argsize = sizeof(xen_capabilities_info_t); + break; + case XENVER_changeset: + argsize = sizeof(xen_changeset_info_t); + break; + case XENVER_platform_parameters: + argsize = sizeof(xen_platform_parameters_t); + break; + case XENVER_pagesize: + argsize = (arg == NULL) ? 0 : sizeof(void *); + break; + case XENVER_get_features: + argsize = (arg == NULL) ? 0 : sizeof(xen_feature_info_t); + break; + + default: + printk("%s: unknown version op %d\n", __func__, cmd); + return -ENOSYS; + } + + rc = xencomm_create(arg, argsize, &desc, GFP_KERNEL); + if (rc) + return rc; + + rc = xencomm_arch_hypercall_xen_version(cmd, desc); + + xencomm_free(desc); + + return rc; +} + +static int +xencomm_privcmd_event_channel_op(privcmd_hypercall_t *hypercall) +{ + int cmd = hypercall->arg[0]; + struct xencomm_handle *desc; + unsigned int argsize; + int ret; + + switch (cmd) { + case EVTCHNOP_alloc_unbound: + argsize = sizeof(evtchn_alloc_unbound_t); + break; + + case EVTCHNOP_status: + argsize = sizeof(evtchn_status_t); + break; + + default: + printk("%s: unknown EVTCHNOP %d\n", __func__, cmd); + return -EINVAL; + } + + ret = xencomm_create((void *)hypercall->arg[1], argsize, + &desc, GFP_KERNEL); + if (ret) + return ret; + + ret = xencomm_arch_hypercall_event_channel_op(cmd, desc); + + xencomm_free(desc); + return ret; +} + +static int +xencomm_privcmd_hvm_op(privcmd_hypercall_t *hypercall) +{ + int cmd = hypercall->arg[0]; + struct xencomm_handle *desc; + unsigned int argsize; + int ret; + + switch (cmd) { + case HVMOP_get_param: + case HVMOP_set_param: + argsize = sizeof(xen_hvm_param_t); + break; + default: + printk("%s: unknown HVMOP %d\n", __func__, cmd); + return -EINVAL; + } + + ret = xencomm_create((void *)hypercall->arg[1], argsize, + &desc, GFP_KERNEL); + if (ret) + return ret; + + ret = xencomm_arch_hypercall_hvm_op(cmd, desc); + + xencomm_free(desc); + return ret; +} + +static int +xencomm_privcmd_sched_op(privcmd_hypercall_t *hypercall) +{ + int cmd = hypercall->arg[0]; + struct xencomm_handle *desc; + unsigned int argsize; + int ret; + + switch (cmd) { + case SCHEDOP_remote_shutdown: + argsize = sizeof(sched_remote_shutdown_t); + break; + default: + printk("%s: unknown SCHEDOP %d\n", __func__, cmd); + return -EINVAL; + } + + ret = xencomm_create((void *)hypercall->arg[1], argsize, + &desc, GFP_KERNEL); + if (ret) + return ret; + + ret = xencomm_arch_hypercall_sched_op(cmd, desc); + + xencomm_free(desc); + return ret; +} + +int +privcmd_hypercall(privcmd_hypercall_t *hypercall) +{ + switch (hypercall->op) { + case __HYPERVISOR_dom0_op: + return xencomm_privcmd_dom0_op(hypercall); + case __HYPERVISOR_domctl: + return xencomm_privcmd_domctl(hypercall); + case __HYPERVISOR_sysctl: + return xencomm_privcmd_sysctl(hypercall); + case __HYPERVISOR_acm_op: + return xencomm_privcmd_acm_op(hypercall); + case __HYPERVISOR_xen_version: + return xencomm_privcmd_xen_version(hypercall); + case __HYPERVISOR_memory_op: + return xencomm_privcmd_memory_op(hypercall); + case __HYPERVISOR_event_channel_op: + return xencomm_privcmd_event_channel_op(hypercall); + case __HYPERVISOR_hvm_op: + return xencomm_privcmd_hvm_op(hypercall); + case __HYPERVISOR_sched_op: + return xencomm_privcmd_sched_op(hypercall); + default: + printk("%s: unknown hcall (%ld)\n", __func__, hypercall->op); + return -ENOSYS; + } +} + diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/xencomm.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arch/ia64/xen/xencomm.c Wed Oct 11 20:19:20 2006 -0400 @@ -0,0 +1,244 @@ +/* + * Copyright (C) 2006 Hollis Blanchard <hollisb@xxxxxxxxxx>, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/gfp.h> +#include <linux/mm.h> +#include <asm/page.h> +#include <asm/xen/xencomm.h> +#include <xen/interface/xen.h> + +static int xencomm_debug = 0; + +/* Translate virtual address to physical address. */ +unsigned long +xencomm_vaddr_to_paddr(unsigned long vaddr) +{ + struct page *page; + struct vm_area_struct *vma; + + if (vaddr == 0) + return 0; + +#ifdef __ia64__ + if (REGION_NUMBER(vaddr) == 5) { + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep; + + /* On ia64, TASK_SIZE refers to current. It is not initialized + during boot. + Furthermore the kernel is relocatable and __pa() doesn't + work on addresses. */ + if (vaddr >= KERNEL_START + && vaddr < (KERNEL_START + KERNEL_TR_PAGE_SIZE)) { + extern unsigned long kernel_start_pa; + + return vaddr - kernel_start_pa; + } + + /* In kernel area -- virtually mapped. */ + pgd = pgd_offset_k(vaddr); + if (pgd_none(*pgd) || pgd_bad(*pgd)) + return ~0UL; + + pud = pud_offset(pgd, vaddr); + if (pud_none(*pud) || pud_bad(*pud)) + return ~0UL; + + pmd = pmd_offset(pud, vaddr); + if (pmd_none(*pmd) || pmd_bad(*pmd)) + return ~0UL; + + ptep = pte_offset_kernel(pmd, vaddr); + if (!ptep) + return ~0UL; + + return (pte_val(*ptep) & _PFN_MASK) | (vaddr & ~PAGE_MASK); + } +#endif + + if (vaddr > TASK_SIZE) { + /* kernel address */ + return __pa(vaddr); + } + + /* XXX double-check (lack of) locking */ + vma = find_extend_vma(current->mm, vaddr); + if (!vma) + return ~0UL; + + /* We assume the page is modified. */ + page = follow_page(vma, vaddr, FOLL_WRITE | FOLL_TOUCH); + if (!page) + return ~0UL; + + return (page_to_pfn(page) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK); +} + +static int +xencomm_init(struct xencomm_desc *desc, void *buffer, unsigned long bytes) +{ + unsigned long recorded = 0; + int i = 0; + + BUG_ON((buffer == NULL) && (bytes > 0)); + + /* record the physical pages used */ + if (buffer == NULL) + desc->nr_addrs = 0; + + while ((recorded < bytes) && (i < desc->nr_addrs)) { + unsigned long vaddr = (unsigned long)buffer + recorded; + unsigned long paddr; + int offset; + int chunksz; + + offset = vaddr % PAGE_SIZE; /* handle partial pages */ + chunksz = min(PAGE_SIZE - offset, bytes - recorded); + + paddr = xencomm_vaddr_to_paddr(vaddr); + if (paddr == ~0UL) { + printk("%s: couldn't translate vaddr %lx\n", + __func__, vaddr); + return -EINVAL; + } + + desc->address[i++] = paddr; + recorded += chunksz; + } + + if (recorded < bytes) { + printk("%s: could only translate %ld of %ld bytes\n", + __func__, recorded, bytes); + return -ENOSPC; + } + + /* mark remaining addresses invalid (just for safety) */ + while (i < desc->nr_addrs) + desc->address[i++] = XENCOMM_INVALID; + + desc->magic = XENCOMM_MAGIC; + + return 0; +} + +static struct xencomm_desc * +xencomm_alloc(gfp_t gfp_mask) +{ + struct xencomm_desc *desc; + + desc = (struct xencomm_desc *)__get_free_page(gfp_mask); + if (desc == NULL) + panic("%s: page allocation failed\n", __func__); + + desc->nr_addrs = (PAGE_SIZE - sizeof(struct xencomm_desc)) / + sizeof(*desc->address); + + return desc; +} + +void +xencomm_free(struct xencomm_handle *desc) +{ + if (desc) + free_page((unsigned long)__va(desc)); +} + +int +xencomm_create(void *buffer, unsigned long bytes, + struct xencomm_handle **ret, gfp_t gfp_mask) +{ + struct xencomm_desc *desc; + struct xencomm_handle *handle; + int rc; + + if (xencomm_debug) + printk("%s: %p[%ld]\n", __func__, buffer, bytes); + + if (buffer == NULL || bytes == 0) { + *ret = (struct xencomm_handle *)NULL; + return 0; + } + + desc = xencomm_alloc(gfp_mask); + if (!desc) { + printk("%s failure\n", "xencomm_alloc"); + return -ENOMEM; + } + handle = (struct xencomm_handle *)__pa(desc); + + rc = xencomm_init(desc, buffer, bytes); + if (rc) { + printk("%s failure: %d\n", "xencomm_init", rc); + xencomm_free(handle); + return rc; + } + + *ret = handle; + return 0; +} + +/* "mini" routines, for stack-based communications: */ + +static void * +xencomm_alloc_mini(struct xencomm_mini *area, int *nbr_area) +{ + unsigned long base; + unsigned int pageoffset; + + while (*nbr_area >= 0) { + /* Allocate an area. */ + (*nbr_area)--; + + base = (unsigned long)(area + *nbr_area); + pageoffset = base % PAGE_SIZE; + + /* If the area does not cross a page, use it. */ + if ((PAGE_SIZE - pageoffset) >= sizeof(struct xencomm_mini)) + return &area[*nbr_area]; + } + /* No more area. */ + return NULL; +} + +int +xencomm_create_mini(struct xencomm_mini *area, int *nbr_area, + void *buffer, unsigned long bytes, + struct xencomm_handle **ret) +{ + struct xencomm_desc *desc; + int rc; + unsigned long res; + + desc = xencomm_alloc_mini(area, nbr_area); + if (!desc) + return -ENOMEM; + desc->nr_addrs = XENCOMM_MINI_ADDRS; + + rc = xencomm_init(desc, buffer, bytes); + if (rc) + return rc; + + res = xencomm_vaddr_to_paddr((unsigned long)desc); + if (res == ~0UL) + return -EINVAL; + + *ret = (struct xencomm_handle*)res; + return 0; +} diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/xensetup.S.orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arch/ia64/xen/xensetup.S.orig Wed Oct 11 20:19:20 2006 -0400 @@ -0,0 +1,54 @@ +/* + * Support routines for Xen + * + * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@xxxxxx> + */ + +#include <asm/processor.h> +#include <asm/asmmacro.h> + +#define isBP p3 // are we the Bootstrap Processor? + + .text +GLOBAL_ENTRY(early_xen_setup) + mov r8=ar.rsc // Initialized in head.S +(isBP) movl r9=running_on_xen;; + extr.u r8=r8,2,2;; // Extract pl fields + cmp.eq p7,p0=r8,r0 // p7: !running on xen + mov r8=1 // booleanize. +(p7) br.ret.sptk.many rp;; +(isBP) st4 [r9]=r8 + movl r10=xen_ivt;; + + mov cr.iva=r10 + +#if XSI_BASE != 0xf100000000000000UL + /* Backward compatibility. */ +(isBP) mov r2=0x600 +(isBP) movl r28=XSI_BASE;; +(isBP) break 0x1000;; +#endif + + br.ret.sptk.many rp + ;; +END(early_xen_setup) + +#include <xen/interface/xen.h> + +/* Stub for suspend. + Just force the stacked registers to be written in memory. */ +GLOBAL_ENTRY(HYPERVISOR_suspend) + alloc r20=ar.pfs,0,0,0,0 + mov r14=2 + mov r15=r12 + ;; + /* We don't want to deal with RSE. */ + flushrs + mov r2=__HYPERVISOR_sched_op + st4 [r12]=r14 + ;; + break 0x1000 + ;; + mov ar.pfs=r20 + br.ret.sptk.many b0 +END(HYPERVISOR_suspend) diff -r 47c098fdce14 -r 85a15e585061 drivers/xen/privcmd/privcmd.c.orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/xen/privcmd/privcmd.c.orig Wed Oct 11 20:19:20 2006 -0400 @@ -0,0 +1,266 @@ +/****************************************************************************** + * privcmd.c + * + * Interface to privileged domain-0 commands. + * + * Copyright (c) 2002-2004, K A Fraser, B Dragovic + */ + +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/swap.h> +#include <linux/smp_lock.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> +#include <linux/seq_file.h> +#include <linux/kthread.h> +#include <asm/hypervisor.h> + +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/tlb.h> +#include <asm/hypervisor.h> +#include <xen/public/privcmd.h> +#include <xen/interface/xen.h> +#include <xen/interface/dom0_ops.h> +#include <xen/xen_proc.h> + +static struct proc_dir_entry *privcmd_intf; +static struct proc_dir_entry *capabilities_intf; + +static int privcmd_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long data) +{ + int ret = -ENOSYS; + void __user *udata = (void __user *) data; + + switch (cmd) { + case IOCTL_PRIVCMD_HYPERCALL: { + privcmd_hypercall_t hypercall; + + if (copy_from_user(&hypercall, udata, sizeof(hypercall))) + return -EFAULT; + +#if defined(__i386__) + __asm__ __volatile__ ( + "pushl %%ebx; pushl %%ecx; pushl %%edx; " + "pushl %%esi; pushl %%edi; " + "movl 8(%%eax),%%ebx ;" + "movl 16(%%eax),%%ecx ;" + "movl 24(%%eax),%%edx ;" + "movl 32(%%eax),%%esi ;" + "movl 40(%%eax),%%edi ;" + "movl (%%eax),%%eax ;" + "shll $5,%%eax ;" + "addl $hypercall_page,%%eax ;" + "call *%%eax ;" + "popl %%edi; popl %%esi; popl %%edx; " + "popl %%ecx; popl %%ebx" + : "=a" (ret) : "0" (&hypercall) : "memory" ); +#elif defined (__x86_64__) + { + long ign1, ign2, ign3; + __asm__ __volatile__ ( + "movq %8,%%r10; movq %9,%%r8;" + "shlq $5,%%rax ;" + "addq $hypercall_page,%%rax ;" + "call *%%rax" + : "=a" (ret), "=D" (ign1), + "=S" (ign2), "=d" (ign3) + : "0" ((unsigned long)hypercall.op), + "1" ((unsigned long)hypercall.arg[0]), + "2" ((unsigned long)hypercall.arg[1]), + "3" ((unsigned long)hypercall.arg[2]), + "g" ((unsigned long)hypercall.arg[3]), + "g" ((unsigned long)hypercall.arg[4]) + : "r8", "r10", "memory" ); + } +#elif defined (__ia64__) + __asm__ __volatile__ ( + ";; mov r14=%2; mov r15=%3; " + "mov r16=%4; mov r17=%5; mov r18=%6;" + "mov r2=%1; break 0x1000;; mov %0=r8 ;;" + : "=r" (ret) + : "r" (hypercall.op), + "r" (hypercall.arg[0]), + "r" (hypercall.arg[1]), + "r" (hypercall.arg[2]), + "r" (hypercall.arg[3]), + "r" (hypercall.arg[4]) + : "r14","r15","r16","r17","r18","r2","r8","memory"); +#endif + } + break; + + case IOCTL_PRIVCMD_MMAP: { +#define PRIVCMD_MMAP_SZ 32 + privcmd_mmap_t mmapcmd; + privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ]; + privcmd_mmap_entry_t __user *p; + int i, rc; + + if (!is_initial_xendomain()) + return -EPERM; + + if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) + return -EFAULT; + + p = mmapcmd.entry; + + for (i = 0; i < mmapcmd.num; + i += PRIVCMD_MMAP_SZ, p += PRIVCMD_MMAP_SZ) { + int j, n = ((mmapcmd.num-i)>PRIVCMD_MMAP_SZ)? + PRIVCMD_MMAP_SZ:(mmapcmd.num-i); + + if (copy_from_user(&msg, p, + n*sizeof(privcmd_mmap_entry_t))) + return -EFAULT; + + for (j = 0; j < n; j++) { + struct vm_area_struct *vma = + find_vma( current->mm, msg[j].va ); + + if (!vma) + return -EINVAL; + + if (msg[j].va > PAGE_OFFSET) + return -EINVAL; + + if ((msg[j].va + (msg[j].npages << PAGE_SHIFT)) + > vma->vm_end ) + return -EINVAL; + + if ((rc = direct_remap_pfn_range( + vma, + msg[j].va&PAGE_MASK, + msg[j].mfn, + msg[j].npages<<PAGE_SHIFT, + vma->vm_page_prot, + mmapcmd.dom)) < 0) + return rc; + } + } + ret = 0; + } + break; + + case IOCTL_PRIVCMD_MMAPBATCH: { + privcmd_mmapbatch_t m; + struct vm_area_struct *vma = NULL; + xen_pfn_t __user *p; + unsigned long addr, mfn; + int i; + + if (!is_initial_xendomain()) + return -EPERM; + + if (copy_from_user(&m, udata, sizeof(m))) { + ret = -EFAULT; + goto batch_err; + } + + if (m.dom == DOMID_SELF) { + ret = -EINVAL; + goto batch_err; + } + + vma = find_vma(current->mm, m.addr); + if (!vma) { + ret = -EINVAL; + goto batch_err; + } + + if (m.addr > PAGE_OFFSET) { + ret = -EFAULT; + goto batch_err; + } + + if ((m.addr + (m.num<<PAGE_SHIFT)) > vma->vm_end) { + ret = -EFAULT; + goto batch_err; + } + + p = m.arr; + addr = m.addr; + for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) { + if (get_user(mfn, p)) + return -EFAULT; + + ret = direct_remap_pfn_range(vma, addr & PAGE_MASK, + mfn, PAGE_SIZE, + vma->vm_page_prot, m.dom); + if (ret < 0) + put_user(0xF0000000 | mfn, p); + } + + ret = 0; + break; + + batch_err: + printk("batch_err ret=%d vma=%p addr=%lx " + "num=%d arr=%p %lx-%lx\n", + ret, vma, (unsigned long)m.addr, m.num, m.arr, + vma ? vma->vm_start : 0, vma ? vma->vm_end : 0); + break; + } + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +#ifndef HAVE_ARCH_PRIVCMD_MMAP +static int privcmd_mmap(struct file * file, struct vm_area_struct * vma) +{ + /* DONTCOPY is essential for Xen as copy_page_range is broken. */ + vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; + + return 0; +} +#endif + +static struct file_operations privcmd_file_ops = { + .ioctl = privcmd_ioctl, + .mmap = privcmd_mmap, +}; + +static int capabilities_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len = 0; + *page = 0; + + if (is_initial_xendomain()) + len = sprintf( page, "control_d\n" ); + + *eof = 1; + return len; +} + +static int __init privcmd_init(void) +{ + if (!is_running_on_xen()) + return -ENODEV; + + privcmd_intf = create_xen_proc_entry("privcmd", 0400); + if (privcmd_intf != NULL) + privcmd_intf->proc_fops = &privcmd_file_ops; + + capabilities_intf = create_xen_proc_entry("capabilities", 0400 ); + if (capabilities_intf != NULL) + capabilities_intf->read_proc = capabilities_read; + + return 0; +} + +__initcall(privcmd_init); diff -r 47c098fdce14 -r 85a15e585061 include/asm-ia64/hypervisor.h.orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/asm-ia64/hypervisor.h.orig Wed Oct 11 20:19:20 2006 -0400 @@ -0,0 +1,218 @@ +/****************************************************************************** + * hypervisor.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERVISOR_H__ +#define __HYPERVISOR_H__ + +#ifdef CONFIG_XEN +extern int running_on_xen; +#define is_running_on_xen() (running_on_xen) +#else /* CONFIG_XEN */ +# ifdef CONFIG_VMX_GUEST +# define is_running_on_xen() (1) +# else /* CONFIG_VMX_GUEST */ +# define is_running_on_xen() (0) +# define HYPERVISOR_ioremap(offset, size) (offset) +# endif /* CONFIG_VMX_GUEST */ +#endif /* CONFIG_XEN */ + +#if defined(CONFIG_XEN) || defined(CONFIG_VMX_GUEST) +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/version.h> +#include <linux/errno.h> +#include <xen/interface/xen.h> +#include <xen/interface/dom0_ops.h> +#include <xen/interface/event_channel.h> +#include <xen/interface/physdev.h> +#include <xen/interface/sched.h> +#include <asm/hypercall.h> +#include <asm/ptrace.h> +#include <asm/page.h> + +extern shared_info_t *HYPERVISOR_shared_info; +extern start_info_t *xen_start_info; + +void force_evtchn_callback(void); + +#ifndef CONFIG_VMX_GUEST +/* Turn jiffies into Xen system time. XXX Implement me. */ +#define jiffies_to_st(j) 0 + +static inline int +HYPERVISOR_yield( + void) +{ + int rc = HYPERVISOR_sched_op(SCHEDOP_yield, NULL); + + if (rc == -ENOSYS) + rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0); + + return rc; +} + +static inline int +HYPERVISOR_block( + void) +{ + int rc = HYPERVISOR_sched_op(SCHEDOP_block, NULL); + + if (rc == -ENOSYS) + rc = HYPERVISOR_sched_op_compat(SCHEDOP_block, 0); + + return rc; +} + +static inline int +HYPERVISOR_shutdown( + unsigned int reason) +{ + struct sched_shutdown sched_shutdown = { + .reason = reason + }; + + int rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); + + if (rc == -ENOSYS) + rc = HYPERVISOR_sched_op_compat(SCHEDOP_shutdown, reason); + + return rc; +} + +static inline int +HYPERVISOR_poll( + evtchn_port_t *ports, unsigned int nr_ports, u64 timeout) +{ + struct sched_poll sched_poll = { + .nr_ports = nr_ports, + .timeout = jiffies_to_st(timeout) + }; + + int rc; + + set_xen_guest_handle(sched_poll.ports, ports); + rc = HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll); + if (rc == -ENOSYS) + rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0); + + return rc; +} + +#include <asm/hypercall.h> + +// for drivers/xen/privcmd/privcmd.c +#define machine_to_phys_mapping 0 +struct vm_area_struct; +int direct_remap_pfn_range(struct vm_area_struct *vma, + unsigned long address, + unsigned long mfn, + unsigned long size, + pgprot_t prot, + domid_t domid); +struct file; +int privcmd_mmap(struct file * file, struct vm_area_struct * vma); +#define HAVE_ARCH_PRIVCMD_MMAP + +// for drivers/xen/balloon/balloon.c +#ifdef CONFIG_XEN_SCRUB_PAGES +#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT) +#else +#define scrub_pages(_p,_n) ((void)0) +#endif +#define pte_mfn(_x) pte_pfn(_x) +#define phys_to_machine_mapping_valid(_x) (1) + +#endif /* !CONFIG_VMX_GUEST */ + +#define __pte_ma(_x) ((pte_t) {(_x)}) /* unmodified use */ +#define pfn_pte_ma(_x,_y) __pte_ma(0) /* unmodified use */ + +#ifndef CONFIG_VMX_GUEST +int __xen_create_contiguous_region(unsigned long vstart, unsigned int order, unsigned int address_bits); +static inline int +xen_create_contiguous_region(unsigned long vstart, + unsigned int order, unsigned int address_bits) +{ + int ret = 0; + if (is_running_on_xen()) { + ret = __xen_create_contiguous_region(vstart, order, + address_bits); + } + return ret; +} + +void __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); +static inline void +xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) +{ + if (is_running_on_xen()) + __xen_destroy_contiguous_region(vstart, order); +} + +#endif /* !CONFIG_VMX_GUEST */ + +// for netfront.c, netback.c +#define MULTI_UVMFLAGS_INDEX 0 //XXX any value + +static inline void +MULTI_update_va_mapping( + multicall_entry_t *mcl, unsigned long va, + pte_t new_val, unsigned long flags) +{ + mcl->op = __HYPERVISOR_update_va_mapping; + mcl->result = 0; +} + +static inline void +MULTI_grant_table_op(multicall_entry_t *mcl, unsigned int cmd, + void *uop, unsigned int count) +{ + mcl->op = __HYPERVISOR_grant_table_op; + mcl->args[0] = cmd; + mcl->args[1] = (unsigned long)uop; + mcl->args[2] = count; +} + +// for debug +asmlinkage int xprintk(const char *fmt, ...); +#define xprintd(fmt, ...) xprintk("%s:%d " fmt, __func__, __LINE__, \ + ##__VA_ARGS__) + +#endif /* CONFIG_XEN || CONFIG_VMX_GUEST */ + +#ifdef CONFIG_XEN_PRIVILEGED_GUEST +#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN) +#else +#define is_initial_xendomain() 0 +#endif + +#endif /* __HYPERVISOR_H__ */ diff -r 47c098fdce14 -r 85a15e585061 include/asm-ia64/xen/xcom_hcall.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/asm-ia64/xen/xcom_hcall.h Wed Oct 11 20:19:20 2006 -0400 @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2006 Tristan Gingold <tristan.gingold@xxxxxxxx>, Bull SAS + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_XENCOMM_HCALL_H_ +#define _LINUX_XENCOMM_HCALL_H_ + +/* These function creates inline descriptor for the parameters and + calls the corresponding xencomm_arch_hypercall_X. + Architectures should defines HYPERVISOR_xxx as xencomm_hypercall_xxx unless + they want to use their own wrapper. */ +extern int xencomm_hypercall_console_io(int cmd, int count, char *str); + +extern int xencomm_hypercall_event_channel_op(int cmd, void *op); + +extern int xencomm_hypercall_xen_version(int cmd, void *arg); + +extern int xencomm_hypercall_physdev_op(int cmd, void *op); + +extern int xencomm_hypercall_grant_table_op(unsigned int cmd, void *op, + unsigned int count); + +extern int xencomm_hypercall_sched_op(int cmd, void *arg); + +extern int xencomm_hypercall_multicall(void *call_list, int nr_calls); + +extern int xencomm_hypercall_callback_op(int cmd, void *arg); + +extern int xencomm_hypercall_memory_op(unsigned int cmd, void *arg); + +extern unsigned long xencomm_hypercall_hvm_op(int cmd, void *arg); + +extern int xencomm_hypercall_suspend(unsigned long srec); + +/* Using mini xencomm. */ +extern int xencomm_mini_hypercall_console_io(int cmd, int count, char *str); + +extern int xencomm_mini_hypercall_event_channel_op(int cmd, void *op); + +extern int xencomm_mini_hypercall_xen_version(int cmd, void *arg); + +extern int xencomm_mini_hypercall_physdev_op(int cmd, void *op); + +extern int xencomm_mini_hypercall_grant_table_op(unsigned int cmd, void *op, + unsigned int count); + +extern int xencomm_mini_hypercall_sched_op(int cmd, void *arg); + +extern int xencomm_mini_hypercall_multicall(void *call_list, int nr_calls); + +extern int xencomm_mini_hypercall_callback_op(int cmd, void *arg); + +extern int xencomm_mini_hypercall_memory_op(unsigned int cmd, void *arg); + +/* For privcmd. Locally declare argument type to avoid include storm. + Type coherency will be checked within privcmd.c */ +struct privcmd_hypercall; +extern int privcmd_hypercall(struct privcmd_hypercall *hypercall); + +#endif /* _LINUX_XENCOMM_HCALL_H_ */ diff -r 47c098fdce14 -r 85a15e585061 include/asm-ia64/xen/xencomm.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/asm-ia64/xen/xencomm.h Wed Oct 11 20:19:20 2006 -0400 @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2006 Hollis Blanchard <hollisb@xxxxxxxxxx>, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_XENCOMM_H_ +#define _LINUX_XENCOMM_H_ + +#include <xen/interface/xencomm.h> + +#define XENCOMM_MINI_ADDRS 3 +struct xencomm_mini { + struct xencomm_desc _desc; + uint64_t address[XENCOMM_MINI_ADDRS]; +}; + +/* To avoid additionnal virt to phys conversion, an opaque structure is + presented. */ +struct xencomm_handle; + +extern int xencomm_create(void *buffer, unsigned long bytes, + struct xencomm_handle **desc, gfp_t type); +extern void xencomm_free(struct xencomm_handle *desc); + +extern int xencomm_create_mini(struct xencomm_mini *area, int *nbr_area, + void *buffer, unsigned long bytes, + struct xencomm_handle **ret); + +/* Translate virtual address to physical address. */ +extern unsigned long xencomm_vaddr_to_paddr(unsigned long vaddr); + +/* Inline version. To be used only on linear space (kernel space). */ +static inline struct xencomm_handle * +xencomm_create_inline(void *buffer) +{ + unsigned long paddr; + + paddr = xencomm_vaddr_to_paddr((unsigned long)buffer); + return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG); +} + +#define xen_guest_handle(hnd) ((hnd).p) + +#endif /* _LINUX_XENCOMM_H_ */ diff -r 47c098fdce14 -r 85a15e585061 lib/Makefile.orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/Makefile.orig Wed Oct 11 20:19:20 2006 -0400 @@ -0,0 +1,68 @@ +# +# Makefile for some libs needed in the kernel. +# + +lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \ + bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \ + idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \ + sha1.o + +lib-$(CONFIG_SMP) += cpumask.o + +lib-y += kobject.o kref.o kobject_uevent.o klist.o + +obj-y += sort.o parser.o halfmd4.o iomap_copy.o debug_locks.o + +ifeq ($(CONFIG_DEBUG_KOBJECT),y) +CFLAGS_kobject.o += -DDEBUG +CFLAGS_kobject_uevent.o += -DDEBUG +endif + +obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o +lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o +lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o +lib-$(CONFIG_SEMAPHORE_SLEEPERS) += semaphore-sleepers.o +lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o +lib-$(CONFIG_GENERIC_HWEIGHT) += hweight.o +obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o +obj-$(CONFIG_PLIST) += plist.o +obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o + +ifneq ($(CONFIG_HAVE_DEC_LOCK),y) + lib-y += dec_and_lock.o +endif + +obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o +obj-$(CONFIG_CRC16) += crc16.o +obj-$(CONFIG_CRC32) += crc32.o +obj-$(CONFIG_LIBCRC32C) += libcrc32c.o +obj-$(CONFIG_GENERIC_IOMAP) += iomap.o +obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o + +obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ +obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ +obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ + +obj-$(CONFIG_TEXTSEARCH) += textsearch.o +obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o +obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o +obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o +obj-$(CONFIG_SMP) += percpu_counter.o +obj-$(CONFIG_AUDIT_GENERIC) += audit.o + +obj-$(CONFIG_SWIOTLB) += swiotlb.o +ifneq ($(CONFIG_XEN_IA64_DOM0_NON_VP),y) +swiotlb-$(CONFIG_XEN) := ../arch/i386/kernel/swiotlb.o +endif + +hostprogs-y := gen_crc32table +clean-files := crc32table.h + +$(obj)/crc32.o: $(obj)/crc32table.h + +quiet_cmd_crc32 = GEN $@ + cmd_crc32 = $< > $@ + +$(obj)/crc32table.h: $(obj)/gen_crc32table + $(call cmd,crc32)
--- xen/arch/ia64/Rules.mk Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/Rules.mk Wed Oct 11 16:10:40 2006 -0400 @@ -5,6 +5,8 @@ HAS_VGA := y HAS_VGA := y VALIDATE_VT ?= n no_warns ?= n +xen_ia64_expose_p2m ?= y +xen_ia64_pervcpu_vhpt ?= y ifneq ($(COMPILE_ARCH),$(TARGET_ARCH)) CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux- @@ -36,6 +38,12 @@ ifeq ($(VALIDATE_VT),y) ifeq ($(VALIDATE_VT),y) CFLAGS += -DVALIDATE_VT endif +ifeq ($(xen_ia64_expose_p2m),y) +CFLAGS += -DCONFIG_XEN_IA64_EXPOSE_P2M +endif +ifeq ($(xen_ia64_pervcpu_vhpt),y) +CFLAGS += -DCONFIG_XEN_IA64_PERVCPU_VHPT +endif ifeq ($(no_warns),y) CFLAGS += -Wa,--fatal-warnings -Werror -Wno-uninitialized endif --- xen/arch/ia64/asm-offsets.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/asm-offsets.c Wed Oct 11 16:10:40 2006 -0400 @@ -37,6 +37,8 @@ void foo(void) DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET, offsetof (struct ia64_mca_cpu, init_stack)); BLANK(); + DEFINE(VCPU_VTM_OFFSET_OFS, offsetof(struct vcpu, arch.arch_vmx.vtm.vtm_offset)); + DEFINE(VCPU_VRR0_OFS, offsetof(struct vcpu, arch.arch_vmx.vrr[0])); #ifdef VTI_DEBUG DEFINE(IVT_CUR_OFS, offsetof(struct vcpu, arch.arch_vmx.ivt_current)); DEFINE(IVT_DBG_OFS, offsetof(struct vcpu, arch.arch_vmx.ivt_debug)); --- xen/arch/ia64/linux-xen/sal.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/linux-xen/sal.c Wed Oct 11 16:10:40 2006 -0400 @@ -16,8 +16,10 @@ #ifdef XEN #include <linux/smp.h> +#include <asm/hw_irq.h> #include <xen/lib.h> #endif +#include <asm/delay.h> #include <asm/page.h> #include <asm/sal.h> #include <asm/pal.h> @@ -218,6 +220,77 @@ static void __init sal_desc_ap_wakeup(vo static void __init sal_desc_ap_wakeup(void *p) { } #endif +/* + * HP rx5670 firmware polls for interrupts during SAL_CACHE_FLUSH by reading + * cr.ivr, but it never writes cr.eoi. This leaves any interrupt marked as + * "in-service" and masks other interrupts of equal or lower priority. + * + * HP internal defect reports: F1859, F2775, F3031. + */ +static int sal_cache_flush_drops_interrupts; + +static void __init +check_sal_cache_flush (void) +{ + unsigned long flags, itv; + int cpu; + u64 vector; + + cpu = get_cpu(); + local_irq_save(flags); + + /* + * Schedule a timer interrupt, wait until it's reported, and see if + * SAL_CACHE_FLUSH drops it. + */ + itv = ia64_get_itv(); + BUG_ON((itv & (1 << 16)) == 0); + + ia64_set_itv(IA64_TIMER_VECTOR); + ia64_set_itm(ia64_get_itc() + 1000); + + while (!ia64_get_irr(IA64_TIMER_VECTOR)) + cpu_relax(); + + ia64_sal_cache_flush(3); + + if (ia64_get_irr(IA64_TIMER_VECTOR)) { + vector = ia64_get_ivr(); + ia64_eoi(); + } else { + sal_cache_flush_drops_interrupts = 1; + printk(KERN_ERR "SAL: SAL_CACHE_FLUSH drops interrupts; " + "PAL_CACHE_FLUSH will be used instead\n"); + ia64_eoi(); + } + + ia64_set_itv(itv); + local_irq_restore(flags); + put_cpu(); +} + +s64 +ia64_sal_cache_flush (u64 cache_type) +{ + struct ia64_sal_retval isrv; + + if (sal_cache_flush_drops_interrupts) { + unsigned long flags; + u64 progress; + s64 rc; + + progress = 0; + local_irq_save(flags); + rc = ia64_pal_cache_flush(cache_type, + PAL_CACHE_FLUSH_INVALIDATE, &progress, NULL); + local_irq_restore(flags); + return rc; + } + + SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0); + return isrv.status; +} + void __init ia64_sal_init (struct ia64_sal_systab *systab) { @@ -271,6 +344,8 @@ ia64_sal_init (struct ia64_sal_systab *s } p += SAL_DESC_SIZE(*p); } + + check_sal_cache_flush(); } int --- xen/arch/ia64/linux-xen/unaligned.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/linux-xen/unaligned.c Wed Oct 11 16:10:40 2006 -0400 @@ -304,7 +304,7 @@ set_rse_reg (struct pt_regs *regs, unsig unsigned long *bsp, *bspstore, *addr, *rnat_addr; unsigned long *kbs = (void *) current + IA64_RBS_OFFSET; unsigned long nat_mask; - unsigned long old_rsc,new_rsc; + unsigned long old_rsc, new_rsc, psr; unsigned long rnat; long sof = (regs->cr_ifs) & 0x7f; long sor = 8 * ((regs->cr_ifs >> 14) & 0xf); @@ -321,16 +321,17 @@ set_rse_reg (struct pt_regs *regs, unsig ridx = rotate_reg(sor, rrb_gr, ridx); old_rsc=ia64_get_rsc(); - new_rsc=old_rsc&(~0x3); + /* put RSC to lazy mode, and set loadrs 0 */ + new_rsc = old_rsc & (~0x3fff0003); ia64_set_rsc(new_rsc); - + bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */ + + addr = ia64_rse_skip_regs(bsp, -sof + ridx); + nat_mask = 1UL << ia64_rse_slot_num(addr); + rnat_addr = ia64_rse_rnat_addr(addr); + + local_irq_save(psr); bspstore = (unsigned long*)ia64_get_bspstore(); - bsp =kbs + (regs->loadrs >> 19);//16+3 - - addr = ia64_rse_skip_regs(bsp, -sof + ridx); - nat_mask = 1UL << ia64_rse_slot_num(addr); - rnat_addr = ia64_rse_rnat_addr(addr); - if(addr >= bspstore){ ia64_flushrs (); @@ -358,6 +359,7 @@ set_rse_reg (struct pt_regs *regs, unsig ia64_set_bspstore (bspstore); ia64_set_rnat(rnat); } + local_irq_restore(psr); ia64_set_rsc(old_rsc); } --- xen/arch/ia64/vmx/Makefile Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/vmx/Makefile Wed Oct 11 16:10:40 2006 -0400 @@ -17,3 +17,4 @@ obj-y += vmx_virt.o obj-y += vmx_virt.o obj-y += vmx_vsa.o obj-y += vtlb.o +obj-y += optvfault.o --- xen/arch/ia64/vmx/mmio.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/vmx/mmio.c Wed Oct 11 16:10:40 2006 -0400 @@ -428,7 +428,7 @@ void emulate_io_inst(VCPU *vcpu, u64 pad IA64_BUNDLE bundle; int slot, dir=0, inst_type; size_t size; - u64 data, value,post_update, slot1a, slot1b, temp; + u64 data, post_update, slot1a, slot1b, temp; INST64 inst; regs=vcpu_regs(vcpu); if (IA64_RETRY == __vmx_get_domain_bundle(regs->cr_iip, &bundle)) { @@ -454,7 +454,6 @@ void emulate_io_inst(VCPU *vcpu, u64 pad vcpu_get_gr_nat(vcpu,inst.M4.r2,&data); }else if((inst.M1.x6>>2)<0xb){ // read dir=IOREQ_READ; - vcpu_get_gr_nat(vcpu,inst.M1.r1,&value); } } // Integer Load + Reg update @@ -462,7 +461,6 @@ void emulate_io_inst(VCPU *vcpu, u64 pad inst_type = SL_INTEGER; dir = IOREQ_READ; //write size = (inst.M2.x6&0x3); - vcpu_get_gr_nat(vcpu,inst.M2.r1,&value); vcpu_get_gr_nat(vcpu,inst.M2.r3,&temp); vcpu_get_gr_nat(vcpu,inst.M2.r2,&post_update); temp += post_update; @@ -485,7 +483,6 @@ void emulate_io_inst(VCPU *vcpu, u64 pad }else if((inst.M3.x6>>2)<0xb){ // read dir=IOREQ_READ; - vcpu_get_gr_nat(vcpu,inst.M3.r1,&value); vcpu_get_gr_nat(vcpu,inst.M3.r3,&temp); post_update = (inst.M3.i<<7)+inst.M3.imm7; if(inst.M3.s) @@ -597,13 +594,6 @@ void emulate_io_inst(VCPU *vcpu, u64 pad mmio_access(vcpu, padr, &data, size, ma, dir); }else{ mmio_access(vcpu, padr, &data, size, ma, dir); - if(size==1) - data = (value & 0xffffffffffffff00U) | (data & 0xffU); - else if(size==2) - data = (value & 0xffffffffffff0000U) | (data & 0xffffU); - else if(size==4) - data = (value & 0xffffffff00000000U) | (data & 0xffffffffU); - if(inst_type==SL_INTEGER){ //gp vcpu_set_gr(vcpu,inst.M1.r1,data,0); }else{ --- xen/arch/ia64/vmx/vlsapic.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/vmx/vlsapic.c Wed Oct 11 16:10:40 2006 -0400 @@ -298,7 +298,7 @@ static void update_vhpi(VCPU *vcpu, int // TODO: Add support for XENO if ( VCPU(vcpu,vac).a_int ) { ia64_call_vsa ( PAL_VPS_SET_PENDING_INTERRUPT, - (uint64_t) &(vcpu->arch.privregs), 0, 0,0,0,0,0); + (uint64_t)vcpu->arch.privregs, 0, 0, 0, 0, 0, 0); } } @@ -683,9 +683,5 @@ void vhpi_detection(VCPU *vcpu) void vmx_vexirq(VCPU *vcpu) { - static uint64_t vexirq_count=0; - - vexirq_count ++; - printk("Virtual ex-irq %ld\n", vexirq_count); generate_exirq (vcpu); } --- xen/arch/ia64/vmx/vmmu.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/vmx/vmmu.c Wed Oct 11 16:10:40 2006 -0400 @@ -456,7 +456,15 @@ IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, u64 } #endif pte &= ~PAGE_FLAGS_RV_MASK; - thash_purge_entries(vcpu, va, ps); + + /* This is a bad workaround + In Linux, region 7 use 16M pagesize and is identity mapped. + VHPT page size is 16K in XEN. If purge VHPT while guest insert 16M, + it will iteratively purge VHPT 1024 times, which makes XEN/IPF very + slow. XEN doesn't purge VHPT + */ + if (ps != _PAGE_SIZE_16M) + thash_purge_entries(vcpu, va, ps); gpfn = (pte & _PAGE_PPN_MASK)>> PAGE_SHIFT; if (VMX_DOMAIN(vcpu) && __gpfn_is_io(vcpu->domain, gpfn)) pte |= VTLB_PTE_IO; @@ -637,37 +645,30 @@ IA64FAULT vmx_vcpu_tpa(VCPU *vcpu, UINT6 visr.ei=pt_isr.ei; visr.ir=pt_isr.ir; vpsr.val = VCPU(vcpu, vpsr); - if(vpsr.ic==0){ - visr.ni=1; - } visr.na=1; data = vtlb_lookup(vcpu, vadr, DSIDE_TLB); if(data){ if(data->p==0){ - visr.na=1; vcpu_set_isr(vcpu,visr.val); - page_not_present(vcpu, vadr); + data_page_not_present(vcpu, vadr); return IA64_FAULT; }else if(data->ma == VA_MATTR_NATPAGE){ - visr.na = 1; vcpu_set_isr(vcpu, visr.val); dnat_page_consumption(vcpu, vadr); return IA64_FAULT; }else{ *padr = ((data->ppn >> (data->ps - 12)) << data->ps) | - (vadr & (PSIZE(data->ps) - 1)); + (vadr & (PSIZE(data->ps) - 1)); return IA64_NO_FAULT; } } data = vhpt_lookup(vadr); if(data){ if(data->p==0){ - visr.na=1; vcpu_set_isr(vcpu,visr.val); - page_not_present(vcpu, vadr); + data_page_not_present(vcpu, vadr); return IA64_FAULT; }else if(data->ma == VA_MATTR_NATPAGE){ - visr.na = 1; vcpu_set_isr(vcpu, visr.val); dnat_page_consumption(vcpu, vadr); return IA64_FAULT; --- xen/arch/ia64/vmx/vmx_entry.S Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/vmx/vmx_entry.S Wed Oct 11 16:10:40 2006 -0400 @@ -669,7 +669,7 @@ 1: // re-pin mappings for guest_vhpt - mov r24=IA64_TR_PERVP_VHPT + mov r24=IA64_TR_VHPT movl r25=PAGE_KERNEL ;; or loc5 = r25,loc5 // construct PA | page properties --- xen/arch/ia64/vmx/vmx_init.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/vmx/vmx_init.c Wed Oct 11 16:10:40 2006 -0400 @@ -378,7 +378,8 @@ static void vmx_build_physmap_table(stru for (j = io_ranges[i].start; j < io_ranges[i].start + io_ranges[i].size; j += PAGE_SIZE) - __assign_domain_page(d, j, io_ranges[i].type, ASSIGN_writable); + (void)__assign_domain_page(d, j, io_ranges[i].type, + ASSIGN_writable); } /* Map normal memory below 3G */ --- xen/arch/ia64/vmx/vmx_interrupt.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/vmx/vmx_interrupt.c Wed Oct 11 16:10:40 2006 -0400 @@ -383,14 +383,29 @@ dnat_page_consumption (VCPU *vcpu, uint6 /* Deal with * Page not present vector */ -void -page_not_present(VCPU *vcpu, u64 vadr) +static void +__page_not_present(VCPU *vcpu, u64 vadr) { /* If vPSR.ic, IFA, ITIR */ set_ifa_itir_iha (vcpu, vadr, 1, 1, 0); inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR); } + +void +data_page_not_present(VCPU *vcpu, u64 vadr) +{ + __page_not_present(vcpu, vadr); +} + + +void +inst_page_not_present(VCPU *vcpu, u64 vadr) +{ + __page_not_present(vcpu, vadr); +} + + /* Deal with * Data access rights vector */ --- xen/arch/ia64/vmx/vmx_ivt.S Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/vmx/vmx_ivt.S Wed Oct 11 16:10:40 2006 -0400 @@ -772,12 +772,20 @@ ENTRY(vmx_single_step_trap) VMX_REFLECT(36) END(vmx_single_step_trap) + .global vmx_virtualization_fault_back .org vmx_ia64_ivt+0x6100 ///////////////////////////////////////////////////////////////////////////////////////// // 0x6100 Entry 37 (size 16 bundles) Virtualization Fault ENTRY(vmx_virtualization_fault) // VMX_DBG_FAULT(37) mov r31=pr + ;; + cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 + cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 + (p6) br.dptk.many asm_mov_from_ar + (p7) br.dptk.many asm_mov_from_rr + ;; +vmx_virtualization_fault_back: mov r19=37 adds r16 = IA64_VCPU_CAUSE_OFFSET,r21 adds r17 = IA64_VCPU_OPCODE_OFFSET,r21 --- xen/arch/ia64/vmx/vmx_phy_mode.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/vmx/vmx_phy_mode.c Wed Oct 11 16:10:40 2006 -0400 @@ -126,10 +126,16 @@ vmx_init_all_rr(VCPU *vcpu) vmx_init_all_rr(VCPU *vcpu) { VMX(vcpu, vrr[VRN0]) = 0x38; + // enable vhpt in guest physical mode + vcpu->arch.metaphysical_rr0 |= 1; + vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(vcpu, 0x38); VMX(vcpu, vrr[VRN1]) = 0x38; VMX(vcpu, vrr[VRN2]) = 0x38; VMX(vcpu, vrr[VRN3]) = 0x38; VMX(vcpu, vrr[VRN4]) = 0x38; + // enable vhpt in guest physical mode + vcpu->arch.metaphysical_rr4 |= 1; + vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(vcpu, 0x38); VMX(vcpu, vrr[VRN5]) = 0x38; VMX(vcpu, vrr[VRN6]) = 0x38; VMX(vcpu, vrr[VRN7]) = 0x738; @@ -141,10 +147,8 @@ vmx_load_all_rr(VCPU *vcpu) vmx_load_all_rr(VCPU *vcpu) { unsigned long psr; - ia64_rr phy_rr; local_irq_save(psr); - /* WARNING: not allow co-exist of both virtual mode and physical * mode in same region @@ -154,24 +158,16 @@ vmx_load_all_rr(VCPU *vcpu) panic_domain(vcpu_regs(vcpu), "Unexpected domain switch in phy emul\n"); } - phy_rr.rrval = vcpu->arch.metaphysical_rr0; - //phy_rr.ps = PAGE_SHIFT; - phy_rr.ve = 1; - - ia64_set_rr((VRN0 << VRN_SHIFT), phy_rr.rrval); - ia64_dv_serialize_data(); - phy_rr.rrval = vcpu->arch.metaphysical_rr4; - //phy_rr.ps = PAGE_SHIFT; - phy_rr.ve = 1; - - ia64_set_rr((VRN4 << VRN_SHIFT), phy_rr.rrval); + ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); + ia64_dv_serialize_data(); + ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4); ia64_dv_serialize_data(); } else { ia64_set_rr((VRN0 << VRN_SHIFT), - vrrtomrr(vcpu, VMX(vcpu, vrr[VRN0]))); + vcpu->arch.metaphysical_saved_rr0); ia64_dv_serialize_data(); ia64_set_rr((VRN4 << VRN_SHIFT), - vrrtomrr(vcpu, VMX(vcpu, vrr[VRN4]))); + vcpu->arch.metaphysical_saved_rr4); ia64_dv_serialize_data(); } @@ -209,21 +205,11 @@ switch_to_physical_rid(VCPU *vcpu) switch_to_physical_rid(VCPU *vcpu) { UINT64 psr; - ia64_rr phy_rr, mrr; - /* Save original virtual mode rr[0] and rr[4] */ psr=ia64_clear_ic(); - phy_rr.rrval = vcpu->domain->arch.metaphysical_rr0; - mrr.rrval = ia64_get_rr(VRN0 << VRN_SHIFT); - phy_rr.ps = mrr.ps; - phy_rr.ve = 1; - ia64_set_rr(VRN0<<VRN_SHIFT, phy_rr.rrval); - ia64_srlz_d(); - phy_rr.rrval = vcpu->domain->arch.metaphysical_rr4; - mrr.rrval = ia64_get_rr(VRN4 << VRN_SHIFT); - phy_rr.ps = mrr.ps; - phy_rr.ve = 1; - ia64_set_rr(VRN4<<VRN_SHIFT, phy_rr.rrval); + ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0); + ia64_srlz_d(); + ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4); ia64_srlz_d(); ia64_set_psr(psr); @@ -236,15 +222,10 @@ switch_to_virtual_rid(VCPU *vcpu) switch_to_virtual_rid(VCPU *vcpu) { UINT64 psr; - ia64_rr mrr; - psr=ia64_clear_ic(); - - vcpu_get_rr(vcpu,VRN0<<VRN_SHIFT,&mrr.rrval); - ia64_set_rr(VRN0<<VRN_SHIFT, vrrtomrr(vcpu, mrr.rrval)); - ia64_srlz_d(); - vcpu_get_rr(vcpu,VRN4<<VRN_SHIFT,&mrr.rrval); - ia64_set_rr(VRN4<<VRN_SHIFT, vrrtomrr(vcpu, mrr.rrval)); + ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0); + ia64_srlz_d(); + ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4); ia64_srlz_d(); ia64_set_psr(psr); ia64_srlz_i(); --- xen/arch/ia64/vmx/vmx_process.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/vmx/vmx_process.c Wed Oct 11 16:10:40 2006 -0400 @@ -81,6 +81,7 @@ void vmx_reflect_interruption(UINT64 ifa void vmx_reflect_interruption(UINT64 ifa,UINT64 isr,UINT64 iim, UINT64 vector,REGS *regs) { + UINT64 status; VCPU *vcpu = current; UINT64 vpsr = VCPU(vcpu, vpsr); vector=vec2off[vector]; @@ -89,13 +90,23 @@ void vmx_reflect_interruption(UINT64 ifa } else{ // handle fpswa emulation // fp fault - if(vector == IA64_FP_FAULT_VECTOR && !handle_fpu_swa(1, regs, isr)){ - vmx_vcpu_increment_iip(vcpu); - return; + if (vector == IA64_FP_FAULT_VECTOR) { + status = handle_fpu_swa(1, regs, isr); + if (!status) { + vmx_vcpu_increment_iip(vcpu); + return; + } else if (IA64_RETRY == status) + return; } //fp trap - else if(vector == IA64_FP_TRAP_VECTOR && !handle_fpu_swa(0, regs, isr)){ - return; + else if (vector == IA64_FP_TRAP_VECTOR) { + status = handle_fpu_swa(0, regs, isr); + if (!status) + return; + else if (IA64_RETRY == status) { + vmx_vcpu_decrement_iip(vcpu); + return; + } } } VCPU(vcpu,isr)=isr; @@ -187,7 +198,7 @@ void leave_hypervisor_tail(struct pt_reg { struct domain *d = current->domain; struct vcpu *v = current; - int callback_irq; + // FIXME: Will this work properly if doing an RFI??? if (!is_idle_domain(d) ) { // always comes from guest // struct pt_regs *user_regs = vcpu_regs(current); @@ -215,11 +226,14 @@ void leave_hypervisor_tail(struct pt_reg // v->arch.irq_new_pending = 1; // } - callback_irq = d->arch.hvm_domain.params[HVM_PARAM_CALLBACK_IRQ]; - if (callback_irq != 0 && local_events_need_delivery()) { - /*inject para-device call back irq*/ - v->vcpu_info->evtchn_upcall_mask = 1; - vmx_vcpu_pend_interrupt(v, callback_irq); + if (v->vcpu_id == 0) { + int callback_irq = + d->arch.hvm_domain.params[HVM_PARAM_CALLBACK_IRQ]; + if (callback_irq != 0 && local_events_need_delivery()) { + /*inject para-device call back irq*/ + v->vcpu_info->evtchn_upcall_mask = 1; + vmx_vcpu_pend_interrupt(v, callback_irq); + } } if ( v->arch.irq_new_pending ) { @@ -252,18 +266,20 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r vmx_hpw_miss(u64 vadr , u64 vec, REGS* regs) { IA64_PSR vpsr; - int type=ISIDE_TLB; + int type; u64 vhpt_adr, gppa, pteval, rr, itir; ISR misr; -// REGS *regs; thash_data_t *data; VCPU *v = current; -#ifdef VTLB_DEBUG - check_vtlb_sanity(vtlb); - dump_vtlb(vtlb); -#endif vpsr.val = VCPU(v, vpsr); misr.val=VMX(v,cr_isr); + + if (vec == 1) + type = ISIDE_TLB; + else if (vec == 2) + type = DSIDE_TLB; + else + panic_domain(regs, "wrong vec:%lx\n", vec); if(is_physical_mode(v)&&(!(vadr<<1>>62))){ if(vec==2){ @@ -275,11 +291,6 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r physical_tlb_miss(v, vadr); return IA64_FAULT; } - if(vec == 1) type = ISIDE_TLB; - else if(vec == 2) type = DSIDE_TLB; - else panic_domain(regs,"wrong vec:%lx\n",vec); - -// prepare_if_physical_mode(v); if((data=vtlb_lookup(v, vadr,type))!=0){ if (v->domain != dom0 && type == DSIDE_TLB) { @@ -298,46 +309,44 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r thash_vhpt_insert(v,data->page_flags, data->itir ,vadr); }else if(type == DSIDE_TLB){ + if (misr.sp) return vmx_handle_lds(regs); + if(!vhpt_enabled(v, vadr, misr.rs?RSE_REF:DATA_REF)){ if(vpsr.ic){ vcpu_set_isr(v, misr.val); alt_dtlb(v, vadr); return IA64_FAULT; } else{ - if(misr.sp){ - //TODO lds emulation - //panic("Don't support speculation load"); - return vmx_handle_lds(regs); - }else{ - nested_dtlb(v); - return IA64_FAULT; - } + nested_dtlb(v); + return IA64_FAULT; } } else{ vmx_vcpu_thash(v, vadr, &vhpt_adr); if(!guest_vhpt_lookup(vhpt_adr, &pteval)){ - if ((pteval & _PAGE_P) && - ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST)) { + if (!(pteval & _PAGE_P)) { + if (vpsr.ic) { + vcpu_set_isr(v, misr.val); + data_page_not_present(v, vadr); + return IA64_FAULT; + } else { + nested_dtlb(v); + return IA64_FAULT; + } + } + else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) { vcpu_get_rr(v, vadr, &rr); itir = rr&(RR_RID_MASK | RR_PS_MASK); thash_purge_and_insert(v, pteval, itir, vadr, DSIDE_TLB); return IA64_NO_FAULT; - } - if(vpsr.ic){ + } else if (vpsr.ic) { vcpu_set_isr(v, misr.val); dtlb_fault(v, vadr); return IA64_FAULT; }else{ - if(misr.sp){ - //TODO lds emulation - //panic("Don't support speculation load"); - return vmx_handle_lds(regs); - }else{ - nested_dtlb(v); - return IA64_FAULT; - } + nested_dtlb(v); + return IA64_FAULT; } }else{ if(vpsr.ic){ @@ -345,22 +354,16 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r dvhpt_fault(v, vadr); return IA64_FAULT; }else{ - if(misr.sp){ - //TODO lds emulation - //panic("Don't support speculation load"); - return vmx_handle_lds(regs); - }else{ - nested_dtlb(v); - return IA64_FAULT; - } + nested_dtlb(v); + return IA64_FAULT; } } } }else if(type == ISIDE_TLB){ + + if (!vpsr.ic) + misr.ni = 1; if(!vhpt_enabled(v, vadr, misr.rs?RSE_REF:DATA_REF)){ - if(!vpsr.ic){ - misr.ni=1; - } vcpu_set_isr(v, misr.val); alt_itlb(v, vadr); return IA64_FAULT; @@ -372,17 +375,12 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r itir = rr&(RR_RID_MASK | RR_PS_MASK); thash_purge_and_insert(v, pteval, itir, vadr, ISIDE_TLB); return IA64_NO_FAULT; - } - if(!vpsr.ic){ - misr.ni=1; - } - vcpu_set_isr(v, misr.val); - itlb_fault(v, vadr); - return IA64_FAULT; + } else { + vcpu_set_isr(v, misr.val); + inst_page_not_present(v, vadr); + return IA64_FAULT; + } }else{ - if(!vpsr.ic){ - misr.ni=1; - } vcpu_set_isr(v, misr.val); ivhpt_fault(v, vadr); return IA64_FAULT; --- xen/arch/ia64/vmx/vmx_vcpu.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/vmx/vmx_vcpu.c Wed Oct 11 16:10:40 2006 -0400 @@ -172,6 +172,21 @@ IA64FAULT vmx_vcpu_increment_iip(VCPU *v } +IA64FAULT vmx_vcpu_decrement_iip(VCPU *vcpu) +{ + REGS *regs = vcpu_regs(vcpu); + IA64_PSR *ipsr = (IA64_PSR *)®s->cr_ipsr; + + if (ipsr->ri == 0) { + ipsr->ri = 2; + regs->cr_iip -= 16; + } else { + ipsr->ri--; + } + return (IA64_NO_FAULT); +} + + IA64FAULT vmx_vcpu_cover(VCPU *vcpu) { REGS *regs = vcpu_regs(vcpu); @@ -197,19 +212,32 @@ IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UI { ia64_rr oldrr,newrr; extern void * pal_vaddr; + u64 rrval; vcpu_get_rr(vcpu, reg, &oldrr.rrval); newrr.rrval=val; if (newrr.rid >= (1 << vcpu->domain->arch.rid_bits)) panic_domain (NULL, "use of invalid rid %x\n", newrr.rid); - VMX(vcpu,vrr[reg>>61]) = val; - switch((u64)(reg>>61)) { + VMX(vcpu,vrr[reg>>VRN_SHIFT]) = val; + switch((u64)(reg>>VRN_SHIFT)) { case VRN7: vmx_switch_rr7(vrrtomrr(vcpu,val),vcpu->domain->shared_info, (void *)vcpu->arch.privregs, (void *)vcpu->arch.vhpt.hash, pal_vaddr ); break; + case VRN4: + rrval = vrrtomrr(vcpu,val); + vcpu->arch.metaphysical_saved_rr4 = rrval; + if (!is_physical_mode(vcpu)) + ia64_set_rr(reg,rrval); + break; + case VRN0: + rrval = vrrtomrr(vcpu,val); + vcpu->arch.metaphysical_saved_rr0 = rrval; + if (!is_physical_mode(vcpu)) + ia64_set_rr(reg,rrval); + break; default: ia64_set_rr(reg,vrrtomrr(vcpu,val)); break; --- xen/arch/ia64/xen/Makefile Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/xen/Makefile Wed Oct 11 16:10:40 2006 -0400 @@ -25,5 +25,7 @@ obj-y += xentime.o obj-y += xentime.o obj-y += flushd.o obj-y += privop_stat.o +obj-y += xenpatch.o +obj-y += xencomm.o obj-$(crash_debug) += gdbstub.o --- xen/arch/ia64/xen/dom0_ops.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/xen/dom0_ops.c Wed Oct 11 16:10:40 2006 -0400 @@ -256,6 +256,7 @@ do_dom0vp_op(unsigned long cmd, } else { ret = (ret & _PFN_MASK) >> PAGE_SHIFT;//XXX pte_pfn() } + perfc_incrc(dom0vp_phystomach); break; case IA64_DOM0VP_machtophys: if (!mfn_valid(arg0)) { @@ -263,6 +264,7 @@ do_dom0vp_op(unsigned long cmd, break; } ret = get_gpfn_from_mfn(arg0); + perfc_incrc(dom0vp_machtophys); break; case IA64_DOM0VP_zap_physmap: ret = dom0vp_zap_physmap(d, arg0, (unsigned int)arg1); @@ -270,6 +272,9 @@ do_dom0vp_op(unsigned long cmd, case IA64_DOM0VP_add_physmap: ret = dom0vp_add_physmap(d, arg0, arg1, (unsigned int)arg2, (domid_t)arg3); + break; + case IA64_DOM0VP_expose_p2m: + ret = dom0vp_expose_p2m(d, arg0, arg1, arg2, arg3); break; default: ret = -1; --- xen/arch/ia64/xen/domain.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/xen/domain.c Wed Oct 11 16:10:40 2006 -0400 @@ -46,6 +46,7 @@ #include <asm/regionreg.h> #include <asm/dom_fw.h> #include <asm/shadow.h> +#include <xen/guest_access.h> unsigned long dom0_size = 512*1024*1024; unsigned long dom0_align = 64*1024*1024; @@ -58,13 +59,8 @@ extern unsigned long running_on_sim; extern char dom0_command_line[]; -/* FIXME: where these declarations should be there ? */ -extern void serial_input_init(void); +/* forward declaration */ static void init_switch_stack(struct vcpu *v); -extern void vmx_do_launch(struct vcpu *); - -/* this belongs in include/asm, but there doesn't seem to be a suitable place */ -extern struct vcpu *ia64_switch_to (struct vcpu *next_task); /* Address of vpsr.i (in fact evtchn_upcall_mask) of current vcpu. This is a Xen virtual address. */ @@ -72,6 +68,16 @@ DEFINE_PER_CPU(int *, current_psr_ic_add DEFINE_PER_CPU(int *, current_psr_ic_addr); #include <xen/sched-if.h> + +static void +ia64_disable_vhpt_walker(void) +{ + // disable VHPT. ia64_new_rr7() might cause VHPT + // fault without this because it flushes dtr[IA64_TR_VHPT] + // (VHPT_SIZE_LOG2 << 2) is just for avoid + // Reserved Register/Field fault. + ia64_set_pta(VHPT_SIZE_LOG2 << 2); +} static void flush_vtlb_for_context_switch(struct vcpu* vcpu) { @@ -96,10 +102,13 @@ static void flush_vtlb_for_context_switc if (VMX_DOMAIN(vcpu)) { // currently vTLB for vt-i domian is per vcpu. // so any flushing isn't needed. + } else if (HAS_PERVCPU_VHPT(vcpu->domain)) { + // nothing to do } else { - vhpt_flush(); + local_vhpt_flush(); } local_flush_tlb_all(); + perfc_incrc(flush_vtlb_for_context_switch); } } @@ -114,9 +123,9 @@ void schedule_tail(struct vcpu *prev) current->processor); } else { ia64_set_iva(&ia64_ivt); - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); + ia64_disable_vhpt_walker(); load_region_regs(current); + ia64_set_pta(vcpu_pta(current)); vcpu_load_kernel_regs(current); __ia64_per_cpu_var(current_psr_i_addr) = ¤t->domain-> shared_info->vcpu_info[current->vcpu_id].evtchn_upcall_mask; @@ -130,7 +139,6 @@ void context_switch(struct vcpu *prev, s void context_switch(struct vcpu *prev, struct vcpu *next) { uint64_t spsr; - uint64_t pta; local_irq_save(spsr); @@ -167,9 +175,9 @@ void context_switch(struct vcpu *prev, s nd = current->domain; if (!is_idle_domain(nd)) { - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); + ia64_disable_vhpt_walker(); load_region_regs(current); + ia64_set_pta(vcpu_pta(current)); vcpu_load_kernel_regs(current); vcpu_set_next_timer(current); if (vcpu_timer_expired(current)) @@ -183,14 +191,13 @@ void context_switch(struct vcpu *prev, s * walker. Then all accesses happen within idle context will * be handled by TR mapping and identity mapping. */ - pta = ia64_get_pta(); - ia64_set_pta(pta & ~VHPT_ENABLED); + ia64_disable_vhpt_walker(); __ia64_per_cpu_var(current_psr_i_addr) = NULL; __ia64_per_cpu_var(current_psr_ic_addr) = NULL; } } + local_irq_restore(spsr); flush_vtlb_for_context_switch(current); - local_irq_restore(spsr); context_saved(prev); } @@ -273,6 +280,13 @@ struct vcpu *alloc_vcpu_struct(struct do if (!d->arch.is_vti) { int order; int i; + // vti domain has its own vhpt policy. + if (HAS_PERVCPU_VHPT(d)) { + if (pervcpu_vhpt_alloc(v) < 0) { + free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER); + return NULL; + } + } /* Create privregs page only if not VTi. */ order = get_order_from_shift(XMAPPEDREGS_SHIFT); @@ -315,6 +329,8 @@ struct vcpu *alloc_vcpu_struct(struct do void relinquish_vcpu_resources(struct vcpu *v) { + if (HAS_PERVCPU_VHPT(v->domain)) + pervcpu_vhpt_free(v); if (v->arch.privregs != NULL) { free_xenheap_pages(v->arch.privregs, get_order_from_shift(XMAPPEDREGS_SHIFT)); @@ -350,6 +366,11 @@ static void init_switch_stack(struct vcp memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96); } +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +static int opt_pervcpu_vhpt = 1; +integer_param("pervcpu_vhpt", opt_pervcpu_vhpt); +#endif + int arch_domain_create(struct domain *d) { int i; @@ -364,6 +385,11 @@ int arch_domain_create(struct domain *d) if (is_idle_domain(d)) return 0; +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + d->arch.has_pervcpu_vhpt = opt_pervcpu_vhpt; + DPRINTK("%s:%d domain %d pervcpu_vhpt %d\n", + __func__, __LINE__, d->domain_id, d->arch.has_pervcpu_vhpt); +#endif d->shared_info = alloc_xenheap_pages(get_order_from_shift(XSI_SHIFT)); if (d->shared_info == NULL) goto fail_nomem; @@ -1101,9 +1127,6 @@ int construct_dom0(struct domain *d, physdev_init_dom0(d); - // FIXME: Hack for keyboard input - //serial_input_init(); - return 0; } --- xen/arch/ia64/xen/faults.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/xen/faults.c Wed Oct 11 16:10:40 2006 -0400 @@ -228,10 +228,10 @@ void ia64_do_page_fault (unsigned long a // indicate a bad xen pointer printk("*** xen_handle_domain_access: exception table" " lookup failed, iip=0x%lx, addr=0x%lx, spinning...\n", - iip, address); + iip, address); panic_domain(regs,"*** xen_handle_domain_access: exception table" - " lookup failed, iip=0x%lx, addr=0x%lx, spinning...\n", - iip, address); + " lookup failed, iip=0x%lx, addr=0x%lx, spinning...\n", + iip, address); } return; } --- xen/arch/ia64/xen/fw_emul.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/xen/fw_emul.c Wed Oct 11 16:10:40 2006 -0400 @@ -16,7 +16,6 @@ * */ #include <xen/config.h> -#include <xen/console.h> #include <asm/system.h> #include <asm/pgalloc.h> @@ -29,6 +28,7 @@ #include <asm/vcpu.h> #include <asm/dom_fw.h> #include <asm/uaccess.h> +#include <xen/console.h> extern unsigned long running_on_sim; --- xen/arch/ia64/xen/hypercall.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/xen/hypercall.c Wed Oct 11 16:10:40 2006 -0400 @@ -32,7 +32,6 @@ #include <xen/event.h> #include <xen/perfc.h> -static long do_physdev_op_compat(XEN_GUEST_HANDLE(physdev_op_t) uop); static long do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg); static long do_callback_op(int cmd, XEN_GUEST_HANDLE(void) arg); @@ -54,10 +53,10 @@ const hypercall_t ia64_hypercall_table[N (hypercall_t)do_multicall, (hypercall_t)do_ni_hypercall, /* do_update_va_mapping */ (hypercall_t)do_ni_hypercall, /* do_set_timer_op */ /* 15 */ - (hypercall_t)do_event_channel_op_compat, + (hypercall_t)do_ni_hypercall, (hypercall_t)do_xen_version, (hypercall_t)do_console_io, - (hypercall_t)do_physdev_op_compat, + (hypercall_t)do_ni_hypercall, (hypercall_t)do_grant_table_op, /* 20 */ (hypercall_t)do_ni_hypercall, /* do_vm_assist */ (hypercall_t)do_ni_hypercall, /* do_update_va_mapping_othe */ @@ -108,19 +107,6 @@ xen_hypercall (struct pt_regs *regs) xen_hypercall (struct pt_regs *regs) { uint32_t cmd = (uint32_t)regs->r2; - struct vcpu *v = current; - - if (cmd == __HYPERVISOR_grant_table_op) { - XEN_GUEST_HANDLE(void) uop; - - v->arch.hypercall_param.va = regs->r15; - v->arch.hypercall_param.pa1 = regs->r17; - v->arch.hypercall_param.pa2 = regs->r18; - set_xen_guest_handle(uop, (void *)regs->r15); - regs->r8 = do_grant_table_op(regs->r14, uop, regs->r16); - v->arch.hypercall_param.va = 0; - return IA64_NO_FAULT; - } if (cmd < NR_hypercalls) { perfc_incra(hypercalls, cmd); @@ -133,7 +119,21 @@ xen_hypercall (struct pt_regs *regs) regs->r19); } else regs->r8 = -ENOSYS; - + + return IA64_NO_FAULT; +} + +static IA64FAULT +xen_fast_hypercall (struct pt_regs *regs) +{ + uint32_t cmd = (uint32_t)regs->r2; + switch (cmd) { + case __HYPERVISOR_ia64_fast_eoi: + regs->r8 = pirq_guest_eoi(current->domain, regs->r14); + break; + default: + regs->r8 = -ENOSYS; + } return IA64_NO_FAULT; } @@ -201,8 +201,8 @@ fw_hypercall_fpswa (struct vcpu *v) return PSCBX(v, fpswa_ret); } -static IA64FAULT -fw_hypercall (struct pt_regs *regs) +IA64FAULT +ia64_hypercall(struct pt_regs *regs) { struct vcpu *v = current; struct sal_ret_values x; @@ -213,7 +213,13 @@ fw_hypercall (struct pt_regs *regs) perfc_incra(fw_hypercall, index >> 8); switch (index) { - case FW_HYPERCALL_PAL_CALL: + case FW_HYPERCALL_XEN: + return xen_hypercall(regs); + + case FW_HYPERCALL_XEN_FAST: + return xen_fast_hypercall(regs); + + case FW_HYPERCALL_PAL_CALL: //printf("*** PAL hypercall: index=%d\n",regs->r28); //FIXME: This should call a C routine #if 0 @@ -264,7 +270,7 @@ fw_hypercall (struct pt_regs *regs) regs->r10 = y.v1; regs->r11 = y.v2; } break; - case FW_HYPERCALL_SAL_CALL: + case FW_HYPERCALL_SAL_CALL: x = sal_emulator(vcpu_get_gr(v,32),vcpu_get_gr(v,33), vcpu_get_gr(v,34),vcpu_get_gr(v,35), vcpu_get_gr(v,36),vcpu_get_gr(v,37), @@ -272,44 +278,33 @@ fw_hypercall (struct pt_regs *regs) regs->r8 = x.r8; regs->r9 = x.r9; regs->r10 = x.r10; regs->r11 = x.r11; break; - case FW_HYPERCALL_SAL_RETURN: + case FW_HYPERCALL_SAL_RETURN: if ( !test_and_set_bit(_VCPUF_down, &v->vcpu_flags) ) vcpu_sleep_nosync(v); break; - case FW_HYPERCALL_EFI_CALL: + case FW_HYPERCALL_EFI_CALL: efi_ret_value = efi_emulator (regs, &fault); if (fault != IA64_NO_FAULT) return fault; regs->r8 = efi_ret_value; break; - case FW_HYPERCALL_IPI: + case FW_HYPERCALL_IPI: fw_hypercall_ipi (regs); break; - case FW_HYPERCALL_SET_SHARED_INFO_VA: + case FW_HYPERCALL_SET_SHARED_INFO_VA: regs->r8 = domain_set_shared_info_va (regs->r28); break; - case FW_HYPERCALL_FPSWA: + case FW_HYPERCALL_FPSWA: fpswa_ret = fw_hypercall_fpswa (v); regs->r8 = fpswa_ret.status; regs->r9 = fpswa_ret.err0; regs->r10 = fpswa_ret.err1; regs->r11 = fpswa_ret.err2; break; - default: + default: printf("unknown ia64 fw hypercall %lx\n", regs->r2); regs->r8 = do_ni_hypercall(); } return IA64_NO_FAULT; -} - -IA64FAULT -ia64_hypercall (struct pt_regs *regs) -{ - unsigned long index = regs->r2; - - if (index >= FW_HYPERCALL_FIRST_ARCH) - return fw_hypercall (regs); - else - return xen_hypercall (regs); } unsigned long hypercall_create_continuation( @@ -465,28 +460,6 @@ static long do_physdev_op(int cmd, XEN_G return ret; } -/* Legacy hypercall (as of 0x00030202). */ -static long do_physdev_op_compat(XEN_GUEST_HANDLE(physdev_op_t) uop) -{ - struct physdev_op op; - - if ( unlikely(copy_from_guest(&op, uop, 1) != 0) ) - return -EFAULT; - - return do_physdev_op(op.cmd, guest_handle_from_ptr(&uop.p->u, void)); -} - -/* Legacy hypercall (as of 0x00030202). */ -long do_event_channel_op_compat(XEN_GUEST_HANDLE(evtchn_op_t) uop) -{ - struct evtchn_op op; - - if ( unlikely(copy_from_guest(&op, uop, 1) != 0) ) - return -EFAULT; - - return do_event_channel_op(op.cmd, guest_handle_from_ptr(&uop.p->u, void)); -} - static long register_guest_callback(struct callback_register *reg) { long ret = 0; --- xen/arch/ia64/xen/mm.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/xen/mm.c Wed Oct 11 16:10:40 2006 -0400 @@ -396,6 +396,12 @@ gmfn_to_mfn_foreign(struct domain *d, un { unsigned long pte; + // This function may be called from __gnttab_copy() + // during destruction of VT-i domain with PV-on-HVM driver. + if (unlikely(d->arch.mm.pgd == NULL)) { + if (VMX_DOMAIN(d->vcpu[0])) + return INVALID_MFN; + } pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT, NULL); if (!pte) { panic("gmfn_to_mfn_foreign: bad gpfn. spinning...\n"); @@ -702,6 +708,22 @@ void *domain_mpa_to_imva(struct domain * } #endif +unsigned long +xencomm_paddr_to_maddr(unsigned long paddr) +{ + struct vcpu *v = current; + struct domain *d = v->domain; + u64 pa; + + pa = ____lookup_domain_mpa(d, paddr); + if (pa == INVALID_MFN) { + printf("%s: called with bad memory address: 0x%lx - iip=%lx\n", + __func__, paddr, vcpu_regs(v)->cr_iip); + return 0; + } + return __va_ul((pa & _PFN_MASK) | (paddr & ~PAGE_MASK)); +} + /* Allocate a new page for domain and map it to the specified metaphysical address. */ static struct page_info * @@ -784,7 +806,7 @@ flags_to_prot (unsigned long flags) // flags: currently only ASSIGN_readonly, ASSIGN_nocache // This is called by assign_domain_mmio_page(). // So accessing to pte is racy. -void +int __assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr, unsigned long flags) @@ -800,8 +822,25 @@ __assign_domain_page(struct domain *d, old_pte = __pte(0); new_pte = pfn_pte(physaddr >> PAGE_SHIFT, __pgprot(prot)); ret_pte = ptep_cmpxchg_rel(&d->arch.mm, mpaddr, pte, old_pte, new_pte); - if (pte_val(ret_pte) == pte_val(old_pte)) + if (pte_val(ret_pte) == pte_val(old_pte)) { smp_mb(); + return 0; + } + + // dom0 tries to map real machine's I/O region, but failed. + // It is very likely that dom0 doesn't boot correctly because + // it can't access I/O. So complain here. + if ((flags & ASSIGN_nocache) && + (pte_pfn(ret_pte) != (physaddr >> PAGE_SHIFT) || + !(pte_val(ret_pte) & _PAGE_MA_UC))) + printk("%s:%d WARNING can't assign page domain 0x%p id %d\n" + "\talready assigned pte_val 0x%016lx\n" + "\tmpaddr 0x%016lx physaddr 0x%016lx flags 0x%lx\n", + __func__, __LINE__, + d, d->domain_id, pte_val(ret_pte), + mpaddr, physaddr, flags); + + return -EAGAIN; } /* get_page() and map a physical address to the specified metaphysical addr */ @@ -818,7 +857,7 @@ assign_domain_page(struct domain *d, set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT); // because __assign_domain_page() uses set_pte_rel() which has // release semantics, smp_mb() isn't needed. - __assign_domain_page(d, mpaddr, physaddr, ASSIGN_writable); + (void)__assign_domain_page(d, mpaddr, physaddr, ASSIGN_writable); } int @@ -841,8 +880,8 @@ ioports_permit_access(struct domain *d, lp_offset = PAGE_ALIGN(IO_SPACE_SPARSE_ENCODING(lp)); for (off = fp_offset; off <= lp_offset; off += PAGE_SIZE) - __assign_domain_page(d, IO_PORTS_PADDR + off, - __pa(ia64_iobase) + off, ASSIGN_nocache); + (void)__assign_domain_page(d, IO_PORTS_PADDR + off, + __pa(ia64_iobase) + off, ASSIGN_nocache); return 0; } @@ -911,7 +950,7 @@ assign_domain_same_page(struct domain *d //XXX optimization unsigned long end = PAGE_ALIGN(mpaddr + size); for (mpaddr &= PAGE_MASK; mpaddr < end; mpaddr += PAGE_SIZE) { - __assign_domain_page(d, mpaddr, mpaddr, flags); + (void)__assign_domain_page(d, mpaddr, mpaddr, flags); } } @@ -1035,6 +1074,7 @@ assign_domain_page_replace(struct domain put_page(old_page); } } + perfc_incrc(assign_domain_page_replace); } // caller must get_page(new_page) before @@ -1095,6 +1135,7 @@ assign_domain_page_cmpxchg_rel(struct do domain_page_flush(d, mpaddr, old_mfn, new_mfn); put_page(old_page); + perfc_incrc(assign_domain_pge_cmpxchg_rel); return 0; } @@ -1167,6 +1208,7 @@ zap_domain_page_one(struct domain *d, un try_to_clear_PGC_allocate(d, page); } put_page(page); + perfc_incrc(zap_dcomain_page_one); } unsigned long @@ -1179,6 +1221,7 @@ dom0vp_zap_physmap(struct domain *d, uns } zap_domain_page_one(d, gpfn << PAGE_SHIFT, INVALID_MFN); + perfc_incrc(dom0vp_zap_physmap); return 0; } @@ -1224,10 +1267,131 @@ dom0vp_add_physmap(struct domain* d, uns get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY); assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, flags); //don't update p2m table because this page belongs to rd, not d. + perfc_incrc(dom0vp_add_physmap); out1: put_domain(rd); return error; } + +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M +static struct page_info* p2m_pte_zero_page = NULL; + +void +expose_p2m_init(void) +{ + pte_t* pte; + + pte = pte_alloc_one_kernel(NULL, 0); + BUG_ON(pte == NULL); + smp_mb();// make contents of the page visible. + p2m_pte_zero_page = virt_to_page(pte); +} + +static int +expose_p2m_page(struct domain* d, unsigned long mpaddr, struct page_info* page) +{ + // we can't get_page(page) here. + // pte page is allocated form xen heap.(see pte_alloc_one_kernel().) + // so that the page has NULL page owner and it's reference count + // is useless. + // see also relinquish_pte()'s page_get_owner() == NULL check. + BUG_ON(page_get_owner(page) != NULL); + + return __assign_domain_page(d, mpaddr, page_to_maddr(page), + ASSIGN_readonly); +} + +// It is possible to optimize loop, But this isn't performance critical. +unsigned long +dom0vp_expose_p2m(struct domain* d, + unsigned long conv_start_gpfn, + unsigned long assign_start_gpfn, + unsigned long expose_size, unsigned long granule_pfn) +{ + unsigned long expose_num_pfn = expose_size >> PAGE_SHIFT; + unsigned long i; + volatile pte_t* conv_pte; + volatile pte_t* assign_pte; + + if ((expose_size % PAGE_SIZE) != 0 || + (granule_pfn % PTRS_PER_PTE) != 0 || + (expose_num_pfn % PTRS_PER_PTE) != 0 || + (conv_start_gpfn % granule_pfn) != 0 || + (assign_start_gpfn % granule_pfn) != 0 || + (expose_num_pfn % granule_pfn) != 0) { + DPRINTK("%s conv_start_gpfn 0x%016lx assign_start_gpfn 0x%016lx " + "expose_size 0x%016lx granulte_pfn 0x%016lx\n", __func__, + conv_start_gpfn, assign_start_gpfn, expose_size, granule_pfn); + return -EINVAL; + } + + if (granule_pfn != PTRS_PER_PTE) { + DPRINTK("%s granule_pfn 0x%016lx PTRS_PER_PTE 0x%016lx\n", + __func__, granule_pfn, PTRS_PER_PTE); + return -ENOSYS; + } + + // allocate pgd, pmd. + i = conv_start_gpfn; + while (i < expose_num_pfn) { + conv_pte = lookup_noalloc_domain_pte(d, (conv_start_gpfn + i) << + PAGE_SHIFT); + if (conv_pte == NULL) { + i++; + continue; + } + + assign_pte = lookup_alloc_domain_pte(d, (assign_start_gpfn << + PAGE_SHIFT) + i * sizeof(pte_t)); + if (assign_pte == NULL) { + DPRINTK("%s failed to allocate pte page\n", __func__); + return -ENOMEM; + } + + // skip to next pte page + i += PTRS_PER_PTE; + i &= ~(PTRS_PER_PTE - 1); + } + + // expose pte page + i = 0; + while (i < expose_num_pfn) { + conv_pte = lookup_noalloc_domain_pte(d, (conv_start_gpfn + i) << + PAGE_SHIFT); + if (conv_pte == NULL) { + i++; + continue; + } + + if (expose_p2m_page(d, (assign_start_gpfn << PAGE_SHIFT) + + i * sizeof(pte_t), virt_to_page(conv_pte)) < 0) { + DPRINTK("%s failed to assign page\n", __func__); + return -EAGAIN; + } + + // skip to next pte page + i += PTRS_PER_PTE; + i &= ~(PTRS_PER_PTE - 1); + } + + // expose p2m_pte_zero_page + for (i = 0; i < expose_num_pfn / PTRS_PER_PTE + 1; i++) { + assign_pte = lookup_noalloc_domain_pte(d, (assign_start_gpfn + i) << + PAGE_SHIFT); + BUG_ON(assign_pte == NULL); + if (pte_present(*assign_pte)) { + continue; + } + if (expose_p2m_page(d, (assign_start_gpfn + i) << PAGE_SHIFT, + p2m_pte_zero_page) < 0) { + DPRINTK("%s failed to assign zero-pte page\n", __func__); + return -EAGAIN; + } + } + + return 0; +} +#endif // grant table host mapping // mpaddr: host_addr: pseudo physical address @@ -1255,6 +1419,7 @@ create_grant_host_mapping(unsigned long get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY); assign_domain_page_replace(d, gpaddr, mfn, (flags & GNTMAP_readonly)? ASSIGN_readonly: ASSIGN_writable); + perfc_incrc(create_grant_host_mapping); return GNTST_okay; } @@ -1314,6 +1479,7 @@ destroy_grant_host_mapping(unsigned long BUG_ON(page_get_owner(page) == d);//try_to_clear_PGC_allocate(d, page) is not needed. put_page(page); + perfc_incrc(destroy_grant_host_mapping); return GNTST_okay; } @@ -1374,6 +1540,7 @@ steal_page(struct domain *d, struct page free_domheap_page(new); return -1; } + perfc_incrc(steal_page_refcount); } spin_lock(&d->page_alloc_lock); @@ -1443,6 +1610,7 @@ steal_page(struct domain *d, struct page list_del(&page->list); spin_unlock(&d->page_alloc_lock); + perfc_incrc(steal_page); return 0; } @@ -1460,6 +1628,8 @@ guest_physmap_add_page(struct domain *d, assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, ASSIGN_writable); //BUG_ON(mfn != ((lookup_domain_mpa(d, gpfn << PAGE_SHIFT) & _PFN_MASK) >> PAGE_SHIFT)); + + perfc_incrc(guest_physmap_add_page); } void @@ -1468,6 +1638,7 @@ guest_physmap_remove_page(struct domain { BUG_ON(mfn == 0);//XXX zap_domain_page_one(d, gpfn << PAGE_SHIFT, mfn); + perfc_incrc(guest_physmap_remove_page); } //XXX sledgehammer. @@ -1480,6 +1651,7 @@ domain_page_flush(struct domain* d, unsi shadow_mark_page_dirty(d, mpaddr >> PAGE_SHIFT); domain_flush_vtlb_all(); + perfc_incrc(domain_page_flush); } int --- xen/arch/ia64/xen/regionreg.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/xen/regionreg.c Wed Oct 11 16:10:40 2006 -0400 @@ -260,7 +260,7 @@ int set_one_rr(unsigned long rr, unsigne } else if (rreg == 7) { ia64_new_rr7(vmMangleRID(newrrv.rrval),v->domain->shared_info, v->arch.privregs, v->domain->arch.shared_info_va, - __get_cpu_var(vhpt_paddr)); + vcpu_vhpt_maddr(v)); } else { set_rr(rr,newrrv.rrval); } --- xen/arch/ia64/xen/vcpu.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/xen/vcpu.c Wed Oct 11 16:10:40 2006 -0400 @@ -1314,12 +1314,21 @@ static inline void static inline void check_xen_space_overlap (const char *func, u64 base, u64 page_size) { + /* Overlaps can occur only in region 7. + (This is an optimization to bypass all the checks). */ + if (REGION_NUMBER(base) != 7) + return; + /* Mask LSBs of base. */ base &= ~(page_size - 1); /* FIXME: ideally an MCA should be generated... */ if (range_overlap (HYPERVISOR_VIRT_START, HYPERVISOR_VIRT_END, - base, base + page_size)) + base, base + page_size) + || range_overlap(current->domain->arch.shared_info_va, + current->domain->arch.shared_info_va + + XSI_SIZE + XMAPPEDREGS_SIZE, + base, base + page_size)) panic_domain (NULL, "%s on Xen virtual space (%lx)\n", func, base); } @@ -2217,28 +2226,3 @@ IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 v return IA64_NO_FAULT; } - -int ia64_map_hypercall_param(void) -{ - struct vcpu *v = current; - struct domain *d = current->domain; - u64 vaddr = v->arch.hypercall_param.va & PAGE_MASK; - volatile pte_t* pte; - - if (v->arch.hypercall_param.va == 0) - return FALSE; - pte = lookup_noalloc_domain_pte(d, v->arch.hypercall_param.pa1); - if (!pte || !pte_present(*pte)) - return FALSE; - vcpu_itc_no_srlz(v, 2, vaddr, pte_val(*pte), -1UL, PAGE_SHIFT); - if (v->arch.hypercall_param.pa2) { - vaddr += PAGE_SIZE; - pte = lookup_noalloc_domain_pte(d, v->arch.hypercall_param.pa2); - if (pte && pte_present(*pte)) { - vcpu_itc_no_srlz(v, 2, vaddr, pte_val(*pte), - -1UL, PAGE_SHIFT); - } - } - ia64_srlz_d(); - return TRUE; -} --- xen/arch/ia64/xen/vhpt.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/xen/vhpt.c Wed Oct 11 16:10:40 2006 -0400 @@ -3,6 +3,10 @@ * * Copyright (C) 2004 Hewlett-Packard Co * Dan Magenheimer <dan.magenheimer@xxxxxx> + * + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * per vcpu vhpt support */ #include <linux/config.h> #include <linux/kernel.h> @@ -24,18 +28,32 @@ DEFINE_PER_CPU (unsigned long, vhpt_padd DEFINE_PER_CPU (unsigned long, vhpt_paddr); DEFINE_PER_CPU (unsigned long, vhpt_pend); -void vhpt_flush(void) -{ - struct vhpt_lf_entry *v = __va(__ia64_per_cpu_var(vhpt_paddr)); +static void + __vhpt_flush(unsigned long vhpt_maddr) +{ + struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr); int i; for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) v->ti_tag = INVALID_TI_TAG; } -static void vhpt_erase(void) -{ - struct vhpt_lf_entry *v = (struct vhpt_lf_entry *)VHPT_ADDR; +void +local_vhpt_flush(void) +{ + __vhpt_flush(__ia64_per_cpu_var(vhpt_paddr)); +} + +static void +vcpu_vhpt_flush(struct vcpu* v) +{ + __vhpt_flush(vcpu_vhpt_maddr(v)); +} + +static void +vhpt_erase(unsigned long vhpt_maddr) +{ + struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr); int i; for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) { @@ -45,17 +63,6 @@ static void vhpt_erase(void) v->ti_tag = INVALID_TI_TAG; } // initialize cache too??? -} - - -static void vhpt_map(unsigned long pte) -{ - unsigned long psr; - - psr = ia64_clear_ic(); - ia64_itr(0x2, IA64_TR_VHPT, VHPT_ADDR, pte, VHPT_SIZE_LOG2); - ia64_set_psr(psr); - ia64_srlz_i(); } void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long logps) @@ -102,7 +109,7 @@ void vhpt_multiple_insert(unsigned long void vhpt_init(void) { - unsigned long paddr, pte; + unsigned long paddr; struct page_info *page; #if !VHPT_ENABLED return; @@ -122,14 +129,51 @@ void vhpt_init(void) __get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1; printf("vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n", paddr, __get_cpu_var(vhpt_pend)); - pte = pte_val(pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL)); - vhpt_map(pte); - ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | - VHPT_ENABLED); - vhpt_erase(); -} - - + vhpt_erase(paddr); + // we don't enable VHPT here. + // context_switch() or schedule_tail() does it. +} + +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +int +pervcpu_vhpt_alloc(struct vcpu *v) +{ + unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2; + + v->arch.vhpt_entries = + (1UL << vhpt_size_log2) / sizeof(struct vhpt_lf_entry); + v->arch.vhpt_page = + alloc_domheap_pages(NULL, vhpt_size_log2 - PAGE_SHIFT, 0); + if (!v->arch.vhpt_page) + return -ENOMEM; + + v->arch.vhpt_maddr = page_to_maddr(v->arch.vhpt_page); + if (v->arch.vhpt_maddr & ((1 << VHPT_SIZE_LOG2) - 1)) + panic("pervcpu_vhpt_init: bad VHPT alignment!\n"); + + v->arch.pta.val = 0; // to zero reserved bits + v->arch.pta.ve = 1; // enable vhpt + v->arch.pta.size = VHPT_SIZE_LOG2; + v->arch.pta.vf = 1; // long format + //v->arch.pta.base = __va(v->arch.vhpt_maddr) >> 15; + v->arch.pta.base = VHPT_ADDR >> 15; + + vhpt_erase(v->arch.vhpt_maddr); + smp_mb(); // per vcpu vhpt may be used by another physical cpu. + return 0; +} + +void +pervcpu_vhpt_free(struct vcpu *v) +{ + free_domheap_pages(v->arch.vhpt_page, VHPT_SIZE_LOG2 - PAGE_SHIFT); +} +#endif + +// SMP: we can't assume v == current, vcpu might move to another physical cpu. +// So memory barrier is necessary. +// if we can guranttee that vcpu can run on only this physical cpu +// (e.g. vcpu == current), smp_mb() is unnecessary. void vcpu_flush_vtlb_all(struct vcpu *v) { if (VMX_DOMAIN(v)) { @@ -144,9 +188,14 @@ void vcpu_flush_vtlb_all(struct vcpu *v) /* First VCPU tlb. */ vcpu_purge_tr_entry(&PSCBX(v,dtlb)); vcpu_purge_tr_entry(&PSCBX(v,itlb)); + smp_mb(); /* Then VHPT. */ - vhpt_flush(); + if (HAS_PERVCPU_VHPT(v->domain)) + vcpu_vhpt_flush(v); + else + local_vhpt_flush(); + smp_mb(); /* Then mTLB. */ local_flush_tlb_all(); @@ -155,6 +204,8 @@ void vcpu_flush_vtlb_all(struct vcpu *v) /* We could clear bit in d->domain_dirty_cpumask only if domain d in not running on this processor. There is currently no easy way to check this. */ + + perfc_incrc(vcpu_flush_vtlb_all); } static void __vcpu_flush_vtlb_all(void *vcpu) @@ -174,32 +225,59 @@ void domain_flush_vtlb_all (void) if (v->processor == cpu) vcpu_flush_vtlb_all(v); else + // SMP: it is racy to reference v->processor. + // vcpu scheduler may move this vcpu to another + // physicall processor, and change the value + // using plain store. + // We may be seeing the old value of it. + // In such case, flush_vtlb_for_context_switch() + // takes care of mTLB flush. smp_call_function_single(v->processor, __vcpu_flush_vtlb_all, v, 1, 1); } -} - -static void cpu_flush_vhpt_range (int cpu, u64 vadr, u64 addr_range) -{ - void *vhpt_base = __va(per_cpu(vhpt_paddr, cpu)); + perfc_incrc(domain_flush_vtlb_all); +} + +// Callers may need to call smp_mb() before/after calling this. +// Be carefull. +static void +__flush_vhpt_range(unsigned long vhpt_maddr, u64 vadr, u64 addr_range) +{ + void *vhpt_base = __va(vhpt_maddr); while ((long)addr_range > 0) { /* Get the VHPT entry. */ unsigned int off = ia64_thash(vadr) - VHPT_ADDR; - volatile struct vhpt_lf_entry *v; - v = vhpt_base + off; + struct vhpt_lf_entry *v = vhpt_base + off; v->ti_tag = INVALID_TI_TAG; addr_range -= PAGE_SIZE; vadr += PAGE_SIZE; } } +static void +cpu_flush_vhpt_range(int cpu, u64 vadr, u64 addr_range) +{ + __flush_vhpt_range(per_cpu(vhpt_paddr, cpu), vadr, addr_range); +} + +static void +vcpu_flush_vhpt_range(struct vcpu* v, u64 vadr, u64 addr_range) +{ + __flush_vhpt_range(vcpu_vhpt_maddr(v), vadr, addr_range); +} + void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range) { - cpu_flush_vhpt_range (current->processor, vadr, 1UL << log_range); + if (HAS_PERVCPU_VHPT(current->domain)) + vcpu_flush_vhpt_range(current, vadr, 1UL << log_range); + else + cpu_flush_vhpt_range(current->processor, + vadr, 1UL << log_range); ia64_ptcl(vadr, log_range << 2); ia64_srlz_i(); + perfc_incrc(vcpu_flush_tlb_vhpt_range); } void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range) @@ -229,19 +307,30 @@ void domain_flush_vtlb_range (struct dom if (!test_bit(_VCPUF_initialised, &v->vcpu_flags)) continue; - /* Invalidate VHPT entries. */ - cpu_flush_vhpt_range (v->processor, vadr, addr_range); + if (HAS_PERVCPU_VHPT(d)) { + vcpu_flush_vhpt_range(v, vadr, addr_range); + } else { + // SMP: it is racy to reference v->processor. + // vcpu scheduler may move this vcpu to another + // physicall processor, and change the value + // using plain store. + // We may be seeing the old value of it. + // In such case, flush_vtlb_for_context_switch() + /* Invalidate VHPT entries. */ + cpu_flush_vhpt_range(v->processor, vadr, addr_range); + } } // ptc.ga has release semantics. /* ptc.ga */ ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT); + perfc_incrc(domain_flush_vtlb_range); } static void flush_tlb_vhpt_all (struct domain *d) { /* First VHPT. */ - vhpt_flush (); + local_vhpt_flush (); /* Then mTLB. */ local_flush_tlb_all (); @@ -250,7 +339,10 @@ void domain_flush_tlb_vhpt(struct domain void domain_flush_tlb_vhpt(struct domain *d) { /* Very heavy... */ - on_each_cpu ((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); + if (HAS_PERVCPU_VHPT(d) /* || VMX_DOMAIN(v) */) + on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); + else + on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1); cpus_clear (d->domain_dirty_cpumask); } --- xen/arch/ia64/xen/xen.lds.S Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/xen/xen.lds.S Wed Oct 11 16:10:40 2006 -0400 @@ -172,6 +172,9 @@ SECTIONS . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose * kernel data */ + + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) + { *(.data.read_mostly) } .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { *(.data.cacheline_aligned) } --- xen/arch/ia64/xen/xenmem.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/xen/xenmem.c Wed Oct 11 16:10:40 2006 -0400 @@ -17,10 +17,19 @@ #include <linux/efi.h> #include <asm/pgalloc.h> -extern pgd_t frametable_pg_dir[]; - -#define frametable_pgd_offset(addr) \ - (frametable_pg_dir + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))) +extern unsigned long frametable_pg_dir[]; + +#define FRAMETABLE_PGD_OFFSET(ADDR) \ + (frametable_pg_dir + (((ADDR) >> PGDIR_SHIFT) & \ + ((1UL << (PAGE_SHIFT - 3)) - 1))) + +#define FRAMETABLE_PMD_OFFSET(PGD, ADDR) \ + __va((unsigned long *)(PGD) + (((ADDR) >> PMD_SHIFT) & \ + ((1UL << (PAGE_SHIFT - 3)) - 1))) + +#define FRAMETABLE_PTE_OFFSET(PMD, ADDR) \ + (pte_t *)__va((unsigned long *)(PMD) + (((ADDR) >> PAGE_SHIFT) & \ + ((1UL << (PAGE_SHIFT - 3)) - 1))) static unsigned long table_size; static int opt_contig_mem = 0; @@ -29,13 +38,13 @@ boolean_param("contig_mem", opt_contig_m #define opt_contig_mem 1 #endif -struct page_info *frame_table; +struct page_info *frame_table __read_mostly; unsigned long max_page; /* * Set up the page tables. */ -volatile unsigned long *mpt_table; +volatile unsigned long *mpt_table __read_mostly; void paging_init (void) @@ -72,7 +81,7 @@ paging_init (void) #ifdef CONFIG_VIRTUAL_FRAME_TABLE -static inline void * +static unsigned long alloc_dir_page(void) { unsigned long mfn = alloc_boot_pages(1, 1); @@ -82,7 +91,7 @@ alloc_dir_page(void) ++table_size; dir = mfn << PAGE_SHIFT; memset(__va(dir), 0, PAGE_SIZE); - return (void *)dir; + return dir; } static inline unsigned long @@ -100,15 +109,33 @@ alloc_table_page(unsigned long fill) return mfn; } +static void +create_page_table(unsigned long start_page, unsigned long end_page, + unsigned long fill) +{ + unsigned long address; + unsigned long *dir; + pte_t *pteptr; + + for (address = start_page; address < end_page; address += PAGE_SIZE) { + dir = FRAMETABLE_PGD_OFFSET(address); + if (!*dir) + *dir = alloc_dir_page(); + dir = FRAMETABLE_PMD_OFFSET(*dir, address); + if (!*dir) + *dir = alloc_dir_page(); + pteptr = FRAMETABLE_PTE_OFFSET(*dir, address); + if (pte_none(*pteptr)) + set_pte(pteptr, pfn_pte(alloc_table_page(fill), + PAGE_KERNEL)); + } +} + static int create_frametable_page_table (u64 start, u64 end, void *arg) { - unsigned long address, start_page, end_page; struct page_info *map_start, *map_end; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; + unsigned long start_page, end_page; map_start = frame_table + (__pa(start) >> PAGE_SHIFT); map_end = frame_table + (__pa(end) >> PAGE_SHIFT); @@ -116,23 +143,7 @@ create_frametable_page_table (u64 start, start_page = (unsigned long) map_start & PAGE_MASK; end_page = PAGE_ALIGN((unsigned long) map_end); - for (address = start_page; address < end_page; address += PAGE_SIZE) { - pgd = frametable_pgd_offset(address); - if (pgd_none(*pgd)) - pgd_populate(NULL, pgd, alloc_dir_page()); - pud = pud_offset(pgd, address); - - if (pud_none(*pud)) - pud_populate(NULL, pud, alloc_dir_page()); - pmd = pmd_offset(pud, address); - - if (pmd_none(*pmd)) - pmd_populate_kernel(NULL, pmd, alloc_dir_page()); - pte = pte_offset_kernel(pmd, address); - - if (pte_none(*pte)) - set_pte(pte, pfn_pte(alloc_table_page(0), PAGE_KERNEL)); - } + create_page_table(start_page, end_page, 0L); return 0; } @@ -140,11 +151,7 @@ create_mpttable_page_table (u64 start, u create_mpttable_page_table (u64 start, u64 end, void *arg) { unsigned long map_start, map_end; - unsigned long address, start_page, end_page; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; + unsigned long start_page, end_page; map_start = (unsigned long)(mpt_table + (__pa(start) >> PAGE_SHIFT)); map_end = (unsigned long)(mpt_table + (__pa(end) >> PAGE_SHIFT)); @@ -152,23 +159,7 @@ create_mpttable_page_table (u64 start, u start_page = map_start & PAGE_MASK; end_page = PAGE_ALIGN(map_end); - for (address = start_page; address < end_page; address += PAGE_SIZE) { - pgd = frametable_pgd_offset(address); - if (pgd_none(*pgd)) - pgd_populate(NULL, pgd, alloc_dir_page()); - pud = pud_offset(pgd, address); - - if (pud_none(*pud)) - pud_populate(NULL, pud, alloc_dir_page()); - pmd = pmd_offset(pud, address); - - if (pmd_none(*pmd)) - pmd_populate_kernel(NULL, pmd, alloc_dir_page()); - pte = pte_offset_kernel(pmd, address); - - if (pte_none(*pte)) - set_pte(pte, pfn_pte(alloc_table_page(INVALID_M2P_ENTRY), PAGE_KERNEL)); - } + create_page_table(start_page, end_page, INVALID_M2P_ENTRY); return 0; } --- xen/arch/ia64/xen/xensetup.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/xen/xensetup.c Wed Oct 11 16:10:40 2006 -0400 @@ -48,6 +48,7 @@ extern void mem_init(void); extern void mem_init(void); extern void init_IRQ(void); extern void trap_init(void); +extern void xen_patch_kernel(void); /* opt_nosmp: If true, secondary processors are ignored. */ static int opt_nosmp = 0; @@ -81,6 +82,7 @@ unsigned long xenheap_size = XENHEAP_DEF unsigned long xenheap_size = XENHEAP_DEFAULT_SIZE; extern long running_on_sim; unsigned long xen_pstart; +void *xen_heap_start __read_mostly; static int xen_count_pages(u64 start, u64 end, void *arg) @@ -184,8 +186,8 @@ efi_print(void) for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) { md = p; - printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n", - i, md->type, md->attribute, md->phys_addr, + printk("mem%02u: type=%2u, attr=0x%016lx, range=[0x%016lx-0x%016lx) " + "(%luMB)\n", i, md->type, md->attribute, md->phys_addr, md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), md->num_pages >> (20 - EFI_PAGE_SHIFT)); } @@ -242,7 +244,6 @@ void start_kernel(void) void start_kernel(void) { char *cmdline; - void *heap_start; unsigned long nr_pages; unsigned long dom0_memory_start, dom0_memory_size; unsigned long dom0_initrd_start, dom0_initrd_size; @@ -292,6 +293,8 @@ void start_kernel(void) xenheap_phys_end = xen_pstart + xenheap_size; printk("xen image pstart: 0x%lx, xenheap pend: 0x%lx\n", xen_pstart, xenheap_phys_end); + + xen_patch_kernel(); kern_md = md = efi_get_md(xen_pstart); md_end = __pa(ia64_imva(&_end)); @@ -389,10 +392,10 @@ void start_kernel(void) printf("find_memory: efi_memmap_walk returns max_page=%lx\n",max_page); efi_print(); - heap_start = memguard_init(ia64_imva(&_end)); - printf("Before heap_start: %p\n", heap_start); - heap_start = __va(init_boot_allocator(__pa(heap_start))); - printf("After heap_start: %p\n", heap_start); + xen_heap_start = memguard_init(ia64_imva(&_end)); + printf("Before xen_heap_start: %p\n", xen_heap_start); + xen_heap_start = __va(init_boot_allocator(__pa(xen_heap_start))); + printf("After xen_heap_start: %p\n", xen_heap_start); efi_memmap_walk(filter_rsvd_memory, init_boot_pages); efi_memmap_walk(xen_count_pages, &nr_pages); @@ -410,10 +413,10 @@ void start_kernel(void) end_boot_allocator(); - init_xenheap_pages(__pa(heap_start), xenheap_phys_end); + init_xenheap_pages(__pa(xen_heap_start), xenheap_phys_end); printk("Xen heap: %luMB (%lukB)\n", - (xenheap_phys_end-__pa(heap_start)) >> 20, - (xenheap_phys_end-__pa(heap_start)) >> 10); + (xenheap_phys_end-__pa(xen_heap_start)) >> 20, + (xenheap_phys_end-__pa(xen_heap_start)) >> 10); late_setup_arch(&cmdline); @@ -495,6 +498,8 @@ printk("num_online_cpus=%d, max_cpus=%d\ /* Hide the HCDP table from dom0 */ efi.hcdp = NULL; } + + expose_p2m_init(); /* Create initial domain 0. */ dom0 = domain_create(0); --- xen/arch/ia64/xen/xentime.c Tue Oct 10 21:05:50 2006 +0100 +++ xen/arch/ia64/xen/xentime.c Wed Oct 11 16:10:40 2006 -0400 @@ -39,7 +39,7 @@ seqlock_t xtime_lock __cacheline_aligned #define TIME_KEEPER_ID 0 unsigned long domain0_ready = 0; static s_time_t stime_irq = 0x0; /* System time at last 'time update' */ -unsigned long itc_scale, ns_scale; +unsigned long itc_scale __read_mostly, ns_scale __read_mostly; unsigned long itc_at_irq; /* We don't expect an absolute cycle value here, since then no way --- xen/include/asm-ia64/dom_fw.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/dom_fw.h Wed Oct 11 16:10:40 2006 -0400 @@ -38,6 +38,13 @@ The high part is the class (xen/pal/sal/efi). */ #define FW_HYPERCALL_NUM_MASK_HIGH ~0xffUL #define FW_HYPERCALL_NUM_MASK_LOW 0xffUL + +/* Xen hypercalls are 0-63. */ +#define FW_HYPERCALL_XEN 0x0000UL + +/* Define some faster and lighter hypercalls. + See definitions in arch-ia64.h */ +#define FW_HYPERCALL_XEN_FAST 0x0200UL /* * PAL can be called in physical or virtual mode simply by --- xen/include/asm-ia64/domain.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/domain.h Wed Oct 11 16:10:40 2006 -0400 @@ -87,6 +87,9 @@ struct arch_domain { unsigned long flags; struct { unsigned int is_vti : 1; +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + unsigned int has_pervcpu_vhpt : 1; +#endif }; }; @@ -142,11 +145,12 @@ struct arch_domain { (sizeof(vcpu_info_t) * (v)->vcpu_id + \ offsetof(vcpu_info_t, evtchn_upcall_mask)) -struct hypercall_param { - unsigned long va; - unsigned long pa1; - unsigned long pa2; -}; +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +#define HAS_PERVCPU_VHPT(d) ((d)->arch.has_pervcpu_vhpt) +#else +#define HAS_PERVCPU_VHPT(d) (0) +#endif + struct arch_vcpu { /* Save the state of vcpu. @@ -192,14 +196,19 @@ struct arch_vcpu { char irq_new_condition; // vpsr.i/vtpr change, check for pending VHPI char hypercall_continuation; - struct hypercall_param hypercall_param; // used to remap a hypercall param - //for phycial emulation unsigned long old_rsc; int mode_flags; fpswa_ret_t fpswa_ret; /* save return values of FPSWA emulation */ struct timer hlt_timer; struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */ + +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + PTA pta; + unsigned long vhpt_maddr; + struct page_info* vhpt_page; + unsigned long vhpt_entries; +#endif #define INVALID_PROCESSOR INT_MAX int last_processor; --- xen/include/asm-ia64/guest_access.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/guest_access.h Wed Oct 11 16:10:40 2006 -0400 @@ -1,91 +1,107 @@ -/****************************************************************************** - * guest_access.h - * - * Copyright (c) 2006, K A Fraser +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) IBM Corp. 2006 + * + * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> + * Tristan Gingold <tristan.gingold@xxxxxxxx> */ -#ifndef __ASM_IA64_GUEST_ACCESS_H__ -#define __ASM_IA64_GUEST_ACCESS_H__ +#ifndef __ASM_GUEST_ACCESS_H__ +#define __ASM_GUEST_ACCESS_H__ -#include <asm/uaccess.h> +extern unsigned long xencomm_copy_to_guest(void *to, const void *from, + unsigned int len, unsigned int skip); +extern unsigned long xencomm_copy_from_guest(void *to, const void *from, + unsigned int len, unsigned int skip); +extern void *xencomm_add_offset(void *handle, unsigned int bytes); +extern int xencomm_handle_is_null(void *ptr); + /* Is the guest handle a NULL reference? */ -#define guest_handle_is_null(hnd) ((hnd).p == NULL) +#define guest_handle_is_null(hnd) \ + ((hnd).p == NULL || xencomm_handle_is_null((hnd).p)) /* Offset the given guest handle into the array it refers to. */ -#define guest_handle_add_offset(hnd, nr) ((hnd).p += (nr)) +#define guest_handle_add_offset(hnd, nr) ({ \ + const typeof((hnd).p) _ptr = (hnd).p; \ + (hnd).p = xencomm_add_offset(_ptr, nr * sizeof(*_ptr)); \ +}) /* Cast a guest handle to the specified type of handle. */ -#define guest_handle_cast(hnd, type) ({ \ - type *_x = (hnd).p; \ - (XEN_GUEST_HANDLE(type)) { _x }; \ +#define guest_handle_cast(hnd, type) ({ \ + type *_x = (hnd).p; \ + XEN_GUEST_HANDLE(type) _y; \ + set_xen_guest_handle(_y, _x); \ + _y; \ }) -#define guest_handle_from_ptr(ptr, type) ((XEN_GUEST_HANDLE(type)) { (type *)ptr }) + +/* Since we run in real mode, we can safely access all addresses. That also + * means our __routines are identical to our "normal" routines. */ +#define guest_handle_okay(hnd, nr) 1 /* - * Copy an array of objects to guest context via a guest handle, - * specifying an offset into the guest array. + * Copy an array of objects to guest context via a guest handle. + * Optionally specify an offset into the guest array. */ -#define copy_to_guest_offset(hnd, off, ptr, nr) ({ \ - const typeof(ptr) _x = (hnd).p; \ - const typeof(ptr) _y = (ptr); \ - copy_to_user(_x+(off), _y, sizeof(*_x)*(nr)); \ +#define copy_to_guest_offset(hnd, idx, ptr, nr) \ + __copy_to_guest_offset(hnd, idx, ptr, nr) + +/* Copy sub-field of a structure to guest context via a guest handle. */ +#define copy_field_to_guest(hnd, ptr, field) \ + __copy_field_to_guest(hnd, ptr, field) + +/* + * Copy an array of objects from guest context via a guest handle. + * Optionally specify an offset into the guest array. + */ +#define copy_from_guest_offset(ptr, hnd, idx, nr) \ + __copy_from_guest_offset(ptr, hnd, idx, nr) + +/* Copy sub-field of a structure from guest context via a guest handle. */ +#define copy_field_from_guest(ptr, hnd, field) \ + __copy_field_from_guest(ptr, hnd, field) + +#define __copy_to_guest_offset(hnd, idx, ptr, nr) ({ \ + const typeof(ptr) _d = (hnd).p; \ + const typeof(ptr) _s = (ptr); \ + xencomm_copy_to_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \ }) -/* - * Copy an array of objects from guest context via a guest handle, - * specifying an offset into the guest array. - */ -#define copy_from_guest_offset(ptr, hnd, off, nr) ({ \ - const typeof(ptr) _x = (hnd).p; \ - const typeof(ptr) _y = (ptr); \ - copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \ +#define __copy_field_to_guest(hnd, ptr, field) ({ \ + const int _off = offsetof(typeof(*ptr), field); \ + const typeof(ptr) _d = (hnd).p; \ + const typeof(&(ptr)->field) _s = &(ptr)->field; \ + xencomm_copy_to_guest(_d, _s, sizeof(*_s), _off); \ }) -/* Copy sub-field of a structure to guest context via a guest handle. */ -#define copy_field_to_guest(hnd, ptr, field) ({ \ - const typeof(&(ptr)->field) _x = &(hnd).p->field; \ - const typeof(&(ptr)->field) _y = &(ptr)->field; \ - copy_to_user(_x, _y, sizeof(*_x)); \ +#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({ \ + const typeof(ptr) _s = (hnd).p; \ + const typeof(ptr) _d = (ptr); \ + xencomm_copy_from_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \ }) -/* Copy sub-field of a structure from guest context via a guest handle. */ -#define copy_field_from_guest(ptr, hnd, field) ({ \ - const typeof(&(ptr)->field) _x = &(hnd).p->field; \ - const typeof(&(ptr)->field) _y = &(ptr)->field; \ - copy_from_user(_y, _x, sizeof(*_x)); \ +#define __copy_field_from_guest(ptr, hnd, field) ({ \ + const int _off = offsetof(typeof(*ptr), field); \ + const typeof(ptr) _s = (hnd).p; \ + const typeof(&(ptr)->field) _d = &(ptr)->field; \ + xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off); \ }) -/* - * Pre-validate a guest handle. - * Allows use of faster __copy_* functions. - */ -#define guest_handle_okay(hnd, nr) \ - array_access_ok((hnd).p, (nr), sizeof(*(hnd).p)) +/* Internal use only: returns 0 in case of bad address. */ +extern unsigned long xencomm_paddr_to_maddr(unsigned long paddr); -#define __copy_to_guest_offset(hnd, off, ptr, nr) ({ \ - const typeof(ptr) _x = (hnd).p; \ - const typeof(ptr) _y = (ptr); \ - __copy_to_user(_x+(off), _y, sizeof(*_x)*(nr)); \ -}) - -#define __copy_from_guest_offset(ptr, hnd, off, nr) ({ \ - const typeof(ptr) _x = (hnd).p; \ - const typeof(ptr) _y = (ptr); \ - __copy_from_user(_y, _x+(off), sizeof(*_x)*(nr)); \ -}) - -#define __copy_field_to_guest(hnd, ptr, field) ({ \ - const typeof(&(ptr)->field) _x = &(hnd).p->field; \ - const typeof(&(ptr)->field) _y = &(ptr)->field; \ - __copy_to_user(_x, _y, sizeof(*_x)); \ -}) - -#define __copy_field_from_guest(ptr, hnd, field) ({ \ - const typeof(&(ptr)->field) _x = &(hnd).p->field; \ - const typeof(&(ptr)->field) _y = &(ptr)->field; \ - __copy_from_user(_y, _x, sizeof(*_x)); \ -}) - -#endif /* __ASM_IA64_GUEST_ACCESS_H__ */ +#endif /* __ASM_GUEST_ACCESS_H__ */ --- xen/include/asm-ia64/ia64_int.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/ia64_int.h Wed Oct 11 16:10:40 2006 -0400 @@ -36,7 +36,9 @@ #define IA64_NO_FAULT 0x0000 #define IA64_FAULT 0x0001 #define IA64_RFI_IN_PROGRESS 0x0002 -#define IA64_RETRY 0x0003 +// To avoid conflicting with return value of handle_fpu_swa() +// set IA64_RETRY to -0x000f +#define IA64_RETRY (-0x000f) #define IA64_FORCED_IFA 0x0004 #define IA64_USE_TLB 0x0005 #define IA64_ILLOP_FAULT (IA64_GENEX_VECTOR | 0x00) --- xen/include/asm-ia64/linux-xen/asm/cache.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/linux-xen/asm/cache.h Wed Oct 11 16:10:40 2006 -0400 @@ -32,6 +32,6 @@ #endif #endif -#define __read_mostly +#define __read_mostly __attribute__((__section__(".data.read_mostly"))) #endif /* _ASM_IA64_CACHE_H */ --- xen/include/asm-ia64/linux-xen/asm/pgtable.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/linux-xen/asm/pgtable.h Wed Oct 11 16:10:40 2006 -0400 @@ -68,6 +68,20 @@ #ifdef XEN #define _PAGE_VIRT_D (__IA64_UL(1) << 53) /* Virtual dirty bit */ #define _PAGE_PROTNONE 0 + +/* domVTI */ +#define GPFN_MEM (0UL << 60) /* Guest pfn is normal mem */ +#define GPFN_FRAME_BUFFER (1UL << 60) /* VGA framebuffer */ +#define GPFN_LOW_MMIO (2UL << 60) /* Low MMIO range */ +#define GPFN_PIB (3UL << 60) /* PIB base */ +#define GPFN_IOSAPIC (4UL << 60) /* IOSAPIC base */ +#define GPFN_LEGACY_IO (5UL << 60) /* Legacy I/O base */ +#define GPFN_GFW (6UL << 60) /* Guest Firmware */ +#define GPFN_HIGH_MMIO (7UL << 60) /* High MMIO range */ + +#define GPFN_IO_MASK (7UL << 60) /* Guest pfn is I/O type */ +#define GPFN_INV_MASK (1UL << 63) /* Guest pfn is invalid */ + #else #define _PAGE_PROTNONE (__IA64_UL(1) << 63) #endif --- xen/include/asm-ia64/linux-xen/asm/processor.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/linux-xen/asm/processor.h Wed Oct 11 16:10:40 2006 -0400 @@ -89,6 +89,7 @@ #ifdef XEN #include <asm/xenprocessor.h> +#include <xen/bitops.h> #else /* like above but expressed as bitfields for more efficient access: */ struct ia64_psr { @@ -571,6 +572,23 @@ ia64_eoi (void) #define cpu_relax() ia64_hint(ia64_hint_pause) +static inline int +ia64_get_irr(unsigned int vector) +{ + unsigned int reg = vector / 64; + unsigned int bit = vector % 64; + u64 irr; + + switch (reg) { + case 0: irr = ia64_getreg(_IA64_REG_CR_IRR0); break; + case 1: irr = ia64_getreg(_IA64_REG_CR_IRR1); break; + case 2: irr = ia64_getreg(_IA64_REG_CR_IRR2); break; + case 3: irr = ia64_getreg(_IA64_REG_CR_IRR3); break; + } + + return test_bit(bit, &irr); +} + static inline void ia64_set_lrr0 (unsigned long val) { --- xen/include/asm-ia64/linux-xen/asm/system.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/linux-xen/asm/system.h Wed Oct 11 16:10:40 2006 -0400 @@ -189,6 +189,7 @@ do { \ #ifdef XEN #define local_irq_is_enabled() (!irqs_disabled()) +extern struct vcpu *ia64_switch_to(struct vcpu *next_task); #else #ifdef __KERNEL__ --- xen/include/asm-ia64/linux/asm/sal.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/linux/asm/sal.h Wed Oct 11 16:10:40 2006 -0400 @@ -657,15 +657,7 @@ ia64_sal_freq_base (unsigned long which, return isrv.status; } -/* Flush all the processor and platform level instruction and/or data caches */ -static inline s64 -ia64_sal_cache_flush (u64 cache_type) -{ - struct ia64_sal_retval isrv; - SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0); - return isrv.status; -} - +extern s64 ia64_sal_cache_flush (u64 cache_type); /* Initialize all the processor and platform level instruction and data caches */ static inline s64 --- xen/include/asm-ia64/mm.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/mm.h Wed Oct 11 16:10:40 2006 -0400 @@ -117,10 +117,14 @@ struct page_info #define IS_XEN_HEAP_FRAME(_pfn) ((page_to_maddr(_pfn) < xenheap_phys_end) \ && (page_to_maddr(_pfn) >= xen_pstart)) -static inline struct domain *unpickle_domptr(u32 _d) -{ return (_d == 0) ? NULL : __va(_d); } +extern void *xen_heap_start; +#define __pickle(a) ((unsigned long)a - (unsigned long)xen_heap_start) +#define __unpickle(a) (void *)(a + xen_heap_start) + +static inline struct domain *unpickle_domptr(u64 _d) +{ return (_d == 0) ? NULL : __unpickle(_d); } static inline u32 pickle_domptr(struct domain *_d) -{ return (_d == NULL) ? 0 : (u32)__pa(_d); } +{ return (_d == NULL) ? 0 : (u32)__pickle(_d); } #define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain)) #define page_set_owner(_p, _d) ((_p)->u.inuse._domain = pickle_domptr(_d)) @@ -420,7 +424,7 @@ extern void relinquish_mm(struct domain* extern void relinquish_mm(struct domain* d); extern struct page_info * assign_new_domain_page(struct domain *d, unsigned long mpaddr); extern void assign_new_domain0_page(struct domain *d, unsigned long mpaddr); -extern void __assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr, unsigned long flags); +extern int __assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr, unsigned long flags); extern void assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr); extern void assign_domain_io_page(struct domain *d, unsigned long mpaddr, unsigned long flags); struct p2m_entry; @@ -435,6 +439,13 @@ extern unsigned long do_dom0vp_op(unsign extern unsigned long do_dom0vp_op(unsigned long cmd, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3); extern unsigned long dom0vp_zap_physmap(struct domain *d, unsigned long gpfn, unsigned int extent_order); extern unsigned long dom0vp_add_physmap(struct domain* d, unsigned long gpfn, unsigned long mfn, unsigned long flags, domid_t domid); +#ifdef CONFIG_XEN_IA64_EXPOSE_P2M +extern void expose_p2m_init(void); +extern unsigned long dom0vp_expose_p2m(struct domain* d, unsigned long conv_start_gpfn, unsigned long assign_start_gpfn, unsigned long expose_size, unsigned long granule_pfn); +#else +#define expose_p2m_init() do { } while (0) +#define dom0vp_expose_p2m(d, conv_start_gpfn, assign_start_gpfn, expose_size, granule_pfn) (-ENOSYS) +#endif extern volatile unsigned long *mpt_table; extern unsigned long gmfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); --- xen/include/asm-ia64/perfc_defn.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/perfc_defn.h Wed Oct 11 16:10:40 2006 -0400 @@ -107,3 +107,30 @@ PERFPRIVOPADDR(get_ifa) PERFPRIVOPADDR(get_ifa) PERFPRIVOPADDR(thash) #endif + +// vhpt.c +PERFCOUNTER_CPU(vcpu_flush_vtlb_all, "vcpu_flush_vtlb_all") +PERFCOUNTER_CPU(domain_flush_vtlb_all, "domain_flush_vtlb_all") +PERFCOUNTER_CPU(vcpu_flush_tlb_vhpt_range, "vcpu_flush_tlb_vhpt_range") +PERFCOUNTER_CPU(domain_flush_vtlb_range, "domain_flush_vtlb_range") + +// domain.c +PERFCOUNTER_CPU(flush_vtlb_for_context_switch, "flush_vtlb_for_context_switch") + +// mm.c +PERFCOUNTER_CPU(assign_domain_page_replace, "assign_domain_page_replace") +PERFCOUNTER_CPU(assign_domain_pge_cmpxchg_rel, "assign_domain_pge_cmpxchg_rel") +PERFCOUNTER_CPU(zap_dcomain_page_one, "zap_dcomain_page_one") +PERFCOUNTER_CPU(dom0vp_zap_physmap, "dom0vp_zap_physmap") +PERFCOUNTER_CPU(dom0vp_add_physmap, "dom0vp_add_physmap") +PERFCOUNTER_CPU(create_grant_host_mapping, "create_grant_host_mapping") +PERFCOUNTER_CPU(destroy_grant_host_mapping, "destroy_grant_host_mapping") +PERFCOUNTER_CPU(steal_page_refcount, "steal_page_refcount") +PERFCOUNTER_CPU(steal_page, "steal_page") +PERFCOUNTER_CPU(guest_physmap_add_page, "guest_physmap_add_page") +PERFCOUNTER_CPU(guest_physmap_remove_page, "guest_physmap_remove_page") +PERFCOUNTER_CPU(domain_page_flush, "domain_page_flush") + +// dom0vp +PERFCOUNTER_CPU(dom0vp_phystomach, "dom0vp_phystomach") +PERFCOUNTER_CPU(dom0vp_machtophys, "dom0vp_machtophys") --- xen/include/asm-ia64/uaccess.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/uaccess.h Wed Oct 11 16:10:40 2006 -0400 @@ -211,30 +211,16 @@ extern unsigned long __must_check __copy extern unsigned long __must_check __copy_user (void __user *to, const void __user *from, unsigned long count); -extern int ia64_map_hypercall_param(void); - static inline unsigned long __copy_to_user (void __user *to, const void *from, unsigned long count) { - unsigned long len; - len = __copy_user(to, (void __user *)from, count); - if (len == 0) - return 0; - if (ia64_map_hypercall_param()) - len = __copy_user(to, (void __user *)from, count); /* retry */ - return len; + return __copy_user(to, (void __user *)from, count); } static inline unsigned long __copy_from_user (void *to, const void __user *from, unsigned long count) { - unsigned long len; - len = __copy_user((void __user *)to, from, count); - if (len == 0) - return 0; - if (ia64_map_hypercall_param()) - len = __copy_user((void __user *) to, from, count); /* retry */ - return len; + return __copy_user((void __user *)to, from, count); } #define __copy_to_user_inatomic __copy_to_user --- xen/include/asm-ia64/vhpt.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/vhpt.h Wed Oct 11 16:10:40 2006 -0400 @@ -37,11 +37,46 @@ extern void vhpt_multiple_insert(unsigne unsigned long logps); extern void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long logps); -void vhpt_flush(void); +void local_vhpt_flush(void); /* Currently the VHPT is allocated per CPU. */ DECLARE_PER_CPU (unsigned long, vhpt_paddr); DECLARE_PER_CPU (unsigned long, vhpt_pend); +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT +#if !VHPT_ENABLED +#error "VHPT_ENABLED must be set for CONFIG_XEN_IA64_PERVCPU_VHPT" +#endif +#endif + +#include <xen/sched.h> +int pervcpu_vhpt_alloc(struct vcpu *v); +void pervcpu_vhpt_free(struct vcpu *v); +static inline unsigned long +vcpu_vhpt_maddr(struct vcpu* v) +{ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(v->domain)) + return v->arch.vhpt_maddr; +#endif + +#if 0 + // referencecing v->processor is racy. + return per_cpu(vhpt_paddr, v->processor); +#endif + BUG_ON(v != current); + return __get_cpu_var(vhpt_paddr); +} + +static inline unsigned long +vcpu_pta(struct vcpu* v) +{ +#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT + if (HAS_PERVCPU_VHPT(v->domain)) + return v->arch.pta.val; +#endif + return VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | VHPT_ENABLED; +} + #endif /* !__ASSEMBLY */ #endif --- xen/include/asm-ia64/vmx.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/vmx.h Wed Oct 11 16:10:40 2006 -0400 @@ -35,6 +35,7 @@ extern void vmx_save_state(struct vcpu * extern void vmx_save_state(struct vcpu *v); extern void vmx_load_state(struct vcpu *v); extern void vmx_setup_platform(struct domain *d); +extern void vmx_do_launch(struct vcpu *v); extern void vmx_io_assist(struct vcpu *v); extern int ia64_hypercall (struct pt_regs *regs); extern void vmx_save_state(struct vcpu *v); --- xen/include/asm-ia64/vmx_vcpu.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/vmx_vcpu.h Wed Oct 11 16:10:40 2006 -0400 @@ -114,6 +114,7 @@ extern void memwrite_p(VCPU *vcpu, u64 * extern void memwrite_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s); extern void vcpu_load_kernel_regs(VCPU *vcpu); extern IA64FAULT vmx_vcpu_increment_iip(VCPU *vcpu); +extern IA64FAULT vmx_vcpu_decrement_iip(VCPU *vcpu); extern void vmx_switch_rr7(unsigned long ,shared_info_t*,void *,void *,void *); extern void dtlb_fault (VCPU *vcpu, u64 vadr); @@ -121,7 +122,8 @@ extern void alt_dtlb (VCPU *vcpu, u64 va extern void alt_dtlb (VCPU *vcpu, u64 vadr); extern void dvhpt_fault (VCPU *vcpu, u64 vadr); extern void dnat_page_consumption (VCPU *vcpu, uint64_t vadr); -extern void page_not_present(VCPU *vcpu, u64 vadr); +extern void data_page_not_present(VCPU *vcpu, u64 vadr); +extern void inst_page_not_present(VCPU *vcpu, u64 vadr); extern void data_access_rights(VCPU *vcpu, u64 vadr); /************************************************************************** --- xen/include/asm-ia64/xenkregs.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/asm-ia64/xenkregs.h Wed Oct 11 16:10:40 2006 -0400 @@ -7,8 +7,7 @@ #define IA64_TR_SHARED_INFO 3 /* dtr3: page shared with domain */ #define IA64_TR_VHPT 4 /* dtr4: vhpt */ #define IA64_TR_MAPPED_REGS 5 /* dtr5: vcpu mapped regs */ -#define IA64_TR_PERVP_VHPT 6 -#define IA64_DTR_GUEST_KERNEL 7 +#define IA64_DTR_GUEST_KERNEL 6 #define IA64_ITR_GUEST_KERNEL 2 /* Processor status register bits: */ #define IA64_PSR_VM_BIT 46 --- xen/include/public/arch-ia64.h Tue Oct 10 21:05:50 2006 +0100 +++ xen/include/public/arch-ia64.h Wed Oct 11 16:10:40 2006 -0400 @@ -47,18 +47,6 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #ifndef __ASSEMBLY__ typedef unsigned long xen_ulong_t; - -#define GPFN_MEM (0UL << 56) /* Guest pfn is normal mem */ -#define GPFN_FRAME_BUFFER (1UL << 56) /* VGA framebuffer */ -#define GPFN_LOW_MMIO (2UL << 56) /* Low MMIO range */ -#define GPFN_PIB (3UL << 56) /* PIB base */ -#define GPFN_IOSAPIC (4UL << 56) /* IOSAPIC base */ -#define GPFN_LEGACY_IO (5UL << 56) /* Legacy I/O base */ -#define GPFN_GFW (6UL << 56) /* Guest Firmware */ -#define GPFN_HIGH_MMIO (7UL << 56) /* High MMIO range */ - -#define GPFN_IO_MASK (7UL << 56) /* Guest pfn is I/O type */ -#define GPFN_INV_MASK (31UL << 59) /* Guest pfn is invalid */ #define INVALID_MFN (~0UL) @@ -336,33 +324,33 @@ typedef struct vcpu_guest_context vcpu_g typedef struct vcpu_guest_context vcpu_guest_context_t; DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); -// dom0 vp op +/* dom0 vp op */ #define __HYPERVISOR_ia64_dom0vp_op __HYPERVISOR_arch_0 -#define IA64_DOM0VP_ioremap 0 // map io space in machine - // address to dom0 physical - // address space. - // currently physical - // assignedg address equals to - // machine address -#define IA64_DOM0VP_phystomach 1 // convert a pseudo physical - // page frame number - // to the corresponding - // machine page frame number. - // if no page is assigned, - // INVALID_MFN or GPFN_INV_MASK - // is returned depending on - // domain's non-vti/vti mode. -#define IA64_DOM0VP_machtophys 3 // convert a machine page - // frame number - // to the corresponding - // pseudo physical page frame - // number of the caller domain -#define IA64_DOM0VP_zap_physmap 17 // unmap and free pages - // contained in the specified - // pseudo physical region -#define IA64_DOM0VP_add_physmap 18 // assigne machine page frane - // to dom0's pseudo physical - // address space. +/* Map io space in machine address to dom0 physical address space. + Currently physical assigned address equals to machine address. */ +#define IA64_DOM0VP_ioremap 0 + +/* Convert a pseudo physical page frame number to the corresponding + machine page frame number. If no page is assigned, INVALID_MFN or + GPFN_INV_MASK is returned depending on domain's non-vti/vti mode. */ +#define IA64_DOM0VP_phystomach 1 + +/* Convert a machine page frame number to the corresponding pseudo physical + page frame number of the caller domain. */ +#define IA64_DOM0VP_machtophys 3 + +/* Reserved for future use. */ +#define IA64_DOM0VP_iounmap 4 + +/* Unmap and free pages contained in the specified pseudo physical region. */ +#define IA64_DOM0VP_zap_physmap 5 + +/* Assign machine page frame to dom0's pseudo physical address space. */ +#define IA64_DOM0VP_add_physmap 6 + +/* expose the p2m table into domain */ +#define IA64_DOM0VP_expose_p2m 7 + // flags for page assignement to pseudo physical address space #define _ASSIGN_readonly 0 #define ASSIGN_readonly (1UL << _ASSIGN_readonly) @@ -395,15 +383,12 @@ struct xen_ia64_boot_param { #endif /* !__ASSEMBLY__ */ -/* Address of shared_info in domain virtual space. - This is the default address, for compatibility only. */ -#define XSI_BASE 0xf100000000000000 - /* Size of the shared_info area (this is not related to page size). */ #define XSI_SHIFT 14 #define XSI_SIZE (1 << XSI_SHIFT) /* Log size of mapped_regs area (64 KB - only 4KB is used). */ #define XMAPPEDREGS_SHIFT 12 +#define XMAPPEDREGS_SIZE (1 << XMAPPEDREGS_SHIFT) /* Offset of XASI (Xen arch shared info) wrt XSI_BASE. */ #define XMAPPEDREGS_OFS XSI_SIZE @@ -435,6 +420,17 @@ struct xen_ia64_boot_param { #define HYPERPRIVOP_GET_PSR 0x19 #define HYPERPRIVOP_MAX 0x19 +/* Fast and light hypercalls. */ +#define __HYPERVISOR_ia64_fast_eoi 0x0200 + +/* Xencomm macros. */ +#define XENCOMM_INLINE_MASK 0xf800000000000000UL +#define XENCOMM_INLINE_FLAG 0x8000000000000000UL + +#define XENCOMM_IS_INLINE(addr) \ + (((unsigned long)(addr) & XENCOMM_INLINE_MASK) == XENCOMM_INLINE_FLAG) +#define XENCOMM_INLINE_ADDR(addr) \ + ((unsigned long)(addr) & ~XENCOMM_INLINE_MASK) #endif /* __HYPERVISOR_IF_IA64_H__ */ /* --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ xen/arch/ia64/tools/p2m_expose/Makefile Wed Oct 11 16:10:40 2006 -0400 @@ -0,0 +1,28 @@ +ifneq ($(KERNELRELEASE),) +obj-m += expose_p2m.o +else +PWD := $(shell pwd) +TOPDIR ?= $(abspath $(PWD)/../../../../..) +KDIR ?= $(TOPDIR)/linux-$(shell awk '/^LINUX_VER\>/{print $$3}' $(TOPDIR)/buildconfigs/mk.linux-2.6-xen)-xen +#CROSS_COMPILE ?= ia64-unknown-linux- +#ARCH ?= ia64 + +ifneq ($(O),) +OPT_O := O=$(realpath $(O)) +endif + +ifneq ($(V),) +OPT_V := V=$(V) +endif + +ifneq ($(ARCH),) +OPT_ARCH := ARCH=$(ARCH) +endif + +ifneq ($(CROSS_COMPILE),) +OPT_CORSS_COMPILE := CROSS_COMPILE=$(CROSS_COMPILE) +endif + +default: + $(MAKE) -C $(KDIR) $(OPT_O) $(OPT_V) $(OPT_CORSS_COMPILE) $(OPT_ARCH) M=$(PWD) +endif --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ xen/arch/ia64/tools/p2m_expose/README.p2m_expose Wed Oct 11 16:10:40 2006 -0400 @@ -0,0 +1,12 @@ +This directory contains Linux kernel module for p2m exposure test/benchmark. + +1. build kernel module + - At fist build, linux-xen as usual + - then type just 'make' in this directory, then you'll have expose_p2m.ko. + See Makefile for details. + +2. test, benchmark. + - type 'insmod expose_p2m.ko' on the system. + Then the result is printed out to your console. + insmod fails with EINVAL so that you don't have to execute rmmod. + --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ xen/arch/ia64/tools/p2m_expose/expose_p2m.c Wed Oct 11 16:10:40 2006 -0400 @@ -0,0 +1,185 @@ +/****************************************************************************** + * arch/ia64/xen/expose_p2m.c + * + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp> + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/hypercall.h> +#include <asm/hypervisor.h> + +#define printd(fmt, ...) printk("%s:%d " fmt, __func__, __LINE__, \ + ##__VA_ARGS__) + +// copied from arch/ia64/mm/tlb.c. it isn't exported. +void +local_flush_tlb_all (void) +{ + unsigned long i, j, flags, count0, count1, stride0, stride1, addr; + + addr = local_cpu_data->ptce_base; + count0 = local_cpu_data->ptce_count[0]; + count1 = local_cpu_data->ptce_count[1]; + stride0 = local_cpu_data->ptce_stride[0]; + stride1 = local_cpu_data->ptce_stride[1]; + + local_irq_save(flags); + for (i = 0; i < count0; ++i) { + for (j = 0; j < count1; ++j) { + ia64_ptce(addr); + addr += stride1; + } + addr += stride0; + } + local_irq_restore(flags); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +static void +do_p2m(unsigned long (*conv)(unsigned long), + const char* msg, const char* prefix, + unsigned long start_gpfn, unsigned end_gpfn, unsigned long stride) +{ + struct timeval before_tv; + struct timeval after_tv; + unsigned long gpfn; + unsigned long mfn; + unsigned long count; + nsec_t nsec; + + count = 0; + do_gettimeofday(&before_tv); + for (gpfn = start_gpfn; gpfn < end_gpfn; gpfn += stride) { + mfn = (*conv)(gpfn); + count++; + } + do_gettimeofday(&after_tv); + nsec = timeval_to_ns(&after_tv) - timeval_to_ns(&before_tv); + printk("%s stride %4ld %s: %9ld / %6ld = %5ld nsec\n", + msg, stride, prefix, + nsec, count, nsec/count); +} + + +static void +do_with_hypercall(const char* msg, + unsigned long start_gpfn, unsigned long end_gpfn, + unsigned long stride) +{ + do_p2m(&HYPERVISOR_phystomach, msg, "hypercall", + start_gpfn, end_gpfn, stride); +} + +static void +do_with_table(const char* msg, + unsigned long start_gpfn, unsigned long end_gpfn, + unsigned long stride) +{ + do_p2m(&p2m_phystomach, msg, "p2m table", + start_gpfn, end_gpfn, stride); +} + +static int __init +expose_p2m_init(void) +{ + unsigned long gpfn; + unsigned long mfn; + unsigned long p2m_mfn; + + int error_count = 0; + + const int strides[] = { + PTRS_PER_PTE, PTRS_PER_PTE/2, PTRS_PER_PTE/3, PTRS_PER_PTE/4, + L1_CACHE_BYTES/sizeof(pte_t), 1 + }; + int i; + + +#if 0 + printd("about to call p2m_expose_init()\n"); + if (p2m_expose_init() < 0) { + printd("p2m_expose_init() failed\n"); + return -EINVAL; + } + printd("p2m_expose_init() success\n"); +#else + if (!p2m_initialized) { + printd("p2m exposure isn't initialized\n"); + return -EINVAL; + } +#endif + + printd("p2m expose test begins\n"); + for (gpfn = p2m_min_low_pfn; gpfn < p2m_max_low_pfn; gpfn++) { + mfn = HYPERVISOR_phystomach(gpfn); + p2m_mfn = p2m_phystomach(gpfn); + if (mfn != p2m_mfn) { + printd("gpfn 0x%016lx " + "mfn 0x%016lx p2m_mfn 0x%016lx\n", + gpfn, mfn, p2m_mfn); + printd("mpaddr 0x%016lx " + "maddr 0x%016lx p2m_maddr 0x%016lx\n", + gpfn << PAGE_SHIFT, + mfn << PAGE_SHIFT, p2m_mfn << PAGE_SHIFT); + + error_count++; + if (error_count > 16) { + printk("too many errors\n"); + return -EINVAL; + } + } + } + printd("p2m expose test done!\n"); + + printk("type " + "stride " + "type : " + " nsec / count = " + "nsec per conv\n"); + for (i = 0; i < sizeof(strides)/sizeof(strides[0]); i++) { + int stride = strides[i]; + local_flush_tlb_all(); + do_with_hypercall("cold tlb", + p2m_min_low_pfn, p2m_max_low_pfn, stride); + do_with_hypercall("warm tlb", + p2m_min_low_pfn, p2m_max_low_pfn, stride); + + local_flush_tlb_all(); + do_with_table("cold tlb", + p2m_min_low_pfn, p2m_max_low_pfn, stride); + do_with_table("warm tlb", + p2m_min_low_pfn, p2m_max_low_pfn, stride); + } + + return -EINVAL; +} + +static void __exit +expose_p2m_cleanup(void) +{ +} + +module_init(expose_p2m_init); +module_exit(expose_p2m_cleanup); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Isaku Yamahata <yamahata@xxxxxxxxxxxxx>"); --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ xen/arch/ia64/vmx/optvfault.S Wed Oct 11 16:10:40 2006 -0400 @@ -0,0 +1,518 @@ +/* + * arch/ia64/vmx/optvfault.S + * optimize virtualization fault handler + * + * Copyright (C) 2006 Intel Co + * Xuefei Xu (Anthony Xu) <anthony.xu@xxxxxxxxx> + */ + +#include <linux/config.h> +#include <asm/asmmacro.h> +#include <asm/kregs.h> +#include <asm/offsets.h> +#include <asm/percpu.h> +#include <asm/processor.h> +#include <asm/vmx_vpd.h> +#include <asm/vmx_pal_vsa.h> +#include <asm/asm-offsets.h> + +#define ACCE_MOV_FROM_AR +#define ACCE_MOV_FROM_RR + +//mov r1=ar3 +GLOBAL_ENTRY(asm_mov_from_ar) +#ifndef ACCE_MOV_FROM_AR + br.many vmx_vitualization_fault_back +#endif + add r18=VCPU_VTM_OFFSET_OFS,r21 + mov r19=ar.itc + extr.u r17=r25,6,7 + ;; + ld8 r18=[r18] + movl r20=asm_mov_to_reg + ;; + adds r30=vmx_resume_to_guest-asm_mov_to_reg,r20 + shladd r17=r17,4,r20 + mov r24=b0 + ;; + add r19=r19,r18 + mov b0=r17 + br.sptk.few b0 + ;; +END(asm_mov_from_ar) + + +// mov r1=rr[r3] +GLOBAL_ENTRY(asm_mov_from_rr) +#ifndef ACCE_MOV_FROM_RR + br.many vmx_vitualization_fault_back +#endif + extr.u r16=r25,20,7 + extr.u r17=r25,6,7 + movl r20=asm_mov_from_reg + ;; + adds r30=asm_mov_from_rr_back_1-asm_mov_from_reg,r20 + shladd r16=r16,4,r20 + mov r24=b0 + ;; + add r27=VCPU_VRR0_OFS,r21 + mov b0=r16 + br.many b0 + ;; +asm_mov_from_rr_back_1: + adds r30=vmx_resume_to_guest-asm_mov_from_reg,r20 + adds r22=asm_mov_to_reg-asm_mov_from_reg,r20 + shr.u r26=r19,61 + ;; + shladd r17=r17,4,r22 + shladd r27=r26,3,r27 + ;; + ld8 r19=[r27] + mov b0=r17 + br.many b0 +END(asm_mov_from_rr) + + +#define MOV_TO_REG0 \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + nop.b 0x0; \ + ;; \ +}; + + +#define MOV_TO_REG(n) \ +{; \ + mov r##n##=r19; \ + mov b0=r30; \ + br.sptk.many b0; \ + ;; \ +}; + + +#define MOV_FROM_REG(n) \ +{; \ + mov r19=r##n##; \ + mov b0=r30; \ + br.sptk.many b0; \ + ;; \ +}; + + +#define MOV_TO_BANK0_REG(n) \ +ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##); \ +{; \ + mov r26=r2; \ + mov r2=r19; \ + bsw.1; \ + ;; \ +}; \ +{; \ + mov r##n##=r2; \ + nop.b 0x0; \ + bsw.0; \ + ;; \ +}; \ +{; \ + mov r2=r26; \ + mov b0=r30; \ + br.sptk.many b0; \ + ;; \ +}; \ +END(asm_mov_to_bank0_reg##n##) + + +#define MOV_FROM_BANK0_REG(n) \ +ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##); \ +{; \ + mov r26=r2; \ + nop.b 0x0; \ + bsw.1; \ + ;; \ +}; \ +{; \ + mov r2=r##n##; \ + nop.b 0x0; \ + bsw.0; \ + ;; \ +}; \ +{; \ + mov r19=r2; \ + mov r2=r26; \ + mov b0=r30; \ +}; \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + br.sptk.many b0; \ + ;; \ +}; \ +END(asm_mov_from_bank0_reg##n##) + + +#define JMP_TO_MOV_TO_BANK0_REG(n) \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + br.sptk.many asm_mov_to_bank0_reg##n##; \ + ;; \ +} + + +#define JMP_TO_MOV_FROM_BANK0_REG(n) \ +{; \ + nop.b 0x0; \ + nop.b 0x0; \ + br.sptk.many asm_mov_from_bank0_reg##n##; \ + ;; \ +} + + +MOV_FROM_BANK0_REG(16) +MOV_FROM_BANK0_REG(17) +MOV_FROM_BANK0_REG(18) +MOV_FROM_BANK0_REG(19) +MOV_FROM_BANK0_REG(20) +MOV_FROM_BANK0_REG(21) +MOV_FROM_BANK0_REG(22) +MOV_FROM_BANK0_REG(23) +MOV_FROM_BANK0_REG(24) +MOV_FROM_BANK0_REG(25) +MOV_FROM_BANK0_REG(26) +MOV_FROM_BANK0_REG(27) +MOV_FROM_BANK0_REG(28) +MOV_FROM_BANK0_REG(29) +MOV_FROM_BANK0_REG(30) +MOV_FROM_BANK0_REG(31) + + +// mov from reg table +ENTRY(asm_mov_from_reg) + MOV_FROM_REG(0) + MOV_FROM_REG(1) + MOV_FROM_REG(2) + MOV_FROM_REG(3) + MOV_FROM_REG(4) + MOV_FROM_REG(5) + MOV_FROM_REG(6) + MOV_FROM_REG(7) + MOV_FROM_REG(8) + MOV_FROM_REG(9) + MOV_FROM_REG(10) + MOV_FROM_REG(11) + MOV_FROM_REG(12) + MOV_FROM_REG(13) + MOV_FROM_REG(14) + MOV_FROM_REG(15) + JMP_TO_MOV_FROM_BANK0_REG(16) + JMP_TO_MOV_FROM_BANK0_REG(17) + JMP_TO_MOV_FROM_BANK0_REG(18) + JMP_TO_MOV_FROM_BANK0_REG(19) + JMP_TO_MOV_FROM_BANK0_REG(20) + JMP_TO_MOV_FROM_BANK0_REG(21) + JMP_TO_MOV_FROM_BANK0_REG(22) + JMP_TO_MOV_FROM_BANK0_REG(23) + JMP_TO_MOV_FROM_BANK0_REG(24) + JMP_TO_MOV_FROM_BANK0_REG(25) + JMP_TO_MOV_FROM_BANK0_REG(26) + JMP_TO_MOV_FROM_BANK0_REG(27) + JMP_TO_MOV_FROM_BANK0_REG(28) + JMP_TO_MOV_FROM_BANK0_REG(29) + JMP_TO_MOV_FROM_BANK0_REG(30) + JMP_TO_MOV_FROM_BANK0_REG(31) + MOV_FROM_REG(32) + MOV_FROM_REG(33) + MOV_FROM_REG(34) + MOV_FROM_REG(35) + MOV_FROM_REG(36) + MOV_FROM_REG(37) + MOV_FROM_REG(38) + MOV_FROM_REG(39) + MOV_FROM_REG(40) + MOV_FROM_REG(41) + MOV_FROM_REG(42) + MOV_FROM_REG(43) + MOV_FROM_REG(44) + MOV_FROM_REG(45) + MOV_FROM_REG(46) + MOV_FROM_REG(47) + MOV_FROM_REG(48) + MOV_FROM_REG(49) + MOV_FROM_REG(50) + MOV_FROM_REG(51) + MOV_FROM_REG(52) + MOV_FROM_REG(53) + MOV_FROM_REG(54) + MOV_FROM_REG(55) + MOV_FROM_REG(56) + MOV_FROM_REG(57) + MOV_FROM_REG(58) + MOV_FROM_REG(59) + MOV_FROM_REG(60) + MOV_FROM_REG(61) + MOV_FROM_REG(62) + MOV_FROM_REG(63) + MOV_FROM_REG(64) + MOV_FROM_REG(65) + MOV_FROM_REG(66) + MOV_FROM_REG(67) + MOV_FROM_REG(68) + MOV_FROM_REG(69) + MOV_FROM_REG(70) + MOV_FROM_REG(71) + MOV_FROM_REG(72) + MOV_FROM_REG(73) + MOV_FROM_REG(74) + MOV_FROM_REG(75) + MOV_FROM_REG(76) + MOV_FROM_REG(77) + MOV_FROM_REG(78) + MOV_FROM_REG(79) + MOV_FROM_REG(80) + MOV_FROM_REG(81) + MOV_FROM_REG(82) + MOV_FROM_REG(83) + MOV_FROM_REG(84) + MOV_FROM_REG(85) + MOV_FROM_REG(86) + MOV_FROM_REG(87) + MOV_FROM_REG(88) + MOV_FROM_REG(89) + MOV_FROM_REG(90) + MOV_FROM_REG(91) + MOV_FROM_REG(92) + MOV_FROM_REG(93) + MOV_FROM_REG(94) + MOV_FROM_REG(95) + MOV_FROM_REG(96) + MOV_FROM_REG(97) + MOV_FROM_REG(98) + MOV_FROM_REG(99) + MOV_FROM_REG(100) + MOV_FROM_REG(101) + MOV_FROM_REG(102) + MOV_FROM_REG(103) + MOV_FROM_REG(104) + MOV_FROM_REG(105) + MOV_FROM_REG(106) + MOV_FROM_REG(107) + MOV_FROM_REG(108) + MOV_FROM_REG(109) + MOV_FROM_REG(110) + MOV_FROM_REG(111) + MOV_FROM_REG(112) + MOV_FROM_REG(113) + MOV_FROM_REG(114) + MOV_FROM_REG(115) + MOV_FROM_REG(116) + MOV_FROM_REG(117) + MOV_FROM_REG(118) + MOV_FROM_REG(119) + MOV_FROM_REG(120) + MOV_FROM_REG(121) + MOV_FROM_REG(122) + MOV_FROM_REG(123) + MOV_FROM_REG(124) + MOV_FROM_REG(125) + MOV_FROM_REG(126) + MOV_FROM_REG(127) +END(asm_mov_from_reg) + + +/* must be in bank 0 + * parameter: + * r31: pr + * r24: b0 + */ +ENTRY(vmx_resume_to_guest) + mov r16=cr.ipsr + movl r20=__vsa_base + ;; + ld8 r20=[r20] + adds r19=IA64_VPD_BASE_OFFSET,r21 + ;; + ld8 r25=[r19] + extr.u r17=r16,IA64_PSR_RI_BIT,2 + tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 + ;; + (p6) mov r18=cr.iip + (p6) mov r17=r0 + ;; + (p6) add r18=0x10,r18 + (p7) add r17=1,r17 + ;; + (p6) mov cr.iip=r18 + dep r16=r17,r16,IA64_PSR_RI_BIT,2 + ;; + mov cr.ipsr=r16 + mov r17=cr.isr + adds r19= VPD_VPSR_START_OFFSET,r25 + ld8 r26=[r25] + add r29=PAL_VPS_RESUME_NORMAL,r20 + add r28=PAL_VPS_RESUME_HANDLER,r20 + ;; + ld8 r19=[r19] + mov b0=r29 + cmp.ne p6,p7 = r0,r0 + ;; + tbit.nz.or.andcm p6,p7 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic + tbit.nz.or.andcm p6,p7 = r17,IA64_ISR_IR_BIT //p1=cr.isr.ir + ;; + (p6) mov b0=r29 + (p7) mov b0=r28 + mov pr=r31,-2 + br.sptk.many b0 // call pal service + ;; +END(vmx_resume_to_guest) + + +MOV_TO_BANK0_REG(16) +MOV_TO_BANK0_REG(17) +MOV_TO_BANK0_REG(18) +MOV_TO_BANK0_REG(19) +MOV_TO_BANK0_REG(20) +MOV_TO_BANK0_REG(21) +MOV_TO_BANK0_REG(22) +MOV_TO_BANK0_REG(23) +MOV_TO_BANK0_REG(24) +MOV_TO_BANK0_REG(25) +MOV_TO_BANK0_REG(26) +MOV_TO_BANK0_REG(27) +MOV_TO_BANK0_REG(28) +MOV_TO_BANK0_REG(29) +MOV_TO_BANK0_REG(30) +MOV_TO_BANK0_REG(31) + + +// mov to reg table +ENTRY(asm_mov_to_reg) + MOV_TO_REG0 + MOV_TO_REG(1) + MOV_TO_REG(2) + MOV_TO_REG(3) + MOV_TO_REG(4) + MOV_TO_REG(5) + MOV_TO_REG(6) + MOV_TO_REG(7) + MOV_TO_REG(8) + MOV_TO_REG(9) + MOV_TO_REG(10) + MOV_TO_REG(11) + MOV_TO_REG(12) + MOV_TO_REG(13) + MOV_TO_REG(14) + MOV_TO_REG(15) + JMP_TO_MOV_TO_BANK0_REG(16) + JMP_TO_MOV_TO_BANK0_REG(17) + JMP_TO_MOV_TO_BANK0_REG(18) + JMP_TO_MOV_TO_BANK0_REG(19) + JMP_TO_MOV_TO_BANK0_REG(20) + JMP_TO_MOV_TO_BANK0_REG(21) + JMP_TO_MOV_TO_BANK0_REG(22) + JMP_TO_MOV_TO_BANK0_REG(23) + JMP_TO_MOV_TO_BANK0_REG(24) + JMP_TO_MOV_TO_BANK0_REG(25) + JMP_TO_MOV_TO_BANK0_REG(26) + JMP_TO_MOV_TO_BANK0_REG(27) + JMP_TO_MOV_TO_BANK0_REG(28) + JMP_TO_MOV_TO_BANK0_REG(29) + JMP_TO_MOV_TO_BANK0_REG(30) + JMP_TO_MOV_TO_BANK0_REG(31) + MOV_TO_REG(32) + MOV_TO_REG(33) + MOV_TO_REG(34) + MOV_TO_REG(35) + MOV_TO_REG(36) + MOV_TO_REG(37) + MOV_TO_REG(38) + MOV_TO_REG(39) + MOV_TO_REG(40) + MOV_TO_REG(41) + MOV_TO_REG(42) + MOV_TO_REG(43) + MOV_TO_REG(44) + MOV_TO_REG(45) + MOV_TO_REG(46) + MOV_TO_REG(47) + MOV_TO_REG(48) + MOV_TO_REG(49) + MOV_TO_REG(50) + MOV_TO_REG(51) + MOV_TO_REG(52) + MOV_TO_REG(53) + MOV_TO_REG(54) + MOV_TO_REG(55) + MOV_TO_REG(56) + MOV_TO_REG(57) + MOV_TO_REG(58) + MOV_TO_REG(59) + MOV_TO_REG(60) + MOV_TO_REG(61) + MOV_TO_REG(62) + MOV_TO_REG(63) + MOV_TO_REG(64) + MOV_TO_REG(65) + MOV_TO_REG(66) + MOV_TO_REG(67) + MOV_TO_REG(68) + MOV_TO_REG(69) + MOV_TO_REG(70) + MOV_TO_REG(71) + MOV_TO_REG(72) + MOV_TO_REG(73) + MOV_TO_REG(74) + MOV_TO_REG(75) + MOV_TO_REG(76) + MOV_TO_REG(77) + MOV_TO_REG(78) + MOV_TO_REG(79) + MOV_TO_REG(80) + MOV_TO_REG(81) + MOV_TO_REG(82) + MOV_TO_REG(83) + MOV_TO_REG(84) + MOV_TO_REG(85) + MOV_TO_REG(86) + MOV_TO_REG(87) + MOV_TO_REG(88) + MOV_TO_REG(89) + MOV_TO_REG(90) + MOV_TO_REG(91) + MOV_TO_REG(92) + MOV_TO_REG(93) + MOV_TO_REG(94) + MOV_TO_REG(95) + MOV_TO_REG(96) + MOV_TO_REG(97) + MOV_TO_REG(98) + MOV_TO_REG(99) + MOV_TO_REG(100) + MOV_TO_REG(101) + MOV_TO_REG(102) + MOV_TO_REG(103) + MOV_TO_REG(104) + MOV_TO_REG(105) + MOV_TO_REG(106) + MOV_TO_REG(107) + MOV_TO_REG(108) + MOV_TO_REG(109) + MOV_TO_REG(110) + MOV_TO_REG(111) + MOV_TO_REG(112) + MOV_TO_REG(113) + MOV_TO_REG(114) + MOV_TO_REG(115) + MOV_TO_REG(116) + MOV_TO_REG(117) + MOV_TO_REG(118) + MOV_TO_REG(119) + MOV_TO_REG(120) + MOV_TO_REG(121) + MOV_TO_REG(122) + MOV_TO_REG(123) + MOV_TO_REG(124) + MOV_TO_REG(125) + MOV_TO_REG(126) + MOV_TO_REG(127) +END(asm_mov_to_reg) --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ xen/arch/ia64/xen/xencomm.c Wed Oct 11 16:10:40 2006 -0400 @@ -0,0 +1,380 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) IBM Corp. 2006 + * + * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx> + * Tristan Gingold <tristan.gingold@xxxxxxxx> + */ + +#include <xen/config.h> +#include <xen/mm.h> +#include <xen/sched.h> +#include <asm/current.h> +#include <asm/guest_access.h> +#include <public/xen.h> +#include <public/xencomm.h> +#include <xen/errno.h> + +#undef DEBUG +#ifdef DEBUG +static int xencomm_debug = 1; /* extremely verbose */ +#else +#define xencomm_debug 0 +#endif + +static int +xencomm_copy_chunk_from( + unsigned long to, + unsigned long paddr, + unsigned int len) +{ + unsigned long maddr; + struct page_info *page; + + while (1) { + maddr = xencomm_paddr_to_maddr(paddr); + if (xencomm_debug > 1) + printk("%lx[%d] -> %lx\n", maddr, len, to); + if (maddr == 0) + return -EFAULT; + + page = virt_to_page(maddr); + if (get_page(page, current->domain) == 0) { + if (page_get_owner(page) != current->domain) { + /* This page might be a page granted by another domain */ + panic_domain(NULL, "copy_from_guest from foreign domain\n"); + } + /* Try again. */ + continue; + } + memcpy((void *)to, (void *)maddr, len); + put_page(page); + return 0; + } +} + +/** + * xencomm_copy_from_guest: Copy a block of data from domain space. + * @to: Machine address. + * @from: Physical address to a xencomm buffer descriptor. + * @n: Number of bytes to copy. + * @skip: Number of bytes from the start to skip. + * + * Copy data from domain to hypervisor. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ +unsigned long +xencomm_copy_from_guest( + void *to, + const void *from, + unsigned int n, + unsigned int skip) +{ + struct xencomm_desc *desc; + unsigned long desc_addr; + unsigned int from_pos = 0; + unsigned int to_pos = 0; + unsigned int i = 0; + + if (xencomm_debug) + printf("xencomm_copy_from_guest: from=%lx+%u n=%u\n", + (unsigned long)from, skip, n); + + if (XENCOMM_IS_INLINE(from)) { + unsigned long src_paddr = XENCOMM_INLINE_ADDR(from); + + src_paddr += skip; + + while (n > 0) { + unsigned int chunksz; + unsigned int bytes; + int res; + + chunksz = PAGE_SIZE - (src_paddr % PAGE_SIZE); + + bytes = min(chunksz, n); + + res = xencomm_copy_chunk_from((unsigned long)to, src_paddr, bytes); + if (res != 0) + return -EFAULT; + src_paddr += bytes; + to += bytes; + n -= bytes; + } + + /* Always successful. */ + return 0; + } + + /* first we need to access the descriptor */ + desc_addr = xencomm_paddr_to_maddr((unsigned long)from); + if (desc_addr == 0) + return -EFAULT; + + desc = (struct xencomm_desc *)desc_addr; + if (desc->magic != XENCOMM_MAGIC) { + printk("%s: error: %p magic was 0x%x\n", + __func__, desc, desc->magic); + return -EFAULT; + } + + /* iterate through the descriptor, copying up to a page at a time */ + while ((to_pos < n) && (i < desc->nr_addrs)) { + unsigned long src_paddr = desc->address[i]; + unsigned int pgoffset; + unsigned int chunksz; + unsigned int chunk_skip; + + if (src_paddr == XENCOMM_INVALID) { + i++; + continue; + } + + pgoffset = src_paddr % PAGE_SIZE; + chunksz = PAGE_SIZE - pgoffset; + + chunk_skip = min(chunksz, skip); + from_pos += chunk_skip; + chunksz -= chunk_skip; + skip -= chunk_skip; + + if (skip == 0) { + unsigned int bytes = min(chunksz, n - to_pos); + int res; + + if (xencomm_debug > 1) + printf ("src_paddr=%lx i=%d, skip=%d\n", + src_paddr, i, chunk_skip); + + res = xencomm_copy_chunk_from((unsigned long)to + to_pos, + src_paddr + chunk_skip, bytes); + if (res != 0) + return -EFAULT; + + from_pos += bytes; + to_pos += bytes; + } + + i++; + } + + return n - to_pos; +} + +static int +xencomm_copy_chunk_to( + unsigned long paddr, + unsigned long from, + unsigned int len) +{ + unsigned long maddr; + struct page_info *page; + + while (1) { + maddr = xencomm_paddr_to_maddr(paddr); + if (xencomm_debug > 1) + printk("%lx[%d] -> %lx\n", from, len, maddr); + if (maddr == 0) + return -EFAULT; + + page = virt_to_page(maddr); + if (get_page(page, current->domain) == 0) { + if (page_get_owner(page) != current->domain) { + /* This page might be a page granted by another domain */ + panic_domain(NULL, "copy_to_guest to foreign domain\n"); + } + /* Try again. */ + continue; + } + memcpy((void *)maddr, (void *)from, len); + put_page(page); + return 0; + } +} + +/** + * xencomm_copy_to_guest: Copy a block of data to domain space. + * @to: Physical address to xencomm buffer descriptor. + * @from: Machine address. + * @n: Number of bytes to copy. + * @skip: Number of bytes from the start to skip. + * + * Copy data from hypervisor to domain. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ +unsigned long +xencomm_copy_to_guest( + void *to, + const void *from, + unsigned int n, + unsigned int skip) +{ + struct xencomm_desc *desc; + unsigned long desc_addr; + unsigned int from_pos = 0; + unsigned int to_pos = 0; + unsigned int i = 0; + + if (xencomm_debug) + printf ("xencomm_copy_to_guest: to=%lx+%u n=%u\n", + (unsigned long)to, skip, n); + + if (XENCOMM_IS_INLINE(to)) { + unsigned long dest_paddr = XENCOMM_INLINE_ADDR(to); + + dest_paddr += skip; + + while (n > 0) { + unsigned int chunksz; + unsigned int bytes; + int res; + + chunksz = PAGE_SIZE - (dest_paddr % PAGE_SIZE); + + bytes = min(chunksz, n); + + res = xencomm_copy_chunk_to(dest_paddr, (unsigned long)from, bytes); + if (res != 0) + return res; + + dest_paddr += bytes; + from += bytes; + n -= bytes; + } + + /* Always successful. */ + return 0; + } + + /* first we need to access the descriptor */ + desc_addr = xencomm_paddr_to_maddr((unsigned long)to); + if (desc_addr == 0) + return -EFAULT; + + desc = (struct xencomm_desc *)desc_addr; + if (desc->magic != XENCOMM_MAGIC) { + printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic); + return -EFAULT; + } + + /* iterate through the descriptor, copying up to a page at a time */ + while ((from_pos < n) && (i < desc->nr_addrs)) { + unsigned long dest_paddr = desc->address[i]; + unsigned int pgoffset; + unsigned int chunksz; + unsigned int chunk_skip; + + if (dest_paddr == XENCOMM_INVALID) { + i++; + continue; + } + + pgoffset = dest_paddr % PAGE_SIZE; + chunksz = PAGE_SIZE - pgoffset; + + chunk_skip = min(chunksz, skip); + to_pos += chunk_skip; + chunksz -= chunk_skip; + skip -= chunk_skip; + dest_paddr += chunk_skip; + + if (skip == 0) { + unsigned int bytes = min(chunksz, n - from_pos); + int res; + + res = xencomm_copy_chunk_to(dest_paddr, + (unsigned long)from + from_pos, bytes); + if (res != 0) + return res; + + from_pos += bytes; + to_pos += bytes; + } + + i++; + } + return n - from_pos; +} + +/* Offset page addresses in 'handle' to skip 'bytes' bytes. Set completely + * exhausted pages to XENCOMM_INVALID. */ +void * +xencomm_add_offset( + void *handle, + unsigned int bytes) +{ + struct xencomm_desc *desc; + unsigned long desc_addr; + int i = 0; + + if (XENCOMM_IS_INLINE(handle)) + return (void *)((unsigned long)handle + bytes); + + /* first we need to access the descriptor */ + desc_addr = xencomm_paddr_to_maddr((unsigned long)handle); + if (desc_addr == 0) + return NULL; + + desc = (struct xencomm_desc *)desc_addr; + if (desc->magic != XENCOMM_MAGIC) { + printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic); + return NULL; + } + + /* iterate through the descriptor incrementing addresses */ + while ((bytes > 0) && (i < desc->nr_addrs)) { + unsigned long dest_paddr = desc->address[i]; + unsigned int pgoffset; + unsigned int chunksz; + unsigned int chunk_skip; + + pgoffset = dest_paddr % PAGE_SIZE; + chunksz = PAGE_SIZE - pgoffset; + + chunk_skip = min(chunksz, bytes); + if (chunk_skip == chunksz) { + /* exhausted this page */ + desc->address[i] = XENCOMM_INVALID; + } else { + desc->address[i] += chunk_skip; + } + bytes -= chunk_skip; + } + return handle; +} + +int +xencomm_handle_is_null( + void *ptr) +{ + if (XENCOMM_IS_INLINE(ptr)) + return XENCOMM_INLINE_ADDR(ptr) == 0; + else { + struct xencomm_desc *desc; + unsigned long desc_addr; + + desc_addr = xencomm_paddr_to_maddr((unsigned long)ptr); + if (desc_addr == 0) + return 1; + + desc = (struct xencomm_desc *)desc_addr; + return (desc->address[0] == XENCOMM_INVALID); + } +} --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ xen/arch/ia64/xen/xenpatch.c Wed Oct 11 16:10:40 2006 -0400 @@ -0,0 +1,122 @@ +/****************************************************************************** + * xenpatch.c + * Copyright (c) 2006 Silicon Graphics Inc. + * Jes Sorensen <jes@xxxxxxx> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Parts of this based on code from arch/ia64/kernel/patch.c + */ + +#include <xen/config.h> +#include <xen/lib.h> +#include <asm/xensystem.h> +#include <asm/intrinsics.h> + +/* + * This was adapted from code written by Tony Luck: + * + * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle + * like this: + * + * 6 6 5 4 3 2 1 + * 3210987654321098765432109876543210987654321098765432109876543210 + * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG + * + * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB + */ +static u64 +get_imm64 (u64 insn_addr) +{ + u64 *p = (u64 *) (insn_addr & -16); /* mask out slot number */ + + return ( (p[1] & 0x0800000000000000UL) << 4) | /*A*/ + ((p[1] & 0x00000000007fffffUL) << 40) | /*B*/ + ((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/ + ((p[1] & 0x0000100000000000UL) >> 23) | /*D*/ + ((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/ + ((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/ + ((p[1] & 0x000007f000000000UL) >> 36); /*G*/ +} + +/* Patch instruction with "val" where "mask" has 1 bits. */ +void +ia64_patch (u64 insn_addr, u64 mask, u64 val) +{ + u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16); +#define insn_mask ((1UL << 41) - 1) + unsigned long shift; + + b0 = b[0]; b1 = b[1]; + /* 5 bits of template, then 3 x 41-bit instructions */ + shift = 5 + 41 * (insn_addr % 16); + if (shift >= 64) { + m1 = mask << (shift - 64); + v1 = val << (shift - 64); + } else { + m0 = mask << shift; m1 = mask >> (64 - shift); + v0 = val << shift; v1 = val >> (64 - shift); + b[0] = (b0 & ~m0) | (v0 & m0); + } + b[1] = (b1 & ~m1) | (v1 & m1); +} + +void +ia64_patch_imm64 (u64 insn_addr, u64 val) +{ + /* The assembler may generate offset pointing to either slot 1 + or slot 2 for a long (2-slot) instruction, occupying slots 1 + and 2. */ + insn_addr &= -16UL; + ia64_patch(insn_addr + 2, 0x01fffefe000UL, + (((val & 0x8000000000000000UL) >> 27) | /* bit 63 -> 36 */ + ((val & 0x0000000000200000UL) << 0) | /* bit 21 -> 21 */ + ((val & 0x00000000001f0000UL) << 6) | /* bit 16 -> 22 */ + ((val & 0x000000000000ff80UL) << 20) | /* bit 7 -> 27 */ + ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */)); + ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22); +} + +extern char frametable_miss; +extern unsigned long xen_pstart; + +/* + * Add more patch points in seperate functions as appropriate + */ + +static void xen_patch_frametable_miss(u64 offset) +{ + u64 addr, val; + + addr = (u64)&frametable_miss; + val = get_imm64(addr) + offset; + ia64_patch_imm64(addr, val); +} + + +void xen_patch_kernel(void) +{ + unsigned long patch_offset; + + patch_offset = xen_pstart - (KERNEL_START - PAGE_OFFSET); + + printk("Xen patching physical address access by offset: " + "0x%lx\n", patch_offset); + + xen_patch_frametable_miss(patch_offset); + + ia64_sync_i(); + ia64_srlz_i(); +}
Attachment:
pgp0eB0M9DFrS.pgp
Description: PGP signature
-- Fedora-xen mailing list Fedora-xen@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/fedora-xen