how to get the latest ia64 bits

Aron Griffis <aron@xxxxxx> · Wed, 11 Oct 2006 20:45:05 -0400

Hi Juan,

There are some significant fixes in xen-ia64-unstable.hg right now,
especially the xencomm patches.  All of the patches are ia64-only.
Since it's unlikely that Keir will pull xen-ia64 again prior to 3.0.3
release, how would you like to handle getting these fixes into your
tree?

I've done it two different ways so far:

1. Clone xen-3.0.3-testing, clone xen-ia64-unstable.  Pull
   xen-ia64-unstable into xen-3.0.3-testing, hg merge, then use
   pull-xen-unstable as normal.  The problem with this is that your
   changeset id will reflect the extra changesets pulled in from
   ia64-land.

        OR

2. Start with the same clones.  Create two patches and apply them in
   the spec-file.

Since method #1 is obvious and simple, I documented method #2 below.
Here are the steps I used to generate the patches attached to this
mail:

# 1. Clone upstream repos and create temporary trees

    hg clone http://xenbits.xensource.com/ext/xen-ia64-unstable.hg
    hg clone http://xenbits.xensource.com/xen-3.0.3-testing.hg
    hg clone http://free.linux.hp.com/~agriffis/pull-xen-unstable.hg
    hg clone http://hg.et.redhat.com/kernel/linux-2.6-xen-fedora
    cp -al xen-3.0.3-testing.hg xen-3.0.3-testing.hg+ia64
    cp -al linux-2.6-xen-fedora linux-2.6-xen-fedora+ia64

# 2. Remember tip changesets

    ia64_cset=$(hg -R xen-ia64-unstable.hg parents | \
        awk -F'[ :]' '/^changeset:/{print $(NF-1);exit}')

    testing_cset=$(hg -R xen-3.0.3-testing.hg parents | \
        awk -F'[ :]' '/^changeset:/{print $(NF-1);exit}')

# 3. Merge ia64 into testing

    cd xen-3.0.3-testing.hg+ia64
    hg pull ../xen-ia64-unstable.hg
    hg merge
    hg ci -m "merge xen-ia64-unstable.hg"
    cd ..

# 4. Generate hypervisor patch

    hg -R xen-3.0.3-testing.hg+ia64 di -r $testing_cset -r tip | \
        filterdiff -p1 -i 'xen/*' --strip=1 > ../xen-ia64-$ia64_cset.patch

# 5. Generate linux patch

    cd linux-2.6-xen-fedora+ia64
    hg -R ../xen-3.0.3-testing.hg+ia64 di -r $testing_cset -r tip | \
        ../pull-xen-unstable.hg/pull-xen-unstable --filter | \
        patch -p1
    hg add
    hg remove --after
    hg ci -m "update to xen-ia64-unstable-$ia64_cset"
    hg export tip > ../linux-2.6-xen-ia64-$ia64_cset.patch

Which method do you prefer?

Thanks,
Aron
# HG changeset patch
# User agriffis@xxxxxxxxxxxxxxxxxxxxxxxxx
# Date 1160612360 14400
# Node ID 85a15e585061cc195e3eb9888179b3cb18c69d5e
# Parent  47c098fdce14af6def37c12074d6271cb5c13b10
update to xen-ia64-unstable-11745

diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/Kconfig

--- a/arch/ia64/Kconfig	Wed Sep 20 15:35:23 2006 +0200
+++ b/arch/ia64/Kconfig	Wed Oct 11 20:19:20 2006 -0400
@@ -71,6 +71,20 @@ config XEN_IA64_VDSO_PARAVIRT
 	default y
 	help
 	  vDSO paravirtualization
+
+config XEN_IA64_EXPOSE_P2M
+	bool "Xen/IA64 exposure p2m table"
+	depends on XEN
+	default y
+	help
+	  expose p2m from xen
+
+config XEN_IA64_EXPOSE_P2M_USE_DTR
+	bool "Xen/IA64 map p2m table with dtr"
+	depends on XEN_IA64_EXPOSE_P2M
+	default y
+	help
+	  use dtr to map the exposed p2m table
 
 config SCHED_NO_NO_OMIT_FRAME_POINTER
 	bool
diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/kernel/setup.c
--- a/arch/ia64/kernel/setup.c	Wed Sep 20 15:35:23 2006 +0200
+++ b/arch/ia64/kernel/setup.c	Wed Oct 11 20:19:20 2006 -0400
@@ -75,6 +75,8 @@ EXPORT_SYMBOL(__per_cpu_offset);
 #endif
 
 #ifdef CONFIG_XEN
+unsigned long kernel_start_pa;
+
 static int
 xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
@@ -490,6 +492,7 @@ setup_arch (char **cmdline_p)
 
 #ifdef CONFIG_XEN
 	if (is_running_on_xen()) {
+		kernel_start_pa = KERNEL_START - ia64_tpa(KERNEL_START);
 		setup_xen_features();
 		/* Register a call for panic conditions. */
 		atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/Makefile
--- a/arch/ia64/xen/Makefile	Wed Sep 20 15:35:23 2006 +0200
+++ b/arch/ia64/xen/Makefile	Wed Oct 11 20:19:20 2006 -0400
@@ -3,6 +3,7 @@
 #
 
 obj-y := hypercall.o xenivt.o xenentry.o xensetup.o xenpal.o xenhpski.o \
-	 hypervisor.o pci-dma-xen.o util.o
+	 hypervisor.o pci-dma-xen.o util.o xencomm.o xcom_hcall.o \
+	 xcom_privcmd.o
 
 pci-dma-xen-y := ../../i386/kernel/pci-dma-xen.o
diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/hypervisor.c
--- a/arch/ia64/xen/hypervisor.c	Wed Sep 20 15:35:23 2006 +0200
+++ b/arch/ia64/xen/hypervisor.c	Wed Oct 11 20:19:20 2006 -0400
@@ -40,59 +40,11 @@ int running_on_xen;
 int running_on_xen;
 EXPORT_SYMBOL(running_on_xen);
 
-//XXX xen/ia64 copy_from_guest() is broken.
-//    This is a temporal work around until it is fixed.
-//    used by balloon.c netfront.c
-
-// get_xen_guest_handle is defined only when __XEN_TOOLS__ is defined
-// if the definition in arch-ia64.h is changed, this must be updated.
-#define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
-
-int
-ia64_xenmem_reservation_op(unsigned long op,
-			   struct xen_memory_reservation* reservation__)
-{
-	struct xen_memory_reservation reservation = *reservation__;
-	unsigned long* frame_list;
-	unsigned long nr_extents = reservation__->nr_extents;
-	int ret = 0;
-	get_xen_guest_handle(frame_list, reservation__->extent_start);
-
-	BUG_ON(op != XENMEM_increase_reservation &&
-	       op != XENMEM_decrease_reservation &&
-	       op != XENMEM_populate_physmap);
-
-	while (nr_extents > 0) {
-		int tmp_ret;
-		volatile unsigned long dummy;
-
-		set_xen_guest_handle(reservation.extent_start, frame_list);
-		reservation.nr_extents = nr_extents;
-
-		dummy = frame_list[0];// re-install tlb entry before hypercall
-		tmp_ret = ____HYPERVISOR_memory_op(op, &reservation);
-		if (tmp_ret < 0) {
-			if (ret == 0) {
-				ret = tmp_ret;
-			}
-			break;
-		}
-		if (tmp_ret == 0) {
-			//XXX dirty work around for skbuff_ctor()
-			//    of a non-privileged domain, 
-			if ((op == XENMEM_increase_reservation ||
-			     op == XENMEM_populate_physmap) &&
-			    !is_initial_xendomain() &&
-			    reservation.extent_order > 0)
-				return ret;
-		}
-		frame_list += tmp_ret;
-		nr_extents -= tmp_ret;
-		ret += tmp_ret;
-	}
-	return ret;
-}
-EXPORT_SYMBOL(ia64_xenmem_reservation_op);
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M
+static int p2m_expose_init(void);
+#else
+#define p2m_expose_init() (-ENOSYS)
+#endif
 
 //XXX same as i386, x86_64 contiguous_bitmap_set(), contiguous_bitmap_clear()
 // move those to lib/contiguous_bitmap?
@@ -371,8 +323,6 @@ int
 int
 HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count)
 {
-	__u64 va1, va2, pa1, pa2;
-
 	if (cmd == GNTTABOP_map_grant_ref) {
 		unsigned int i;
 		for (i = 0; i < count; i++) {
@@ -380,29 +330,7 @@ HYPERVISOR_grant_table_op(unsigned int c
 				(struct gnttab_map_grant_ref*)uop + i);
 		}
 	}
-	va1 = (__u64)uop & PAGE_MASK;
-	pa1 = pa2 = 0;
-	if ((REGION_NUMBER(va1) == 5) &&
-	    ((va1 - KERNEL_START) >= KERNEL_TR_PAGE_SIZE)) {
-		pa1 = ia64_tpa(va1);
-		if (cmd <= GNTTABOP_transfer) {
-			static uint32_t uop_size[GNTTABOP_transfer + 1] = {
-				sizeof(struct gnttab_map_grant_ref),
-				sizeof(struct gnttab_unmap_grant_ref),
-				sizeof(struct gnttab_setup_table),
-				sizeof(struct gnttab_dump_table),
-				sizeof(struct gnttab_transfer),
-			};
-			va2 = (__u64)uop + (uop_size[cmd] * count) - 1;
-			va2 &= PAGE_MASK;
-			if (va1 != va2) {
-				/* maximum size of uop is 2pages */
-				BUG_ON(va2 > va1 + PAGE_SIZE);
-				pa2 = ia64_tpa(va2);
-			}
-		}
-	}
-	return ____HYPERVISOR_grant_table_op(cmd, uop, count, pa1, pa2);
+	return xencomm_mini_hypercall_grant_table_op(cmd, uop, count);
 }
 EXPORT_SYMBOL(HYPERVISOR_grant_table_op);
 
@@ -526,6 +454,10 @@ out:
 	       privcmd_resource_min, privcmd_resource_max, 
 	       (privcmd_resource_max - privcmd_resource_min) >> 20);
 	BUG_ON(privcmd_resource_min >= privcmd_resource_max);
+
+	// XXX this should be somewhere appropriate
+	(void)p2m_expose_init();
+
 	return 0;
 }
 late_initcall(xen_ia64_privcmd_init);
@@ -831,3 +763,276 @@ time_resume(void)
 	/* Just trigger a tick.  */
 	ia64_cpu_local_tick();
 }
+
+///////////////////////////////////////////////////////////////////////////
+// expose p2m table
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M
+#include <linux/cpu.h>
+#include <asm/uaccess.h>
+
+int p2m_initialized __read_mostly = 0;
+
+unsigned long p2m_min_low_pfn __read_mostly;
+unsigned long p2m_max_low_pfn __read_mostly;
+unsigned long p2m_convert_min_pfn __read_mostly;
+unsigned long p2m_convert_max_pfn __read_mostly;
+
+static struct resource p2m_resource = {
+	.name    = "Xen p2m table",
+	.flags   = IORESOURCE_MEM,
+};
+static unsigned long p2m_assign_start_pfn __read_mostly;
+static unsigned long p2m_assign_end_pfn __read_mostly;
+volatile const pte_t* p2m_pte __read_mostly;
+
+#define GRNULE_PFN	PTRS_PER_PTE
+static unsigned long p2m_granule_pfn __read_mostly = GRNULE_PFN;
+
+#define ROUNDDOWN(x, y)  ((x) & ~((y) - 1))
+#define ROUNDUP(x, y)    (((x) + (y) - 1) & ~((y) - 1))
+
+#define P2M_PREFIX	"Xen p2m: "
+
+static int xen_ia64_p2m_expose __read_mostly = 1;
+module_param(xen_ia64_p2m_expose, int, 0);
+MODULE_PARM_DESC(xen_ia64_p2m_expose,
+                 "enable/disable xen/ia64 p2m exposure optimization\n");
+
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
+static int xen_ia64_p2m_expose_use_dtr __read_mostly = 1;
+module_param(xen_ia64_p2m_expose_use_dtr, int, 0);
+MODULE_PARM_DESC(xen_ia64_p2m_expose_use_dtr,
+                 "use/unuse dtr to map exposed p2m table\n");
+
+static const int p2m_page_shifts[] = {
+	_PAGE_SIZE_4K,
+	_PAGE_SIZE_8K,
+	_PAGE_SIZE_16K,
+	_PAGE_SIZE_64K,
+	_PAGE_SIZE_256K,
+	_PAGE_SIZE_1M,
+	_PAGE_SIZE_4M,
+	_PAGE_SIZE_16M,
+	_PAGE_SIZE_64M,
+	_PAGE_SIZE_256M,
+};
+
+struct p2m_itr_arg {
+	unsigned long vaddr;
+	unsigned long pteval;
+	unsigned long log_page_size;
+};
+static struct p2m_itr_arg p2m_itr_arg __read_mostly;
+
+// This should be in asm-ia64/kregs.h
+#define IA64_TR_P2M_TABLE	3
+
+static void
+p2m_itr(void* info)
+{
+	struct p2m_itr_arg* arg = (struct p2m_itr_arg*)info;
+	ia64_itr(0x2, IA64_TR_P2M_TABLE,
+	         arg->vaddr, arg->pteval, arg->log_page_size);
+	ia64_srlz_d();
+}
+
+static int
+p2m_expose_dtr_call(struct notifier_block *self,
+                    unsigned long event, void* ptr)
+{
+	unsigned int cpu = (unsigned int)(long)ptr;
+	if (event != CPU_ONLINE)
+		return 0;
+	if (!(p2m_initialized && xen_ia64_p2m_expose_use_dtr))
+		smp_call_function_single(cpu, &p2m_itr, &p2m_itr_arg, 1, 1);
+	return 0;
+}
+
+static struct notifier_block p2m_expose_dtr_hotplug_notifier = {
+	.notifier_call = p2m_expose_dtr_call,
+	.next          = NULL,
+	.priority      = 0
+};
+#endif
+
+static int
+p2m_expose_init(void)
+{
+	unsigned long num_pfn;
+	unsigned long size = 0;
+	unsigned long p2m_size = 0;
+	unsigned long align = ~0UL;
+	int error = 0;
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
+	int i;
+	unsigned long page_size;
+	unsigned long log_page_size = 0;
+#endif
+
+	if (!xen_ia64_p2m_expose)
+		return -ENOSYS;
+	if (p2m_initialized)
+		return 0;
+
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
+	error = register_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
+	if (error < 0)
+		return error;
+#endif
+
+	lock_cpu_hotplug();
+	if (p2m_initialized)
+		goto out;
+
+#ifdef CONFIG_DISCONTIGMEM
+	p2m_min_low_pfn = min_low_pfn;
+	p2m_max_low_pfn = max_low_pfn;
+#else
+	p2m_min_low_pfn = 0;
+	p2m_max_low_pfn = max_pfn;
+#endif
+
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
+	if (xen_ia64_p2m_expose_use_dtr) {
+		unsigned long granule_pfn = 0;
+		p2m_size = p2m_max_low_pfn - p2m_min_low_pfn;
+		for (i = 0;
+		     i < sizeof(p2m_page_shifts)/sizeof(p2m_page_shifts[0]);
+		     i++) {
+			log_page_size = p2m_page_shifts[i];
+			page_size = 1UL << log_page_size;
+			if (page_size < p2m_size)
+				continue;
+
+			granule_pfn = max(page_size >> PAGE_SHIFT,
+			                  p2m_granule_pfn);
+			p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
+			                                granule_pfn);
+			p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn,
+			                              granule_pfn);
+			num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
+			size = num_pfn << PAGE_SHIFT;
+			p2m_size = num_pfn / PTRS_PER_PTE;
+			p2m_size = ROUNDUP(p2m_size, granule_pfn << PAGE_SHIFT);
+			if (p2m_size == page_size)
+				break;
+		}
+		if (p2m_size != page_size) {
+			printk(KERN_ERR "p2m_size != page_size\n");
+			error = -EINVAL;
+			goto out;
+		}
+		align = max(privcmd_resource_align, granule_pfn << PAGE_SHIFT);
+	} else
+#endif
+	{
+		BUG_ON(p2m_granule_pfn & (p2m_granule_pfn - 1));
+		p2m_convert_min_pfn = ROUNDDOWN(p2m_min_low_pfn,
+		                                p2m_granule_pfn);
+		p2m_convert_max_pfn = ROUNDUP(p2m_max_low_pfn, p2m_granule_pfn);
+		num_pfn = p2m_convert_max_pfn - p2m_convert_min_pfn;
+		size = num_pfn << PAGE_SHIFT;
+		p2m_size = num_pfn / PTRS_PER_PTE;
+		p2m_size = ROUNDUP(p2m_size, p2m_granule_pfn << PAGE_SHIFT);
+		align = max(privcmd_resource_align,
+		            p2m_granule_pfn << PAGE_SHIFT);
+	}
+	
+	// use privcmd region
+	error = allocate_resource(&iomem_resource, &p2m_resource, p2m_size,
+	                          privcmd_resource_min, privcmd_resource_max,
+	                          align, NULL, NULL);
+	if (error) {
+		printk(KERN_ERR P2M_PREFIX
+		       "can't allocate region for p2m exposure "
+		       "[0x%016lx, 0x%016lx) 0x%016lx\n",
+		       p2m_convert_min_pfn, p2m_convert_max_pfn, p2m_size);
+		goto out;
+	}
+
+	p2m_assign_start_pfn = p2m_resource.start >> PAGE_SHIFT;
+	p2m_assign_end_pfn = p2m_resource.end >> PAGE_SHIFT;
+	
+	error = HYPERVISOR_expose_p2m(p2m_convert_min_pfn,
+	                              p2m_assign_start_pfn,
+	                              size, p2m_granule_pfn);
+	if (error) {
+		printk(KERN_ERR P2M_PREFIX "failed expose p2m hypercall %d\n",
+		       error);
+		printk(KERN_ERR P2M_PREFIX "conv 0x%016lx assign 0x%016lx "
+		       "size 0x%016lx granule 0x%016lx\n",
+		       p2m_convert_min_pfn, p2m_assign_start_pfn,
+		       size, p2m_granule_pfn);;
+		release_resource(&p2m_resource);
+		goto out;
+	}
+	p2m_pte = (volatile const pte_t*)pfn_to_kaddr(p2m_assign_start_pfn);
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
+	if (xen_ia64_p2m_expose_use_dtr) {
+		p2m_itr_arg.vaddr = (unsigned long)__va(p2m_assign_start_pfn
+		                                        << PAGE_SHIFT);
+		p2m_itr_arg.pteval = pte_val(pfn_pte(p2m_assign_start_pfn,
+		                                     PAGE_KERNEL));
+		p2m_itr_arg.log_page_size = log_page_size;
+		smp_mb();
+		smp_call_function(&p2m_itr, &p2m_itr_arg, 1, 1);
+		p2m_itr(&p2m_itr_arg);
+	}
+#endif	
+	smp_mb();
+	p2m_initialized = 1;
+	printk(P2M_PREFIX "assign p2m table of [0x%016lx, 0x%016lx)\n",
+	       p2m_convert_min_pfn << PAGE_SHIFT,
+	       p2m_convert_max_pfn << PAGE_SHIFT);
+	printk(P2M_PREFIX "to [0x%016lx, 0x%016lx) (%ld KBytes)\n",
+	       p2m_assign_start_pfn << PAGE_SHIFT,
+	       p2m_assign_end_pfn << PAGE_SHIFT,
+	       p2m_size / 1024);
+out:
+	unlock_cpu_hotplug();
+	return error;
+}
+
+#ifdef notyet
+void
+p2m_expose_cleanup(void)
+{
+	BUG_ON(!p2m_initialized);
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M_USE_DTR
+	unregister_cpu_notifier(&p2m_expose_dtr_hotplug_notifier);
+#endif
+	release_resource(&p2m_resource);
+}
+#endif
+
+//XXX inlinize?
+unsigned long
+p2m_phystomach(unsigned long gpfn)
+{
+	volatile const pte_t* pte;
+	unsigned long mfn;
+	unsigned long pteval;
+	
+	if (!p2m_initialized ||
+	    gpfn < p2m_min_low_pfn || gpfn > p2m_max_low_pfn
+	    /* || !pfn_valid(gpfn) */)
+		return INVALID_MFN;
+	pte = p2m_pte + (gpfn - p2m_convert_min_pfn);
+
+	mfn = INVALID_MFN;
+	if (likely(__get_user(pteval, (unsigned long __user *)pte) == 0 &&
+	           pte_present(__pte(pteval)) &&
+	           pte_pfn(__pte(pteval)) != (INVALID_MFN >> PAGE_SHIFT)))
+		mfn = (pteval & _PFN_MASK) >> PAGE_SHIFT;
+
+	return mfn;
+}
+
+EXPORT_SYMBOL_GPL(p2m_initialized);
+EXPORT_SYMBOL_GPL(p2m_min_low_pfn);
+EXPORT_SYMBOL_GPL(p2m_max_low_pfn);
+EXPORT_SYMBOL_GPL(p2m_convert_min_pfn);
+EXPORT_SYMBOL_GPL(p2m_convert_max_pfn);
+EXPORT_SYMBOL_GPL(p2m_pte);
+EXPORT_SYMBOL_GPL(p2m_phystomach);
+#endif
diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/util.c
--- a/arch/ia64/xen/util.c	Wed Sep 20 15:35:23 2006 +0200
+++ b/arch/ia64/xen/util.c	Wed Oct 11 20:19:20 2006 -0400
@@ -27,6 +27,8 @@
 #include <linux/vmalloc.h>
 #include <asm/uaccess.h>
 #include <xen/driver_util.h>
+#include <xen/interface/memory.h>
+#include <asm/hypercall.h>
 
 struct vm_struct *alloc_vm_area(unsigned long size)
 {
diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/xensetup.S
--- a/arch/ia64/xen/xensetup.S	Wed Sep 20 15:35:23 2006 +0200
+++ b/arch/ia64/xen/xensetup.S	Wed Oct 11 20:19:20 2006 -0400
@@ -22,12 +22,11 @@ GLOBAL_ENTRY(early_xen_setup)
 	
 	mov cr.iva=r10
 
-#if XSI_BASE != 0xf100000000000000UL
-	/* Backward compatibility.  */
-(isBP)	mov r2=0x600
+	/* Set xsi base.  */
+#define FW_HYPERCALL_SET_SHARED_INFO_VA			0x600
+(isBP)	mov r2=FW_HYPERCALL_SET_SHARED_INFO_VA
 (isBP)	movl r28=XSI_BASE;;
 (isBP)	break 0x1000;;
-#endif
 
 	br.ret.sptk.many rp
 	;;
@@ -37,18 +36,18 @@ END(early_xen_setup)
 
 /* Stub for suspend.
    Just force the stacked registers to be written in memory.  */	
-GLOBAL_ENTRY(HYPERVISOR_suspend)
+GLOBAL_ENTRY(xencomm_arch_hypercall_suspend)
+	mov r15=r32
+	;; 
 	alloc r20=ar.pfs,0,0,0,0
-	mov r14=2
-	mov r15=r12
-	;;
+	mov r2=__HYPERVISOR_sched_op
+	;; 
 	/* We don't want to deal with RSE.  */
 	flushrs
-	mov r2=__HYPERVISOR_sched_op
-	st4 [r12]=r14
+	mov r14=2 // SCHEDOP_shutdown
 	;;
 	break 0x1000
 	;; 
 	mov ar.pfs=r20
 	br.ret.sptk.many b0
-END(HYPERVISOR_suspend)
+END(xencomm_arch_hypercall_suspend)
diff -r 47c098fdce14 -r 85a15e585061 drivers/xen/privcmd/privcmd.c
--- a/drivers/xen/privcmd/privcmd.c	Wed Sep 20 15:35:23 2006 +0200
+++ b/drivers/xen/privcmd/privcmd.c	Wed Oct 11 20:19:20 2006 -0400
@@ -82,18 +82,7 @@ static int privcmd_ioctl(struct inode *i
 				: "r8", "r10", "memory" );
 		}
 #elif defined (__ia64__)
-		__asm__ __volatile__ (
-			";; mov r14=%2; mov r15=%3; "
-			"mov r16=%4; mov r17=%5; mov r18=%6;"
-			"mov r2=%1; break 0x1000;; mov %0=r8 ;;"
-			: "=r" (ret)
-			: "r" (hypercall.op),
-			"r" (hypercall.arg[0]),
-			"r" (hypercall.arg[1]),
-			"r" (hypercall.arg[2]),
-			"r" (hypercall.arg[3]),
-			"r" (hypercall.arg[4])
-			: "r14","r15","r16","r17","r18","r2","r8","memory");
+		ret = privcmd_hypercall(&hypercall);
 #endif
 	}
 	break;
diff -r 47c098fdce14 -r 85a15e585061 include/asm-ia64/hypercall.h
--- a/include/asm-ia64/hypercall.h	Wed Sep 20 15:35:23 2006 +0200
+++ b/include/asm-ia64/hypercall.h	Wed Oct 11 20:19:20 2006 -0400
@@ -33,11 +33,12 @@
 #ifndef __HYPERCALL_H__
 #define __HYPERCALL_H__
 
-#include <linux/string.h> /* memcpy() */
-
 #ifndef __HYPERVISOR_H__
 # error "please don't include this file directly"
 #endif
+
+#include <asm/xen/xcom_hcall.h>
+struct xencomm_handle;
 
 /*
  * Assembler stubs for hyper-calls.
@@ -157,157 +158,117 @@
 	(type)__res;                                            \
 })
 
-static inline int
-HYPERVISOR_sched_op_compat(
-    int cmd, unsigned long arg)
-{
-	return _hypercall2(int, sched_op_compat, cmd, arg);
-}
-
-static inline int
-HYPERVISOR_sched_op(
-	int cmd, void *arg)
+
+static inline int
+xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg)
 {
 	return _hypercall2(int, sched_op, cmd, arg);
 }
 
 static inline long
-HYPERVISOR_set_timer_op(
-    u64 timeout)
-{
-    unsigned long timeout_hi = (unsigned long)(timeout>>32);
-    unsigned long timeout_lo = (unsigned long)timeout;
-    return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
-}
-
-static inline int
-HYPERVISOR_dom0_op(
-    dom0_op_t *dom0_op)
-{
-    dom0_op->interface_version = DOM0_INTERFACE_VERSION;
-    return _hypercall1(int, dom0_op, dom0_op);
-}
-
-static inline int
-HYPERVISOR_multicall(
-    void *call_list, int nr_calls)
-{
-    return _hypercall2(int, multicall, call_list, nr_calls);
-}
-
-//XXX xen/ia64 copy_from_guest() is broken.
-//    This is a temporal work around until it is fixed.
-static inline int
-____HYPERVISOR_memory_op(
-    unsigned int cmd, void *arg)
-{
-    return _hypercall2(int, memory_op, cmd, arg);
-}
-
-#include <xen/interface/memory.h>
-#ifdef CONFIG_VMX_GUEST
-# define ia64_xenmem_reservation_op(op, xmr) (0)
-#else
-int ia64_xenmem_reservation_op(unsigned long op,
-		   struct xen_memory_reservation* reservation__);
-#endif
-static inline int
-HYPERVISOR_memory_op(
-    unsigned int cmd, void *arg)
-{
-    switch (cmd) {
-    case XENMEM_increase_reservation:
-    case XENMEM_decrease_reservation:
-    case XENMEM_populate_physmap:
-        return ia64_xenmem_reservation_op(cmd, 
-                                          (struct xen_memory_reservation*)arg);
-    default:
-        return ____HYPERVISOR_memory_op(cmd, arg);
-    }
-    /* NOTREACHED */
-}
-
-static inline int
-HYPERVISOR_event_channel_op(
-    int cmd, void *arg)
-{
-    int rc = _hypercall2(int, event_channel_op, cmd, arg);
-    if (unlikely(rc == -ENOSYS)) {
-        struct evtchn_op op;
-        op.cmd = cmd;
-        memcpy(&op.u, arg, sizeof(op.u));
-        rc = _hypercall1(int, event_channel_op_compat, &op);
-    }
-    return rc;
-}
-
-static inline int
-HYPERVISOR_acm_op(
-	unsigned int cmd, void *arg)
-{
-    return _hypercall2(int, acm_op, cmd, arg);
-}
-
-static inline int
-HYPERVISOR_xen_version(
-    int cmd, void *arg)
-{
-    return _hypercall2(int, xen_version, cmd, arg);
-}
-
-static inline int
-HYPERVISOR_console_io(
-    int cmd, int count, char *str)
-{
-    return _hypercall3(int, console_io, cmd, count, str);
-}
-
-static inline int
-HYPERVISOR_physdev_op(
-    int cmd, void *arg)
-{
-    int rc = _hypercall2(int, physdev_op, cmd, arg);
-    if (unlikely(rc == -ENOSYS)) {
-        struct physdev_op op;
-        op.cmd = cmd;
-        memcpy(&op.u, arg, sizeof(op.u));
-        rc = _hypercall1(int, physdev_op_compat, &op);
-    }
-    return rc;
-}
-
-//XXX __HYPERVISOR_grant_table_op is used for this hypercall constant.
-static inline int
-____HYPERVISOR_grant_table_op(
-    unsigned int cmd, void *uop, unsigned int count,
-    unsigned long pa1, unsigned long pa2)
-{
-    return _hypercall5(int, grant_table_op, cmd, uop, count, pa1, pa2);
+HYPERVISOR_set_timer_op(u64 timeout)
+{
+	unsigned long timeout_hi = (unsigned long)(timeout >> 32);
+	unsigned long timeout_lo = (unsigned long)timeout;
+	return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
+}
+
+static inline int
+xencomm_arch_hypercall_dom0_op(struct xencomm_handle *op)
+{
+	return _hypercall1(int, dom0_op, op);
+}
+
+static inline int
+xencomm_arch_hypercall_sysctl(struct xencomm_handle *op)
+{
+	return _hypercall1(int, sysctl, op);
+}
+
+static inline int
+xencomm_arch_hypercall_domctl(struct xencomm_handle *op)
+{
+	return _hypercall1(int, domctl, op);
+}
+
+static inline int
+xencomm_arch_hypercall_multicall(struct xencomm_handle *call_list,
+				 int nr_calls)
+{
+	return _hypercall2(int, multicall, call_list, nr_calls);
+}
+
+static inline int
+xencomm_arch_hypercall_memory_op(unsigned int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, memory_op, cmd, arg);
+}
+
+static inline int
+xencomm_arch_hypercall_event_channel_op(int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, event_channel_op, cmd, arg);
+}
+
+static inline int
+xencomm_arch_hypercall_acm_op(unsigned int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, acm_op, cmd, arg);
+}
+
+static inline int
+xencomm_arch_hypercall_xen_version(int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, xen_version, cmd, arg);
+}
+
+static inline int
+xencomm_arch_hypercall_console_io(int cmd, int count,
+                                  struct xencomm_handle *str)
+{
+	return _hypercall3(int, console_io, cmd, count, str);
+}
+
+static inline int
+xencomm_arch_hypercall_physdev_op(int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, physdev_op, cmd, arg);
+}
+
+static inline int
+xencomm_arch_hypercall_grant_table_op(unsigned int cmd,
+                                      struct xencomm_handle *uop,
+                                      unsigned int count)
+{
+	return _hypercall3(int, grant_table_op, cmd, uop, count);
 }
 
 int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count);
 
-static inline int
-HYPERVISOR_vcpu_op(
-	int cmd, int vcpuid, void *extra_args)
-{
-    return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
-}
-
-extern int HYPERVISOR_suspend(unsigned long srec);
-
-static inline unsigned long
-HYPERVISOR_hvm_op(
-	int cmd, void *arg)
+extern int xencomm_arch_hypercall_suspend(struct xencomm_handle *arg);
+
+static inline int
+xencomm_arch_hypercall_callback_op(int cmd, struct xencomm_handle *arg)
+{
+	return _hypercall2(int, callback_op, cmd, arg);
+}
+
+static inline unsigned long
+xencomm_arch_hypercall_hvm_op(int cmd, void *arg)
 {
 	return _hypercall2(unsigned long, hvm_op, cmd, arg);
 }
 
 static inline int
-HYPERVISOR_callback_op(
-	int cmd, void *arg)
-{
-	return _hypercall2(int, callback_op, cmd, arg);
+HYPERVISOR_physdev_op(int cmd, void *arg)
+{
+	switch (cmd) {
+	case PHYSDEVOP_eoi:
+		return _hypercall1(int, ia64_fast_eoi,
+		                   ((struct physdev_eoi *)arg)->irq);
+	default:
+		return xencomm_hypercall_physdev_op(cmd, arg);
+	}
 }
 
 extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs);
@@ -417,7 +378,46 @@ HYPERVISOR_add_physmap(unsigned long gpf
 	return ret;
 }
 
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M
+static inline unsigned long
+HYPERVISOR_expose_p2m(unsigned long conv_start_gpfn,
+                      unsigned long assign_start_gpfn,
+                      unsigned long expose_size, unsigned long granule_pfn)
+{
+	return _hypercall5(unsigned long, ia64_dom0vp_op,
+	                   IA64_DOM0VP_expose_p2m, conv_start_gpfn,
+	                   assign_start_gpfn, expose_size, granule_pfn);
+}
+#endif
+
 // for balloon driver
 #define HYPERVISOR_update_va_mapping(va, new_val, flags) (0)
 
+/* Use xencomm to do hypercalls.  */
+#ifdef MODULE
+#define HYPERVISOR_sched_op xencomm_mini_hypercall_sched_op
+#define HYPERVISOR_event_channel_op xencomm_mini_hypercall_event_channel_op
+#define HYPERVISOR_callback_op xencomm_mini_hypercall_callback_op
+#define HYPERVISOR_multicall xencomm_mini_hypercall_multicall
+#define HYPERVISOR_xen_version xencomm_mini_hypercall_xen_version
+#define HYPERVISOR_console_io xencomm_mini_hypercall_console_io
+#define HYPERVISOR_hvm_op xencomm_mini_hypercall_hvm_op
+#ifdef CONFIG_VMX_GUEST
+#define HYPERVISOR_memory_op 0
+#else
+#define HYPERVISOR_memory_op xencomm_mini_hypercall_memory_op
+#endif
+#else
+#define HYPERVISOR_sched_op xencomm_hypercall_sched_op
+#define HYPERVISOR_event_channel_op xencomm_hypercall_event_channel_op
+#define HYPERVISOR_callback_op xencomm_hypercall_callback_op
+#define HYPERVISOR_multicall xencomm_hypercall_multicall
+#define HYPERVISOR_xen_version xencomm_hypercall_xen_version
+#define HYPERVISOR_console_io xencomm_hypercall_console_io
+#define HYPERVISOR_hvm_op xencomm_hypercall_hvm_op
+#define HYPERVISOR_memory_op xencomm_hypercall_memory_op
+#endif
+
+#define HYPERVISOR_suspend xencomm_hypercall_suspend
+
 #endif /* __HYPERCALL_H__ */
diff -r 47c098fdce14 -r 85a15e585061 include/asm-ia64/hypervisor.h
--- a/include/asm-ia64/hypervisor.h	Wed Sep 20 15:35:23 2006 +0200
+++ b/include/asm-ia64/hypervisor.h	Wed Oct 11 20:19:20 2006 -0400
@@ -74,9 +74,6 @@ HYPERVISOR_yield(
 {
 	int rc = HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
 
-	if (rc == -ENOSYS)
-		rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0);
-
 	return rc;
 }
 
@@ -85,9 +82,6 @@ HYPERVISOR_block(
 	void)
 {
 	int rc = HYPERVISOR_sched_op(SCHEDOP_block, NULL);
-
-	if (rc == -ENOSYS)
-		rc = HYPERVISOR_sched_op_compat(SCHEDOP_block, 0);
 
 	return rc;
 }
@@ -101,9 +95,6 @@ HYPERVISOR_shutdown(
 	};
 
 	int rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
-
-	if (rc == -ENOSYS)
-		rc = HYPERVISOR_sched_op_compat(SCHEDOP_shutdown, reason);
 
 	return rc;
 }
@@ -121,8 +112,6 @@ HYPERVISOR_poll(
 
 	set_xen_guest_handle(sched_poll.ports, ports);
 	rc = HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll);
-	if (rc == -ENOSYS)
-		rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0);
 
 	return rc;
 }
diff -r 47c098fdce14 -r 85a15e585061 include/asm-ia64/maddr.h
--- a/include/asm-ia64/maddr.h	Wed Sep 20 15:35:23 2006 +0200
+++ b/include/asm-ia64/maddr.h	Wed Oct 11 20:19:20 2006 -0400
@@ -10,11 +10,26 @@
 
 #define INVALID_P2M_ENTRY       (~0UL)
 
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M
+extern int p2m_initialized;
+extern unsigned long p2m_min_low_pfn;
+extern unsigned long p2m_max_low_pfn;
+extern unsigned long p2m_convert_min_pfn;
+extern unsigned long p2m_convert_max_pfn;
+extern volatile const pte_t* p2m_pte;
+unsigned long p2m_phystomach(unsigned long gpfn);
+#else
+#define p2m_initialized		(0)
+#define p2m_phystomach(gpfn)	INVALID_MFN
+#endif
+
 /* XXX xen page size != page size */
 static inline unsigned long
 pfn_to_mfn_for_dma(unsigned long pfn)
 {
 	unsigned long mfn;
+	if (p2m_initialized)
+		return p2m_phystomach(pfn);
 	mfn = HYPERVISOR_phystomach(pfn);
 	BUG_ON(mfn == 0); // XXX
 	BUG_ON(mfn == INVALID_P2M_ENTRY); // XXX
diff -r 47c098fdce14 -r 85a15e585061 include/asm-ia64/xen/privop.h
--- a/include/asm-ia64/xen/privop.h	Wed Sep 20 15:35:23 2006 +0200
+++ b/include/asm-ia64/xen/privop.h	Wed Oct 11 20:19:20 2006 -0400
@@ -14,12 +14,9 @@
 
 #define IA64_PARAVIRTUALIZED
 
-#if 0
-#undef XSI_BASE
 /* At 1 MB, before per-cpu space but still addressable using addl instead
    of movl. */
 #define XSI_BASE				0xfffffffffff00000
-#endif
 
 /* Address of mapped regs.  */
 #define XMAPPEDREGS_BASE		(XSI_BASE + XSI_SIZE)
diff -r 47c098fdce14 -r 85a15e585061 include/xen/interface/arch-ia64.h
--- a/include/xen/interface/arch-ia64.h	Wed Sep 20 15:35:23 2006 +0200
+++ b/include/xen/interface/arch-ia64.h	Wed Oct 11 20:19:20 2006 -0400
@@ -47,18 +47,6 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 #ifndef __ASSEMBLY__
 
 typedef unsigned long xen_ulong_t;
-
-#define GPFN_MEM          (0UL << 56) /* Guest pfn is normal mem */
-#define GPFN_FRAME_BUFFER (1UL << 56) /* VGA framebuffer */
-#define GPFN_LOW_MMIO     (2UL << 56) /* Low MMIO range */
-#define GPFN_PIB          (3UL << 56) /* PIB base */
-#define GPFN_IOSAPIC      (4UL << 56) /* IOSAPIC base */
-#define GPFN_LEGACY_IO    (5UL << 56) /* Legacy I/O base */
-#define GPFN_GFW          (6UL << 56) /* Guest Firmware */
-#define GPFN_HIGH_MMIO    (7UL << 56) /* High MMIO range */
-
-#define GPFN_IO_MASK     (7UL << 56)  /* Guest pfn is I/O type */
-#define GPFN_INV_MASK    (31UL << 59) /* Guest pfn is invalid */
 
 #define INVALID_MFN       (~0UL)
 
@@ -336,33 +324,33 @@ typedef struct vcpu_guest_context vcpu_g
 typedef struct vcpu_guest_context vcpu_guest_context_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
 
-// dom0 vp op
+/* dom0 vp op */
 #define __HYPERVISOR_ia64_dom0vp_op     __HYPERVISOR_arch_0
-#define IA64_DOM0VP_ioremap             0       // map io space in machine
-                                                // address to dom0 physical
-                                                // address space.
-                                                // currently physical
-                                                // assignedg address equals to
-                                                // machine address
-#define IA64_DOM0VP_phystomach          1       // convert a pseudo physical
-                                                // page frame number
-                                                // to the corresponding
-                                                // machine page frame number.
-                                                // if no page is assigned,
-                                                // INVALID_MFN or GPFN_INV_MASK
-                                                // is returned depending on
-                                                // domain's non-vti/vti mode.
-#define IA64_DOM0VP_machtophys          3       // convert a machine page
-                                                // frame number
-                                                // to the corresponding
-                                                // pseudo physical page frame
-                                                // number of the caller domain
-#define IA64_DOM0VP_zap_physmap         17      // unmap and free pages
-                                                // contained in the specified
-                                                // pseudo physical region
-#define IA64_DOM0VP_add_physmap         18      // assigne machine page frane
-                                                // to dom0's pseudo physical
-                                                // address space.
+/*  Map io space in machine address to dom0 physical address space.
+    Currently physical assigned address equals to machine address.  */
+#define IA64_DOM0VP_ioremap             0
+
+/* Convert a pseudo physical page frame number to the corresponding
+   machine page frame number. If no page is assigned, INVALID_MFN or
+   GPFN_INV_MASK is returned depending on domain's non-vti/vti mode.  */
+#define IA64_DOM0VP_phystomach          1
+
+/* Convert a machine page frame number to the corresponding pseudo physical
+   page frame number of the caller domain.  */
+#define IA64_DOM0VP_machtophys          3
+
+/* Reserved for future use.  */
+#define IA64_DOM0VP_iounmap             4
+
+/* Unmap and free pages contained in the specified pseudo physical region.  */
+#define IA64_DOM0VP_zap_physmap         5
+
+/* Assign machine page frame to dom0's pseudo physical address space.  */
+#define IA64_DOM0VP_add_physmap         6
+
+/* expose the p2m table into domain */
+#define IA64_DOM0VP_expose_p2m          7
+
 // flags for page assignement to pseudo physical address space
 #define _ASSIGN_readonly                0
 #define ASSIGN_readonly                 (1UL << _ASSIGN_readonly)
@@ -395,15 +383,12 @@ struct xen_ia64_boot_param {
 
 #endif /* !__ASSEMBLY__ */
 
-/* Address of shared_info in domain virtual space.
-   This is the default address, for compatibility only.  */
-#define XSI_BASE			0xf100000000000000
-
 /* Size of the shared_info area (this is not related to page size).  */
 #define XSI_SHIFT			14
 #define XSI_SIZE			(1 << XSI_SHIFT)
 /* Log size of mapped_regs area (64 KB - only 4KB is used).  */
 #define XMAPPEDREGS_SHIFT		12
+#define XMAPPEDREGS_SIZE		(1 << XMAPPEDREGS_SHIFT)
 /* Offset of XASI (Xen arch shared info) wrt XSI_BASE.  */
 #define XMAPPEDREGS_OFS			XSI_SIZE
 
@@ -435,6 +420,17 @@ struct xen_ia64_boot_param {
 #define HYPERPRIVOP_GET_PSR		0x19
 #define HYPERPRIVOP_MAX			0x19
 
+/* Fast and light hypercalls.  */
+#define __HYPERVISOR_ia64_fast_eoi	0x0200
+
+/* Xencomm macros.  */
+#define XENCOMM_INLINE_MASK 0xf800000000000000UL
+#define XENCOMM_INLINE_FLAG 0x8000000000000000UL
+
+#define XENCOMM_IS_INLINE(addr) \
+  (((unsigned long)(addr) & XENCOMM_INLINE_MASK) == XENCOMM_INLINE_FLAG)
+#define XENCOMM_INLINE_ADDR(addr) \
+  ((unsigned long)(addr) & ~XENCOMM_INLINE_MASK)
 #endif /* __HYPERVISOR_IF_IA64_H__ */
 
 /*
diff -r 47c098fdce14 -r 85a15e585061 lib/Makefile
--- a/lib/Makefile	Wed Sep 20 15:35:23 2006 +0200
+++ b/lib/Makefile	Wed Oct 11 20:19:20 2006 -0400
@@ -52,9 +52,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o
 obj-$(CONFIG_AUDIT_GENERIC) += audit.o
 
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
-ifneq ($(CONFIG_XEN_IA64_DOM0_NON_VP),y)
 swiotlb-$(CONFIG_XEN) := ../arch/i386/kernel/swiotlb.o
-endif
 
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/Kconfig.orig
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/ia64/Kconfig.orig	Wed Oct 11 20:19:20 2006 -0400
@@ -0,0 +1,590 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+
+mainmenu "IA-64 Linux Kernel Configuration"
+
+source "init/Kconfig"
+
+menu "Processor type and features"
+
+config IA64
+	bool
+	default y
+	help
+	  The Itanium Processor Family is Intel's 64-bit successor to
+	  the 32-bit X86 line.  The IA-64 Linux project has a home
+	  page at <http://www.linuxia64.org/> and a mailing list at
+	  <linux-ia64@xxxxxxxxxxxxxxx>.
+
+config 64BIT
+	bool
+	default y
+
+config MMU
+	bool
+	default y
+
+config SWIOTLB
+       bool
+       default y
+
+config RWSEM_XCHGADD_ALGORITHM
+	bool
+	default y
+
+config GENERIC_FIND_NEXT_BIT
+	bool
+	default y
+
+config GENERIC_CALIBRATE_DELAY
+	bool
+	default y
+
+config TIME_INTERPOLATION
+	bool
+	default y
+
+config DMI
+	bool
+	default y
+
+config EFI
+	bool
+	default y
+
+config GENERIC_IOMAP
+	bool
+	default y
+
+config XEN
+	bool "Xen hypervisor support"
+	default y
+	help
+	  Enable Xen hypervisor support.  Resulting kernel runs
+	  both as a guest OS on Xen and natively on hardware.
+
+config XEN_IA64_VDSO_PARAVIRT
+	bool
+	depends on XEN && !ITANIUM
+	default y
+	help
+	  vDSO paravirtualization
+
+config SCHED_NO_NO_OMIT_FRAME_POINTER
+	bool
+	default y
+
+config IA64_UNCACHED_ALLOCATOR
+	bool
+	select GENERIC_ALLOCATOR
+
+config DMA_IS_DMA32
+	bool
+	default y
+
+config DMA_IS_NORMAL
+	bool
+	depends on IA64_SGI_SN2
+	default y
+
+config AUDIT_ARCH
+	bool
+	default y
+
+choice
+	prompt "System type"
+	default IA64_GENERIC
+
+config IA64_GENERIC
+	bool "generic"
+	select ACPI
+	select PCI
+	select NUMA
+	select ACPI_NUMA
+	help
+	  This selects the system type of your hardware.  A "generic" kernel
+	  will run on any supported IA-64 system.  However, if you configure
+	  a kernel for your specific system, it will be faster and smaller.
+
+	  generic		For any supported IA-64 system
+	  DIG-compliant		For DIG ("Developer's Interface Guide") compliant systems
+	  HP-zx1/sx1000		For HP systems
+	  HP-zx1/sx1000+swiotlb	For HP systems with (broken) DMA-constrained devices.
+	  SGI-SN2		For SGI Altix systems
+	  Ski-simulator		For the HP simulator <http://www.hpl.hp.com/research/linux/ski/>
+
+	  If you don't know what to do, choose "generic".
+
+config IA64_DIG
+	bool "DIG-compliant"
+
+config IA64_HP_ZX1
+	bool "HP-zx1/sx1000"
+	help
+	  Build a kernel that runs on HP zx1 and sx1000 systems.  This adds
+	  support for the HP I/O MMU.
+
+config IA64_HP_ZX1_SWIOTLB
+	bool "HP-zx1/sx1000 with software I/O TLB"
+	help
+	  Build a kernel that runs on HP zx1 and sx1000 systems even when they
+	  have broken PCI devices which cannot DMA to full 32 bits.  Apart
+	  from support for the HP I/O MMU, this includes support for the software
+	  I/O TLB, which allows supporting the broken devices at the expense of
+	  wasting some kernel memory (about 2MB by default).
+
+config IA64_SGI_SN2
+	bool "SGI-SN2"
+	help
+	  Selecting this option will optimize the kernel for use on sn2 based
+	  systems, but the resulting kernel binary will not run on other
+	  types of ia64 systems.  If you have an SGI Altix system, it's safe
+	  to select this option.  If in doubt, select ia64 generic support
+	  instead.
+
+config IA64_HP_SIM
+	bool "Ski-simulator"
+
+endchoice
+
+choice
+	prompt "Processor type"
+	default ITANIUM
+
+config ITANIUM
+	bool "Itanium"
+	help
+	  Select your IA-64 processor type.  The default is Itanium.
+	  This choice is safe for all IA-64 systems, but may not perform
+	  optimally on systems with, say, Itanium 2 or newer processors.
+
+config MCKINLEY
+	bool "Itanium 2"
+	help
+	  Select this to configure for an Itanium 2 (McKinley) processor.
+
+endchoice
+
+choice
+	prompt "Kernel page size"
+	default IA64_PAGE_SIZE_16KB
+
+config IA64_PAGE_SIZE_4KB
+	bool "4KB"
+	help
+	  This lets you select the page size of the kernel.  For best IA-64
+	  performance, a page size of 8KB or 16KB is recommended.  For best
+	  IA-32 compatibility, a page size of 4KB should be selected (the vast
+	  majority of IA-32 binaries work perfectly fine with a larger page
+	  size).  For Itanium 2 or newer systems, a page size of 64KB can also
+	  be selected.
+
+	  4KB                For best IA-32 compatibility
+	  8KB                For best IA-64 performance
+	  16KB               For best IA-64 performance
+	  64KB               Requires Itanium 2 or newer processor.
+
+	  If you don't know what to do, choose 16KB.
+
+config IA64_PAGE_SIZE_8KB
+	bool "8KB"
+
+config IA64_PAGE_SIZE_16KB
+	bool "16KB"
+
+config IA64_PAGE_SIZE_64KB
+	depends on !ITANIUM
+	bool "64KB"
+
+endchoice
+
+choice
+	prompt "Page Table Levels"
+	default PGTABLE_3
+
+config PGTABLE_3
+	bool "3 Levels"
+
+config PGTABLE_4
+	depends on !IA64_PAGE_SIZE_64KB
+	bool "4 Levels"
+
+endchoice
+
+source kernel/Kconfig.hz
+
+config IA64_BRL_EMU
+	bool
+	depends on ITANIUM
+	default y
+
+# align cache-sensitive data to 128 bytes
+config IA64_L1_CACHE_SHIFT
+	int
+	default "7" if MCKINLEY
+	default "6" if ITANIUM
+
+config IA64_CYCLONE
+	bool "Cyclone (EXA) Time Source support"
+	help
+	  Say Y here to enable support for IBM EXA Cyclone time source.
+	  If you're unsure, answer N.
+
+config IOSAPIC
+	bool
+	depends on !IA64_HP_SIM
+	default y
+
+config IA64_SGI_SN_XP
+	tristate "Support communication between SGI SSIs"
+	depends on IA64_GENERIC || IA64_SGI_SN2
+	select IA64_UNCACHED_ALLOCATOR
+	help
+	  An SGI machine can be divided into multiple Single System
+	  Images which act independently of each other and have
+	  hardware based memory protection from the others.  Enabling
+	  this feature will allow for direct communication between SSIs
+	  based on a network adapter and DMA messaging.
+
+config FORCE_MAX_ZONEORDER
+	int "MAX_ORDER (11 - 17)"  if !HUGETLB_PAGE
+	range 11 17  if !HUGETLB_PAGE
+	default "17" if HUGETLB_PAGE
+	default "11"
+
+config SMP
+	bool "Symmetric multi-processing support"
+	help
+	  This enables support for systems with more than one CPU. If you have
+	  a system with only one CPU, say N.  If you have a system with more
+	  than one CPU, say Y.
+
+	  If you say N here, the kernel will run on single and multiprocessor
+	  systems, but will use only one CPU of a multiprocessor system.  If
+	  you say Y here, the kernel will run on many, but not all,
+	  single processor systems.  On a single processor system, the kernel
+	  will run faster if you say N here.
+
+	  See also the <file:Documentation/smp.txt> and the SMP-HOWTO
+	  available at <http://www.tldp.org/docs.html#howto>.
+
+	  If you don't know what to do here, say N.
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-1024)"
+	range 2 1024
+	depends on SMP
+	default "1024"
+	help
+	  You should set this to the number of CPUs in your system, but
+	  keep in mind that a kernel compiled for, e.g., 2 CPUs will boot but
+	  only use 2 CPUs on a >2 CPU system.  Setting this to a value larger
+	  than 64 will cause the use of a CPU mask array, causing a small
+	  performance hit.
+
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
+	depends on SMP && EXPERIMENTAL
+	select HOTPLUG
+	default n
+	---help---
+	  Say Y here to experiment with turning CPUs off and on.  CPUs
+	  can be controlled through /sys/devices/system/cpu/cpu#.
+	  Say N if you want to disable CPU hotplug.
+
+config ARCH_ENABLE_MEMORY_HOTPLUG
+	def_bool y
+
+config SCHED_SMT
+	bool "SMT scheduler support"
+	depends on SMP
+	help
+	  Improves the CPU scheduler's decision making when dealing with
+	  Intel IA64 chips with MultiThreading at a cost of slightly increased
+	  overhead in some places. If unsure say N here.
+
+config PERMIT_BSP_REMOVE
+	bool "Support removal of Bootstrap Processor"
+	depends on HOTPLUG_CPU
+	default n
+	---help---
+	Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU
+	support. 
+
+config FORCE_CPEI_RETARGET
+	bool "Force assumption that CPEI can be re-targetted"
+	depends on PERMIT_BSP_REMOVE
+	default n
+	---help---
+	Say Y if you need to force the assumption that CPEI can be re-targetted to
+	any cpu in the system. This hint is available via ACPI 3.0 specifications.
+	Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP.
+	This option it useful to enable this feature on older BIOS's as well.
+	You can also enable this by using boot command line option force_cpei=1.
+
+config PREEMPT
+	bool "Preemptible Kernel"
+        help
+          This option reduces the latency of the kernel when reacting to
+          real-time or interactive events by allowing a low priority process to
+          be preempted even if it is in kernel mode executing a system call.
+          This allows applications to run more reliably even when the system is
+          under load.
+
+          Say Y here if you are building a kernel for a desktop, embedded
+          or real-time system.  Say N if you are unsure.
+
+source "mm/Kconfig"
+
+config ARCH_SELECT_MEMORY_MODEL
+	def_bool y
+
+config ARCH_DISCONTIGMEM_ENABLE
+	def_bool y
+	help
+	  Say Y to support efficient handling of discontiguous physical memory,
+	  for architectures which are either NUMA (Non-Uniform Memory Access)
+	  or have huge holes in the physical address space for other reasons.
+ 	  See <file:Documentation/vm/numa> for more.
+
+config ARCH_FLATMEM_ENABLE
+	def_bool y
+
+config ARCH_SPARSEMEM_ENABLE
+	def_bool y
+	depends on ARCH_DISCONTIGMEM_ENABLE
+
+config ARCH_DISCONTIGMEM_DEFAULT
+	def_bool y if (IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB)
+	depends on ARCH_DISCONTIGMEM_ENABLE
+
+config NUMA
+	bool "NUMA support"
+	depends on !IA64_HP_SIM && !FLATMEM
+	default y if IA64_SGI_SN2
+	help
+	  Say Y to compile the kernel to support NUMA (Non-Uniform Memory
+	  Access).  This option is for configuring high-end multiprocessor
+	  server systems.  If in doubt, say N.
+
+config NODES_SHIFT
+	int "Max num nodes shift(3-10)"
+	range 3 10
+	default "10"
+	depends on NEED_MULTIPLE_NODES
+	help
+	  This option specifies the maximum number of nodes in your SSI system.
+	  MAX_NUMNODES will be 2^(This value).
+	  If in doubt, use the default.
+
+# VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
+# VIRTUAL_MEM_MAP has been retained for historical reasons.
+config VIRTUAL_MEM_MAP
+	bool "Virtual mem map"
+	depends on !SPARSEMEM
+	default y if !IA64_HP_SIM
+	help
+	  Say Y to compile the kernel with support for a virtual mem map.
+	  This code also only takes effect if a memory hole of greater than
+	  1 Gb is found during boot.  You must turn this option on if you
+	  require the DISCONTIGMEM option for your machine. If you are
+	  unsure, say Y.
+
+config HOLES_IN_ZONE
+	bool
+	default y if VIRTUAL_MEM_MAP
+
+config HAVE_ARCH_EARLY_PFN_TO_NID
+	def_bool y
+	depends on NEED_MULTIPLE_NODES
+
+config HAVE_ARCH_NODEDATA_EXTENSION
+	def_bool y
+	depends on NUMA
+
+config IA32_SUPPORT
+	bool "Support for Linux/x86 binaries"
+	help
+	  IA-64 processors can execute IA-32 (X86) instructions.  By
+	  saying Y here, the kernel will include IA-32 system call
+	  emulation support which makes it possible to transparently
+	  run IA-32 Linux binaries on an IA-64 Linux system.
+	  If in doubt, say Y.
+
+config COMPAT
+	bool
+	depends on IA32_SUPPORT
+	default y
+
+config IA64_MCA_RECOVERY
+	tristate "MCA recovery from errors other than TLB."
+
+config PERFMON
+	bool "Performance monitor support"
+	help
+	  Selects whether support for the IA-64 performance monitor hardware
+	  is included in the kernel.  This makes some kernel data-structures a
+	  little bigger and slows down execution a bit, but it is generally
+	  a good idea to turn this on.  If you're unsure, say Y.
+
+config IA64_PALINFO
+	tristate "/proc/pal support"
+	help
+	  If you say Y here, you are able to get PAL (Processor Abstraction
+	  Layer) information in /proc/pal.  This contains useful information
+	  about the processors in your systems, such as cache and TLB sizes
+	  and the PAL firmware version in use.
+
+	  To use this option, you have to ensure that the "/proc file system
+	  support" (CONFIG_PROC_FS) is enabled, too.
+
+config SGI_SN
+	def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
+
+source "drivers/sn/Kconfig"
+
+source "drivers/firmware/Kconfig"
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+menu "Power management and ACPI"
+
+source "kernel/power/Kconfig"
+
+source "drivers/acpi/Kconfig"
+
+if PM
+
+source "arch/ia64/kernel/cpufreq/Kconfig"
+
+endif
+
+endmenu
+
+if !IA64_HP_SIM
+
+menu "Bus options (PCI, PCMCIA)"
+
+config PCI
+	bool "PCI support"
+	help
+	  Real IA-64 machines all have PCI/PCI-X/PCI Express busses.  Say Y
+	  here unless you are using a simulator without PCI support.
+
+config PCI_DOMAINS
+	bool
+	default PCI
+
+config XEN_PCIDEV_FRONTEND
+	bool "Xen PCI Frontend"
+	depends on PCI && XEN
+	default y
+	help
+	  The PCI device frontend driver allows the kernel to import arbitrary
+	  PCI devices from a PCI backend to support PCI driver domains.
+
+config XEN_PCIDEV_FE_DEBUG
+	bool "Xen PCI Frontend Debugging"
+	depends on XEN_PCIDEV_FRONTEND
+	default n
+	help
+	  Enables some debug statements within the PCI Frontend.
+
+source "drivers/pci/pcie/Kconfig"
+
+source "drivers/pci/Kconfig"
+
+source "drivers/pci/hotplug/Kconfig"
+
+source "drivers/pcmcia/Kconfig"
+
+endmenu
+
+endif
+
+source "net/Kconfig"
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "lib/Kconfig"
+
+#
+# Use the generic interrupt handling code in kernel/irq/:
+#
+config GENERIC_HARDIRQS
+	bool
+	default y
+
+config GENERIC_IRQ_PROBE
+	bool
+	default y
+
+config GENERIC_PENDING_IRQ
+	bool
+	depends on GENERIC_HARDIRQS && SMP
+	default y
+
+config IRQ_PER_CPU
+	bool
+	default y
+
+source "arch/ia64/hp/sim/Kconfig"
+
+menu "Instrumentation Support"
+        depends on EXPERIMENTAL
+
+source "arch/ia64/oprofile/Kconfig"
+
+config KPROBES
+	bool "Kprobes (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && MODULES
+	help
+	  Kprobes allows you to trap at almost any kernel address and
+	  execute a callback function.  register_kprobe() establishes
+	  a probepoint and specifies the callback.  Kprobes is useful
+	  for kernel debugging, non-intrusive instrumentation and testing.
+	  If in doubt, say "N".
+endmenu
+
+source "arch/ia64/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+#
+# override default values of drivers/xen/Kconfig
+#
+if XEN
+config XEN_UTIL
+	default n
+
+config HAVE_ARCH_ALLOC_SKB
+	default y
+
+config HAVE_ARCH_DEV_ALLOC_SKB
+	default y
+
+config XEN_BALLOON
+	default y
+
+config XEN_SKBUFF
+	default y
+
+config XEN_DEVMEM
+	default n
+
+config XEN_REBOOT
+	default y
+
+config XEN_SMPBOOT
+	default n
+endif
+
+source "drivers/xen/Kconfig"
diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/kernel/setup.c.orig
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/ia64/kernel/setup.c.orig	Wed Oct 11 20:19:20 2006 -0400
@@ -0,0 +1,1020 @@
+/*
+ * Architecture-specific setup.
+ *
+ * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@xxxxxxxxxx>
+ *	Stephane Eranian <eranian@xxxxxxxxxx>
+ * Copyright (C) 2000, 2004 Intel Corp
+ * 	Rohit Seth <rohit.seth@xxxxxxxxx>
+ * 	Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
+ * 	Gordon Jin <gordon.jin@xxxxxxxxx>
+ * Copyright (C) 1999 VA Linux Systems
+ * Copyright (C) 1999 Walt Drummond <drummond@xxxxxxxxxxx>
+ *
+ * 12/26/04 S.Siddha, G.Jin, R.Seth
+ *			Add multi-threading and multi-core detection
+ * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo().
+ * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map
+ * 03/31/00 R.Seth	cpu_initialized and current->processor fixes
+ * 02/04/00 D.Mosberger	some more get_cpuinfo fixes...
+ * 02/01/00 R.Seth	fixed get_cpuinfo for SMP
+ * 01/07/99 S.Eranian	added the support for command line argument
+ * 06/24/99 W.Drummond	added boot_cpu_data.
+ * 05/28/05 Z. Menyhart	Dynamic stride size for "flush_icache_range()"
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/threads.h>
+#include <linux/screen_info.h>
+#include <linux/dmi.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+#include <linux/efi.h>
+#include <linux/initrd.h>
+#include <linux/pm.h>
+#include <linux/cpufreq.h>
+
+#include <asm/ia32.h>
+#include <asm/machvec.h>
+#include <asm/mca.h>
+#include <asm/meminit.h>
+#include <asm/page.h>
+#include <asm/patch.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/sections.h>
+#include <asm/serial.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/unistd.h>
+#include <asm/system.h>
+#ifdef CONFIG_XEN
+#include <asm/hypervisor.h>
+#endif
+#include <linux/dma-mapping.h>
+
+#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
+# error "struct cpuinfo_ia64 too big!"
+#endif
+
+#ifdef CONFIG_SMP
+unsigned long __per_cpu_offset[NR_CPUS];
+EXPORT_SYMBOL(__per_cpu_offset);
+#endif
+
+#ifdef CONFIG_XEN
+static int
+xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	HYPERVISOR_shutdown(SHUTDOWN_crash);
+	/* we're never actually going to get here... */
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block xen_panic_block = {
+	.notifier_call	= xen_panic_event,
+	.next		= NULL,
+	.priority	= 0	/* try to go last */
+};
+#endif
+
+extern void ia64_setup_printk_clock(void);
+
+DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
+DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
+DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
+unsigned long ia64_cycles_per_usec;
+struct ia64_boot_param *ia64_boot_param;
+struct screen_info screen_info;
+unsigned long vga_console_iobase;
+unsigned long vga_console_membase;
+
+static struct resource data_resource = {
+	.name	= "Kernel data",
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+static struct resource code_resource = {
+	.name	= "Kernel code",
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
+};
+extern void efi_initialize_iomem_resources(struct resource *,
+		struct resource *);
+extern char _text[], _end[], _etext[];
+
+unsigned long ia64_max_cacheline_size;
+
+int dma_get_cache_alignment(void)
+{
+        return ia64_max_cacheline_size;
+}
+EXPORT_SYMBOL(dma_get_cache_alignment);
+
+unsigned long ia64_iobase;	/* virtual address for I/O accesses */
+EXPORT_SYMBOL(ia64_iobase);
+struct io_space io_space[MAX_IO_SPACES];
+EXPORT_SYMBOL(io_space);
+unsigned int num_io_spaces;
+
+/*
+ * "flush_icache_range()" needs to know what processor dependent stride size to use
+ * when it makes i-cache(s) coherent with d-caches.
+ */
+#define	I_CACHE_STRIDE_SHIFT	5	/* Safest way to go: 32 bytes by 32 bytes */
+unsigned long ia64_i_cache_stride_shift = ~0;
+
+/*
+ * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1).  This
+ * mask specifies a mask of address bits that must be 0 in order for two buffers to be
+ * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start
+ * address of the second buffer must be aligned to (merge_mask+1) in order to be
+ * mergeable).  By default, we assume there is no I/O MMU which can merge physically
+ * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu
+ * page-size of 2^64.
+ */
+unsigned long ia64_max_iommu_merge_mask = ~0UL;
+EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
+
+/*
+ * We use a special marker for the end of memory and it uses the extra (+1) slot
+ */
+struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata;
+int num_rsvd_regions __initdata;
+
+
+/*
+ * Filter incoming memory segments based on the primitive map created from the boot
+ * parameters. Segments contained in the map are removed from the memory ranges. A
+ * caller-specified function is called with the memory ranges that remain after filtering.
+ * This routine does not assume the incoming segments are sorted.
+ */
+int __init
+filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
+{
+	unsigned long range_start, range_end, prev_start;
+	void (*func)(unsigned long, unsigned long, int);
+	int i;
+
+#if IGNORE_PFN0
+	if (start == PAGE_OFFSET) {
+		printk(KERN_WARNING "warning: skipping physical page 0\n");
+		start += PAGE_SIZE;
+		if (start >= end) return 0;
+	}
+#endif
+	/*
+	 * lowest possible address(walker uses virtual)
+	 */
+	prev_start = PAGE_OFFSET;
+	func = arg;
+
+	for (i = 0; i < num_rsvd_regions; ++i) {
+		range_start = max(start, prev_start);
+		range_end   = min(end, rsvd_region[i].start);
+
+		if (range_start < range_end)
+			call_pernode_memory(__pa(range_start), range_end - range_start, func);
+
+		/* nothing more available in this segment */
+		if (range_end == end) return 0;
+
+		prev_start = rsvd_region[i].end;
+	}
+	/* end of memory marker allows full processing inside loop body */
+	return 0;
+}
+
+static int __init
+rsvd_region_cmp(struct rsvd_region *lhs, struct rsvd_region *rhs)
+{
+	if (lhs->start > rhs->start)
+		return 1;
+	if (lhs->start < rhs->start)
+		return -1;
+
+	if (lhs->end > rhs->end)
+		return 1;
+	if (lhs->end < rhs->end)
+		return -1;
+
+	return 0;
+}
+
+static void __init
+sort_regions (struct rsvd_region *rsvd_region, int max)
+{
+	int num = max;
+	int j;
+
+	/* simple bubble sorting */
+	while (max--) {
+		for (j = 0; j < max; ++j) {
+			if (rsvd_region_cmp(&rsvd_region[j],
+					    &rsvd_region[j + 1]) > 0) {
+				struct rsvd_region tmp;
+				tmp = rsvd_region[j];
+				rsvd_region[j] = rsvd_region[j + 1];
+				rsvd_region[j + 1] = tmp;
+			}
+		}
+	}
+
+	for (j = 0; j < num - 1; j++) {
+		int k;
+		unsigned long start = rsvd_region[j].start;
+		unsigned long end = rsvd_region[j].end;
+		int collapsed;
+		
+		for (k = j + 1; k < num; k++) {
+			BUG_ON(start > rsvd_region[k].start);
+			if (end < rsvd_region[k].start) {
+				k--;
+				break;
+			}
+			end = max(end, rsvd_region[k].end);
+		}
+		if (k == num)
+			k--;
+		rsvd_region[j].end = end;
+		collapsed = k - j;
+		num -= collapsed;
+		for (k = j + 1; k < num; k++) {
+			rsvd_region[k] = rsvd_region[k + collapsed];
+		}
+	}
+
+	num_rsvd_regions = num;
+	for (j = 0; j < num; j++) {
+		printk("rsvd_region[%d]: [0x%016lx, 0x%06lx)\n",
+		       j, rsvd_region[j].start, rsvd_region[j].end);
+	}
+}
+
+/*
+ * Request address space for all standard resources
+ */
+static int __init register_memory(void)
+{
+	code_resource.start = ia64_tpa(_text);
+	code_resource.end   = ia64_tpa(_etext) - 1;
+	data_resource.start = ia64_tpa(_etext);
+	data_resource.end   = ia64_tpa(_end) - 1;
+	efi_initialize_iomem_resources(&code_resource, &data_resource);
+
+	return 0;
+}
+
+__initcall(register_memory);
+
+/**
+ * reserve_memory - setup reserved memory areas
+ *
+ * Setup the reserved memory areas set aside for the boot parameters,
+ * initrd, etc.  There are currently %IA64_MAX_RSVD_REGIONS defined,
+ * see include/asm-ia64/meminit.h if you need to define more.
+ */
+void __init
+reserve_memory (void)
+{
+	int n = 0;
+
+	/*
+	 * none of the entries in this table overlap
+	 */
+	rsvd_region[n].start = (unsigned long) ia64_boot_param;
+	rsvd_region[n].end   = rsvd_region[n].start + sizeof(*ia64_boot_param);
+	n++;
+
+	rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap);
+	rsvd_region[n].end   = rsvd_region[n].start + ia64_boot_param->efi_memmap_size;
+	n++;
+
+	rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line);
+	rsvd_region[n].end   = (rsvd_region[n].start
+				+ strlen(__va(ia64_boot_param->command_line)) + 1);
+	n++;
+
+	rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START);
+	rsvd_region[n].end   = (unsigned long) ia64_imva(_end);
+	n++;
+
+#ifdef CONFIG_XEN
+	if (is_running_on_xen()) {
+		rsvd_region[n].start = (unsigned long)__va((HYPERVISOR_shared_info->arch.start_info_pfn << PAGE_SHIFT));
+		rsvd_region[n].end   = rsvd_region[n].start + PAGE_SIZE;
+		n++;
+ 	}
+#endif
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (ia64_boot_param->initrd_start) {
+		rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start);
+		rsvd_region[n].end   = rsvd_region[n].start + ia64_boot_param->initrd_size;
+		n++;
+	}
+#endif
+
+	efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
+	n++;
+
+	/* end of memory marker */
+	rsvd_region[n].start = ~0UL;
+	rsvd_region[n].end   = ~0UL;
+	n++;
+
+	num_rsvd_regions = n;
+	BUG_ON(IA64_MAX_RSVD_REGIONS + 1 < n);
+
+	sort_regions(rsvd_region, num_rsvd_regions);
+}
+
+/**
+ * find_initrd - get initrd parameters from the boot parameter structure
+ *
+ * Grab the initrd start and end from the boot parameter struct given us by
+ * the boot loader.
+ */
+void __init
+find_initrd (void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (ia64_boot_param->initrd_start) {
+		initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start);
+		initrd_end   = initrd_start+ia64_boot_param->initrd_size;
+
+		printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n",
+		       initrd_start, ia64_boot_param->initrd_size);
+	}
+#endif
+}
+
+static void __init
+io_port_init (void)
+{
+	unsigned long phys_iobase;
+
+	/*
+	 * Set `iobase' based on the EFI memory map or, failing that, the
+	 * value firmware left in ar.k0.
+	 *
+	 * Note that in ia32 mode, IN/OUT instructions use ar.k0 to compute
+	 * the port's virtual address, so ia32_load_state() loads it with a
+	 * user virtual address.  But in ia64 mode, glibc uses the
+	 * *physical* address in ar.k0 to mmap the appropriate area from
+	 * /dev/mem, and the inX()/outX() interfaces use MMIO.  In both
+	 * cases, user-mode can only use the legacy 0-64K I/O port space.
+	 *
+	 * ar.k0 is not involved in kernel I/O port accesses, which can use
+	 * any of the I/O port spaces and are done via MMIO using the
+	 * virtual mmio_base from the appropriate io_space[].
+	 */
+	phys_iobase = efi_get_iobase();
+	if (!phys_iobase) {
+		phys_iobase = ia64_get_kr(IA64_KR_IO_BASE);
+		printk(KERN_INFO "No I/O port range found in EFI memory map, "
+			"falling back to AR.KR0 (0x%lx)\n", phys_iobase);
+	}
+	ia64_iobase = (unsigned long) ioremap(phys_iobase, 0);
+	ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
+
+	/* setup legacy IO port space */
+	io_space[0].mmio_base = ia64_iobase;
+	io_space[0].sparse = 1;
+	num_io_spaces = 1;
+}
+
+/**
+ * early_console_setup - setup debugging console
+ *
+ * Consoles started here require little enough setup that we can start using
+ * them very early in the boot process, either right after the machine
+ * vector initialization, or even before if the drivers can detect their hw.
+ *
+ * Returns non-zero if a console couldn't be setup.
+ */
+static inline int __init
+early_console_setup (char *cmdline)
+{
+	int earlycons = 0;
+
+#ifdef CONFIG_XEN
+#ifndef CONFIG_IA64_HP_SIM
+	if (is_running_on_xen()) {
+		extern struct console hpsim_cons;
+		hpsim_cons.flags |= CON_BOOT;
+		register_console(&hpsim_cons);
+		earlycons++;
+	}
+#endif
+#endif
+#ifdef CONFIG_SERIAL_SGI_L1_CONSOLE
+	{
+		extern int sn_serial_console_early_setup(void);
+		if (!sn_serial_console_early_setup())
+			earlycons++;
+	}
+#endif
+#ifdef CONFIG_EFI_PCDP
+	if (!efi_setup_pcdp_console(cmdline))
+		earlycons++;
+#endif
+#ifdef CONFIG_SERIAL_8250_CONSOLE
+	if (!early_serial_console_init(cmdline))
+		earlycons++;
+#endif
+
+	return (earlycons) ? 0 : -1;
+}
+
+static inline void
+mark_bsp_online (void)
+{
+#ifdef CONFIG_SMP
+	/* If we register an early console, allow CPU 0 to printk */
+	cpu_set(smp_processor_id(), cpu_online_map);
+#endif
+}
+
+#ifdef CONFIG_SMP
+static void __init
+check_for_logical_procs (void)
+{
+	pal_logical_to_physical_t info;
+	s64 status;
+
+	status = ia64_pal_logical_to_phys(0, &info);
+	if (status == -1) {
+		printk(KERN_INFO "No logical to physical processor mapping "
+		       "available\n");
+		return;
+	}
+	if (status) {
+		printk(KERN_ERR "ia64_pal_logical_to_phys failed with %ld\n",
+		       status);
+		return;
+	}
+	/*
+	 * Total number of siblings that BSP has.  Though not all of them 
+	 * may have booted successfully. The correct number of siblings 
+	 * booted is in info.overview_num_log.
+	 */
+	smp_num_siblings = info.overview_tpc;
+	smp_num_cpucores = info.overview_cpp;
+}
+#endif
+
+static __initdata int nomca;
+static __init int setup_nomca(char *s)
+{
+	nomca = 1;
+	return 0;
+}
+early_param("nomca", setup_nomca);
+
+void __init
+setup_arch (char **cmdline_p)
+{
+	unw_init();
+
+#ifdef CONFIG_XEN
+	if (is_running_on_xen()) {
+		setup_xen_features();
+		/* Register a call for panic conditions. */
+		atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
+	}
+#endif
+
+	ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
+
+	*cmdline_p = __va(ia64_boot_param->command_line);
+	strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+
+	efi_init();
+	io_port_init();
+
+	parse_early_param();
+
+#ifdef CONFIG_IA64_GENERIC
+	machvec_init(NULL);
+#endif
+
+	if (early_console_setup(*cmdline_p) == 0)
+		mark_bsp_online();
+
+#ifdef CONFIG_ACPI
+	/* Initialize the ACPI boot-time table parser */
+	acpi_table_init();
+# ifdef CONFIG_ACPI_NUMA
+	acpi_numa_init();
+# endif
+#else
+# ifdef CONFIG_SMP
+	smp_build_cpu_map();	/* happens, e.g., with the Ski simulator */
+# endif
+#endif /* CONFIG_APCI_BOOT */
+
+	find_memory();
+
+	/* process SAL system table: */
+	ia64_sal_init(__va(efi.sal_systab));
+
+	ia64_setup_printk_clock();
+
+#ifdef CONFIG_SMP
+	cpu_physical_id(0) = hard_smp_processor_id();
+
+	cpu_set(0, cpu_sibling_map[0]);
+	cpu_set(0, cpu_core_map[0]);
+
+	check_for_logical_procs();
+	if (smp_num_cpucores > 1)
+		printk(KERN_INFO
+		       "cpu package is Multi-Core capable: number of cores=%d\n",
+		       smp_num_cpucores);
+	if (smp_num_siblings > 1)
+		printk(KERN_INFO
+		       "cpu package is Multi-Threading capable: number of siblings=%d\n",
+		       smp_num_siblings);
+#endif
+
+	cpu_init();	/* initialize the bootstrap CPU */
+	mmu_context_init();	/* initialize context_id bitmap */
+
+#ifdef CONFIG_ACPI
+	acpi_boot_init();
+#endif
+
+#ifdef CONFIG_VT
+	if (!conswitchp) {
+# if defined(CONFIG_DUMMY_CONSOLE)
+		conswitchp = &dummy_con;
+# endif
+# if defined(CONFIG_VGA_CONSOLE)
+		/*
+		 * Non-legacy systems may route legacy VGA MMIO range to system
+		 * memory.  vga_con probes the MMIO hole, so memory looks like
+		 * a VGA device to it.  The EFI memory map can tell us if it's
+		 * memory so we can avoid this problem.
+		 */
+		if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY)
+			conswitchp = &vga_con;
+# endif
+	}
+#ifdef CONFIG_XEN
+	if (is_running_on_xen()) {
+		shared_info_t *s = HYPERVISOR_shared_info;
+
+		xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT);
+
+		printk("Running on Xen! start_info_pfn=0x%lx nr_pages=%ld "
+		       "flags=0x%x\n", s->arch.start_info_pfn,
+		       xen_start_info->nr_pages, xen_start_info->flags);
+
+		if (!is_initial_xendomain()) {
+			extern int console_use_vt;
+			conswitchp = NULL;
+			console_use_vt = 0;
+		}
+	}
+#endif
+#endif
+
+	/* enable IA-64 Machine Check Abort Handling unless disabled */
+	if (!nomca)
+		ia64_mca_init();
+
+	platform_setup(cmdline_p);
+	paging_init();
+#ifdef CONFIG_XEN
+	contiguous_bitmap_init(max_pfn);
+#endif
+}
+
+/*
+ * Display cpu info for all cpu's.
+ */
+static int
+show_cpuinfo (struct seq_file *m, void *v)
+{
+#ifdef CONFIG_SMP
+#	define lpj	c->loops_per_jiffy
+#	define cpunum	c->cpu
+#else
+#	define lpj	loops_per_jiffy
+#	define cpunum	0
+#endif
+	static struct {
+		unsigned long mask;
+		const char *feature_name;
+	} feature_bits[] = {
+		{ 1UL << 0, "branchlong" },
+		{ 1UL << 1, "spontaneous deferral"},
+		{ 1UL << 2, "16-byte atomic ops" }
+	};
+	char family[32], features[128], *cp, sep;
+	struct cpuinfo_ia64 *c = v;
+	unsigned long mask;
+	unsigned long proc_freq;
+	int i;
+
+	mask = c->features;
+
+	switch (c->family) {
+	      case 0x07:	memcpy(family, "Itanium", 8); break;
+	      case 0x1f:	memcpy(family, "Itanium 2", 10); break;
+	      default:		sprintf(family, "%u", c->family); break;
+	}
+
+	/* build the feature string: */
+	memcpy(features, " standard", 10);
+	cp = features;
+	sep = 0;
+	for (i = 0; i < (int) ARRAY_SIZE(feature_bits); ++i) {
+		if (mask & feature_bits[i].mask) {
+			if (sep)
+				*cp++ = sep;
+			sep = ',';
+			*cp++ = ' ';
+			strcpy(cp, feature_bits[i].feature_name);
+			cp += strlen(feature_bits[i].feature_name);
+			mask &= ~feature_bits[i].mask;
+		}
+	}
+	if (mask) {
+		/* print unknown features as a hex value: */
+		if (sep)
+			*cp++ = sep;
+		sprintf(cp, " 0x%lx", mask);
+	}
+
+	proc_freq = cpufreq_quick_get(cpunum);
+	if (!proc_freq)
+		proc_freq = c->proc_freq / 1000;
+
+	seq_printf(m,
+		   "processor  : %d\n"
+		   "vendor     : %s\n"
+		   "arch       : IA-64\n"
+		   "family     : %s\n"
+		   "model      : %u\n"
+		   "revision   : %u\n"
+		   "archrev    : %u\n"
+		   "features   :%s\n"	/* don't change this---it _is_ right! */
+		   "cpu number : %lu\n"
+		   "cpu regs   : %u\n"
+		   "cpu MHz    : %lu.%06lu\n"
+		   "itc MHz    : %lu.%06lu\n"
+		   "BogoMIPS   : %lu.%02lu\n",
+		   cpunum, c->vendor, family, c->model, c->revision, c->archrev,
+		   features, c->ppn, c->number,
+		   proc_freq / 1000, proc_freq % 1000,
+		   c->itc_freq / 1000000, c->itc_freq % 1000000,
+		   lpj*HZ/500000, (lpj*HZ/5000) % 100);
+#ifdef CONFIG_SMP
+	seq_printf(m, "siblings   : %u\n", cpus_weight(cpu_core_map[cpunum]));
+	if (c->threads_per_core > 1 || c->cores_per_socket > 1)
+		seq_printf(m,
+		   	   "physical id: %u\n"
+		   	   "core id    : %u\n"
+		   	   "thread id  : %u\n",
+		   	   c->socket_id, c->core_id, c->thread_id);
+#endif
+	seq_printf(m,"\n");
+
+	return 0;
+}
+
+static void *
+c_start (struct seq_file *m, loff_t *pos)
+{
+#ifdef CONFIG_SMP
+	while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
+		++*pos;
+#endif
+	return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
+}
+
+static void *
+c_next (struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_start(m, pos);
+}
+
+static void
+c_stop (struct seq_file *m, void *v)
+{
+}
+
+struct seq_operations cpuinfo_op = {
+	.start =	c_start,
+	.next =		c_next,
+	.stop =		c_stop,
+	.show =		show_cpuinfo
+};
+
+static void __cpuinit
+identify_cpu (struct cpuinfo_ia64 *c)
+{
+	union {
+		unsigned long bits[5];
+		struct {
+			/* id 0 & 1: */
+			char vendor[16];
+
+			/* id 2 */
+			u64 ppn;		/* processor serial number */
+
+			/* id 3: */
+			unsigned number		:  8;
+			unsigned revision	:  8;
+			unsigned model		:  8;
+			unsigned family		:  8;
+			unsigned archrev	:  8;
+			unsigned reserved	: 24;
+
+			/* id 4: */
+			u64 features;
+		} field;
+	} cpuid;
+	pal_vm_info_1_u_t vm1;
+	pal_vm_info_2_u_t vm2;
+	pal_status_t status;
+	unsigned long impl_va_msb = 50, phys_addr_size = 44;	/* Itanium defaults */
+	int i;
+
+	for (i = 0; i < 5; ++i)
+		cpuid.bits[i] = ia64_get_cpuid(i);
+
+	memcpy(c->vendor, cpuid.field.vendor, 16);
+#ifdef CONFIG_SMP
+	c->cpu = smp_processor_id();
+
+	/* below default values will be overwritten  by identify_siblings() 
+	 * for Multi-Threading/Multi-Core capable cpu's
+	 */
+	c->threads_per_core = c->cores_per_socket = c->num_log = 1;
+	c->socket_id = -1;
+
+	identify_siblings(c);
+#endif
+	c->ppn = cpuid.field.ppn;
+	c->number = cpuid.field.number;
+	c->revision = cpuid.field.revision;
+	c->model = cpuid.field.model;
+	c->family = cpuid.field.family;
+	c->archrev = cpuid.field.archrev;
+	c->features = cpuid.field.features;
+
+	status = ia64_pal_vm_summary(&vm1, &vm2);
+	if (status == PAL_STATUS_SUCCESS) {
+		impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb;
+		phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size;
+	}
+	c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1));
+	c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
+}
+
+void
+setup_per_cpu_areas (void)
+{
+	/* start_kernel() requires this... */
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+	prefill_possible_map();
+#endif
+}
+
+/*
+ * Calculate the max. cache line size.
+ *
+ * In addition, the minimum of the i-cache stride sizes is calculated for
+ * "flush_icache_range()".
+ */
+static void __cpuinit
+get_max_cacheline_size (void)
+{
+	unsigned long line_size, max = 1;
+	unsigned int cache_size = 0;
+	u64 l, levels, unique_caches;
+        pal_cache_config_info_t cci;
+        s64 status;
+
+        status = ia64_pal_cache_summary(&levels, &unique_caches);
+        if (status != 0) {
+                printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
+                       __FUNCTION__, status);
+                max = SMP_CACHE_BYTES;
+		/* Safest setup for "flush_icache_range()" */
+		ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT;
+		goto out;
+        }
+
+	for (l = 0; l < levels; ++l) {
+		status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2,
+						    &cci);
+		if (status != 0) {
+			printk(KERN_ERR
+			       "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n",
+			       __FUNCTION__, l, status);
+			max = SMP_CACHE_BYTES;
+			/* The safest setup for "flush_icache_range()" */
+			cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
+			cci.pcci_unified = 1;
+		}
+		line_size = 1 << cci.pcci_line_size;
+		if (line_size > max)
+			max = line_size;
+		if (cache_size < cci.pcci_cache_size)
+			cache_size = cci.pcci_cache_size;
+		if (!cci.pcci_unified) {
+			status = ia64_pal_cache_config_info(l,
+						    /* cache_type (instruction)= */ 1,
+						    &cci);
+			if (status != 0) {
+				printk(KERN_ERR
+				"%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n",
+					__FUNCTION__, l, status);
+				/* The safest setup for "flush_icache_range()" */
+				cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
+			}
+		}
+		if (cci.pcci_stride < ia64_i_cache_stride_shift)
+			ia64_i_cache_stride_shift = cci.pcci_stride;
+	}
+  out:
+#ifdef CONFIG_SMP
+	max_cache_size = max(max_cache_size, cache_size);
+#endif
+	if (max > ia64_max_cacheline_size)
+		ia64_max_cacheline_size = max;
+}
+
+/*
+ * cpu_init() initializes state that is per-CPU.  This function acts
+ * as a 'CPU state barrier', nothing should get across.
+ */
+void __cpuinit
+cpu_init (void)
+{
+	extern void __cpuinit ia64_mmu_init (void *);
+	unsigned long num_phys_stacked;
+	pal_vm_info_2_u_t vmi;
+	unsigned int max_ctx;
+	struct cpuinfo_ia64 *cpu_info;
+	void *cpu_data;
+
+	cpu_data = per_cpu_init();
+
+	/*
+	 * We set ar.k3 so that assembly code in MCA handler can compute
+	 * physical addresses of per cpu variables with a simple:
+	 *   phys = ar.k3 + &per_cpu_var
+	 */
+	ia64_set_kr(IA64_KR_PER_CPU_DATA,
+		    ia64_tpa(cpu_data) - (long) __per_cpu_start);
+
+	get_max_cacheline_size();
+
+	/*
+	 * We can't pass "local_cpu_data" to identify_cpu() because we haven't called
+	 * ia64_mmu_init() yet.  And we can't call ia64_mmu_init() first because it
+	 * depends on the data returned by identify_cpu().  We break the dependency by
+	 * accessing cpu_data() through the canonical per-CPU address.
+	 */
+	cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
+	identify_cpu(cpu_info);
+
+#ifdef CONFIG_MCKINLEY
+	{
+#		define FEATURE_SET 16
+		struct ia64_pal_retval iprv;
+
+		if (cpu_info->family == 0x1f) {
+			PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0);
+			if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80))
+				PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES,
+				              (iprv.v1 | 0x80), FEATURE_SET, 0);
+		}
+	}
+#endif
+
+	/* Clear the stack memory reserved for pt_regs: */
+	memset(task_pt_regs(current), 0, sizeof(struct pt_regs));
+
+	ia64_set_kr(IA64_KR_FPU_OWNER, 0);
+
+	/*
+	 * Initialize the page-table base register to a global
+	 * directory with all zeroes.  This ensure that we can handle
+	 * TLB-misses to user address-space even before we created the
+	 * first user address-space.  This may happen, e.g., due to
+	 * aggressive use of lfetch.fault.
+	 */
+	ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page)));
+
+	/*
+	 * Initialize default control register to defer speculative faults except
+	 * for those arising from TLB misses, which are not deferred.  The
+	 * kernel MUST NOT depend on a particular setting of these bits (in other words,
+	 * the kernel must have recovery code for all speculative accesses).  Turn on
+	 * dcr.lc as per recommendation by the architecture team.  Most IA-32 apps
+	 * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll
+	 * be fine).
+	 */
+	ia64_setreg(_IA64_REG_CR_DCR,  (  IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
+					| IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
+	atomic_inc(&init_mm.mm_count);
+	current->active_mm = &init_mm;
+	if (current->mm)
+		BUG();
+
+	ia64_mmu_init(ia64_imva(cpu_data));
+	ia64_mca_cpu_init(ia64_imva(cpu_data));
+
+#ifdef CONFIG_IA32_SUPPORT
+	ia32_cpu_init();
+#endif
+
+	/* Clear ITC to eliminiate sched_clock() overflows in human time.  */
+	ia64_set_itc(0);
+
+	/* disable all local interrupt sources: */
+	ia64_set_itv(1 << 16);
+	ia64_set_lrr0(1 << 16);
+	ia64_set_lrr1(1 << 16);
+	ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
+	ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
+
+	/* clear TPR & XTP to enable all interrupt classes: */
+	ia64_setreg(_IA64_REG_CR_TPR, 0);
+#ifdef CONFIG_SMP
+	normal_xtp();
+#endif
+
+	/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
+	if (ia64_pal_vm_summary(NULL, &vmi) == 0)
+		max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
+	else {
+		printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
+		max_ctx = (1U << 15) - 1;	/* use architected minimum */
+	}
+	while (max_ctx < ia64_ctx.max_ctx) {
+		unsigned int old = ia64_ctx.max_ctx;
+		if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old)
+			break;
+	}
+
+	if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) {
+		printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical "
+		       "stacked regs\n");
+		num_phys_stacked = 96;
+	}
+	/* size of physical stacked register partition plus 8 bytes: */
+	__get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
+	platform_cpu_init();
+
+#ifdef CONFIG_XEN
+	/* Need to be moved into platform_cpu_init later */
+	if (is_running_on_xen()) {
+		extern void xen_smp_intr_init(void);
+		xen_smp_intr_init();
+	}
+#endif
+
+	pm_idle = default_idle;
+}
+
+/*
+ * On SMP systems, when the scheduler does migration-cost autodetection,
+ * it needs a way to flush as much of the CPU's caches as possible.
+ */
+void sched_cacheflush(void)
+{
+	ia64_sal_cache_flush(3);
+}
+
+void __init
+check_bugs (void)
+{
+	ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
+			       (unsigned long) __end___mckinley_e9_bundles);
+}
+
+static int __init run_dmi_scan(void)
+{
+	dmi_scan_machine();
+	return 0;
+}
+core_initcall(run_dmi_scan);
diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/util.c.orig
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/ia64/xen/util.c.orig	Wed Oct 11 20:19:20 2006 -0400
@@ -0,0 +1,115 @@
+/******************************************************************************
+ * arch/ia64/xen/util.c
+ * This file is the ia64 counterpart of drivers/xen/util.c
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/uaccess.h>
+#include <xen/driver_util.h>
+
+struct vm_struct *alloc_vm_area(unsigned long size)
+{
+	int order;
+	unsigned long virt;
+	unsigned long nr_pages;
+	struct vm_struct* area;
+	
+	order = get_order(size);
+	virt = __get_free_pages(GFP_KERNEL, order);
+	if (virt == 0) {
+		goto err0;
+	}
+	nr_pages = 1 << order;
+	scrub_pages(virt, nr_pages);
+	
+	area = kmalloc(sizeof(*area), GFP_KERNEL);
+	if (area == NULL) {
+		goto err1;
+	}
+	
+        area->flags = VM_IOREMAP;//XXX
+        area->addr = (void*)virt;
+        area->size = size;
+        area->pages = NULL; //XXX
+        area->nr_pages = nr_pages;
+        area->phys_addr = 0; 	/* xenbus_map_ring_valloc uses this field!  */
+
+	return area;
+
+err1:
+	free_pages(virt, order);
+err0:
+	return NULL;
+	
+}
+EXPORT_SYMBOL_GPL(alloc_vm_area);
+
+void free_vm_area(struct vm_struct *area)
+{
+	unsigned int order = get_order(area->size);
+	unsigned long i;
+	unsigned long phys_addr = __pa(area->addr);
+
+	// This area is used for foreign page mappping.
+	// So underlying machine page may not be assigned.
+	for (i = 0; i < (1 << order); i++) {
+		unsigned long ret;
+		unsigned long gpfn = (phys_addr >> PAGE_SHIFT) + i;
+		struct xen_memory_reservation reservation = {
+			.nr_extents   = 1,
+			.address_bits = 0,
+			.extent_order = 0,
+			.domid        = DOMID_SELF
+		};
+		set_xen_guest_handle(reservation.extent_start, &gpfn);
+		ret = HYPERVISOR_memory_op(XENMEM_populate_physmap,
+					   &reservation);
+		BUG_ON(ret != 1);
+	}
+	free_pages((unsigned long)area->addr, order);
+	kfree(area);
+}
+EXPORT_SYMBOL_GPL(free_vm_area);
+
+void lock_vm_area(struct vm_struct *area)
+{
+	// nothing
+}
+EXPORT_SYMBOL_GPL(lock_vm_area);
+
+void unlock_vm_area(struct vm_struct *area)
+{
+	// nothing
+}
+EXPORT_SYMBOL_GPL(unlock_vm_area);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/xcom_hcall.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/ia64/xen/xcom_hcall.c	Wed Oct 11 20:19:20 2006 -0400
@@ -0,0 +1,469 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ *          Tristan Gingold <tristan.gingold@xxxxxxxx>
+ */
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/dom0_ops.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/xencomm.h>
+#include <xen/interface/version.h>
+#include <xen/interface/sched.h>
+#include <xen/interface/event_channel.h>
+#include <xen/interface/physdev.h>
+#include <xen/interface/grant_table.h>
+#include <xen/interface/callback.h>
+#include <xen/interface/acm_ops.h>
+#include <xen/interface/hvm/params.h>
+#include <xen/public/privcmd.h>
+#include <asm/hypercall.h>
+#include <asm/page.h>
+#include <asm/uaccess.h>
+#include <asm/xen/xencomm.h>
+
+/* Xencomm notes:
+ * This file defines hypercalls to be used by xencomm.  The hypercalls simply
+ * create inlines descriptors for pointers and then call the raw arch hypercall
+ * xencomm_arch_hypercall_XXX
+ *
+ * If the arch wants to directly use these hypercalls, simply define macros
+ * in asm/hypercall.h, eg:
+ *  #define HYPERVISOR_sched_op xencomm_hypercall_sched_op
+ * 
+ * The arch may also define HYPERVISOR_xxx as a function and do more operations
+ * before/after doing the hypercall.
+ *
+ * Note: because only inline descriptors are created these functions must only
+ * be called with in kernel memory parameters.
+ */
+
+int
+xencomm_hypercall_console_io(int cmd, int count, char *str)
+{
+	return xencomm_arch_hypercall_console_io
+		(cmd, count, xencomm_create_inline(str));
+}
+
+int
+xencomm_hypercall_event_channel_op(int cmd, void *op)
+{
+	return xencomm_arch_hypercall_event_channel_op
+		(cmd, xencomm_create_inline(op));
+}
+
+int
+xencomm_hypercall_xen_version(int cmd, void *arg)
+{
+	switch (cmd) {
+	case XENVER_version:
+	case XENVER_extraversion:
+	case XENVER_compile_info:
+	case XENVER_capabilities:
+	case XENVER_changeset:
+	case XENVER_platform_parameters:
+	case XENVER_pagesize:
+	case XENVER_get_features:
+		break;
+	default:
+		printk("%s: unknown version cmd %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	return xencomm_arch_hypercall_xen_version
+		(cmd, xencomm_create_inline(arg));
+}
+
+int
+xencomm_hypercall_physdev_op(int cmd, void *op)
+{
+	return xencomm_arch_hypercall_physdev_op
+		(cmd, xencomm_create_inline(op));
+}
+
+static void *
+xencommize_grant_table_op(unsigned int cmd, void *op, unsigned int count)
+{
+	switch (cmd) {
+	case GNTTABOP_map_grant_ref:
+	case GNTTABOP_unmap_grant_ref:
+		break;
+	case GNTTABOP_setup_table:
+	{
+		struct gnttab_setup_table *setup = op;
+		struct xencomm_handle *frame_list;
+
+		frame_list = xencomm_create_inline
+			(xen_guest_handle(setup->frame_list));
+
+		set_xen_guest_handle(setup->frame_list, (void *)frame_list);
+		break;
+	}
+	case GNTTABOP_dump_table:
+	case GNTTABOP_transfer:
+	case GNTTABOP_copy:
+		break;
+	default:
+		printk("%s: unknown grant table op %d\n", __func__, cmd);
+		BUG();
+	}
+
+	return  xencomm_create_inline(op);
+}
+
+int
+xencomm_hypercall_grant_table_op(unsigned int cmd, void *op, unsigned int count)
+{
+	void *desc = xencommize_grant_table_op (cmd, op, count);
+
+	return xencomm_arch_hypercall_grant_table_op(cmd, desc, count);
+}
+
+int
+xencomm_hypercall_sched_op(int cmd, void *arg)
+{
+	switch (cmd) {
+	case SCHEDOP_yield:
+	case SCHEDOP_block:
+	case SCHEDOP_shutdown:
+	case SCHEDOP_poll:
+	case SCHEDOP_remote_shutdown:
+		break;
+	default:
+		printk("%s: unknown sched op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+	
+	return xencomm_arch_hypercall_sched_op(cmd, xencomm_create_inline(arg));
+}
+
+int
+xencomm_hypercall_multicall(void *call_list, int nr_calls)
+{
+	int i;
+	multicall_entry_t *mce;
+
+	for (i = 0; i < nr_calls; i++) {
+		mce = (multicall_entry_t *)call_list + i;
+
+		switch (mce->op) {
+		case __HYPERVISOR_update_va_mapping:
+		case __HYPERVISOR_mmu_update:
+			/* No-op on ia64.  */
+			break;
+		case __HYPERVISOR_grant_table_op:
+			mce->args[1] = (unsigned long)xencommize_grant_table_op
+				(mce->args[0], (void *)mce->args[1],
+				 mce->args[2]);
+			break;
+		case __HYPERVISOR_memory_op:
+		default:
+			printk("%s: unhandled multicall op entry op %lu\n",
+			       __func__, mce->op);
+			return -ENOSYS;
+		}
+	}
+
+	return xencomm_arch_hypercall_multicall
+		(xencomm_create_inline(call_list), nr_calls);
+}
+
+int
+xencomm_hypercall_callback_op(int cmd, void *arg)
+{
+	switch (cmd)
+	{
+	case CALLBACKOP_register:
+	case CALLBACKOP_unregister:
+		break;
+	default:
+		printk("%s: unknown callback op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	return xencomm_arch_hypercall_callback_op
+		(cmd, xencomm_create_inline(arg));
+}
+
+static void
+xencommize_memory_reservation (xen_memory_reservation_t *mop)
+{
+	struct xencomm_handle *desc;
+
+	desc = xencomm_create_inline(xen_guest_handle(mop->extent_start));
+	set_xen_guest_handle(mop->extent_start, (void *)desc);
+}
+
+int
+xencomm_hypercall_memory_op(unsigned int cmd, void *arg)
+{
+	switch (cmd) {
+	case XENMEM_increase_reservation:
+	case XENMEM_decrease_reservation:
+	case XENMEM_populate_physmap:
+		xencommize_memory_reservation((xen_memory_reservation_t *)arg);
+		break;
+		
+	case XENMEM_maximum_ram_page:
+		break;
+
+	case XENMEM_exchange:
+		xencommize_memory_reservation
+			(&((xen_memory_exchange_t *)arg)->in);
+		xencommize_memory_reservation
+			(&((xen_memory_exchange_t *)arg)->out);
+		break;
+
+	default:
+		printk("%s: unknown memory op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	return xencomm_arch_hypercall_memory_op
+		(cmd, xencomm_create_inline(arg));
+}
+
+unsigned long
+xencomm_hypercall_hvm_op(int cmd, void *arg)
+{
+	switch (cmd) {
+	case HVMOP_set_param:
+	case HVMOP_get_param:
+		break;
+	default:
+		printk("%s: unknown hvm op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	return xencomm_arch_hypercall_hvm_op(cmd, xencomm_create_inline(arg));
+}
+
+int
+xencomm_hypercall_suspend(unsigned long srec)
+{
+	struct sched_shutdown arg;
+
+	arg.reason = SHUTDOWN_suspend;
+
+	return xencomm_arch_hypercall_suspend(xencomm_create_inline(&arg));
+}
+
+int
+xencomm_mini_hypercall_event_channel_op(int cmd, void *op)
+{
+	struct xencomm_mini xc_area[2];
+	int nbr_area = 2;
+	struct xencomm_handle *desc;
+	int rc;
+
+	rc = xencomm_create_mini(xc_area, &nbr_area,
+	                         op, sizeof(evtchn_op_t), &desc);
+	if (rc)
+		return rc;
+
+	return xencomm_arch_hypercall_event_channel_op(cmd, desc);
+}
+EXPORT_SYMBOL(xencomm_mini_hypercall_event_channel_op);
+
+static int
+xencommize_mini_grant_table_op(struct xencomm_mini *xc_area, int *nbr_area,
+                               unsigned int cmd, void *op, unsigned int count,
+                               struct xencomm_handle **desc)
+{
+	struct xencomm_handle *desc1;
+	unsigned int argsize;
+	int rc;
+
+	switch (cmd) {
+	case GNTTABOP_map_grant_ref:
+		argsize = sizeof(struct gnttab_map_grant_ref);
+		break;
+	case GNTTABOP_unmap_grant_ref:
+		argsize = sizeof(struct gnttab_unmap_grant_ref);
+		break;
+	case GNTTABOP_setup_table:
+	{
+		struct gnttab_setup_table *setup = op;
+
+		argsize = sizeof(*setup);
+
+		if (count != 1)
+			return -EINVAL;
+		rc = xencomm_create_mini
+			(xc_area, nbr_area,
+			 xen_guest_handle(setup->frame_list),
+			 setup->nr_frames 
+			 * sizeof(*xen_guest_handle(setup->frame_list)),
+			 &desc1);
+		if (rc)
+			return rc;
+		set_xen_guest_handle(setup->frame_list, (void *)desc1);
+		break;
+	}
+	case GNTTABOP_dump_table:
+		argsize = sizeof(struct gnttab_dump_table);
+		break;
+	case GNTTABOP_transfer:
+		argsize = sizeof(struct gnttab_transfer);
+		break;
+	default:
+		printk("%s: unknown mini grant table op %d\n", __func__, cmd);
+		BUG();
+	}
+
+	rc = xencomm_create_mini(xc_area, nbr_area, op, count * argsize, desc);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
+int
+xencomm_mini_hypercall_grant_table_op(unsigned int cmd, void *op,
+                                      unsigned int count)
+{
+	int rc;
+	struct xencomm_handle *desc;
+	int nbr_area = 2;
+	struct xencomm_mini xc_area[2];
+
+	rc = xencommize_mini_grant_table_op(xc_area, &nbr_area,
+	                                    cmd, op, count, &desc);
+	if (rc)
+		return rc;
+
+	return xencomm_arch_hypercall_grant_table_op(cmd, desc, count);
+}
+EXPORT_SYMBOL(xencomm_mini_hypercall_grant_table_op);
+
+int
+xencomm_mini_hypercall_multicall(void *call_list, int nr_calls)
+{
+	int i;
+	multicall_entry_t *mce;
+	int nbr_area = 2 + nr_calls * 3;
+	struct xencomm_mini xc_area[nbr_area];
+	struct xencomm_handle *desc;
+	int rc;
+
+	for (i = 0; i < nr_calls; i++) {
+		mce = (multicall_entry_t *)call_list + i;
+
+		switch (mce->op) {
+		case __HYPERVISOR_update_va_mapping:
+		case __HYPERVISOR_mmu_update:
+			/* No-op on ia64.  */
+			break;
+		case __HYPERVISOR_grant_table_op:
+			rc = xencommize_mini_grant_table_op
+				(xc_area, &nbr_area,
+				 mce->args[0], (void *)mce->args[1],
+				 mce->args[2], &desc);
+			if (rc)
+				return rc;
+			mce->args[1] = (unsigned long)desc;
+			break;
+		case __HYPERVISOR_memory_op:
+		default:
+			printk("%s: unhandled multicall op entry op %lu\n",
+			       __func__, mce->op);
+			return -ENOSYS;
+		}
+	}
+
+	rc = xencomm_create_mini(xc_area, &nbr_area, call_list,
+	                         nr_calls * sizeof(multicall_entry_t), &desc);
+	if (rc)
+		return rc;
+
+	return xencomm_arch_hypercall_multicall(desc, nr_calls);
+}
+EXPORT_SYMBOL(xencomm_mini_hypercall_multicall);
+
+static int
+xencommize_mini_memory_reservation(struct xencomm_mini *area, int *nbr_area,
+                                   xen_memory_reservation_t *mop)
+{
+	struct xencomm_handle *desc;
+	int rc;
+
+	rc = xencomm_create_mini
+		(area, nbr_area,
+		 xen_guest_handle(mop->extent_start),
+		 mop->nr_extents 
+		 * sizeof(*xen_guest_handle(mop->extent_start)),
+		 &desc);
+	if (rc)
+		return rc;
+
+	set_xen_guest_handle(mop->extent_start, (void *)desc);
+
+	return 0;
+}
+
+int
+xencomm_mini_hypercall_memory_op(unsigned int cmd, void *arg)
+{
+	int nbr_area = 4;
+	struct xencomm_mini xc_area[4];
+	struct xencomm_handle *desc;
+	int rc;
+	unsigned int argsize;
+
+	switch (cmd) {
+	case XENMEM_increase_reservation:
+	case XENMEM_decrease_reservation:
+	case XENMEM_populate_physmap:
+		argsize = sizeof(xen_memory_reservation_t);
+		rc = xencommize_mini_memory_reservation
+			(xc_area, &nbr_area, (xen_memory_reservation_t *)arg);
+		if (rc)
+			return rc;
+		break;
+		
+	case XENMEM_maximum_ram_page:
+		argsize = 0;
+		break;
+
+	case XENMEM_exchange:
+		argsize = sizeof(xen_memory_exchange_t);
+		rc = xencommize_mini_memory_reservation
+			(xc_area, &nbr_area,
+			 &((xen_memory_exchange_t *)arg)->in);
+		if (rc)
+			return rc;
+		rc = xencommize_mini_memory_reservation
+			(xc_area, &nbr_area,
+			 &((xen_memory_exchange_t *)arg)->out);
+		if (rc)
+			return rc;
+		break;
+
+	default:
+		printk("%s: unknown mini memory op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	rc = xencomm_create_mini(xc_area, &nbr_area, arg, argsize, &desc);
+	if (rc)
+		return rc;
+
+	return xencomm_arch_hypercall_memory_op(cmd, desc);
+}
+EXPORT_SYMBOL(xencomm_mini_hypercall_memory_op);
diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/xcom_privcmd.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/ia64/xen/xcom_privcmd.c	Wed Oct 11 20:19:20 2006 -0400
@@ -0,0 +1,600 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Tristan Gingold <tristan.gingold@xxxxxxxx>
+ */
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/dom0_ops.h>
+#define __XEN__
+#include <xen/interface/domctl.h>
+#include <xen/interface/sysctl.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/version.h>
+#include <xen/interface/event_channel.h>
+#include <xen/interface/acm_ops.h>
+#include <xen/interface/hvm/params.h>
+#include <xen/public/privcmd.h>
+#include <asm/hypercall.h>
+#include <asm/page.h>
+#include <asm/uaccess.h>
+#include <asm/xen/xencomm.h>
+
+#define ROUND_DIV(v,s) (((v) + (s) - 1) / (s))
+
+static int
+xencomm_privcmd_dom0_op(privcmd_hypercall_t *hypercall)
+{
+	dom0_op_t kern_op;
+	dom0_op_t __user *user_op = (dom0_op_t __user *)hypercall->arg[0];
+	struct xencomm_handle *op_desc;
+	struct xencomm_handle *desc = NULL;
+	int ret = 0;
+
+	if (copy_from_user(&kern_op, user_op, sizeof(dom0_op_t)))
+		return -EFAULT;
+
+	if (kern_op.interface_version != DOM0_INTERFACE_VERSION)
+		return -EACCES;
+
+	op_desc = xencomm_create_inline(&kern_op);
+
+	switch (kern_op.cmd) {
+	default:
+		printk("%s: unknown dom0 cmd %d\n", __func__, kern_op.cmd);
+		return -ENOSYS;
+	}
+
+	if (ret) {
+		/* error mapping the nested pointer */
+		return ret;
+	}
+
+	ret = xencomm_arch_hypercall_dom0_op(op_desc);
+
+	/* FIXME: should we restore the handle?  */
+	if (copy_to_user(user_op, &kern_op, sizeof(dom0_op_t)))
+		ret = -EFAULT;
+
+	if (desc)
+		xencomm_free(desc);
+	return ret;
+}
+
+static int
+xencomm_privcmd_sysctl(privcmd_hypercall_t *hypercall)
+{
+	xen_sysctl_t kern_op;
+	xen_sysctl_t __user *user_op;
+	struct xencomm_handle *op_desc;
+	struct xencomm_handle *desc = NULL;
+	struct xencomm_handle *desc1 = NULL;
+	int ret = 0;
+
+	user_op = (xen_sysctl_t __user *)hypercall->arg[0];
+
+	if (copy_from_user(&kern_op, user_op, sizeof(xen_sysctl_t)))
+		return -EFAULT;
+
+	if (kern_op.interface_version != XEN_SYSCTL_INTERFACE_VERSION)
+		return -EACCES;
+
+	op_desc = xencomm_create_inline(&kern_op);
+
+	switch (kern_op.cmd) {
+	case XEN_SYSCTL_readconsole:
+		ret = xencomm_create(
+			xen_guest_handle(kern_op.u.readconsole.buffer),
+			kern_op.u.readconsole.count,
+			&desc, GFP_KERNEL);
+		set_xen_guest_handle(kern_op.u.readconsole.buffer,
+		                     (void *)desc);
+		break;
+	case XEN_SYSCTL_tbuf_op:
+	case XEN_SYSCTL_physinfo:
+	case XEN_SYSCTL_sched_id:
+		break;
+	case XEN_SYSCTL_perfc_op:
+		ret = xencomm_create(
+			xen_guest_handle(kern_op.u.perfc_op.desc),
+			kern_op.u.perfc_op.nr_counters *
+			sizeof(xen_sysctl_perfc_desc_t),
+			&desc, GFP_KERNEL);
+		if (ret)
+			return ret;
+		set_xen_guest_handle(kern_op.u.perfc_op.val,
+				     (void *)desc);
+		ret = xencomm_create(
+			xen_guest_handle(kern_op.u.perfc_op.val),
+			kern_op.u.perfc_op.nr_vals *
+			sizeof(xen_sysctl_perfc_desc_t),
+			&desc1, GFP_KERNEL);
+		if (ret)
+			xencomm_free(desc);
+		set_xen_guest_handle(kern_op.u.perfc_op.val,
+				     (void *)desc1);
+		break;
+	case XEN_SYSCTL_getdomaininfolist:
+		ret = xencomm_create(
+			xen_guest_handle(kern_op.u.getdomaininfolist.buffer),
+			kern_op.u.getdomaininfolist.max_domains *
+			sizeof(xen_domctl_getdomaininfo_t),
+			&desc, GFP_KERNEL);
+		set_xen_guest_handle(kern_op.u.getdomaininfolist.buffer,
+				     (void *)desc);
+		break;
+	default:
+		printk("%s: unknown sysctl cmd %d\n", __func__, kern_op.cmd);
+		return -ENOSYS;
+	}
+
+	if (ret) {
+		/* error mapping the nested pointer */
+		return ret;
+	}
+
+	ret = xencomm_arch_hypercall_sysctl(op_desc);
+
+	/* FIXME: should we restore the handle?  */
+	if (copy_to_user(user_op, &kern_op, sizeof(xen_sysctl_t)))
+		ret = -EFAULT;
+
+	if (desc)
+		xencomm_free(desc);
+	if (desc1)
+		xencomm_free(desc1);
+	return ret;
+}
+
+static int
+xencomm_privcmd_domctl(privcmd_hypercall_t *hypercall)
+{
+	xen_domctl_t kern_op;
+	xen_domctl_t __user *user_op;
+	struct xencomm_handle *op_desc;
+	struct xencomm_handle *desc = NULL;
+	int ret = 0;
+
+	user_op = (xen_domctl_t __user *)hypercall->arg[0];
+
+	if (copy_from_user(&kern_op, user_op, sizeof(xen_domctl_t)))
+		return -EFAULT;
+
+	if (kern_op.interface_version != XEN_DOMCTL_INTERFACE_VERSION)
+		return -EACCES;
+
+	op_desc = xencomm_create_inline(&kern_op);
+
+	switch (kern_op.cmd) {
+	case XEN_DOMCTL_createdomain:
+	case XEN_DOMCTL_destroydomain:
+	case XEN_DOMCTL_pausedomain:
+	case XEN_DOMCTL_unpausedomain:
+	case XEN_DOMCTL_getdomaininfo:
+		break;
+	case XEN_DOMCTL_getmemlist:
+	{
+		unsigned long nr_pages = kern_op.u.getmemlist.max_pfns;
+
+		ret = xencomm_create(
+			xen_guest_handle(kern_op.u.getmemlist.buffer),
+			nr_pages * sizeof(unsigned long),
+			&desc, GFP_KERNEL);
+		set_xen_guest_handle(kern_op.u.getmemlist.buffer,
+		                     (void *)desc);
+		break;
+	}
+	case XEN_DOMCTL_getpageframeinfo:
+		break;
+	case XEN_DOMCTL_getpageframeinfo2:
+		ret = xencomm_create(
+			xen_guest_handle(kern_op.u.getpageframeinfo2.array),
+			kern_op.u.getpageframeinfo2.num,
+			&desc, GFP_KERNEL);
+		set_xen_guest_handle(kern_op.u.getpageframeinfo2.array,
+		                     (void *)desc);
+		break;
+	case XEN_DOMCTL_shadow_op:
+		ret = xencomm_create(
+			xen_guest_handle(kern_op.u.shadow_op.dirty_bitmap),
+			ROUND_DIV(kern_op.u.shadow_op.pages, 8),
+			&desc, GFP_KERNEL);
+		set_xen_guest_handle(kern_op.u.shadow_op.dirty_bitmap,
+		                     (void *)desc);
+		break;
+	case XEN_DOMCTL_max_mem:
+		break;
+	case XEN_DOMCTL_setvcpucontext:
+	case XEN_DOMCTL_getvcpucontext:
+		ret = xencomm_create(
+			xen_guest_handle(kern_op.u.vcpucontext.ctxt),
+			sizeof(vcpu_guest_context_t),
+			&desc, GFP_KERNEL);
+		set_xen_guest_handle(kern_op.u.vcpucontext.ctxt, (void *)desc);
+		break;
+	case XEN_DOMCTL_getvcpuinfo:
+		break;
+	case XEN_DOMCTL_setvcpuaffinity:
+	case XEN_DOMCTL_getvcpuaffinity:
+		ret = xencomm_create(
+			xen_guest_handle(kern_op.u.vcpuaffinity.cpumap.bitmap),
+			ROUND_DIV(kern_op.u.vcpuaffinity.cpumap.nr_cpus, 8),
+			&desc, GFP_KERNEL);
+		set_xen_guest_handle(kern_op.u.vcpuaffinity.cpumap.bitmap,
+		                     (void *)desc);
+		break;
+	case XEN_DOMCTL_max_vcpus:
+	case XEN_DOMCTL_scheduler_op:
+	case XEN_DOMCTL_setdomainhandle:
+	case XEN_DOMCTL_setdebugging:
+	case XEN_DOMCTL_irq_permission:
+	case XEN_DOMCTL_iomem_permission:
+	case XEN_DOMCTL_ioport_permission:
+	case XEN_DOMCTL_hypercall_init:
+	case XEN_DOMCTL_arch_setup:
+	case XEN_DOMCTL_settimeoffset:
+		break;
+	default:
+		printk("%s: unknown domctl cmd %d\n", __func__, kern_op.cmd);
+		return -ENOSYS;
+	}
+
+	if (ret) {
+		/* error mapping the nested pointer */
+		return ret;
+	}
+
+	ret = xencomm_arch_hypercall_domctl (op_desc);
+
+	/* FIXME: should we restore the handle?  */
+	if (copy_to_user(user_op, &kern_op, sizeof(xen_domctl_t)))
+		ret = -EFAULT;
+
+	if (desc)
+		xencomm_free(desc);
+	return ret;
+}
+
+static int
+xencomm_privcmd_acm_op(privcmd_hypercall_t *hypercall)
+{
+	int cmd = hypercall->arg[0];
+	void __user *arg = (void __user *)hypercall->arg[1];
+	struct xencomm_handle *op_desc;
+	struct xencomm_handle *desc = NULL;
+	int ret;
+
+	switch (cmd) {
+	case ACMOP_getssid:
+	{
+		struct acm_getssid kern_arg;
+
+		if (copy_from_user(&kern_arg, arg, sizeof (kern_arg)))
+			return -EFAULT;
+
+		op_desc = xencomm_create_inline(&kern_arg);
+
+		ret = xencomm_create(xen_guest_handle(kern_arg.ssidbuf),
+		                     kern_arg.ssidbuf_size, &desc, GFP_KERNEL);
+		if (ret)
+			return ret;
+
+		set_xen_guest_handle(kern_arg.ssidbuf, (void *)desc);
+
+		ret = xencomm_arch_hypercall_acm_op(cmd, op_desc);
+
+		xencomm_free(desc);
+
+		if (copy_to_user(arg, &kern_arg, sizeof (kern_arg)))
+			return -EFAULT;
+
+		return ret;
+	}
+	default:
+		printk("%s: unknown acm_op cmd %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	return ret;
+}
+
+static int
+xencomm_privcmd_memory_op(privcmd_hypercall_t *hypercall)
+{
+	const unsigned long cmd = hypercall->arg[0];
+	int ret = 0;
+
+	switch (cmd) {
+	case XENMEM_increase_reservation:
+	case XENMEM_decrease_reservation:
+	case XENMEM_populate_physmap:
+	{
+		xen_memory_reservation_t kern_op;
+		xen_memory_reservation_t __user *user_op;
+		struct xencomm_handle *desc = NULL;
+		struct xencomm_handle *desc_op;
+
+		user_op = (xen_memory_reservation_t __user *)hypercall->arg[1];
+		if (copy_from_user(&kern_op, user_op,
+		                   sizeof(xen_memory_reservation_t)))
+			return -EFAULT;
+		desc_op = xencomm_create_inline(&kern_op);
+
+		if (xen_guest_handle(kern_op.extent_start)) {
+			void * addr;
+
+			addr = xen_guest_handle(kern_op.extent_start);
+			ret = xencomm_create
+				(addr,
+				 kern_op.nr_extents *
+				 sizeof(*xen_guest_handle
+					(kern_op.extent_start)),
+				 &desc, GFP_KERNEL);
+			if (ret)
+				return ret;
+			set_xen_guest_handle(kern_op.extent_start,
+			                     (void *)desc);
+		}
+
+		ret = xencomm_arch_hypercall_memory_op(cmd, desc_op);
+
+		if (desc)
+			xencomm_free(desc);
+
+		if (ret != 0)
+			return ret;
+
+		if (copy_to_user(user_op, &kern_op,
+		                 sizeof(xen_memory_reservation_t)))
+			return -EFAULT;
+
+		return ret;
+	}
+	case XENMEM_translate_gpfn_list:
+	{
+		xen_translate_gpfn_list_t kern_op;
+		xen_translate_gpfn_list_t __user *user_op;
+		struct xencomm_handle *desc_gpfn = NULL;
+		struct xencomm_handle *desc_mfn = NULL;
+		struct xencomm_handle *desc_op;
+		void *addr;
+
+		user_op = (xen_translate_gpfn_list_t __user *)
+			hypercall->arg[1];
+		if (copy_from_user(&kern_op, user_op,
+		                   sizeof(xen_translate_gpfn_list_t)))
+			return -EFAULT;
+		desc_op = xencomm_create_inline(&kern_op);
+
+		if (kern_op.nr_gpfns) {
+			/* gpfn_list.  */
+			addr = xen_guest_handle(kern_op.gpfn_list);
+
+			ret = xencomm_create(addr, kern_op.nr_gpfns *
+			                     sizeof(*xen_guest_handle
+			                            (kern_op.gpfn_list)),
+			                     &desc_gpfn, GFP_KERNEL);
+			if (ret)
+				return ret;
+			set_xen_guest_handle(kern_op.gpfn_list,
+			                     (void *)desc_gpfn);
+
+			/* mfn_list.  */
+			addr = xen_guest_handle(kern_op.mfn_list);
+
+			ret = xencomm_create(addr, kern_op.nr_gpfns *
+			                     sizeof(*xen_guest_handle
+			                            (kern_op.mfn_list)),
+			                     &desc_mfn, GFP_KERNEL);
+			if (ret)
+				return ret;
+			set_xen_guest_handle(kern_op.mfn_list,
+			                     (void *)desc_mfn);
+		}
+
+		ret = xencomm_arch_hypercall_memory_op(cmd, desc_op);
+
+		if (desc_gpfn)
+			xencomm_free(desc_gpfn);
+
+		if (desc_mfn)
+			xencomm_free(desc_mfn);
+
+		if (ret != 0)
+			return ret;
+
+		return ret;
+	}
+	default:
+		printk("%s: unknown memory op %lu\n", __func__, cmd);
+		ret = -ENOSYS;
+	}
+	return ret;
+}
+
+static int
+xencomm_privcmd_xen_version(privcmd_hypercall_t *hypercall)
+{
+	int cmd = hypercall->arg[0];
+	void __user *arg = (void __user *)hypercall->arg[1];
+	struct xencomm_handle *desc;
+	size_t argsize;
+	int rc;
+
+	switch (cmd) {
+	case XENVER_version:
+		/* do not actually pass an argument */
+		return xencomm_arch_hypercall_xen_version(cmd, 0);
+	case XENVER_extraversion:
+		argsize = sizeof(xen_extraversion_t);
+		break;
+	case XENVER_compile_info:
+		argsize = sizeof(xen_compile_info_t);
+		break;
+	case XENVER_capabilities:
+		argsize = sizeof(xen_capabilities_info_t);
+		break;
+	case XENVER_changeset:
+		argsize = sizeof(xen_changeset_info_t);
+		break;
+	case XENVER_platform_parameters:
+		argsize = sizeof(xen_platform_parameters_t);
+		break;
+	case XENVER_pagesize:
+		argsize = (arg == NULL) ? 0 : sizeof(void *);
+		break;
+	case XENVER_get_features:
+		argsize = (arg == NULL) ? 0 : sizeof(xen_feature_info_t);
+		break;
+
+	default:
+		printk("%s: unknown version op %d\n", __func__, cmd);
+		return -ENOSYS;
+	}
+
+	rc = xencomm_create(arg, argsize, &desc, GFP_KERNEL);
+	if (rc)
+		return rc;
+
+	rc = xencomm_arch_hypercall_xen_version(cmd, desc);
+
+	xencomm_free(desc);
+
+	return rc;
+}
+
+static int
+xencomm_privcmd_event_channel_op(privcmd_hypercall_t *hypercall)
+{
+	int cmd = hypercall->arg[0];
+	struct xencomm_handle *desc;
+	unsigned int argsize;
+	int ret;
+
+	switch (cmd) {
+	case EVTCHNOP_alloc_unbound:
+		argsize = sizeof(evtchn_alloc_unbound_t);
+		break;
+
+	case EVTCHNOP_status:
+		argsize = sizeof(evtchn_status_t);
+		break;
+
+	default:
+		printk("%s: unknown EVTCHNOP %d\n", __func__, cmd);
+		return -EINVAL;
+	}
+
+	ret = xencomm_create((void *)hypercall->arg[1], argsize,
+	                     &desc, GFP_KERNEL);
+	if (ret)
+		return ret;
+
+	ret = xencomm_arch_hypercall_event_channel_op(cmd, desc);
+
+	xencomm_free(desc);
+	return ret;
+}
+
+static int
+xencomm_privcmd_hvm_op(privcmd_hypercall_t *hypercall)
+{
+	int cmd = hypercall->arg[0];
+	struct xencomm_handle *desc;
+	unsigned int argsize;
+	int ret;
+
+	switch (cmd) {
+	case HVMOP_get_param:
+	case HVMOP_set_param:
+		argsize = sizeof(xen_hvm_param_t);
+		break;
+	default:
+		printk("%s: unknown HVMOP %d\n", __func__, cmd);
+		return -EINVAL;
+	}
+
+	ret = xencomm_create((void *)hypercall->arg[1], argsize,
+	                     &desc, GFP_KERNEL);
+	if (ret)
+		return ret;
+
+	ret = xencomm_arch_hypercall_hvm_op(cmd, desc);
+
+	xencomm_free(desc);
+	return ret;
+}
+
+static int
+xencomm_privcmd_sched_op(privcmd_hypercall_t *hypercall)
+{
+	int cmd = hypercall->arg[0];
+	struct xencomm_handle *desc;
+	unsigned int argsize;
+	int ret;
+
+	switch (cmd) {
+	case SCHEDOP_remote_shutdown:
+		argsize = sizeof(sched_remote_shutdown_t);
+		break;
+	default:
+		printk("%s: unknown SCHEDOP %d\n", __func__, cmd);
+		return -EINVAL;
+	}
+
+	ret = xencomm_create((void *)hypercall->arg[1], argsize,
+	                     &desc, GFP_KERNEL);
+	if (ret)
+		return ret;
+
+	ret = xencomm_arch_hypercall_sched_op(cmd, desc);
+
+	xencomm_free(desc);
+	return ret;
+}
+
+int
+privcmd_hypercall(privcmd_hypercall_t *hypercall)
+{
+	switch (hypercall->op) {
+	case __HYPERVISOR_dom0_op:
+		return xencomm_privcmd_dom0_op(hypercall);
+	case __HYPERVISOR_domctl:
+		return xencomm_privcmd_domctl(hypercall);
+	case __HYPERVISOR_sysctl:
+		return xencomm_privcmd_sysctl(hypercall);
+        case __HYPERVISOR_acm_op:
+		return xencomm_privcmd_acm_op(hypercall);
+	case __HYPERVISOR_xen_version:
+		return xencomm_privcmd_xen_version(hypercall);
+	case __HYPERVISOR_memory_op:
+		return xencomm_privcmd_memory_op(hypercall);
+	case __HYPERVISOR_event_channel_op:
+		return xencomm_privcmd_event_channel_op(hypercall);
+	case __HYPERVISOR_hvm_op:
+		return xencomm_privcmd_hvm_op(hypercall);
+	case __HYPERVISOR_sched_op:
+		return xencomm_privcmd_sched_op(hypercall);
+	default:
+		printk("%s: unknown hcall (%ld)\n", __func__, hypercall->op);
+		return -ENOSYS;
+	}
+}
+
diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/xencomm.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/ia64/xen/xencomm.c	Wed Oct 11 20:19:20 2006 -0400
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2006 Hollis Blanchard <hollisb@xxxxxxxxxx>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <asm/xen/xencomm.h>
+#include <xen/interface/xen.h>
+
+static int xencomm_debug = 0;
+
+/* Translate virtual address to physical address.  */
+unsigned long
+xencomm_vaddr_to_paddr(unsigned long vaddr)
+{
+	struct page *page;
+	struct vm_area_struct *vma;
+
+	if (vaddr == 0)
+		return 0;
+
+#ifdef __ia64__
+	if (REGION_NUMBER(vaddr) == 5) {
+		pgd_t *pgd;
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *ptep;
+
+		/* On ia64, TASK_SIZE refers to current.  It is not initialized
+		   during boot.
+		   Furthermore the kernel is relocatable and __pa() doesn't
+		   work on  addresses.  */
+		if (vaddr >= KERNEL_START
+		    && vaddr < (KERNEL_START + KERNEL_TR_PAGE_SIZE)) {
+			extern unsigned long kernel_start_pa;
+			
+			return vaddr - kernel_start_pa;
+		}
+
+		/* In kernel area -- virtually mapped.  */
+		pgd = pgd_offset_k(vaddr);
+		if (pgd_none(*pgd) || pgd_bad(*pgd))
+			return ~0UL;
+
+		pud = pud_offset(pgd, vaddr);
+		if (pud_none(*pud) || pud_bad(*pud))
+			return ~0UL;
+
+		pmd = pmd_offset(pud, vaddr);
+		if (pmd_none(*pmd) || pmd_bad(*pmd))
+			return ~0UL;
+
+		ptep = pte_offset_kernel(pmd, vaddr);
+		if (!ptep)
+			return ~0UL;
+
+		return (pte_val(*ptep) & _PFN_MASK) | (vaddr & ~PAGE_MASK);
+	}
+#endif
+
+	if (vaddr > TASK_SIZE) {
+		/* kernel address */
+		return __pa(vaddr);
+	}
+
+	/* XXX double-check (lack of) locking */
+	vma = find_extend_vma(current->mm, vaddr);
+	if (!vma)
+		return ~0UL;
+
+	/* We assume the page is modified.  */
+	page = follow_page(vma, vaddr, FOLL_WRITE | FOLL_TOUCH);
+	if (!page)
+		return ~0UL;
+
+	return (page_to_pfn(page) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
+}
+
+static int
+xencomm_init(struct xencomm_desc *desc, void *buffer, unsigned long bytes)
+{
+	unsigned long recorded = 0;
+	int i = 0;
+
+	BUG_ON((buffer == NULL) && (bytes > 0));
+
+	/* record the physical pages used */
+	if (buffer == NULL)
+		desc->nr_addrs = 0;
+
+	while ((recorded < bytes) && (i < desc->nr_addrs)) {
+		unsigned long vaddr = (unsigned long)buffer + recorded;
+		unsigned long paddr;
+		int offset;
+		int chunksz;
+
+		offset = vaddr % PAGE_SIZE; /* handle partial pages */
+		chunksz = min(PAGE_SIZE - offset, bytes - recorded);
+
+		paddr = xencomm_vaddr_to_paddr(vaddr);
+		if (paddr == ~0UL) {
+			printk("%s: couldn't translate vaddr %lx\n",
+			       __func__, vaddr);
+			return -EINVAL;
+		}
+
+		desc->address[i++] = paddr;
+		recorded += chunksz;
+	}
+
+	if (recorded < bytes) {
+		printk("%s: could only translate %ld of %ld bytes\n",
+		       __func__, recorded, bytes);
+		return -ENOSPC;
+	}
+
+	/* mark remaining addresses invalid (just for safety) */
+	while (i < desc->nr_addrs)
+		desc->address[i++] = XENCOMM_INVALID;
+
+	desc->magic = XENCOMM_MAGIC;
+
+	return 0;
+}
+
+static struct xencomm_desc *
+xencomm_alloc(gfp_t gfp_mask)
+{
+	struct xencomm_desc *desc;
+
+	desc = (struct xencomm_desc *)__get_free_page(gfp_mask);
+	if (desc == NULL)
+		panic("%s: page allocation failed\n", __func__);
+
+	desc->nr_addrs = (PAGE_SIZE - sizeof(struct xencomm_desc)) /
+	                 sizeof(*desc->address);
+
+	return desc;
+}
+
+void
+xencomm_free(struct xencomm_handle *desc)
+{
+	if (desc)
+		free_page((unsigned long)__va(desc));
+}
+
+int
+xencomm_create(void *buffer, unsigned long bytes,
+               struct xencomm_handle **ret, gfp_t gfp_mask)
+{
+	struct xencomm_desc *desc;
+	struct xencomm_handle *handle;
+	int rc;
+
+	if (xencomm_debug)
+		printk("%s: %p[%ld]\n", __func__, buffer, bytes);
+
+	if (buffer == NULL || bytes == 0) {
+		*ret = (struct xencomm_handle *)NULL;
+		return 0;
+	}
+
+	desc = xencomm_alloc(gfp_mask);
+	if (!desc) {
+		printk("%s failure\n", "xencomm_alloc");
+		return -ENOMEM;
+	}
+	handle = (struct xencomm_handle *)__pa(desc);
+
+	rc = xencomm_init(desc, buffer, bytes);
+	if (rc) {
+		printk("%s failure: %d\n", "xencomm_init", rc);
+		xencomm_free(handle);
+		return rc;
+	}
+
+	*ret = handle;
+	return 0;
+}
+
+/* "mini" routines, for stack-based communications: */
+
+static void *
+xencomm_alloc_mini(struct xencomm_mini *area, int *nbr_area)
+{
+	unsigned long base;
+	unsigned int pageoffset;
+
+	while (*nbr_area >= 0) {
+		/* Allocate an area.  */
+		(*nbr_area)--;
+
+		base = (unsigned long)(area + *nbr_area);
+		pageoffset = base % PAGE_SIZE;
+
+		/* If the area does not cross a page, use it.  */
+		if ((PAGE_SIZE - pageoffset) >= sizeof(struct xencomm_mini))
+			return &area[*nbr_area];
+	}
+	/* No more area.  */
+	return NULL;
+}
+
+int
+xencomm_create_mini(struct xencomm_mini *area, int *nbr_area,
+                    void *buffer, unsigned long bytes,
+                    struct xencomm_handle **ret)
+{
+	struct xencomm_desc *desc;
+	int rc;
+	unsigned long res;
+
+	desc = xencomm_alloc_mini(area, nbr_area);
+	if (!desc)
+		return -ENOMEM;
+	desc->nr_addrs = XENCOMM_MINI_ADDRS;
+
+	rc = xencomm_init(desc, buffer, bytes);
+	if (rc)
+		return rc;
+
+	res = xencomm_vaddr_to_paddr((unsigned long)desc);
+	if (res == ~0UL)
+		return -EINVAL;
+
+	*ret = (struct xencomm_handle*)res;
+	return 0;
+}
diff -r 47c098fdce14 -r 85a15e585061 arch/ia64/xen/xensetup.S.orig
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/ia64/xen/xensetup.S.orig	Wed Oct 11 20:19:20 2006 -0400
@@ -0,0 +1,54 @@
+/*
+ * Support routines for Xen
+ *
+ * Copyright (C) 2005 Dan Magenheimer <dan.magenheimer@xxxxxx>
+ */
+
+#include <asm/processor.h>
+#include <asm/asmmacro.h>
+
+#define isBP	p3	// are we the Bootstrap Processor?
+
+	.text
+GLOBAL_ENTRY(early_xen_setup)
+	mov r8=ar.rsc		// Initialized in head.S
+(isBP)	movl r9=running_on_xen;;
+	extr.u r8=r8,2,2;;	// Extract pl fields
+	cmp.eq p7,p0=r8,r0	// p7: !running on xen
+	mov r8=1		// booleanize.
+(p7)	br.ret.sptk.many rp;;
+(isBP)	st4 [r9]=r8
+	movl r10=xen_ivt;;
+	
+	mov cr.iva=r10
+
+#if XSI_BASE != 0xf100000000000000UL
+	/* Backward compatibility.  */
+(isBP)	mov r2=0x600
+(isBP)	movl r28=XSI_BASE;;
+(isBP)	break 0x1000;;
+#endif
+
+	br.ret.sptk.many rp
+	;;
+END(early_xen_setup)
+
+#include <xen/interface/xen.h>
+
+/* Stub for suspend.
+   Just force the stacked registers to be written in memory.  */	
+GLOBAL_ENTRY(HYPERVISOR_suspend)
+	alloc r20=ar.pfs,0,0,0,0
+	mov r14=2
+	mov r15=r12
+	;;
+	/* We don't want to deal with RSE.  */
+	flushrs
+	mov r2=__HYPERVISOR_sched_op
+	st4 [r12]=r14
+	;;
+	break 0x1000
+	;; 
+	mov ar.pfs=r20
+	br.ret.sptk.many b0
+END(HYPERVISOR_suspend)
diff -r 47c098fdce14 -r 85a15e585061 drivers/xen/privcmd/privcmd.c.orig
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/privcmd/privcmd.c.orig	Wed Oct 11 20:19:20 2006 -0400
@@ -0,0 +1,266 @@
+/******************************************************************************
+ * privcmd.c
+ * 
+ * Interface to privileged domain-0 commands.
+ * 
+ * Copyright (c) 2002-2004, K A Fraser, B Dragovic
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/swap.h>
+#include <linux/smp_lock.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+#include <linux/kthread.h>
+#include <asm/hypervisor.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+#include <asm/hypervisor.h>
+#include <xen/public/privcmd.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/dom0_ops.h>
+#include <xen/xen_proc.h>
+
+static struct proc_dir_entry *privcmd_intf;
+static struct proc_dir_entry *capabilities_intf;
+
+static int privcmd_ioctl(struct inode *inode, struct file *file,
+			 unsigned int cmd, unsigned long data)
+{
+	int ret = -ENOSYS;
+	void __user *udata = (void __user *) data;
+
+	switch (cmd) {
+	case IOCTL_PRIVCMD_HYPERCALL: {
+		privcmd_hypercall_t hypercall;
+  
+		if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
+			return -EFAULT;
+
+#if defined(__i386__)
+		__asm__ __volatile__ (
+			"pushl %%ebx; pushl %%ecx; pushl %%edx; "
+			"pushl %%esi; pushl %%edi; "
+			"movl  8(%%eax),%%ebx ;"
+			"movl 16(%%eax),%%ecx ;"
+			"movl 24(%%eax),%%edx ;"
+			"movl 32(%%eax),%%esi ;"
+			"movl 40(%%eax),%%edi ;"
+			"movl   (%%eax),%%eax ;"
+			"shll $5,%%eax ;"
+			"addl $hypercall_page,%%eax ;"
+			"call *%%eax ;"
+			"popl %%edi; popl %%esi; popl %%edx; "
+			"popl %%ecx; popl %%ebx"
+			: "=a" (ret) : "0" (&hypercall) : "memory" );
+#elif defined (__x86_64__)
+		{
+			long ign1, ign2, ign3;
+			__asm__ __volatile__ (
+				"movq %8,%%r10; movq %9,%%r8;"
+				"shlq $5,%%rax ;"
+				"addq $hypercall_page,%%rax ;"
+				"call *%%rax"
+				: "=a" (ret), "=D" (ign1),
+				  "=S" (ign2), "=d" (ign3)
+				: "0" ((unsigned long)hypercall.op), 
+				"1" ((unsigned long)hypercall.arg[0]), 
+				"2" ((unsigned long)hypercall.arg[1]),
+				"3" ((unsigned long)hypercall.arg[2]), 
+				"g" ((unsigned long)hypercall.arg[3]),
+				"g" ((unsigned long)hypercall.arg[4])
+				: "r8", "r10", "memory" );
+		}
+#elif defined (__ia64__)
+		__asm__ __volatile__ (
+			";; mov r14=%2; mov r15=%3; "
+			"mov r16=%4; mov r17=%5; mov r18=%6;"
+			"mov r2=%1; break 0x1000;; mov %0=r8 ;;"
+			: "=r" (ret)
+			: "r" (hypercall.op),
+			"r" (hypercall.arg[0]),
+			"r" (hypercall.arg[1]),
+			"r" (hypercall.arg[2]),
+			"r" (hypercall.arg[3]),
+			"r" (hypercall.arg[4])
+			: "r14","r15","r16","r17","r18","r2","r8","memory");
+#endif
+	}
+	break;
+
+	case IOCTL_PRIVCMD_MMAP: {
+#define PRIVCMD_MMAP_SZ 32
+		privcmd_mmap_t mmapcmd;
+		privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ];
+		privcmd_mmap_entry_t __user *p;
+		int i, rc;
+
+		if (!is_initial_xendomain())
+			return -EPERM;
+
+		if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
+			return -EFAULT;
+
+		p = mmapcmd.entry;
+
+		for (i = 0; i < mmapcmd.num;
+		     i += PRIVCMD_MMAP_SZ, p += PRIVCMD_MMAP_SZ) {
+			int j, n = ((mmapcmd.num-i)>PRIVCMD_MMAP_SZ)?
+				PRIVCMD_MMAP_SZ:(mmapcmd.num-i);
+
+			if (copy_from_user(&msg, p,
+					   n*sizeof(privcmd_mmap_entry_t)))
+				return -EFAULT;
+     
+			for (j = 0; j < n; j++) {
+				struct vm_area_struct *vma = 
+					find_vma( current->mm, msg[j].va );
+
+				if (!vma)
+					return -EINVAL;
+
+				if (msg[j].va > PAGE_OFFSET)
+					return -EINVAL;
+
+				if ((msg[j].va + (msg[j].npages << PAGE_SHIFT))
+				    > vma->vm_end )
+					return -EINVAL;
+
+				if ((rc = direct_remap_pfn_range(
+					vma,
+					msg[j].va&PAGE_MASK, 
+					msg[j].mfn, 
+					msg[j].npages<<PAGE_SHIFT, 
+					vma->vm_page_prot,
+					mmapcmd.dom)) < 0)
+					return rc;
+			}
+		}
+		ret = 0;
+	}
+	break;
+
+	case IOCTL_PRIVCMD_MMAPBATCH: {
+		privcmd_mmapbatch_t m;
+		struct vm_area_struct *vma = NULL;
+		xen_pfn_t __user *p;
+		unsigned long addr, mfn;
+		int i;
+
+		if (!is_initial_xendomain())
+			return -EPERM;
+
+		if (copy_from_user(&m, udata, sizeof(m))) {
+			ret = -EFAULT;
+			goto batch_err;
+		}
+
+		if (m.dom == DOMID_SELF) {
+			ret = -EINVAL;
+			goto batch_err;
+		}
+
+		vma = find_vma(current->mm, m.addr);
+		if (!vma) {
+			ret = -EINVAL;
+			goto batch_err;
+		}
+
+		if (m.addr > PAGE_OFFSET) {
+			ret = -EFAULT;
+			goto batch_err;
+		}
+
+		if ((m.addr + (m.num<<PAGE_SHIFT)) > vma->vm_end) {
+			ret = -EFAULT;
+			goto batch_err;
+		}
+
+		p = m.arr;
+		addr = m.addr;
+		for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
+			if (get_user(mfn, p))
+				return -EFAULT;
+
+			ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
+						     mfn, PAGE_SIZE,
+						     vma->vm_page_prot, m.dom);
+			if (ret < 0)
+				put_user(0xF0000000 | mfn, p);
+		}
+
+		ret = 0;
+		break;
+
+	batch_err:
+		printk("batch_err ret=%d vma=%p addr=%lx "
+		       "num=%d arr=%p %lx-%lx\n", 
+		       ret, vma, (unsigned long)m.addr, m.num, m.arr,
+		       vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
+		break;
+	}
+	break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
+static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
+{
+	/* DONTCOPY is essential for Xen as copy_page_range is broken. */
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
+
+	return 0;
+}
+#endif
+
+static struct file_operations privcmd_file_ops = {
+	.ioctl = privcmd_ioctl,
+	.mmap  = privcmd_mmap,
+};
+
+static int capabilities_read(char *page, char **start, off_t off,
+			     int count, int *eof, void *data)
+{
+	int len = 0;
+	*page = 0;
+
+	if (is_initial_xendomain())
+		len = sprintf( page, "control_d\n" );
+
+	*eof = 1;
+	return len;
+}
+
+static int __init privcmd_init(void)
+{
+	if (!is_running_on_xen())
+		return -ENODEV;
+
+	privcmd_intf = create_xen_proc_entry("privcmd", 0400);
+	if (privcmd_intf != NULL)
+		privcmd_intf->proc_fops = &privcmd_file_ops;
+
+	capabilities_intf = create_xen_proc_entry("capabilities", 0400 );
+	if (capabilities_intf != NULL)
+		capabilities_intf->read_proc = capabilities_read;
+
+	return 0;
+}
+
+__initcall(privcmd_init);
diff -r 47c098fdce14 -r 85a15e585061 include/asm-ia64/hypervisor.h.orig
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/include/asm-ia64/hypervisor.h.orig	Wed Oct 11 20:19:20 2006 -0400
@@ -0,0 +1,218 @@
+/******************************************************************************
+ * hypervisor.h
+ * 
+ * Linux-specific hypervisor handling.
+ * 
+ * Copyright (c) 2002-2004, K A Fraser
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __HYPERVISOR_H__
+#define __HYPERVISOR_H__
+
+#ifdef CONFIG_XEN
+extern int running_on_xen;
+#define is_running_on_xen()			(running_on_xen)
+#else /* CONFIG_XEN */
+# ifdef CONFIG_VMX_GUEST
+#  define is_running_on_xen()			(1)
+# else /* CONFIG_VMX_GUEST */
+#  define is_running_on_xen()			(0)
+#  define HYPERVISOR_ioremap(offset, size)	(offset)
+# endif /* CONFIG_VMX_GUEST */
+#endif /* CONFIG_XEN */
+
+#if defined(CONFIG_XEN) || defined(CONFIG_VMX_GUEST)
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/errno.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/dom0_ops.h>
+#include <xen/interface/event_channel.h>
+#include <xen/interface/physdev.h>
+#include <xen/interface/sched.h>
+#include <asm/hypercall.h>
+#include <asm/ptrace.h>
+#include <asm/page.h>
+
+extern shared_info_t *HYPERVISOR_shared_info;
+extern start_info_t *xen_start_info;
+
+void force_evtchn_callback(void);
+
+#ifndef CONFIG_VMX_GUEST
+/* Turn jiffies into Xen system time. XXX Implement me. */
+#define jiffies_to_st(j)	0
+
+static inline int
+HYPERVISOR_yield(
+	void)
+{
+	int rc = HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
+
+	if (rc == -ENOSYS)
+		rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0);
+
+	return rc;
+}
+
+static inline int
+HYPERVISOR_block(
+	void)
+{
+	int rc = HYPERVISOR_sched_op(SCHEDOP_block, NULL);
+
+	if (rc == -ENOSYS)
+		rc = HYPERVISOR_sched_op_compat(SCHEDOP_block, 0);
+
+	return rc;
+}
+
+static inline int
+HYPERVISOR_shutdown(
+	unsigned int reason)
+{
+	struct sched_shutdown sched_shutdown = {
+		.reason = reason
+	};
+
+	int rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
+
+	if (rc == -ENOSYS)
+		rc = HYPERVISOR_sched_op_compat(SCHEDOP_shutdown, reason);
+
+	return rc;
+}
+
+static inline int
+HYPERVISOR_poll(
+	evtchn_port_t *ports, unsigned int nr_ports, u64 timeout)
+{
+	struct sched_poll sched_poll = {
+		.nr_ports = nr_ports,
+		.timeout = jiffies_to_st(timeout)
+	};
+
+	int rc;
+
+	set_xen_guest_handle(sched_poll.ports, ports);
+	rc = HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll);
+	if (rc == -ENOSYS)
+		rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0);
+
+	return rc;
+}
+
+#include <asm/hypercall.h>
+
+// for drivers/xen/privcmd/privcmd.c
+#define machine_to_phys_mapping 0
+struct vm_area_struct;
+int direct_remap_pfn_range(struct vm_area_struct *vma,
+			   unsigned long address,
+			   unsigned long mfn,
+			   unsigned long size,
+			   pgprot_t prot,
+			   domid_t  domid);
+struct file;
+int privcmd_mmap(struct file * file, struct vm_area_struct * vma);
+#define HAVE_ARCH_PRIVCMD_MMAP
+
+// for drivers/xen/balloon/balloon.c
+#ifdef CONFIG_XEN_SCRUB_PAGES
+#define scrub_pages(_p,_n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT)
+#else
+#define scrub_pages(_p,_n) ((void)0)
+#endif
+#define	pte_mfn(_x)	pte_pfn(_x)
+#define phys_to_machine_mapping_valid(_x)	(1)
+
+#endif /* !CONFIG_VMX_GUEST */
+
+#define __pte_ma(_x)	((pte_t) {(_x)})        /* unmodified use */
+#define pfn_pte_ma(_x,_y)	__pte_ma(0)     /* unmodified use */
+
+#ifndef CONFIG_VMX_GUEST
+int __xen_create_contiguous_region(unsigned long vstart, unsigned int order, unsigned int address_bits);
+static inline int
+xen_create_contiguous_region(unsigned long vstart,
+                             unsigned int order, unsigned int address_bits)
+{
+	int ret = 0;
+	if (is_running_on_xen()) {
+		ret = __xen_create_contiguous_region(vstart, order,
+		                                     address_bits);
+	}
+	return ret;
+}
+
+void __xen_destroy_contiguous_region(unsigned long vstart, unsigned int order);
+static inline void
+xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
+{
+	if (is_running_on_xen())
+		__xen_destroy_contiguous_region(vstart, order);
+}
+
+#endif /* !CONFIG_VMX_GUEST */
+
+// for netfront.c, netback.c
+#define MULTI_UVMFLAGS_INDEX 0 //XXX any value
+
+static inline void
+MULTI_update_va_mapping(
+	multicall_entry_t *mcl, unsigned long va,
+	pte_t new_val, unsigned long flags)
+{
+	mcl->op = __HYPERVISOR_update_va_mapping;
+	mcl->result = 0;
+}
+
+static inline void
+MULTI_grant_table_op(multicall_entry_t *mcl, unsigned int cmd,
+	void *uop, unsigned int count)
+{
+	mcl->op = __HYPERVISOR_grant_table_op;
+	mcl->args[0] = cmd;
+	mcl->args[1] = (unsigned long)uop;
+	mcl->args[2] = count;
+}
+
+// for debug
+asmlinkage int xprintk(const char *fmt, ...);
+#define xprintd(fmt, ...)	xprintk("%s:%d " fmt, __func__, __LINE__, \
+					##__VA_ARGS__)
+
+#endif /* CONFIG_XEN || CONFIG_VMX_GUEST */
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN)
+#else
+#define is_initial_xendomain() 0
+#endif
+
+#endif /* __HYPERVISOR_H__ */
diff -r 47c098fdce14 -r 85a15e585061 include/asm-ia64/xen/xcom_hcall.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/include/asm-ia64/xen/xcom_hcall.h	Wed Oct 11 20:19:20 2006 -0400
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2006 Tristan Gingold <tristan.gingold@xxxxxxxx>, Bull SAS
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef _LINUX_XENCOMM_HCALL_H_
+#define _LINUX_XENCOMM_HCALL_H_
+
+/* These function creates inline descriptor for the parameters and
+   calls the corresponding xencomm_arch_hypercall_X.
+   Architectures should defines HYPERVISOR_xxx as xencomm_hypercall_xxx unless
+   they want to use their own wrapper.  */
+extern int xencomm_hypercall_console_io(int cmd, int count, char *str);
+
+extern int xencomm_hypercall_event_channel_op(int cmd, void *op);
+
+extern int xencomm_hypercall_xen_version(int cmd, void *arg);
+
+extern int xencomm_hypercall_physdev_op(int cmd, void *op);
+
+extern int xencomm_hypercall_grant_table_op(unsigned int cmd, void *op,
+                                            unsigned int count);
+
+extern int xencomm_hypercall_sched_op(int cmd, void *arg);
+
+extern int xencomm_hypercall_multicall(void *call_list, int nr_calls);
+
+extern int xencomm_hypercall_callback_op(int cmd, void *arg);
+
+extern int xencomm_hypercall_memory_op(unsigned int cmd, void *arg);
+
+extern unsigned long xencomm_hypercall_hvm_op(int cmd, void *arg);
+
+extern int xencomm_hypercall_suspend(unsigned long srec);
+
+/* Using mini xencomm.  */
+extern int xencomm_mini_hypercall_console_io(int cmd, int count, char *str);
+
+extern int xencomm_mini_hypercall_event_channel_op(int cmd, void *op);
+
+extern int xencomm_mini_hypercall_xen_version(int cmd, void *arg);
+
+extern int xencomm_mini_hypercall_physdev_op(int cmd, void *op);
+
+extern int xencomm_mini_hypercall_grant_table_op(unsigned int cmd, void *op,
+                                                 unsigned int count);
+
+extern int xencomm_mini_hypercall_sched_op(int cmd, void *arg);
+
+extern int xencomm_mini_hypercall_multicall(void *call_list, int nr_calls);
+
+extern int xencomm_mini_hypercall_callback_op(int cmd, void *arg);
+
+extern int xencomm_mini_hypercall_memory_op(unsigned int cmd, void *arg);
+
+/* For privcmd.  Locally declare argument type to avoid include storm.
+   Type coherency will be checked within privcmd.c  */
+struct privcmd_hypercall;
+extern int privcmd_hypercall(struct privcmd_hypercall *hypercall);
+
+#endif /* _LINUX_XENCOMM_HCALL_H_ */
diff -r 47c098fdce14 -r 85a15e585061 include/asm-ia64/xen/xencomm.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/include/asm-ia64/xen/xencomm.h	Wed Oct 11 20:19:20 2006 -0400
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2006 Hollis Blanchard <hollisb@xxxxxxxxxx>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef _LINUX_XENCOMM_H_
+#define _LINUX_XENCOMM_H_
+
+#include <xen/interface/xencomm.h>
+
+#define XENCOMM_MINI_ADDRS 3
+struct xencomm_mini {
+	struct xencomm_desc _desc;
+	uint64_t address[XENCOMM_MINI_ADDRS];
+};
+
+/* To avoid additionnal virt to phys conversion, an opaque structure is
+   presented.  */
+struct xencomm_handle;
+
+extern int xencomm_create(void *buffer, unsigned long bytes,
+                          struct xencomm_handle **desc, gfp_t type);
+extern void xencomm_free(struct xencomm_handle *desc);
+
+extern int xencomm_create_mini(struct xencomm_mini *area, int *nbr_area,
+                               void *buffer, unsigned long bytes,
+                               struct xencomm_handle **ret);
+
+/* Translate virtual address to physical address.  */
+extern unsigned long xencomm_vaddr_to_paddr(unsigned long vaddr);
+
+/* Inline version.  To be used only on linear space (kernel space).  */
+static inline struct xencomm_handle *
+xencomm_create_inline(void *buffer)
+{
+	unsigned long paddr;
+
+	paddr = xencomm_vaddr_to_paddr((unsigned long)buffer);
+	return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG);
+}
+
+#define xen_guest_handle(hnd)  ((hnd).p)
+
+#endif /* _LINUX_XENCOMM_H_ */
diff -r 47c098fdce14 -r 85a15e585061 lib/Makefile.orig
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/Makefile.orig	Wed Oct 11 20:19:20 2006 -0400
@@ -0,0 +1,68 @@
+#
+# Makefile for some libs needed in the kernel.
+#
+
+lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
+	 bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
+	 idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
+	 sha1.o
+
+lib-$(CONFIG_SMP) += cpumask.o
+
+lib-y	+= kobject.o kref.o kobject_uevent.o klist.o
+
+obj-y += sort.o parser.o halfmd4.o iomap_copy.o debug_locks.o
+
+ifeq ($(CONFIG_DEBUG_KOBJECT),y)
+CFLAGS_kobject.o += -DDEBUG
+CFLAGS_kobject_uevent.o += -DDEBUG
+endif
+
+obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
+obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
+lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
+lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+lib-$(CONFIG_SEMAPHORE_SLEEPERS) += semaphore-sleepers.o
+lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
+lib-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
+obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
+obj-$(CONFIG_PLIST) += plist.o
+obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
+
+ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
+  lib-y += dec_and_lock.o
+endif
+
+obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o
+obj-$(CONFIG_CRC16)	+= crc16.o
+obj-$(CONFIG_CRC32)	+= crc32.o
+obj-$(CONFIG_LIBCRC32C)	+= libcrc32c.o
+obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
+obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
+
+obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
+obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
+obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
+
+obj-$(CONFIG_TEXTSEARCH) += textsearch.o
+obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
+obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
+obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
+obj-$(CONFIG_SMP) += percpu_counter.o
+obj-$(CONFIG_AUDIT_GENERIC) += audit.o
+
+obj-$(CONFIG_SWIOTLB) += swiotlb.o
+ifneq ($(CONFIG_XEN_IA64_DOM0_NON_VP),y)
+swiotlb-$(CONFIG_XEN) := ../arch/i386/kernel/swiotlb.o
+endif
+
+hostprogs-y	:= gen_crc32table
+clean-files	:= crc32table.h
+
+$(obj)/crc32.o: $(obj)/crc32table.h
+
+quiet_cmd_crc32 = GEN     $@
+      cmd_crc32 = $< > $@
+
+$(obj)/crc32table.h: $(obj)/gen_crc32table
+	$(call cmd,crc32)
--- xen/arch/ia64/Rules.mk	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/Rules.mk	Wed Oct 11 16:10:40 2006 -0400
@@ -5,6 +5,8 @@ HAS_VGA  := y
 HAS_VGA  := y
 VALIDATE_VT	?= n
 no_warns ?= n
+xen_ia64_expose_p2m	?= y
+xen_ia64_pervcpu_vhpt	?= y
 
 ifneq ($(COMPILE_ARCH),$(TARGET_ARCH))
 CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux-
@@ -36,6 +38,12 @@ ifeq ($(VALIDATE_VT),y)
 ifeq ($(VALIDATE_VT),y)
 CFLAGS  += -DVALIDATE_VT
 endif
+ifeq ($(xen_ia64_expose_p2m),y)
+CFLAGS	+= -DCONFIG_XEN_IA64_EXPOSE_P2M
+endif
+ifeq ($(xen_ia64_pervcpu_vhpt),y)
+CFLAGS	+= -DCONFIG_XEN_IA64_PERVCPU_VHPT
+endif
 ifeq ($(no_warns),y)
 CFLAGS	+= -Wa,--fatal-warnings -Werror -Wno-uninitialized
 endif
--- xen/arch/ia64/asm-offsets.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/asm-offsets.c	Wed Oct 11 16:10:40 2006 -0400
@@ -37,6 +37,8 @@ void foo(void)
 	DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET, offsetof (struct ia64_mca_cpu, init_stack));
 
 	BLANK();
+	DEFINE(VCPU_VTM_OFFSET_OFS, offsetof(struct vcpu, arch.arch_vmx.vtm.vtm_offset));
+	DEFINE(VCPU_VRR0_OFS, offsetof(struct vcpu, arch.arch_vmx.vrr[0]));
 #ifdef   VTI_DEBUG
 	DEFINE(IVT_CUR_OFS, offsetof(struct vcpu, arch.arch_vmx.ivt_current));
 	DEFINE(IVT_DBG_OFS, offsetof(struct vcpu, arch.arch_vmx.ivt_debug));
--- xen/arch/ia64/linux-xen/sal.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/linux-xen/sal.c	Wed Oct 11 16:10:40 2006 -0400
@@ -16,8 +16,10 @@
 
 #ifdef XEN
 #include <linux/smp.h>
+#include <asm/hw_irq.h>
 #include <xen/lib.h>
 #endif
+#include <asm/delay.h>
 #include <asm/page.h>
 #include <asm/sal.h>
 #include <asm/pal.h>
@@ -218,6 +220,77 @@ static void __init sal_desc_ap_wakeup(vo
 static void __init sal_desc_ap_wakeup(void *p) { }
 #endif
 
+/*
+ * HP rx5670 firmware polls for interrupts during SAL_CACHE_FLUSH by reading
+ * cr.ivr, but it never writes cr.eoi.  This leaves any interrupt marked as
+ * "in-service" and masks other interrupts of equal or lower priority.
+ *
+ * HP internal defect reports: F1859, F2775, F3031.
+ */
+static int sal_cache_flush_drops_interrupts;
+
+static void __init
+check_sal_cache_flush (void)
+{
+	unsigned long flags, itv;
+	int cpu;
+	u64 vector;
+
+	cpu = get_cpu();
+	local_irq_save(flags);
+
+	/*
+	 * Schedule a timer interrupt, wait until it's reported, and see if
+	 * SAL_CACHE_FLUSH drops it.
+	 */
+	itv = ia64_get_itv();
+	BUG_ON((itv & (1 << 16)) == 0);
+
+	ia64_set_itv(IA64_TIMER_VECTOR);
+	ia64_set_itm(ia64_get_itc() + 1000);
+
+	while (!ia64_get_irr(IA64_TIMER_VECTOR))
+		cpu_relax();
+
+	ia64_sal_cache_flush(3);
+
+	if (ia64_get_irr(IA64_TIMER_VECTOR)) {
+		vector = ia64_get_ivr();
+		ia64_eoi();
+	} else {
+		sal_cache_flush_drops_interrupts = 1;
+		printk(KERN_ERR "SAL: SAL_CACHE_FLUSH drops interrupts; "
+			"PAL_CACHE_FLUSH will be used instead\n");
+		ia64_eoi();
+	}
+
+	ia64_set_itv(itv);
+	local_irq_restore(flags);
+	put_cpu();
+}
+
+s64
+ia64_sal_cache_flush (u64 cache_type)
+{
+	struct ia64_sal_retval isrv;
+
+	if (sal_cache_flush_drops_interrupts) {
+		unsigned long flags;
+		u64 progress;
+		s64 rc;
+
+		progress = 0;
+		local_irq_save(flags);
+		rc = ia64_pal_cache_flush(cache_type,
+			PAL_CACHE_FLUSH_INVALIDATE, &progress, NULL);
+		local_irq_restore(flags);
+		return rc;
+	}
+
+	SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0);
+	return isrv.status;
+}
+
 void __init
 ia64_sal_init (struct ia64_sal_systab *systab)
 {
@@ -271,6 +344,8 @@ ia64_sal_init (struct ia64_sal_systab *s
 		}
 		p += SAL_DESC_SIZE(*p);
 	}
+
+	check_sal_cache_flush();
 }
 
 int
--- xen/arch/ia64/linux-xen/unaligned.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/linux-xen/unaligned.c	Wed Oct 11 16:10:40 2006 -0400
@@ -304,7 +304,7 @@ set_rse_reg (struct pt_regs *regs, unsig
 	unsigned long *bsp, *bspstore, *addr, *rnat_addr;
 	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
 	unsigned long nat_mask;
-    unsigned long old_rsc,new_rsc;
+	unsigned long old_rsc, new_rsc, psr;
 	unsigned long rnat;
 	long sof = (regs->cr_ifs) & 0x7f;
 	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
@@ -321,16 +321,17 @@ set_rse_reg (struct pt_regs *regs, unsig
 		ridx = rotate_reg(sor, rrb_gr, ridx);
 
     old_rsc=ia64_get_rsc();
-    new_rsc=old_rsc&(~0x3);
+    /* put RSC to lazy mode, and set loadrs 0 */
+    new_rsc = old_rsc & (~0x3fff0003);
     ia64_set_rsc(new_rsc);
-
+    bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */
+
+    addr = ia64_rse_skip_regs(bsp, -sof + ridx);
+    nat_mask = 1UL << ia64_rse_slot_num(addr);
+    rnat_addr = ia64_rse_rnat_addr(addr);
+    
+    local_irq_save(psr); 
     bspstore = (unsigned long*)ia64_get_bspstore();
-    bsp =kbs + (regs->loadrs >> 19);//16+3
-
-	addr = ia64_rse_skip_regs(bsp, -sof + ridx);
-    nat_mask = 1UL << ia64_rse_slot_num(addr);
-	rnat_addr = ia64_rse_rnat_addr(addr);
-
     if(addr >= bspstore){
 
         ia64_flushrs ();
@@ -358,6 +359,7 @@ set_rse_reg (struct pt_regs *regs, unsig
         ia64_set_bspstore (bspstore);
         ia64_set_rnat(rnat);
     }
+    local_irq_restore(psr);
     ia64_set_rsc(old_rsc);
 }
 
--- xen/arch/ia64/vmx/Makefile	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/vmx/Makefile	Wed Oct 11 16:10:40 2006 -0400
@@ -17,3 +17,4 @@ obj-y += vmx_virt.o
 obj-y += vmx_virt.o
 obj-y += vmx_vsa.o
 obj-y += vtlb.o
+obj-y += optvfault.o
--- xen/arch/ia64/vmx/mmio.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/vmx/mmio.c	Wed Oct 11 16:10:40 2006 -0400
@@ -428,7 +428,7 @@ void emulate_io_inst(VCPU *vcpu, u64 pad
     IA64_BUNDLE bundle;
     int slot, dir=0, inst_type;
     size_t size;
-    u64 data, value,post_update, slot1a, slot1b, temp;
+    u64 data, post_update, slot1a, slot1b, temp;
     INST64 inst;
     regs=vcpu_regs(vcpu);
     if (IA64_RETRY == __vmx_get_domain_bundle(regs->cr_iip, &bundle)) {
@@ -454,7 +454,6 @@ void emulate_io_inst(VCPU *vcpu, u64 pad
             vcpu_get_gr_nat(vcpu,inst.M4.r2,&data);
         }else if((inst.M1.x6>>2)<0xb){   //  read
             dir=IOREQ_READ;
-            vcpu_get_gr_nat(vcpu,inst.M1.r1,&value);
         }
     }
     // Integer Load + Reg update
@@ -462,7 +461,6 @@ void emulate_io_inst(VCPU *vcpu, u64 pad
         inst_type = SL_INTEGER;
         dir = IOREQ_READ;     //write
         size = (inst.M2.x6&0x3);
-        vcpu_get_gr_nat(vcpu,inst.M2.r1,&value);
         vcpu_get_gr_nat(vcpu,inst.M2.r3,&temp);
         vcpu_get_gr_nat(vcpu,inst.M2.r2,&post_update);
         temp += post_update;
@@ -485,7 +483,6 @@ void emulate_io_inst(VCPU *vcpu, u64 pad
 
         }else if((inst.M3.x6>>2)<0xb){   //  read
             dir=IOREQ_READ;
-            vcpu_get_gr_nat(vcpu,inst.M3.r1,&value);
             vcpu_get_gr_nat(vcpu,inst.M3.r3,&temp);
             post_update = (inst.M3.i<<7)+inst.M3.imm7;
             if(inst.M3.s)
@@ -597,13 +594,6 @@ void emulate_io_inst(VCPU *vcpu, u64 pad
         mmio_access(vcpu, padr, &data, size, ma, dir);
     }else{
         mmio_access(vcpu, padr, &data, size, ma, dir);
-        if(size==1)
-            data = (value & 0xffffffffffffff00U) | (data & 0xffU);
-        else if(size==2)
-            data = (value & 0xffffffffffff0000U) | (data & 0xffffU);
-        else if(size==4)
-            data = (value & 0xffffffff00000000U) | (data & 0xffffffffU);
-
         if(inst_type==SL_INTEGER){       //gp
             vcpu_set_gr(vcpu,inst.M1.r1,data,0);
         }else{
--- xen/arch/ia64/vmx/vlsapic.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/vmx/vlsapic.c	Wed Oct 11 16:10:40 2006 -0400
@@ -298,7 +298,7 @@ static void update_vhpi(VCPU *vcpu, int 
     // TODO: Add support for XENO
     if ( VCPU(vcpu,vac).a_int ) {
         ia64_call_vsa ( PAL_VPS_SET_PENDING_INTERRUPT, 
-                (uint64_t) &(vcpu->arch.privregs), 0, 0,0,0,0,0);
+                (uint64_t)vcpu->arch.privregs, 0, 0, 0, 0, 0, 0);
     }
 }
 
@@ -683,9 +683,5 @@ void vhpi_detection(VCPU *vcpu)
 
 void vmx_vexirq(VCPU *vcpu)
 {
-    static  uint64_t  vexirq_count=0;
-
-    vexirq_count ++;
-    printk("Virtual ex-irq %ld\n", vexirq_count);
     generate_exirq (vcpu);
 }
--- xen/arch/ia64/vmx/vmmu.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/vmx/vmmu.c	Wed Oct 11 16:10:40 2006 -0400
@@ -456,7 +456,15 @@ IA64FAULT vmx_vcpu_itr_d(VCPU *vcpu, u64
     }
 #endif   
     pte &= ~PAGE_FLAGS_RV_MASK;
-    thash_purge_entries(vcpu, va, ps);
+
+    /* This is a bad workaround
+       In Linux, region 7 use 16M pagesize and is identity mapped.
+       VHPT page size is 16K in XEN.  If purge VHPT while guest insert 16M,
+       it will iteratively purge VHPT 1024 times, which makes XEN/IPF very
+       slow.  XEN doesn't purge VHPT
+    */   
+    if (ps != _PAGE_SIZE_16M)
+        thash_purge_entries(vcpu, va, ps);
     gpfn = (pte & _PAGE_PPN_MASK)>> PAGE_SHIFT;
     if (VMX_DOMAIN(vcpu) && __gpfn_is_io(vcpu->domain, gpfn))
         pte |= VTLB_PTE_IO;
@@ -637,37 +645,30 @@ IA64FAULT vmx_vcpu_tpa(VCPU *vcpu, UINT6
     visr.ei=pt_isr.ei;
     visr.ir=pt_isr.ir;
     vpsr.val = VCPU(vcpu, vpsr);
-    if(vpsr.ic==0){
-        visr.ni=1;
-    }
     visr.na=1;
     data = vtlb_lookup(vcpu, vadr, DSIDE_TLB);
     if(data){
         if(data->p==0){
-            visr.na=1;
             vcpu_set_isr(vcpu,visr.val);
-            page_not_present(vcpu, vadr);
+            data_page_not_present(vcpu, vadr);
             return IA64_FAULT;
         }else if(data->ma == VA_MATTR_NATPAGE){
-            visr.na = 1;
             vcpu_set_isr(vcpu, visr.val);
             dnat_page_consumption(vcpu, vadr);
             return IA64_FAULT;
         }else{
             *padr = ((data->ppn >> (data->ps - 12)) << data->ps) |
-                                                (vadr & (PSIZE(data->ps) - 1));
+                    (vadr & (PSIZE(data->ps) - 1));
             return IA64_NO_FAULT;
         }
     }
     data = vhpt_lookup(vadr);
     if(data){
         if(data->p==0){
-            visr.na=1;
             vcpu_set_isr(vcpu,visr.val);
-            page_not_present(vcpu, vadr);
+            data_page_not_present(vcpu, vadr);
             return IA64_FAULT;
         }else if(data->ma == VA_MATTR_NATPAGE){
-            visr.na = 1;
             vcpu_set_isr(vcpu, visr.val);
             dnat_page_consumption(vcpu, vadr);
             return IA64_FAULT;
--- xen/arch/ia64/vmx/vmx_entry.S	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/vmx/vmx_entry.S	Wed Oct 11 16:10:40 2006 -0400
@@ -669,7 +669,7 @@ 1:
 
    // re-pin mappings for guest_vhpt
 
-   mov r24=IA64_TR_PERVP_VHPT
+   mov r24=IA64_TR_VHPT
    movl r25=PAGE_KERNEL
    ;;
    or loc5 = r25,loc5          // construct PA | page properties
--- xen/arch/ia64/vmx/vmx_init.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/vmx/vmx_init.c	Wed Oct 11 16:10:40 2006 -0400
@@ -378,7 +378,8 @@ static void vmx_build_physmap_table(stru
 	    for (j = io_ranges[i].start;
 		j < io_ranges[i].start + io_ranges[i].size;
 		j += PAGE_SIZE)
-		__assign_domain_page(d, j, io_ranges[i].type, ASSIGN_writable);
+		(void)__assign_domain_page(d, j, io_ranges[i].type,
+		                           ASSIGN_writable);
 	}
 
 	/* Map normal memory below 3G */
--- xen/arch/ia64/vmx/vmx_interrupt.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/vmx/vmx_interrupt.c	Wed Oct 11 16:10:40 2006 -0400
@@ -383,14 +383,29 @@ dnat_page_consumption (VCPU *vcpu, uint6
 /* Deal with
  *  Page not present vector
  */
-void
-page_not_present(VCPU *vcpu, u64 vadr)
+static void
+__page_not_present(VCPU *vcpu, u64 vadr)
 {
     /* If vPSR.ic, IFA, ITIR */
     set_ifa_itir_iha (vcpu, vadr, 1, 1, 0);
     inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
 }
 
+
+void
+data_page_not_present(VCPU *vcpu, u64 vadr)
+{
+    __page_not_present(vcpu, vadr);
+}
+
+
+void
+inst_page_not_present(VCPU *vcpu, u64 vadr)
+{
+    __page_not_present(vcpu, vadr);
+}
+
+
 /* Deal with
  *  Data access rights vector
  */
--- xen/arch/ia64/vmx/vmx_ivt.S	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/vmx/vmx_ivt.S	Wed Oct 11 16:10:40 2006 -0400
@@ -772,12 +772,20 @@ ENTRY(vmx_single_step_trap)
     VMX_REFLECT(36)
 END(vmx_single_step_trap)
 
+    .global vmx_virtualization_fault_back
     .org vmx_ia64_ivt+0x6100
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
 ENTRY(vmx_virtualization_fault)
 //    VMX_DBG_FAULT(37)
     mov r31=pr
+    ;;
+    cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
+    cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
+    (p6) br.dptk.many asm_mov_from_ar
+    (p7) br.dptk.many asm_mov_from_rr
+    ;;
+vmx_virtualization_fault_back:
     mov r19=37
     adds r16 = IA64_VCPU_CAUSE_OFFSET,r21
     adds r17 = IA64_VCPU_OPCODE_OFFSET,r21
--- xen/arch/ia64/vmx/vmx_phy_mode.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/vmx/vmx_phy_mode.c	Wed Oct 11 16:10:40 2006 -0400
@@ -126,10 +126,16 @@ vmx_init_all_rr(VCPU *vcpu)
 vmx_init_all_rr(VCPU *vcpu)
 {
 	VMX(vcpu, vrr[VRN0]) = 0x38;
+	// enable vhpt in guest physical mode
+	vcpu->arch.metaphysical_rr0 |= 1;
+	vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(vcpu, 0x38);
 	VMX(vcpu, vrr[VRN1]) = 0x38;
 	VMX(vcpu, vrr[VRN2]) = 0x38;
 	VMX(vcpu, vrr[VRN3]) = 0x38;
 	VMX(vcpu, vrr[VRN4]) = 0x38;
+	// enable vhpt in guest physical mode
+	vcpu->arch.metaphysical_rr4 |= 1;
+	vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(vcpu, 0x38);
 	VMX(vcpu, vrr[VRN5]) = 0x38;
 	VMX(vcpu, vrr[VRN6]) = 0x38;
 	VMX(vcpu, vrr[VRN7]) = 0x738;
@@ -141,10 +147,8 @@ vmx_load_all_rr(VCPU *vcpu)
 vmx_load_all_rr(VCPU *vcpu)
 {
 	unsigned long psr;
-	ia64_rr phy_rr;
 
 	local_irq_save(psr);
-
 
 	/* WARNING: not allow co-exist of both virtual mode and physical
 	 * mode in same region
@@ -154,24 +158,16 @@ vmx_load_all_rr(VCPU *vcpu)
 			panic_domain(vcpu_regs(vcpu),
 			             "Unexpected domain switch in phy emul\n");
 		}
-		phy_rr.rrval = vcpu->arch.metaphysical_rr0;
-		//phy_rr.ps = PAGE_SHIFT;
-		phy_rr.ve = 1;
-
-		ia64_set_rr((VRN0 << VRN_SHIFT), phy_rr.rrval);
-		ia64_dv_serialize_data();
-		phy_rr.rrval = vcpu->arch.metaphysical_rr4;
-		//phy_rr.ps = PAGE_SHIFT;
-		phy_rr.ve = 1;
-
-		ia64_set_rr((VRN4 << VRN_SHIFT), phy_rr.rrval);
+		ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
+		ia64_dv_serialize_data();
+		ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4);
 		ia64_dv_serialize_data();
 	} else {
 		ia64_set_rr((VRN0 << VRN_SHIFT),
-			     vrrtomrr(vcpu, VMX(vcpu, vrr[VRN0])));
+                            vcpu->arch.metaphysical_saved_rr0);
 		ia64_dv_serialize_data();
 		ia64_set_rr((VRN4 << VRN_SHIFT),
-			     vrrtomrr(vcpu, VMX(vcpu, vrr[VRN4])));
+                            vcpu->arch.metaphysical_saved_rr4);
 		ia64_dv_serialize_data();
 	}
 
@@ -209,21 +205,11 @@ switch_to_physical_rid(VCPU *vcpu)
 switch_to_physical_rid(VCPU *vcpu)
 {
     UINT64 psr;
-    ia64_rr phy_rr, mrr;
-
     /* Save original virtual mode rr[0] and rr[4] */
     psr=ia64_clear_ic();
-    phy_rr.rrval = vcpu->domain->arch.metaphysical_rr0;
-    mrr.rrval = ia64_get_rr(VRN0 << VRN_SHIFT);
-    phy_rr.ps = mrr.ps;
-    phy_rr.ve = 1;
-    ia64_set_rr(VRN0<<VRN_SHIFT, phy_rr.rrval);
-    ia64_srlz_d();
-    phy_rr.rrval = vcpu->domain->arch.metaphysical_rr4;
-    mrr.rrval = ia64_get_rr(VRN4 << VRN_SHIFT);
-    phy_rr.ps = mrr.ps;
-    phy_rr.ve = 1;
-    ia64_set_rr(VRN4<<VRN_SHIFT, phy_rr.rrval);
+    ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0);
+    ia64_srlz_d();
+    ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4);
     ia64_srlz_d();
 
     ia64_set_psr(psr);
@@ -236,15 +222,10 @@ switch_to_virtual_rid(VCPU *vcpu)
 switch_to_virtual_rid(VCPU *vcpu)
 {
     UINT64 psr;
-    ia64_rr mrr;
-
     psr=ia64_clear_ic();
-
-    vcpu_get_rr(vcpu,VRN0<<VRN_SHIFT,&mrr.rrval);
-    ia64_set_rr(VRN0<<VRN_SHIFT, vrrtomrr(vcpu, mrr.rrval));
-    ia64_srlz_d();
-    vcpu_get_rr(vcpu,VRN4<<VRN_SHIFT,&mrr.rrval);
-    ia64_set_rr(VRN4<<VRN_SHIFT, vrrtomrr(vcpu, mrr.rrval));
+    ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0);
+    ia64_srlz_d();
+    ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4);
     ia64_srlz_d();
     ia64_set_psr(psr);
     ia64_srlz_i();
--- xen/arch/ia64/vmx/vmx_process.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/vmx/vmx_process.c	Wed Oct 11 16:10:40 2006 -0400
@@ -81,6 +81,7 @@ void vmx_reflect_interruption(UINT64 ifa
 void vmx_reflect_interruption(UINT64 ifa,UINT64 isr,UINT64 iim,
      UINT64 vector,REGS *regs)
 {
+    UINT64 status;
     VCPU *vcpu = current;
     UINT64 vpsr = VCPU(vcpu, vpsr);
     vector=vec2off[vector];
@@ -89,13 +90,23 @@ void vmx_reflect_interruption(UINT64 ifa
     }
     else{ // handle fpswa emulation
         // fp fault
-        if(vector == IA64_FP_FAULT_VECTOR && !handle_fpu_swa(1, regs, isr)){
-            vmx_vcpu_increment_iip(vcpu);
-            return;
+        if (vector == IA64_FP_FAULT_VECTOR) {
+            status = handle_fpu_swa(1, regs, isr);
+            if (!status) {
+                vmx_vcpu_increment_iip(vcpu);
+                return;
+            } else if (IA64_RETRY == status)
+                return;
         }
         //fp trap
-        else if(vector == IA64_FP_TRAP_VECTOR && !handle_fpu_swa(0, regs, isr)){
-            return; 
+        else if (vector == IA64_FP_TRAP_VECTOR) {
+            status = handle_fpu_swa(0, regs, isr);
+            if (!status)
+                return;
+            else if (IA64_RETRY == status) {
+                vmx_vcpu_decrement_iip(vcpu);
+                return;
+            }
         }
     }
     VCPU(vcpu,isr)=isr;
@@ -187,7 +198,7 @@ void leave_hypervisor_tail(struct pt_reg
 {
     struct domain *d = current->domain;
     struct vcpu *v = current;
-    int callback_irq;
+
     // FIXME: Will this work properly if doing an RFI???
     if (!is_idle_domain(d) ) {	// always comes from guest
 //        struct pt_regs *user_regs = vcpu_regs(current);
@@ -215,11 +226,14 @@ void leave_hypervisor_tail(struct pt_reg
 //           v->arch.irq_new_pending = 1;
 //       }
 
-        callback_irq = d->arch.hvm_domain.params[HVM_PARAM_CALLBACK_IRQ];
-        if (callback_irq != 0 && local_events_need_delivery()) {
-            /*inject para-device call back irq*/
-            v->vcpu_info->evtchn_upcall_mask = 1;
-            vmx_vcpu_pend_interrupt(v, callback_irq);
+        if (v->vcpu_id == 0) {
+            int callback_irq =
+                d->arch.hvm_domain.params[HVM_PARAM_CALLBACK_IRQ];
+            if (callback_irq != 0 && local_events_need_delivery()) {
+                /*inject para-device call back irq*/
+                v->vcpu_info->evtchn_upcall_mask = 1;
+                vmx_vcpu_pend_interrupt(v, callback_irq);
+            }
         }
 
         if ( v->arch.irq_new_pending ) {
@@ -252,18 +266,20 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r
 vmx_hpw_miss(u64 vadr , u64 vec, REGS* regs)
 {
     IA64_PSR vpsr;
-    int type=ISIDE_TLB;
+    int type;
     u64 vhpt_adr, gppa, pteval, rr, itir;
     ISR misr;
-//    REGS *regs;
     thash_data_t *data;
     VCPU *v = current;
-#ifdef  VTLB_DEBUG
-    check_vtlb_sanity(vtlb);
-    dump_vtlb(vtlb);
-#endif
     vpsr.val = VCPU(v, vpsr);
     misr.val=VMX(v,cr_isr);
+    
+    if (vec == 1)
+        type = ISIDE_TLB;
+    else if (vec == 2)
+        type = DSIDE_TLB;
+    else
+        panic_domain(regs, "wrong vec:%lx\n", vec);
 
     if(is_physical_mode(v)&&(!(vadr<<1>>62))){
         if(vec==2){
@@ -275,11 +291,6 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r
         physical_tlb_miss(v, vadr);
         return IA64_FAULT;
     }
-    if(vec == 1) type = ISIDE_TLB;
-    else if(vec == 2) type = DSIDE_TLB;
-    else panic_domain(regs,"wrong vec:%lx\n",vec);
-
-//    prepare_if_physical_mode(v);
 
     if((data=vtlb_lookup(v, vadr,type))!=0){
         if (v->domain != dom0 && type == DSIDE_TLB) {
@@ -298,46 +309,44 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r
         thash_vhpt_insert(v,data->page_flags, data->itir ,vadr);
 
     }else if(type == DSIDE_TLB){
+    
         if (misr.sp)
             return vmx_handle_lds(regs);
+
         if(!vhpt_enabled(v, vadr, misr.rs?RSE_REF:DATA_REF)){
             if(vpsr.ic){
                 vcpu_set_isr(v, misr.val);
                 alt_dtlb(v, vadr);
                 return IA64_FAULT;
             } else{
-                if(misr.sp){
-                    //TODO  lds emulation
-                    //panic("Don't support speculation load");
-                    return vmx_handle_lds(regs);
-                }else{
-                    nested_dtlb(v);
-                    return IA64_FAULT;
-                }
+                nested_dtlb(v);
+                return IA64_FAULT;
             }
         } else{
             vmx_vcpu_thash(v, vadr, &vhpt_adr);
             if(!guest_vhpt_lookup(vhpt_adr, &pteval)){
-                if ((pteval & _PAGE_P) &&
-                    ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST)) {
+                if (!(pteval & _PAGE_P)) {
+                    if (vpsr.ic) {
+                        vcpu_set_isr(v, misr.val);
+                        data_page_not_present(v, vadr);
+                        return IA64_FAULT;
+                    } else {
+                        nested_dtlb(v);
+                        return IA64_FAULT;
+                    }
+                }                     
+                else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) {
                     vcpu_get_rr(v, vadr, &rr);
                     itir = rr&(RR_RID_MASK | RR_PS_MASK);
                     thash_purge_and_insert(v, pteval, itir, vadr, DSIDE_TLB);
                     return IA64_NO_FAULT;
-                }
-                if(vpsr.ic){
+                } else if (vpsr.ic) {
                     vcpu_set_isr(v, misr.val);
                     dtlb_fault(v, vadr);
                     return IA64_FAULT;
                 }else{
-                    if(misr.sp){
-                    //TODO  lds emulation
-                    //panic("Don't support speculation load");
-                    return vmx_handle_lds(regs);
-                    }else{
-                        nested_dtlb(v);
-                        return IA64_FAULT;
-                    }
+                    nested_dtlb(v);
+                    return IA64_FAULT;
                 }
             }else{
                 if(vpsr.ic){
@@ -345,22 +354,16 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r
                     dvhpt_fault(v, vadr);
                     return IA64_FAULT;
                 }else{
-                    if(misr.sp){
-                    //TODO  lds emulation
-                    //panic("Don't support speculation load");
-                    return vmx_handle_lds(regs);
-                    }else{
-                        nested_dtlb(v);
-                        return IA64_FAULT;
-                    }
+                    nested_dtlb(v);
+                    return IA64_FAULT;
                 }
             }
         }
     }else if(type == ISIDE_TLB){
+    
+        if (!vpsr.ic)
+            misr.ni = 1;
         if(!vhpt_enabled(v, vadr, misr.rs?RSE_REF:DATA_REF)){
-            if(!vpsr.ic){
-                misr.ni=1;
-            }
             vcpu_set_isr(v, misr.val);
             alt_itlb(v, vadr);
             return IA64_FAULT;
@@ -372,17 +375,12 @@ vmx_hpw_miss(u64 vadr , u64 vec, REGS* r
                     itir = rr&(RR_RID_MASK | RR_PS_MASK);
                     thash_purge_and_insert(v, pteval, itir, vadr, ISIDE_TLB);
                     return IA64_NO_FAULT;
-                }
-                if(!vpsr.ic){
-                    misr.ni=1;
-                }
-                vcpu_set_isr(v, misr.val);
-                itlb_fault(v, vadr);
-                return IA64_FAULT;
+                } else {
+                    vcpu_set_isr(v, misr.val);
+                    inst_page_not_present(v, vadr);
+                    return IA64_FAULT;
+                }
             }else{
-                if(!vpsr.ic){
-                    misr.ni=1;
-                }
                 vcpu_set_isr(v, misr.val);
                 ivhpt_fault(v, vadr);
                 return IA64_FAULT;
--- xen/arch/ia64/vmx/vmx_vcpu.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/vmx/vmx_vcpu.c	Wed Oct 11 16:10:40 2006 -0400
@@ -172,6 +172,21 @@ IA64FAULT vmx_vcpu_increment_iip(VCPU *v
 }
 
 
+IA64FAULT vmx_vcpu_decrement_iip(VCPU *vcpu)
+{
+    REGS *regs = vcpu_regs(vcpu);
+    IA64_PSR *ipsr = (IA64_PSR *)&regs->cr_ipsr;
+    
+    if (ipsr->ri == 0) {
+        ipsr->ri = 2;
+        regs->cr_iip -= 16;
+    } else {
+        ipsr->ri--;
+    }
+    return (IA64_NO_FAULT);
+}
+
+
 IA64FAULT vmx_vcpu_cover(VCPU *vcpu)
 {
     REGS *regs = vcpu_regs(vcpu);
@@ -197,19 +212,32 @@ IA64FAULT vmx_vcpu_set_rr(VCPU *vcpu, UI
 {
     ia64_rr oldrr,newrr;
     extern void * pal_vaddr;
+    u64 rrval;
 
     vcpu_get_rr(vcpu, reg, &oldrr.rrval);
     newrr.rrval=val;
     if (newrr.rid >= (1 << vcpu->domain->arch.rid_bits))
         panic_domain (NULL, "use of invalid rid %x\n", newrr.rid);
 
-    VMX(vcpu,vrr[reg>>61]) = val;
-    switch((u64)(reg>>61)) {
+    VMX(vcpu,vrr[reg>>VRN_SHIFT]) = val;
+    switch((u64)(reg>>VRN_SHIFT)) {
     case VRN7:
         vmx_switch_rr7(vrrtomrr(vcpu,val),vcpu->domain->shared_info,
         (void *)vcpu->arch.privregs,
         (void *)vcpu->arch.vhpt.hash, pal_vaddr );
        break;
+    case VRN4:
+        rrval = vrrtomrr(vcpu,val);
+        vcpu->arch.metaphysical_saved_rr4 = rrval;
+        if (!is_physical_mode(vcpu))
+            ia64_set_rr(reg,rrval);
+        break;
+    case VRN0:
+        rrval = vrrtomrr(vcpu,val);
+        vcpu->arch.metaphysical_saved_rr0 = rrval;
+        if (!is_physical_mode(vcpu))
+            ia64_set_rr(reg,rrval);
+        break;
     default:
         ia64_set_rr(reg,vrrtomrr(vcpu,val));
         break;
--- xen/arch/ia64/xen/Makefile	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/xen/Makefile	Wed Oct 11 16:10:40 2006 -0400
@@ -25,5 +25,7 @@ obj-y += xentime.o
 obj-y += xentime.o
 obj-y += flushd.o
 obj-y += privop_stat.o
+obj-y += xenpatch.o
+obj-y += xencomm.o
 
 obj-$(crash_debug) += gdbstub.o
--- xen/arch/ia64/xen/dom0_ops.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/xen/dom0_ops.c	Wed Oct 11 16:10:40 2006 -0400
@@ -256,6 +256,7 @@ do_dom0vp_op(unsigned long cmd,
         } else {
             ret = (ret & _PFN_MASK) >> PAGE_SHIFT;//XXX pte_pfn()
         }
+        perfc_incrc(dom0vp_phystomach);
         break;
     case IA64_DOM0VP_machtophys:
         if (!mfn_valid(arg0)) {
@@ -263,6 +264,7 @@ do_dom0vp_op(unsigned long cmd,
             break;
         }
         ret = get_gpfn_from_mfn(arg0);
+        perfc_incrc(dom0vp_machtophys);
         break;
     case IA64_DOM0VP_zap_physmap:
         ret = dom0vp_zap_physmap(d, arg0, (unsigned int)arg1);
@@ -270,6 +272,9 @@ do_dom0vp_op(unsigned long cmd,
     case IA64_DOM0VP_add_physmap:
         ret = dom0vp_add_physmap(d, arg0, arg1, (unsigned int)arg2,
                                  (domid_t)arg3);
+        break;
+    case IA64_DOM0VP_expose_p2m:
+        ret = dom0vp_expose_p2m(d, arg0, arg1, arg2, arg3);
         break;
     default:
         ret = -1;
--- xen/arch/ia64/xen/domain.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/xen/domain.c	Wed Oct 11 16:10:40 2006 -0400
@@ -46,6 +46,7 @@
 #include <asm/regionreg.h>
 #include <asm/dom_fw.h>
 #include <asm/shadow.h>
+#include <xen/guest_access.h>
 
 unsigned long dom0_size = 512*1024*1024;
 unsigned long dom0_align = 64*1024*1024;
@@ -58,13 +59,8 @@ extern unsigned long running_on_sim;
 
 extern char dom0_command_line[];
 
-/* FIXME: where these declarations should be there ? */
-extern void serial_input_init(void);
+/* forward declaration */
 static void init_switch_stack(struct vcpu *v);
-extern void vmx_do_launch(struct vcpu *);
-
-/* this belongs in include/asm, but there doesn't seem to be a suitable place */
-extern struct vcpu *ia64_switch_to (struct vcpu *next_task);
 
 /* Address of vpsr.i (in fact evtchn_upcall_mask) of current vcpu.
    This is a Xen virtual address.  */
@@ -72,6 +68,16 @@ DEFINE_PER_CPU(int *, current_psr_ic_add
 DEFINE_PER_CPU(int *, current_psr_ic_addr);
 
 #include <xen/sched-if.h>
+
+static void
+ia64_disable_vhpt_walker(void)
+{
+	// disable VHPT. ia64_new_rr7() might cause VHPT
+	// fault without this because it flushes dtr[IA64_TR_VHPT]
+	// (VHPT_SIZE_LOG2 << 2) is just for avoid
+	// Reserved Register/Field fault.
+	ia64_set_pta(VHPT_SIZE_LOG2 << 2);
+}
 
 static void flush_vtlb_for_context_switch(struct vcpu* vcpu)
 {
@@ -96,10 +102,13 @@ static void flush_vtlb_for_context_switc
 		if (VMX_DOMAIN(vcpu)) {
 			// currently vTLB for vt-i domian is per vcpu.
 			// so any flushing isn't needed.
+		} else if (HAS_PERVCPU_VHPT(vcpu->domain)) {
+			// nothing to do
 		} else {
-			vhpt_flush();
+			local_vhpt_flush();
 		}
 		local_flush_tlb_all();
+		perfc_incrc(flush_vtlb_for_context_switch);
 	}
 }
 
@@ -114,9 +123,9 @@ void schedule_tail(struct vcpu *prev)
 		              current->processor);
 	} else {
 		ia64_set_iva(&ia64_ivt);
-        	ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
-		        VHPT_ENABLED);
+		ia64_disable_vhpt_walker();
 		load_region_regs(current);
+        	ia64_set_pta(vcpu_pta(current));
 		vcpu_load_kernel_regs(current);
 		__ia64_per_cpu_var(current_psr_i_addr) = &current->domain->
 		  shared_info->vcpu_info[current->vcpu_id].evtchn_upcall_mask;
@@ -130,7 +139,6 @@ void context_switch(struct vcpu *prev, s
 void context_switch(struct vcpu *prev, struct vcpu *next)
 {
     uint64_t spsr;
-    uint64_t pta;
 
     local_irq_save(spsr);
 
@@ -167,9 +175,9 @@ void context_switch(struct vcpu *prev, s
 
 	nd = current->domain;
     	if (!is_idle_domain(nd)) {
-        	ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
-			     VHPT_ENABLED);
+		ia64_disable_vhpt_walker();
 	    	load_region_regs(current);
+		ia64_set_pta(vcpu_pta(current));
 	    	vcpu_load_kernel_regs(current);
 		vcpu_set_next_timer(current);
 		if (vcpu_timer_expired(current))
@@ -183,14 +191,13 @@ void context_switch(struct vcpu *prev, s
 		 * walker. Then all accesses happen within idle context will
 		 * be handled by TR mapping and identity mapping.
 		 */
-		pta = ia64_get_pta();
-		ia64_set_pta(pta & ~VHPT_ENABLED);
+		ia64_disable_vhpt_walker();
 		__ia64_per_cpu_var(current_psr_i_addr) = NULL;
 		__ia64_per_cpu_var(current_psr_ic_addr) = NULL;
         }
     }
+    local_irq_restore(spsr);
     flush_vtlb_for_context_switch(current);
-    local_irq_restore(spsr);
     context_saved(prev);
 }
 
@@ -273,6 +280,13 @@ struct vcpu *alloc_vcpu_struct(struct do
 	    if (!d->arch.is_vti) {
 		int order;
 		int i;
+		// vti domain has its own vhpt policy.
+		if (HAS_PERVCPU_VHPT(d)) {
+			if (pervcpu_vhpt_alloc(v) < 0) {
+				free_xenheap_pages(v, KERNEL_STACK_SIZE_ORDER);
+				return NULL;
+			}
+		}
 
 		/* Create privregs page only if not VTi. */
 		order = get_order_from_shift(XMAPPEDREGS_SHIFT);
@@ -315,6 +329,8 @@ struct vcpu *alloc_vcpu_struct(struct do
 
 void relinquish_vcpu_resources(struct vcpu *v)
 {
+    if (HAS_PERVCPU_VHPT(v->domain))
+        pervcpu_vhpt_free(v);
     if (v->arch.privregs != NULL) {
         free_xenheap_pages(v->arch.privregs,
                            get_order_from_shift(XMAPPEDREGS_SHIFT));
@@ -350,6 +366,11 @@ static void init_switch_stack(struct vcp
 	memset(v->arch._thread.fph,0,sizeof(struct ia64_fpreg)*96);
 }
 
+#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
+static int opt_pervcpu_vhpt = 1;
+integer_param("pervcpu_vhpt", opt_pervcpu_vhpt);
+#endif
+
 int arch_domain_create(struct domain *d)
 {
 	int i;
@@ -364,6 +385,11 @@ int arch_domain_create(struct domain *d)
 	if (is_idle_domain(d))
 	    return 0;
 
+#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
+	d->arch.has_pervcpu_vhpt = opt_pervcpu_vhpt;
+	DPRINTK("%s:%d domain %d pervcpu_vhpt %d\n",
+	        __func__, __LINE__, d->domain_id, d->arch.has_pervcpu_vhpt);
+#endif
 	d->shared_info = alloc_xenheap_pages(get_order_from_shift(XSI_SHIFT));
 	if (d->shared_info == NULL)
 	    goto fail_nomem;
@@ -1101,9 +1127,6 @@ int construct_dom0(struct domain *d,
 
 	physdev_init_dom0(d);
 
-	// FIXME: Hack for keyboard input
-	//serial_input_init();
-
 	return 0;
 }
 
--- xen/arch/ia64/xen/faults.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/xen/faults.c	Wed Oct 11 16:10:40 2006 -0400
@@ -228,10 +228,10 @@ void ia64_do_page_fault (unsigned long a
 			// indicate a bad xen pointer
 			printk("*** xen_handle_domain_access: exception table"
 			       " lookup failed, iip=0x%lx, addr=0x%lx, spinning...\n",
-				iip, address);
+			       iip, address);
 			panic_domain(regs,"*** xen_handle_domain_access: exception table"
-			       " lookup failed, iip=0x%lx, addr=0x%lx, spinning...\n",
-				iip, address);
+				     " lookup failed, iip=0x%lx, addr=0x%lx, spinning...\n",
+				     iip, address);
 		}
 		return;
 	}
--- xen/arch/ia64/xen/fw_emul.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/xen/fw_emul.c	Wed Oct 11 16:10:40 2006 -0400
@@ -16,7 +16,6 @@
  *
  */
 #include <xen/config.h>
-#include <xen/console.h>
 #include <asm/system.h>
 #include <asm/pgalloc.h>
 
@@ -29,6 +28,7 @@
 #include <asm/vcpu.h>
 #include <asm/dom_fw.h>
 #include <asm/uaccess.h>
+#include <xen/console.h>
 
 extern unsigned long running_on_sim;
 
--- xen/arch/ia64/xen/hypercall.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/xen/hypercall.c	Wed Oct 11 16:10:40 2006 -0400
@@ -32,7 +32,6 @@
 #include <xen/event.h>
 #include <xen/perfc.h>
 
-static long do_physdev_op_compat(XEN_GUEST_HANDLE(physdev_op_t) uop);
 static long do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg);
 static long do_callback_op(int cmd, XEN_GUEST_HANDLE(void) arg);
 
@@ -54,10 +53,10 @@ const hypercall_t ia64_hypercall_table[N
 	(hypercall_t)do_multicall,
 	(hypercall_t)do_ni_hypercall,		/* do_update_va_mapping */
 	(hypercall_t)do_ni_hypercall,		/* do_set_timer_op */  /* 15 */
-	(hypercall_t)do_event_channel_op_compat,
+	(hypercall_t)do_ni_hypercall,
 	(hypercall_t)do_xen_version,
 	(hypercall_t)do_console_io,
-	(hypercall_t)do_physdev_op_compat,
+	(hypercall_t)do_ni_hypercall,
 	(hypercall_t)do_grant_table_op,				       /* 20 */
 	(hypercall_t)do_ni_hypercall,		/* do_vm_assist */
 	(hypercall_t)do_ni_hypercall,		/* do_update_va_mapping_othe */
@@ -108,19 +107,6 @@ xen_hypercall (struct pt_regs *regs)
 xen_hypercall (struct pt_regs *regs)
 {
 	uint32_t cmd = (uint32_t)regs->r2;
-	struct vcpu *v = current;
-
-	if (cmd == __HYPERVISOR_grant_table_op) {
-		XEN_GUEST_HANDLE(void) uop;
-
-		v->arch.hypercall_param.va = regs->r15;
-		v->arch.hypercall_param.pa1 = regs->r17;
-		v->arch.hypercall_param.pa2 = regs->r18;
-		set_xen_guest_handle(uop, (void *)regs->r15);
-		regs->r8 = do_grant_table_op(regs->r14, uop, regs->r16);
-		v->arch.hypercall_param.va = 0;
-		return IA64_NO_FAULT;
-	}
 
 	if (cmd < NR_hypercalls) {
 		perfc_incra(hypercalls, cmd);
@@ -133,7 +119,21 @@ xen_hypercall (struct pt_regs *regs)
 			regs->r19);
 	} else
 		regs->r8 = -ENOSYS;
-
+	
+	return IA64_NO_FAULT;
+}
+
+static IA64FAULT
+xen_fast_hypercall (struct pt_regs *regs)
+{
+	uint32_t cmd = (uint32_t)regs->r2;
+	switch (cmd) {
+	case __HYPERVISOR_ia64_fast_eoi:
+		regs->r8 = pirq_guest_eoi(current->domain, regs->r14);
+		break;
+	default:
+		regs->r8 = -ENOSYS;
+	}
 	return IA64_NO_FAULT;
 }
 
@@ -201,8 +201,8 @@ fw_hypercall_fpswa (struct vcpu *v)
 	return PSCBX(v, fpswa_ret);
 }
 
-static IA64FAULT
-fw_hypercall (struct pt_regs *regs)
+IA64FAULT
+ia64_hypercall(struct pt_regs *regs)
 {
 	struct vcpu *v = current;
 	struct sal_ret_values x;
@@ -213,7 +213,13 @@ fw_hypercall (struct pt_regs *regs)
 
 	perfc_incra(fw_hypercall, index >> 8);
 	switch (index) {
-	    case FW_HYPERCALL_PAL_CALL:
+	case FW_HYPERCALL_XEN:
+		return xen_hypercall(regs);
+
+	case FW_HYPERCALL_XEN_FAST:
+		return xen_fast_hypercall(regs);
+
+	case FW_HYPERCALL_PAL_CALL:
 		//printf("*** PAL hypercall: index=%d\n",regs->r28);
 		//FIXME: This should call a C routine
 #if 0
@@ -264,7 +270,7 @@ fw_hypercall (struct pt_regs *regs)
 			regs->r10 = y.v1; regs->r11 = y.v2;
 		}
 		break;
-	    case FW_HYPERCALL_SAL_CALL:
+	case FW_HYPERCALL_SAL_CALL:
 		x = sal_emulator(vcpu_get_gr(v,32),vcpu_get_gr(v,33),
 			vcpu_get_gr(v,34),vcpu_get_gr(v,35),
 			vcpu_get_gr(v,36),vcpu_get_gr(v,37),
@@ -272,44 +278,33 @@ fw_hypercall (struct pt_regs *regs)
 		regs->r8 = x.r8; regs->r9 = x.r9;
 		regs->r10 = x.r10; regs->r11 = x.r11;
 		break;
- 	    case FW_HYPERCALL_SAL_RETURN:
+	case FW_HYPERCALL_SAL_RETURN:
 	        if ( !test_and_set_bit(_VCPUF_down, &v->vcpu_flags) )
 			vcpu_sleep_nosync(v);
 		break;
-	    case FW_HYPERCALL_EFI_CALL:
+	case FW_HYPERCALL_EFI_CALL:
 		efi_ret_value = efi_emulator (regs, &fault);
 		if (fault != IA64_NO_FAULT) return fault;
 		regs->r8 = efi_ret_value;
 		break;
-	    case FW_HYPERCALL_IPI:
+	case FW_HYPERCALL_IPI:
 		fw_hypercall_ipi (regs);
 		break;
-	    case FW_HYPERCALL_SET_SHARED_INFO_VA:
+	case FW_HYPERCALL_SET_SHARED_INFO_VA:
 	        regs->r8 = domain_set_shared_info_va (regs->r28);
 		break;
-	    case FW_HYPERCALL_FPSWA:
+	case FW_HYPERCALL_FPSWA:
 		fpswa_ret = fw_hypercall_fpswa (v);
 		regs->r8  = fpswa_ret.status;
 		regs->r9  = fpswa_ret.err0;
 		regs->r10 = fpswa_ret.err1;
 		regs->r11 = fpswa_ret.err2;
 		break;
-	    default:
+	default:
 		printf("unknown ia64 fw hypercall %lx\n", regs->r2);
 		regs->r8 = do_ni_hypercall();
 	}
 	return IA64_NO_FAULT;
-}
-
-IA64FAULT
-ia64_hypercall (struct pt_regs *regs)
-{
-	unsigned long index = regs->r2;
-
-	if (index >= FW_HYPERCALL_FIRST_ARCH)
-	    return fw_hypercall (regs);
-	else
-	    return xen_hypercall (regs);
 }
 
 unsigned long hypercall_create_continuation(
@@ -465,28 +460,6 @@ static long do_physdev_op(int cmd, XEN_G
     return ret;
 }
 
-/* Legacy hypercall (as of 0x00030202). */
-static long do_physdev_op_compat(XEN_GUEST_HANDLE(physdev_op_t) uop)
-{
-    struct physdev_op op;
-
-    if ( unlikely(copy_from_guest(&op, uop, 1) != 0) )
-        return -EFAULT;
-
-    return do_physdev_op(op.cmd, guest_handle_from_ptr(&uop.p->u, void));
-}
-
-/* Legacy hypercall (as of 0x00030202). */
-long do_event_channel_op_compat(XEN_GUEST_HANDLE(evtchn_op_t) uop)
-{
-    struct evtchn_op op;
-
-    if ( unlikely(copy_from_guest(&op, uop, 1) != 0) )
-        return -EFAULT;
-
-    return do_event_channel_op(op.cmd, guest_handle_from_ptr(&uop.p->u, void));
-}
-
 static long register_guest_callback(struct callback_register *reg)
 {
     long ret = 0;
--- xen/arch/ia64/xen/mm.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/xen/mm.c	Wed Oct 11 16:10:40 2006 -0400
@@ -396,6 +396,12 @@ gmfn_to_mfn_foreign(struct domain *d, un
 {
 	unsigned long pte;
 
+	// This function may be called from __gnttab_copy()
+	// during destruction of VT-i domain with PV-on-HVM driver.
+	if (unlikely(d->arch.mm.pgd == NULL)) {
+		if (VMX_DOMAIN(d->vcpu[0]))
+			return INVALID_MFN;
+	}
 	pte = lookup_domain_mpa(d,gpfn << PAGE_SHIFT, NULL);
 	if (!pte) {
 		panic("gmfn_to_mfn_foreign: bad gpfn. spinning...\n");
@@ -702,6 +708,22 @@ void *domain_mpa_to_imva(struct domain *
 }
 #endif
 
+unsigned long
+xencomm_paddr_to_maddr(unsigned long paddr)
+{
+    struct vcpu *v = current;
+    struct domain *d = v->domain;
+    u64 pa;
+
+    pa = ____lookup_domain_mpa(d, paddr);
+    if (pa == INVALID_MFN) {
+        printf("%s: called with bad memory address: 0x%lx - iip=%lx\n",
+               __func__, paddr, vcpu_regs(v)->cr_iip);
+        return 0;
+    }
+    return __va_ul((pa & _PFN_MASK) | (paddr & ~PAGE_MASK));
+}
+
 /* Allocate a new page for domain and map it to the specified metaphysical
    address.  */
 static struct page_info *
@@ -784,7 +806,7 @@ flags_to_prot (unsigned long flags)
 // flags: currently only ASSIGN_readonly, ASSIGN_nocache
 // This is called by assign_domain_mmio_page().
 // So accessing to pte is racy.
-void
+int
 __assign_domain_page(struct domain *d,
                      unsigned long mpaddr, unsigned long physaddr,
                      unsigned long flags)
@@ -800,8 +822,25 @@ __assign_domain_page(struct domain *d,
     old_pte = __pte(0);
     new_pte = pfn_pte(physaddr >> PAGE_SHIFT, __pgprot(prot));
     ret_pte = ptep_cmpxchg_rel(&d->arch.mm, mpaddr, pte, old_pte, new_pte);
-    if (pte_val(ret_pte) == pte_val(old_pte))
+    if (pte_val(ret_pte) == pte_val(old_pte)) {
         smp_mb();
+        return 0;
+    }
+
+    // dom0 tries to map real machine's I/O region, but failed.
+    // It is very likely that dom0 doesn't boot correctly because
+    // it can't access I/O. So complain here.
+    if ((flags & ASSIGN_nocache) &&
+        (pte_pfn(ret_pte) != (physaddr >> PAGE_SHIFT) ||
+         !(pte_val(ret_pte) & _PAGE_MA_UC)))
+        printk("%s:%d WARNING can't assign page domain 0x%p id %d\n"
+               "\talready assigned pte_val 0x%016lx\n"
+               "\tmpaddr 0x%016lx physaddr 0x%016lx flags 0x%lx\n",
+               __func__, __LINE__,
+               d, d->domain_id, pte_val(ret_pte),
+               mpaddr, physaddr, flags);
+
+    return -EAGAIN;
 }
 
 /* get_page() and map a physical address to the specified metaphysical addr */
@@ -818,7 +857,7 @@ assign_domain_page(struct domain *d,
     set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT);
     // because __assign_domain_page() uses set_pte_rel() which has
     // release semantics, smp_mb() isn't needed.
-    __assign_domain_page(d, mpaddr, physaddr, ASSIGN_writable);
+    (void)__assign_domain_page(d, mpaddr, physaddr, ASSIGN_writable);
 }
 
 int
@@ -841,8 +880,8 @@ ioports_permit_access(struct domain *d, 
     lp_offset = PAGE_ALIGN(IO_SPACE_SPARSE_ENCODING(lp));
 
     for (off = fp_offset; off <= lp_offset; off += PAGE_SIZE)
-        __assign_domain_page(d, IO_PORTS_PADDR + off,
-                             __pa(ia64_iobase) + off, ASSIGN_nocache);
+        (void)__assign_domain_page(d, IO_PORTS_PADDR + off,
+                                   __pa(ia64_iobase) + off, ASSIGN_nocache);
 
     return 0;
 }
@@ -911,7 +950,7 @@ assign_domain_same_page(struct domain *d
     //XXX optimization
     unsigned long end = PAGE_ALIGN(mpaddr + size);
     for (mpaddr &= PAGE_MASK; mpaddr < end; mpaddr += PAGE_SIZE) {
-        __assign_domain_page(d, mpaddr, mpaddr, flags);
+        (void)__assign_domain_page(d, mpaddr, mpaddr, flags);
     }
 }
 
@@ -1035,6 +1074,7 @@ assign_domain_page_replace(struct domain
             put_page(old_page);
         }
     }
+    perfc_incrc(assign_domain_page_replace);
 }
 
 // caller must get_page(new_page) before
@@ -1095,6 +1135,7 @@ assign_domain_page_cmpxchg_rel(struct do
 
     domain_page_flush(d, mpaddr, old_mfn, new_mfn);
     put_page(old_page);
+    perfc_incrc(assign_domain_pge_cmpxchg_rel);
     return 0;
 }
 
@@ -1167,6 +1208,7 @@ zap_domain_page_one(struct domain *d, un
         try_to_clear_PGC_allocate(d, page);
     }
     put_page(page);
+    perfc_incrc(zap_dcomain_page_one);
 }
 
 unsigned long
@@ -1179,6 +1221,7 @@ dom0vp_zap_physmap(struct domain *d, uns
     }
 
     zap_domain_page_one(d, gpfn << PAGE_SHIFT, INVALID_MFN);
+    perfc_incrc(dom0vp_zap_physmap);
     return 0;
 }
 
@@ -1224,10 +1267,131 @@ dom0vp_add_physmap(struct domain* d, uns
            get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
     assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, flags);
     //don't update p2m table because this page belongs to rd, not d.
+    perfc_incrc(dom0vp_add_physmap);
 out1:
     put_domain(rd);
     return error;
 }
+
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M
+static struct page_info* p2m_pte_zero_page = NULL;
+
+void
+expose_p2m_init(void)
+{
+    pte_t* pte;
+
+    pte = pte_alloc_one_kernel(NULL, 0);
+    BUG_ON(pte == NULL);
+    smp_mb();// make contents of the page visible.
+    p2m_pte_zero_page = virt_to_page(pte);
+}
+
+static int
+expose_p2m_page(struct domain* d, unsigned long mpaddr, struct page_info* page)
+{
+    // we can't get_page(page) here.
+    // pte page is allocated form xen heap.(see pte_alloc_one_kernel().)
+    // so that the page has NULL page owner and it's reference count
+    // is useless.
+    // see also relinquish_pte()'s page_get_owner() == NULL check.
+    BUG_ON(page_get_owner(page) != NULL);
+
+    return __assign_domain_page(d, mpaddr, page_to_maddr(page),
+                                ASSIGN_readonly);
+}
+
+// It is possible to optimize loop, But this isn't performance critical.
+unsigned long
+dom0vp_expose_p2m(struct domain* d,
+                  unsigned long conv_start_gpfn,
+                  unsigned long assign_start_gpfn,
+                  unsigned long expose_size, unsigned long granule_pfn)
+{
+    unsigned long expose_num_pfn = expose_size >> PAGE_SHIFT;
+    unsigned long i;
+    volatile pte_t* conv_pte;
+    volatile pte_t* assign_pte;
+
+    if ((expose_size % PAGE_SIZE) != 0 ||
+        (granule_pfn % PTRS_PER_PTE) != 0 ||
+        (expose_num_pfn % PTRS_PER_PTE) != 0 ||
+        (conv_start_gpfn % granule_pfn) != 0 ||
+        (assign_start_gpfn % granule_pfn) != 0 ||
+        (expose_num_pfn % granule_pfn) != 0) {
+        DPRINTK("%s conv_start_gpfn 0x%016lx assign_start_gpfn 0x%016lx "
+                "expose_size 0x%016lx granulte_pfn 0x%016lx\n", __func__, 
+                conv_start_gpfn, assign_start_gpfn, expose_size, granule_pfn);
+        return -EINVAL;
+    }
+
+    if (granule_pfn != PTRS_PER_PTE) {
+        DPRINTK("%s granule_pfn 0x%016lx PTRS_PER_PTE 0x%016lx\n",
+                __func__, granule_pfn, PTRS_PER_PTE);
+        return -ENOSYS;
+    }
+
+    // allocate pgd, pmd.
+    i = conv_start_gpfn;
+    while (i < expose_num_pfn) {
+        conv_pte = lookup_noalloc_domain_pte(d, (conv_start_gpfn + i) <<
+                                             PAGE_SHIFT);
+        if (conv_pte == NULL) {
+            i++;
+            continue;
+        }
+        
+        assign_pte = lookup_alloc_domain_pte(d, (assign_start_gpfn <<
+                                             PAGE_SHIFT) + i * sizeof(pte_t));
+        if (assign_pte == NULL) {
+            DPRINTK("%s failed to allocate pte page\n", __func__);
+            return -ENOMEM;
+        }
+
+        // skip to next pte page
+        i += PTRS_PER_PTE;
+        i &= ~(PTRS_PER_PTE - 1);
+    }
+
+    // expose pte page
+    i = 0;
+    while (i < expose_num_pfn) {
+        conv_pte = lookup_noalloc_domain_pte(d, (conv_start_gpfn + i) <<
+                                             PAGE_SHIFT);
+        if (conv_pte == NULL) {
+            i++;
+            continue;
+        }
+
+        if (expose_p2m_page(d, (assign_start_gpfn << PAGE_SHIFT) +
+                            i * sizeof(pte_t), virt_to_page(conv_pte)) < 0) {
+            DPRINTK("%s failed to assign page\n", __func__);
+            return -EAGAIN;
+        }
+
+        // skip to next pte page
+        i += PTRS_PER_PTE;
+        i &= ~(PTRS_PER_PTE - 1);
+    }
+
+    // expose p2m_pte_zero_page 
+    for (i = 0; i < expose_num_pfn / PTRS_PER_PTE + 1; i++) {
+        assign_pte = lookup_noalloc_domain_pte(d, (assign_start_gpfn + i) <<
+                                               PAGE_SHIFT);
+        BUG_ON(assign_pte == NULL);
+        if (pte_present(*assign_pte)) {
+            continue;
+        }
+        if (expose_p2m_page(d, (assign_start_gpfn + i) << PAGE_SHIFT,
+                            p2m_pte_zero_page) < 0) {
+            DPRINTK("%s failed to assign zero-pte page\n", __func__);
+            return -EAGAIN;
+        }
+    }
+    
+    return 0;
+}
+#endif
 
 // grant table host mapping
 // mpaddr: host_addr: pseudo physical address
@@ -1255,6 +1419,7 @@ create_grant_host_mapping(unsigned long 
            get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
     assign_domain_page_replace(d, gpaddr, mfn, (flags & GNTMAP_readonly)?
                                               ASSIGN_readonly: ASSIGN_writable);
+    perfc_incrc(create_grant_host_mapping);
     return GNTST_okay;
 }
 
@@ -1314,6 +1479,7 @@ destroy_grant_host_mapping(unsigned long
     BUG_ON(page_get_owner(page) == d);//try_to_clear_PGC_allocate(d, page) is not needed.
     put_page(page);
 
+    perfc_incrc(destroy_grant_host_mapping);
     return GNTST_okay;
 }
 
@@ -1374,6 +1540,7 @@ steal_page(struct domain *d, struct page
             free_domheap_page(new);
             return -1;
         }
+        perfc_incrc(steal_page_refcount);
     }
 
     spin_lock(&d->page_alloc_lock);
@@ -1443,6 +1610,7 @@ steal_page(struct domain *d, struct page
     list_del(&page->list);
 
     spin_unlock(&d->page_alloc_lock);
+    perfc_incrc(steal_page);
     return 0;
 }
 
@@ -1460,6 +1628,8 @@ guest_physmap_add_page(struct domain *d,
     assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn, ASSIGN_writable);
 
     //BUG_ON(mfn != ((lookup_domain_mpa(d, gpfn << PAGE_SHIFT) & _PFN_MASK) >> PAGE_SHIFT));
+
+    perfc_incrc(guest_physmap_add_page);
 }
 
 void
@@ -1468,6 +1638,7 @@ guest_physmap_remove_page(struct domain 
 {
     BUG_ON(mfn == 0);//XXX
     zap_domain_page_one(d, gpfn << PAGE_SHIFT, mfn);
+    perfc_incrc(guest_physmap_remove_page);
 }
 
 //XXX sledgehammer.
@@ -1480,6 +1651,7 @@ domain_page_flush(struct domain* d, unsi
         shadow_mark_page_dirty(d, mpaddr >> PAGE_SHIFT);
 
     domain_flush_vtlb_all();
+    perfc_incrc(domain_page_flush);
 }
 
 int
--- xen/arch/ia64/xen/regionreg.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/xen/regionreg.c	Wed Oct 11 16:10:40 2006 -0400
@@ -260,7 +260,7 @@ int set_one_rr(unsigned long rr, unsigne
 	} else if (rreg == 7) {
 		ia64_new_rr7(vmMangleRID(newrrv.rrval),v->domain->shared_info,
 			     v->arch.privregs, v->domain->arch.shared_info_va,
-			     __get_cpu_var(vhpt_paddr));
+		             vcpu_vhpt_maddr(v));
 	} else {
 		set_rr(rr,newrrv.rrval);
 	}
--- xen/arch/ia64/xen/vcpu.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/xen/vcpu.c	Wed Oct 11 16:10:40 2006 -0400
@@ -1314,12 +1314,21 @@ static inline void
 static inline void
 check_xen_space_overlap (const char *func, u64 base, u64 page_size)
 {
+	/* Overlaps can occur only in region 7.
+	   (This is an optimization to bypass all the checks).  */
+	if (REGION_NUMBER(base) != 7)
+		return;
+
 	/* Mask LSBs of base.  */
 	base &= ~(page_size - 1);
 
 	/* FIXME: ideally an MCA should be generated...  */
 	if (range_overlap (HYPERVISOR_VIRT_START, HYPERVISOR_VIRT_END,
-			   base, base + page_size))
+	                   base, base + page_size)
+	    || range_overlap(current->domain->arch.shared_info_va,
+	                     current->domain->arch.shared_info_va 
+	                     + XSI_SIZE + XMAPPEDREGS_SIZE,
+	                     base, base + page_size))
 		panic_domain (NULL, "%s on Xen virtual space (%lx)\n",
 			      func, base);
 }
@@ -2217,28 +2226,3 @@ IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 v
 
 	return IA64_NO_FAULT;
 }
-
-int ia64_map_hypercall_param(void)
-{
-	struct vcpu *v = current;
-	struct domain *d = current->domain;
-	u64 vaddr = v->arch.hypercall_param.va & PAGE_MASK;
-	volatile pte_t* pte;
-
-	if (v->arch.hypercall_param.va == 0)
-		return FALSE;
-	pte = lookup_noalloc_domain_pte(d, v->arch.hypercall_param.pa1);
-	if (!pte || !pte_present(*pte))
-		return FALSE;
-	vcpu_itc_no_srlz(v, 2, vaddr, pte_val(*pte), -1UL, PAGE_SHIFT);
-	if (v->arch.hypercall_param.pa2) {
-		vaddr += PAGE_SIZE;
-		pte = lookup_noalloc_domain_pte(d, v->arch.hypercall_param.pa2);
-		if (pte && pte_present(*pte)) {
-			vcpu_itc_no_srlz(v, 2, vaddr, pte_val(*pte),
-			                 -1UL, PAGE_SHIFT);
-		}
-	}
-	ia64_srlz_d();
-	return TRUE;
-}
--- xen/arch/ia64/xen/vhpt.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/xen/vhpt.c	Wed Oct 11 16:10:40 2006 -0400
@@ -3,6 +3,10 @@
  *
  * Copyright (C) 2004 Hewlett-Packard Co
  *	Dan Magenheimer <dan.magenheimer@xxxxxx>
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *                    per vcpu vhpt support
  */
 #include <linux/config.h>
 #include <linux/kernel.h>
@@ -24,18 +28,32 @@ DEFINE_PER_CPU (unsigned long, vhpt_padd
 DEFINE_PER_CPU (unsigned long, vhpt_paddr);
 DEFINE_PER_CPU (unsigned long, vhpt_pend);
 
-void vhpt_flush(void)
-{
-	struct vhpt_lf_entry *v = __va(__ia64_per_cpu_var(vhpt_paddr));
+static void
+ __vhpt_flush(unsigned long vhpt_maddr)
+{
+	struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);
 	int i;
 
 	for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
 		v->ti_tag = INVALID_TI_TAG;
 }
 
-static void vhpt_erase(void)
-{
-	struct vhpt_lf_entry *v = (struct vhpt_lf_entry *)VHPT_ADDR;
+void
+local_vhpt_flush(void)
+{
+	__vhpt_flush(__ia64_per_cpu_var(vhpt_paddr));
+}
+
+static void
+vcpu_vhpt_flush(struct vcpu* v)
+{
+	__vhpt_flush(vcpu_vhpt_maddr(v));
+}
+
+static void
+vhpt_erase(unsigned long vhpt_maddr)
+{
+	struct vhpt_lf_entry *v = (struct vhpt_lf_entry*)__va(vhpt_maddr);
 	int i;
 
 	for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++) {
@@ -45,17 +63,6 @@ static void vhpt_erase(void)
 		v->ti_tag = INVALID_TI_TAG;
 	}
 	// initialize cache too???
-}
-
-
-static void vhpt_map(unsigned long pte)
-{
-	unsigned long psr;
-
-	psr = ia64_clear_ic();
-	ia64_itr(0x2, IA64_TR_VHPT, VHPT_ADDR, pte, VHPT_SIZE_LOG2);
-	ia64_set_psr(psr);
-	ia64_srlz_i();
 }
 
 void vhpt_insert (unsigned long vadr, unsigned long pte, unsigned long logps)
@@ -102,7 +109,7 @@ void vhpt_multiple_insert(unsigned long 
 
 void vhpt_init(void)
 {
-	unsigned long paddr, pte;
+	unsigned long paddr;
 	struct page_info *page;
 #if !VHPT_ENABLED
 	return;
@@ -122,14 +129,51 @@ void vhpt_init(void)
 	__get_cpu_var(vhpt_pend) = paddr + (1 << VHPT_SIZE_LOG2) - 1;
 	printf("vhpt_init: vhpt paddr=0x%lx, end=0x%lx\n",
 		paddr, __get_cpu_var(vhpt_pend));
-	pte = pte_val(pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL));
-	vhpt_map(pte);
-	ia64_set_pta(VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) |
-		VHPT_ENABLED);
-	vhpt_erase();
-}
-
-
+	vhpt_erase(paddr);
+	// we don't enable VHPT here.
+	// context_switch() or schedule_tail() does it.
+}
+
+#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
+int
+pervcpu_vhpt_alloc(struct vcpu *v)
+{
+	unsigned long vhpt_size_log2 = VHPT_SIZE_LOG2;
+
+	v->arch.vhpt_entries =
+		(1UL << vhpt_size_log2) / sizeof(struct vhpt_lf_entry);
+	v->arch.vhpt_page =
+		alloc_domheap_pages(NULL, vhpt_size_log2 - PAGE_SHIFT, 0);
+	if (!v->arch.vhpt_page)
+		return -ENOMEM;
+	
+	v->arch.vhpt_maddr = page_to_maddr(v->arch.vhpt_page);
+	if (v->arch.vhpt_maddr & ((1 << VHPT_SIZE_LOG2) - 1))
+		panic("pervcpu_vhpt_init: bad VHPT alignment!\n");
+
+	v->arch.pta.val = 0; // to zero reserved bits
+	v->arch.pta.ve = 1; // enable vhpt
+	v->arch.pta.size = VHPT_SIZE_LOG2;
+	v->arch.pta.vf = 1; // long format
+	//v->arch.pta.base = __va(v->arch.vhpt_maddr) >> 15;
+	v->arch.pta.base = VHPT_ADDR >> 15;
+
+	vhpt_erase(v->arch.vhpt_maddr);
+	smp_mb(); // per vcpu vhpt may be used by another physical cpu.
+	return 0;
+}
+
+void
+pervcpu_vhpt_free(struct vcpu *v)
+{
+	free_domheap_pages(v->arch.vhpt_page, VHPT_SIZE_LOG2 - PAGE_SHIFT);
+}
+#endif
+
+// SMP: we can't assume v == current, vcpu might move to another physical cpu.
+// So memory barrier is necessary.
+// if we can guranttee that vcpu can run on only this physical cpu
+// (e.g. vcpu == current), smp_mb() is unnecessary.
 void vcpu_flush_vtlb_all(struct vcpu *v)
 {
 	if (VMX_DOMAIN(v)) {
@@ -144,9 +188,14 @@ void vcpu_flush_vtlb_all(struct vcpu *v)
 		/* First VCPU tlb.  */
 		vcpu_purge_tr_entry(&PSCBX(v,dtlb));
 		vcpu_purge_tr_entry(&PSCBX(v,itlb));
+		smp_mb();
 
 		/* Then VHPT.  */
-		vhpt_flush();
+		if (HAS_PERVCPU_VHPT(v->domain))
+			vcpu_vhpt_flush(v);
+		else
+			local_vhpt_flush();
+		smp_mb();
 
 		/* Then mTLB.  */
 		local_flush_tlb_all();
@@ -155,6 +204,8 @@ void vcpu_flush_vtlb_all(struct vcpu *v)
 	/* We could clear bit in d->domain_dirty_cpumask only if domain d in
 	   not running on this processor.  There is currently no easy way to
 	   check this.  */
+
+	perfc_incrc(vcpu_flush_vtlb_all);
 }
 
 static void __vcpu_flush_vtlb_all(void *vcpu)
@@ -174,32 +225,59 @@ void domain_flush_vtlb_all (void)
 		if (v->processor == cpu)
 			vcpu_flush_vtlb_all(v);
 		else
+			// SMP: it is racy to reference v->processor.
+			// vcpu scheduler may move this vcpu to another
+			// physicall processor, and change the value
+			// using plain store.
+			// We may be seeing the old value of it.
+			// In such case, flush_vtlb_for_context_switch()
+			// takes care of mTLB flush.
 			smp_call_function_single(v->processor,
 						 __vcpu_flush_vtlb_all,
 						 v, 1, 1);
 	}
-}
-
-static void cpu_flush_vhpt_range (int cpu, u64 vadr, u64 addr_range)
-{
-	void *vhpt_base = __va(per_cpu(vhpt_paddr, cpu));
+	perfc_incrc(domain_flush_vtlb_all);
+}
+
+// Callers may need to call smp_mb() before/after calling this.
+// Be carefull.
+static void
+__flush_vhpt_range(unsigned long vhpt_maddr, u64 vadr, u64 addr_range)
+{
+	void *vhpt_base = __va(vhpt_maddr);
 
 	while ((long)addr_range > 0) {
 		/* Get the VHPT entry.  */
 		unsigned int off = ia64_thash(vadr) - VHPT_ADDR;
-		volatile struct vhpt_lf_entry *v;
-		v = vhpt_base + off;
+		struct vhpt_lf_entry *v = vhpt_base + off;
 		v->ti_tag = INVALID_TI_TAG;
 		addr_range -= PAGE_SIZE;
 		vadr += PAGE_SIZE;
 	}
 }
 
+static void
+cpu_flush_vhpt_range(int cpu, u64 vadr, u64 addr_range)
+{
+	__flush_vhpt_range(per_cpu(vhpt_paddr, cpu), vadr, addr_range);
+}
+
+static void
+vcpu_flush_vhpt_range(struct vcpu* v, u64 vadr, u64 addr_range)
+{
+	__flush_vhpt_range(vcpu_vhpt_maddr(v), vadr, addr_range);
+}
+
 void vcpu_flush_tlb_vhpt_range (u64 vadr, u64 log_range)
 {
-	cpu_flush_vhpt_range (current->processor, vadr, 1UL << log_range);
+	if (HAS_PERVCPU_VHPT(current->domain))
+		vcpu_flush_vhpt_range(current, vadr, 1UL << log_range);
+	else
+		cpu_flush_vhpt_range(current->processor,
+		                     vadr, 1UL << log_range);
 	ia64_ptcl(vadr, log_range << 2);
 	ia64_srlz_i();
+	perfc_incrc(vcpu_flush_tlb_vhpt_range);
 }
 
 void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range)
@@ -229,19 +307,30 @@ void domain_flush_vtlb_range (struct dom
 		if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
 			continue;
 
-		/* Invalidate VHPT entries.  */
-		cpu_flush_vhpt_range (v->processor, vadr, addr_range);
+		if (HAS_PERVCPU_VHPT(d)) {
+			vcpu_flush_vhpt_range(v, vadr, addr_range);
+		} else {
+			// SMP: it is racy to reference v->processor.
+			// vcpu scheduler may move this vcpu to another
+			// physicall processor, and change the value
+			// using plain store.
+			// We may be seeing the old value of it.
+			// In such case, flush_vtlb_for_context_switch()
+			/* Invalidate VHPT entries.  */
+			cpu_flush_vhpt_range(v->processor, vadr, addr_range);
+		}
 	}
 	// ptc.ga has release semantics.
 
 	/* ptc.ga  */
 	ia64_global_tlb_purge(vadr,vadr+addr_range,PAGE_SHIFT);
+	perfc_incrc(domain_flush_vtlb_range);
 }
 
 static void flush_tlb_vhpt_all (struct domain *d)
 {
 	/* First VHPT.  */
-	vhpt_flush ();
+	local_vhpt_flush ();
 
 	/* Then mTLB.  */
 	local_flush_tlb_all ();
@@ -250,7 +339,10 @@ void domain_flush_tlb_vhpt(struct domain
 void domain_flush_tlb_vhpt(struct domain *d)
 {
 	/* Very heavy...  */
-	on_each_cpu ((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1);
+	if (HAS_PERVCPU_VHPT(d) /* || VMX_DOMAIN(v) */)
+		on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
+	else
+		on_each_cpu((void (*)(void *))flush_tlb_vhpt_all, d, 1, 1);
 	cpus_clear (d->domain_dirty_cpumask);
 }
 
--- xen/arch/ia64/xen/xen.lds.S	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/xen/xen.lds.S	Wed Oct 11 16:10:40 2006 -0400
@@ -172,6 +172,9 @@ SECTIONS
   . = ALIGN(PAGE_SIZE);		/* make sure the gate page doesn't expose
   				 * kernel data
 				 */
+
+  .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET)
+        { *(.data.read_mostly) }
 
   .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET)
         { *(.data.cacheline_aligned) }
--- xen/arch/ia64/xen/xenmem.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/xen/xenmem.c	Wed Oct 11 16:10:40 2006 -0400
@@ -17,10 +17,19 @@
 #include <linux/efi.h>
 #include <asm/pgalloc.h>
 
-extern pgd_t frametable_pg_dir[];
-
-#define frametable_pgd_offset(addr) \
-	(frametable_pg_dir + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)))
+extern unsigned long frametable_pg_dir[];
+
+#define FRAMETABLE_PGD_OFFSET(ADDR) \
+	(frametable_pg_dir + (((ADDR) >> PGDIR_SHIFT) & \
+	((1UL << (PAGE_SHIFT - 3)) - 1)))
+
+#define FRAMETABLE_PMD_OFFSET(PGD, ADDR) \
+	__va((unsigned long *)(PGD) + (((ADDR) >> PMD_SHIFT) & \
+	((1UL << (PAGE_SHIFT - 3)) - 1)))
+
+#define FRAMETABLE_PTE_OFFSET(PMD, ADDR) \
+	(pte_t *)__va((unsigned long *)(PMD) + (((ADDR) >> PAGE_SHIFT) & \
+	((1UL << (PAGE_SHIFT - 3)) - 1)))
 
 static unsigned long table_size;
 static int opt_contig_mem = 0;
@@ -29,13 +38,13 @@ boolean_param("contig_mem", opt_contig_m
 #define opt_contig_mem 1
 #endif
 
-struct page_info *frame_table;
+struct page_info *frame_table __read_mostly;
 unsigned long max_page;
 
 /*
  * Set up the page tables.
  */
-volatile unsigned long *mpt_table;
+volatile unsigned long *mpt_table __read_mostly;
 
 void
 paging_init (void)
@@ -72,7 +81,7 @@ paging_init (void)
 
 #ifdef CONFIG_VIRTUAL_FRAME_TABLE
 
-static inline void *
+static unsigned long
 alloc_dir_page(void)
 {
 	unsigned long mfn = alloc_boot_pages(1, 1);
@@ -82,7 +91,7 @@ alloc_dir_page(void)
 	++table_size;
 	dir = mfn << PAGE_SHIFT;
 	memset(__va(dir), 0, PAGE_SIZE);
-	return (void *)dir;
+	return dir;
 }
 
 static inline unsigned long
@@ -100,15 +109,33 @@ alloc_table_page(unsigned long fill)
 	return mfn;
 }
 
+static void
+create_page_table(unsigned long start_page, unsigned long end_page,
+                  unsigned long fill)
+{
+	unsigned long address;
+	unsigned long *dir;
+	pte_t *pteptr;
+
+	for (address = start_page; address < end_page; address += PAGE_SIZE) {
+		dir = FRAMETABLE_PGD_OFFSET(address);
+		if (!*dir)
+			*dir = alloc_dir_page();
+		dir = FRAMETABLE_PMD_OFFSET(*dir, address);
+		if (!*dir)
+			*dir = alloc_dir_page();
+		pteptr = FRAMETABLE_PTE_OFFSET(*dir, address);
+		if (pte_none(*pteptr))
+			set_pte(pteptr, pfn_pte(alloc_table_page(fill),
+			                        PAGE_KERNEL));
+	}
+}
+
 static int
 create_frametable_page_table (u64 start, u64 end, void *arg)
 {
-	unsigned long address, start_page, end_page;
 	struct page_info *map_start, *map_end;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
+	unsigned long start_page, end_page;
 
 	map_start = frame_table + (__pa(start) >> PAGE_SHIFT);
 	map_end   = frame_table + (__pa(end) >> PAGE_SHIFT);
@@ -116,23 +143,7 @@ create_frametable_page_table (u64 start,
 	start_page = (unsigned long) map_start & PAGE_MASK;
 	end_page = PAGE_ALIGN((unsigned long) map_end);
 
-	for (address = start_page; address < end_page; address += PAGE_SIZE) {
-		pgd = frametable_pgd_offset(address);
-		if (pgd_none(*pgd))
-			pgd_populate(NULL, pgd, alloc_dir_page());
-		pud = pud_offset(pgd, address);
-
-		if (pud_none(*pud))
-			pud_populate(NULL, pud, alloc_dir_page());
-		pmd = pmd_offset(pud, address);
-
-		if (pmd_none(*pmd))
-			pmd_populate_kernel(NULL, pmd, alloc_dir_page());
-		pte = pte_offset_kernel(pmd, address);
-
-		if (pte_none(*pte))
-			set_pte(pte, pfn_pte(alloc_table_page(0), PAGE_KERNEL));
-	}
+	create_page_table(start_page, end_page, 0L);
 	return 0;
 }
 
@@ -140,11 +151,7 @@ create_mpttable_page_table (u64 start, u
 create_mpttable_page_table (u64 start, u64 end, void *arg)
 {
 	unsigned long map_start, map_end;
-	unsigned long address, start_page, end_page;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
+	unsigned long start_page, end_page;
 
 	map_start = (unsigned long)(mpt_table + (__pa(start) >> PAGE_SHIFT));
 	map_end   = (unsigned long)(mpt_table + (__pa(end) >> PAGE_SHIFT));
@@ -152,23 +159,7 @@ create_mpttable_page_table (u64 start, u
 	start_page = map_start & PAGE_MASK;
 	end_page = PAGE_ALIGN(map_end);
 
-	for (address = start_page; address < end_page; address += PAGE_SIZE) {
-		pgd = frametable_pgd_offset(address);
-		if (pgd_none(*pgd))
-			pgd_populate(NULL, pgd, alloc_dir_page());
-		pud = pud_offset(pgd, address);
-
-		if (pud_none(*pud))
-			pud_populate(NULL, pud, alloc_dir_page());
-		pmd = pmd_offset(pud, address);
-
-		if (pmd_none(*pmd))
-			pmd_populate_kernel(NULL, pmd, alloc_dir_page());
-		pte = pte_offset_kernel(pmd, address);
-
-		if (pte_none(*pte))
-			set_pte(pte, pfn_pte(alloc_table_page(INVALID_M2P_ENTRY), PAGE_KERNEL));
-	}
+	create_page_table(start_page, end_page, INVALID_M2P_ENTRY);
 	return 0;
 }
 
--- xen/arch/ia64/xen/xensetup.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/xen/xensetup.c	Wed Oct 11 16:10:40 2006 -0400
@@ -48,6 +48,7 @@ extern void mem_init(void);
 extern void mem_init(void);
 extern void init_IRQ(void);
 extern void trap_init(void);
+extern void xen_patch_kernel(void);
 
 /* opt_nosmp: If true, secondary processors are ignored. */
 static int opt_nosmp = 0;
@@ -81,6 +82,7 @@ unsigned long xenheap_size = XENHEAP_DEF
 unsigned long xenheap_size = XENHEAP_DEFAULT_SIZE;
 extern long running_on_sim;
 unsigned long xen_pstart;
+void *xen_heap_start __read_mostly;
 
 static int
 xen_count_pages(u64 start, u64 end, void *arg)
@@ -184,8 +186,8 @@ efi_print(void)
 
     for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) {
         md = p;
-        printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n",
-               i, md->type, md->attribute, md->phys_addr,
+        printk("mem%02u: type=%2u, attr=0x%016lx, range=[0x%016lx-0x%016lx) "
+               "(%luMB)\n", i, md->type, md->attribute, md->phys_addr,
                md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
                md->num_pages >> (20 - EFI_PAGE_SHIFT));
     }
@@ -242,7 +244,6 @@ void start_kernel(void)
 void start_kernel(void)
 {
     char *cmdline;
-    void *heap_start;
     unsigned long nr_pages;
     unsigned long dom0_memory_start, dom0_memory_size;
     unsigned long dom0_initrd_start, dom0_initrd_size;
@@ -292,6 +293,8 @@ void start_kernel(void)
     xenheap_phys_end = xen_pstart + xenheap_size;
     printk("xen image pstart: 0x%lx, xenheap pend: 0x%lx\n",
            xen_pstart, xenheap_phys_end);
+
+    xen_patch_kernel();
 
     kern_md = md = efi_get_md(xen_pstart);
     md_end = __pa(ia64_imva(&_end));
@@ -389,10 +392,10 @@ void start_kernel(void)
     printf("find_memory: efi_memmap_walk returns max_page=%lx\n",max_page);
     efi_print();
 
-    heap_start = memguard_init(ia64_imva(&_end));
-    printf("Before heap_start: %p\n", heap_start);
-    heap_start = __va(init_boot_allocator(__pa(heap_start)));
-    printf("After heap_start: %p\n", heap_start);
+    xen_heap_start = memguard_init(ia64_imva(&_end));
+    printf("Before xen_heap_start: %p\n", xen_heap_start);
+    xen_heap_start = __va(init_boot_allocator(__pa(xen_heap_start)));
+    printf("After xen_heap_start: %p\n", xen_heap_start);
 
     efi_memmap_walk(filter_rsvd_memory, init_boot_pages);
     efi_memmap_walk(xen_count_pages, &nr_pages);
@@ -410,10 +413,10 @@ void start_kernel(void)
 
     end_boot_allocator();
 
-    init_xenheap_pages(__pa(heap_start), xenheap_phys_end);
+    init_xenheap_pages(__pa(xen_heap_start), xenheap_phys_end);
     printk("Xen heap: %luMB (%lukB)\n",
-	(xenheap_phys_end-__pa(heap_start)) >> 20,
-	(xenheap_phys_end-__pa(heap_start)) >> 10);
+	(xenheap_phys_end-__pa(xen_heap_start)) >> 20,
+	(xenheap_phys_end-__pa(xen_heap_start)) >> 10);
 
     late_setup_arch(&cmdline);
 
@@ -495,6 +498,8 @@ printk("num_online_cpus=%d, max_cpus=%d\
         /* Hide the HCDP table from dom0 */
         efi.hcdp = NULL;
     }
+
+    expose_p2m_init();
 
     /* Create initial domain 0. */
     dom0 = domain_create(0);
--- xen/arch/ia64/xen/xentime.c	Tue Oct 10 21:05:50 2006 +0100
+++ xen/arch/ia64/xen/xentime.c	Wed Oct 11 16:10:40 2006 -0400
@@ -39,7 +39,7 @@ seqlock_t xtime_lock __cacheline_aligned
 #define TIME_KEEPER_ID  0
 unsigned long domain0_ready = 0;
 static s_time_t        stime_irq = 0x0;       /* System time at last 'time update' */
-unsigned long itc_scale, ns_scale;
+unsigned long itc_scale __read_mostly, ns_scale __read_mostly;
 unsigned long itc_at_irq;
 
 /* We don't expect an absolute cycle value here, since then no way
--- xen/include/asm-ia64/dom_fw.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/dom_fw.h	Wed Oct 11 16:10:40 2006 -0400
@@ -38,6 +38,13 @@
    The high part is the class (xen/pal/sal/efi).  */
 #define FW_HYPERCALL_NUM_MASK_HIGH	~0xffUL
 #define FW_HYPERCALL_NUM_MASK_LOW	 0xffUL
+
+/* Xen hypercalls are 0-63.  */
+#define FW_HYPERCALL_XEN		0x0000UL
+
+/* Define some faster and lighter hypercalls.
+   See definitions in arch-ia64.h */
+#define FW_HYPERCALL_XEN_FAST		0x0200UL
 
 /*
  * PAL can be called in physical or virtual mode simply by
--- xen/include/asm-ia64/domain.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/domain.h	Wed Oct 11 16:10:40 2006 -0400
@@ -87,6 +87,9 @@ struct arch_domain {
         unsigned long flags;
         struct {
             unsigned int is_vti : 1;
+#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
+            unsigned int has_pervcpu_vhpt : 1;
+#endif
         };
     };
 
@@ -142,11 +145,12 @@ struct arch_domain {
     (sizeof(vcpu_info_t) * (v)->vcpu_id + \
     offsetof(vcpu_info_t, evtchn_upcall_mask))
 
-struct hypercall_param {
-    unsigned long va;
-    unsigned long pa1;
-    unsigned long pa2;
-};
+#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
+#define HAS_PERVCPU_VHPT(d)     ((d)->arch.has_pervcpu_vhpt)
+#else
+#define HAS_PERVCPU_VHPT(d)     (0)
+#endif
+
 
 struct arch_vcpu {
     /* Save the state of vcpu.
@@ -192,14 +196,19 @@ struct arch_vcpu {
     char irq_new_condition;    // vpsr.i/vtpr change, check for pending VHPI
     char hypercall_continuation;
 
-    struct hypercall_param hypercall_param;  // used to remap a hypercall param
-
     //for phycial  emulation
     unsigned long old_rsc;
     int mode_flags;
     fpswa_ret_t fpswa_ret;	/* save return values of FPSWA emulation */
     struct timer hlt_timer;
     struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */
+
+#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
+    PTA                 pta;
+    unsigned long       vhpt_maddr;
+    struct page_info*   vhpt_page;
+    unsigned long       vhpt_entries;
+#endif
 
 #define INVALID_PROCESSOR       INT_MAX
     int last_processor;
--- xen/include/asm-ia64/guest_access.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/guest_access.h	Wed Oct 11 16:10:40 2006 -0400
@@ -1,91 +1,107 @@
-/******************************************************************************
- * guest_access.h
- * 
- * Copyright (c) 2006, K A Fraser
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Tristan Gingold <tristan.gingold@xxxxxxxx>
  */
 
-#ifndef __ASM_IA64_GUEST_ACCESS_H__
-#define __ASM_IA64_GUEST_ACCESS_H__
+#ifndef __ASM_GUEST_ACCESS_H__
+#define __ASM_GUEST_ACCESS_H__
 
-#include <asm/uaccess.h>
+extern unsigned long xencomm_copy_to_guest(void *to, const void *from,
+        unsigned int len, unsigned int skip); 
+extern unsigned long xencomm_copy_from_guest(void *to, const void *from,
+        unsigned int len, unsigned int skip); 
+extern void *xencomm_add_offset(void *handle, unsigned int bytes);
+extern int xencomm_handle_is_null(void *ptr);
+
 
 /* Is the guest handle a NULL reference? */
-#define guest_handle_is_null(hnd)        ((hnd).p == NULL)
+#define guest_handle_is_null(hnd)                          \
+    ((hnd).p == NULL || xencomm_handle_is_null((hnd).p))
 
 /* Offset the given guest handle into the array it refers to. */
-#define guest_handle_add_offset(hnd, nr) ((hnd).p += (nr))
+#define guest_handle_add_offset(hnd, nr) ({                   \
+    const typeof((hnd).p) _ptr = (hnd).p;                     \
+    (hnd).p = xencomm_add_offset(_ptr, nr * sizeof(*_ptr));   \
+})
 
 /* Cast a guest handle to the specified type of handle. */
-#define guest_handle_cast(hnd, type) ({         \
-    type *_x = (hnd).p;                         \
-    (XEN_GUEST_HANDLE(type)) { _x };                \
+#define guest_handle_cast(hnd, type) ({   \
+    type *_x = (hnd).p;                   \
+    XEN_GUEST_HANDLE(type) _y;            \
+    set_xen_guest_handle(_y, _x);         \
+    _y;                                   \
 })
 
-#define guest_handle_from_ptr(ptr, type) ((XEN_GUEST_HANDLE(type)) { (type *)ptr })
+
+/* Since we run in real mode, we can safely access all addresses. That also
+ * means our __routines are identical to our "normal" routines. */
+#define guest_handle_okay(hnd, nr) 1
 
 /*
- * Copy an array of objects to guest context via a guest handle,
- * specifying an offset into the guest array.
+ * Copy an array of objects to guest context via a guest handle.
+ * Optionally specify an offset into the guest array.
  */
-#define copy_to_guest_offset(hnd, off, ptr, nr) ({      \
-    const typeof(ptr) _x = (hnd).p;                     \
-    const typeof(ptr) _y = (ptr);                       \
-    copy_to_user(_x+(off), _y, sizeof(*_x)*(nr));       \
+#define copy_to_guest_offset(hnd, idx, ptr, nr) \
+    __copy_to_guest_offset(hnd, idx, ptr, nr)
+
+/* Copy sub-field of a structure to guest context via a guest handle. */
+#define copy_field_to_guest(hnd, ptr, field) \
+    __copy_field_to_guest(hnd, ptr, field)
+
+/*
+ * Copy an array of objects from guest context via a guest handle.
+ * Optionally specify an offset into the guest array.
+ */
+#define copy_from_guest_offset(ptr, hnd, idx, nr) \
+    __copy_from_guest_offset(ptr, hnd, idx, nr)
+
+/* Copy sub-field of a structure from guest context via a guest handle. */
+#define copy_field_from_guest(ptr, hnd, field) \
+    __copy_field_from_guest(ptr, hnd, field)
+
+#define __copy_to_guest_offset(hnd, idx, ptr, nr) ({                    \
+    const typeof(ptr) _d = (hnd).p;                                     \
+    const typeof(ptr) _s = (ptr);                                       \
+    xencomm_copy_to_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx)); \
 })
 
-/*
- * Copy an array of objects from guest context via a guest handle,
- * specifying an offset into the guest array.
- */
-#define copy_from_guest_offset(ptr, hnd, off, nr) ({    \
-    const typeof(ptr) _x = (hnd).p;                     \
-    const typeof(ptr) _y = (ptr);                       \
-    copy_from_user(_y, _x+(off), sizeof(*_x)*(nr));     \
+#define __copy_field_to_guest(hnd, ptr, field) ({                   \
+    const int _off = offsetof(typeof(*ptr), field);                 \
+    const typeof(ptr) _d = (hnd).p;                                 \
+    const typeof(&(ptr)->field) _s = &(ptr)->field;                 \
+    xencomm_copy_to_guest(_d, _s, sizeof(*_s), _off);               \
 })
 
-/* Copy sub-field of a structure to guest context via a guest handle. */
-#define copy_field_to_guest(hnd, ptr, field) ({         \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;   \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;     \
-    copy_to_user(_x, _y, sizeof(*_x));                  \
+#define __copy_from_guest_offset(ptr, hnd, idx, nr) ({                     \
+    const typeof(ptr) _s = (hnd).p;                                        \
+    const typeof(ptr) _d = (ptr);                                          \
+    xencomm_copy_from_guest(_d, _s, sizeof(*_s)*(nr), sizeof(*_s)*(idx));  \
 })
 
-/* Copy sub-field of a structure from guest context via a guest handle. */
-#define copy_field_from_guest(ptr, hnd, field) ({       \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;   \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;     \
-    copy_from_user(_y, _x, sizeof(*_x));                \
+#define __copy_field_from_guest(ptr, hnd, field) ({                 \
+    const int _off = offsetof(typeof(*ptr), field);                 \
+    const typeof(ptr) _s = (hnd).p;                                 \
+    const typeof(&(ptr)->field) _d = &(ptr)->field;                 \
+    xencomm_copy_from_guest(_d, _s, sizeof(*_d), _off);             \
 })
 
-/*
- * Pre-validate a guest handle.
- * Allows use of faster __copy_* functions.
- */
-#define guest_handle_okay(hnd, nr)                      \
-    array_access_ok((hnd).p, (nr), sizeof(*(hnd).p))
+/* Internal use only: returns 0 in case of bad address.  */
+extern unsigned long xencomm_paddr_to_maddr(unsigned long paddr);
 
-#define __copy_to_guest_offset(hnd, off, ptr, nr) ({    \
-    const typeof(ptr) _x = (hnd).p;                     \
-    const typeof(ptr) _y = (ptr);                       \
-    __copy_to_user(_x+(off), _y, sizeof(*_x)*(nr));     \
-})
-
-#define __copy_from_guest_offset(ptr, hnd, off, nr) ({  \
-    const typeof(ptr) _x = (hnd).p;                     \
-    const typeof(ptr) _y = (ptr);                       \
-    __copy_from_user(_y, _x+(off), sizeof(*_x)*(nr));   \
-})
-
-#define __copy_field_to_guest(hnd, ptr, field) ({       \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;   \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;     \
-    __copy_to_user(_x, _y, sizeof(*_x));                \
-})
-
-#define __copy_field_from_guest(ptr, hnd, field) ({     \
-    const typeof(&(ptr)->field) _x = &(hnd).p->field;   \
-    const typeof(&(ptr)->field) _y = &(ptr)->field;     \
-    __copy_from_user(_y, _x, sizeof(*_x));              \
-})
-
-#endif /* __ASM_IA64_GUEST_ACCESS_H__ */
+#endif /* __ASM_GUEST_ACCESS_H__ */
--- xen/include/asm-ia64/ia64_int.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/ia64_int.h	Wed Oct 11 16:10:40 2006 -0400
@@ -36,7 +36,9 @@
 #define	IA64_NO_FAULT		0x0000
 #define IA64_FAULT		        0x0001
 #define	IA64_RFI_IN_PROGRESS	0x0002
-#define IA64_RETRY              0x0003
+// To avoid conflicting with return value of handle_fpu_swa()
+// set IA64_RETRY to -0x000f 
+#define IA64_RETRY		(-0x000f)
 #define IA64_FORCED_IFA         0x0004
 #define IA64_USE_TLB		0x0005
 #define	IA64_ILLOP_FAULT	(IA64_GENEX_VECTOR | 0x00)
--- xen/include/asm-ia64/linux-xen/asm/cache.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/linux-xen/asm/cache.h	Wed Oct 11 16:10:40 2006 -0400
@@ -32,6 +32,6 @@
 #endif
 #endif
 
-#define __read_mostly
+#define __read_mostly __attribute__((__section__(".data.read_mostly")))
 
 #endif /* _ASM_IA64_CACHE_H */
--- xen/include/asm-ia64/linux-xen/asm/pgtable.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/linux-xen/asm/pgtable.h	Wed Oct 11 16:10:40 2006 -0400
@@ -68,6 +68,20 @@
 #ifdef XEN
 #define _PAGE_VIRT_D		(__IA64_UL(1) << 53)	/* Virtual dirty bit */
 #define _PAGE_PROTNONE		0
+
+/* domVTI */
+#define GPFN_MEM		(0UL << 60)	/* Guest pfn is normal mem */
+#define GPFN_FRAME_BUFFER	(1UL << 60)	/* VGA framebuffer */
+#define GPFN_LOW_MMIO		(2UL << 60)	/* Low MMIO range */
+#define GPFN_PIB		(3UL << 60)	/* PIB base */
+#define GPFN_IOSAPIC		(4UL << 60)	/* IOSAPIC base */
+#define GPFN_LEGACY_IO		(5UL << 60)	/* Legacy I/O base */
+#define GPFN_GFW		(6UL << 60)	/* Guest Firmware */
+#define GPFN_HIGH_MMIO		(7UL << 60)	/* High MMIO range */
+
+#define GPFN_IO_MASK		(7UL << 60)	/* Guest pfn is I/O type */
+#define GPFN_INV_MASK		(1UL << 63)	/* Guest pfn is invalid */
+
 #else
 #define _PAGE_PROTNONE		(__IA64_UL(1) << 63)
 #endif
--- xen/include/asm-ia64/linux-xen/asm/processor.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/linux-xen/asm/processor.h	Wed Oct 11 16:10:40 2006 -0400
@@ -89,6 +89,7 @@
 
 #ifdef XEN
 #include <asm/xenprocessor.h>
+#include <xen/bitops.h>
 #else
 /* like above but expressed as bitfields for more efficient access: */
 struct ia64_psr {
@@ -571,6 +572,23 @@ ia64_eoi (void)
 
 #define cpu_relax()	ia64_hint(ia64_hint_pause)
 
+static inline int
+ia64_get_irr(unsigned int vector)
+{
+	unsigned int reg = vector / 64;
+	unsigned int bit = vector % 64;
+	u64 irr;
+
+	switch (reg) {
+	case 0: irr = ia64_getreg(_IA64_REG_CR_IRR0); break;
+	case 1: irr = ia64_getreg(_IA64_REG_CR_IRR1); break;
+	case 2: irr = ia64_getreg(_IA64_REG_CR_IRR2); break;
+	case 3: irr = ia64_getreg(_IA64_REG_CR_IRR3); break;
+	}
+
+	return test_bit(bit, &irr);
+}
+
 static inline void
 ia64_set_lrr0 (unsigned long val)
 {
--- xen/include/asm-ia64/linux-xen/asm/system.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/linux-xen/asm/system.h	Wed Oct 11 16:10:40 2006 -0400
@@ -189,6 +189,7 @@ do {								\
 
 #ifdef XEN
 #define local_irq_is_enabled() (!irqs_disabled())
+extern struct vcpu *ia64_switch_to(struct vcpu *next_task);
 #else
 #ifdef __KERNEL__
 
--- xen/include/asm-ia64/linux/asm/sal.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/linux/asm/sal.h	Wed Oct 11 16:10:40 2006 -0400
@@ -657,15 +657,7 @@ ia64_sal_freq_base (unsigned long which,
 	return isrv.status;
 }
 
-/* Flush all the processor and platform level instruction and/or data caches */
-static inline s64
-ia64_sal_cache_flush (u64 cache_type)
-{
-	struct ia64_sal_retval isrv;
-	SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0);
-	return isrv.status;
-}
-
+extern s64 ia64_sal_cache_flush (u64 cache_type);
 
 /* Initialize all the processor and platform level instruction and data caches */
 static inline s64
--- xen/include/asm-ia64/mm.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/mm.h	Wed Oct 11 16:10:40 2006 -0400
@@ -117,10 +117,14 @@ struct page_info
 #define IS_XEN_HEAP_FRAME(_pfn) ((page_to_maddr(_pfn) < xenheap_phys_end) \
 				 && (page_to_maddr(_pfn) >= xen_pstart))
 
-static inline struct domain *unpickle_domptr(u32 _d)
-{ return (_d == 0) ? NULL : __va(_d); }
+extern void *xen_heap_start;
+#define __pickle(a)	((unsigned long)a - (unsigned long)xen_heap_start)
+#define __unpickle(a)	(void *)(a + xen_heap_start)
+
+static inline struct domain *unpickle_domptr(u64 _d)
+{ return (_d == 0) ? NULL : __unpickle(_d); }
 static inline u32 pickle_domptr(struct domain *_d)
-{ return (_d == NULL) ? 0 : (u32)__pa(_d); }
+{ return (_d == NULL) ? 0 : (u32)__pickle(_d); }
 
 #define page_get_owner(_p)	(unpickle_domptr((_p)->u.inuse._domain))
 #define page_set_owner(_p, _d)	((_p)->u.inuse._domain = pickle_domptr(_d))
@@ -420,7 +424,7 @@ extern void relinquish_mm(struct domain*
 extern void relinquish_mm(struct domain* d);
 extern struct page_info * assign_new_domain_page(struct domain *d, unsigned long mpaddr);
 extern void assign_new_domain0_page(struct domain *d, unsigned long mpaddr);
-extern void __assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr, unsigned long flags);
+extern int __assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr, unsigned long flags);
 extern void assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr);
 extern void assign_domain_io_page(struct domain *d, unsigned long mpaddr, unsigned long flags);
 struct p2m_entry;
@@ -435,6 +439,13 @@ extern unsigned long do_dom0vp_op(unsign
 extern unsigned long do_dom0vp_op(unsigned long cmd, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3);
 extern unsigned long dom0vp_zap_physmap(struct domain *d, unsigned long gpfn, unsigned int extent_order);
 extern unsigned long dom0vp_add_physmap(struct domain* d, unsigned long gpfn, unsigned long mfn, unsigned long flags, domid_t domid);
+#ifdef CONFIG_XEN_IA64_EXPOSE_P2M
+extern void expose_p2m_init(void);
+extern unsigned long dom0vp_expose_p2m(struct domain* d, unsigned long conv_start_gpfn, unsigned long assign_start_gpfn, unsigned long expose_size, unsigned long granule_pfn);
+#else
+#define expose_p2m_init()       do { } while (0)
+#define dom0vp_expose_p2m(d, conv_start_gpfn, assign_start_gpfn, expose_size, granule_pfn)	(-ENOSYS)
+#endif
 
 extern volatile unsigned long *mpt_table;
 extern unsigned long gmfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
--- xen/include/asm-ia64/perfc_defn.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/perfc_defn.h	Wed Oct 11 16:10:40 2006 -0400
@@ -107,3 +107,30 @@ PERFPRIVOPADDR(get_ifa)
 PERFPRIVOPADDR(get_ifa)
 PERFPRIVOPADDR(thash)
 #endif
+
+// vhpt.c
+PERFCOUNTER_CPU(vcpu_flush_vtlb_all,            "vcpu_flush_vtlb_all")
+PERFCOUNTER_CPU(domain_flush_vtlb_all,          "domain_flush_vtlb_all")
+PERFCOUNTER_CPU(vcpu_flush_tlb_vhpt_range,      "vcpu_flush_tlb_vhpt_range")
+PERFCOUNTER_CPU(domain_flush_vtlb_range,        "domain_flush_vtlb_range")
+
+// domain.c
+PERFCOUNTER_CPU(flush_vtlb_for_context_switch,  "flush_vtlb_for_context_switch")
+
+// mm.c
+PERFCOUNTER_CPU(assign_domain_page_replace,     "assign_domain_page_replace")
+PERFCOUNTER_CPU(assign_domain_pge_cmpxchg_rel,  "assign_domain_pge_cmpxchg_rel")
+PERFCOUNTER_CPU(zap_dcomain_page_one,           "zap_dcomain_page_one")
+PERFCOUNTER_CPU(dom0vp_zap_physmap,             "dom0vp_zap_physmap")
+PERFCOUNTER_CPU(dom0vp_add_physmap,             "dom0vp_add_physmap")
+PERFCOUNTER_CPU(create_grant_host_mapping,      "create_grant_host_mapping")
+PERFCOUNTER_CPU(destroy_grant_host_mapping,     "destroy_grant_host_mapping")
+PERFCOUNTER_CPU(steal_page_refcount,            "steal_page_refcount")
+PERFCOUNTER_CPU(steal_page,                     "steal_page")
+PERFCOUNTER_CPU(guest_physmap_add_page,         "guest_physmap_add_page")
+PERFCOUNTER_CPU(guest_physmap_remove_page,      "guest_physmap_remove_page")
+PERFCOUNTER_CPU(domain_page_flush,              "domain_page_flush")
+
+// dom0vp
+PERFCOUNTER_CPU(dom0vp_phystomach,              "dom0vp_phystomach")
+PERFCOUNTER_CPU(dom0vp_machtophys,              "dom0vp_machtophys")
--- xen/include/asm-ia64/uaccess.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/uaccess.h	Wed Oct 11 16:10:40 2006 -0400
@@ -211,30 +211,16 @@ extern unsigned long __must_check __copy
 extern unsigned long __must_check __copy_user (void __user *to, const void __user *from,
 					       unsigned long count);
 
-extern int ia64_map_hypercall_param(void);
-
 static inline unsigned long
 __copy_to_user (void __user *to, const void *from, unsigned long count)
 {
-	unsigned long len;
-	len = __copy_user(to, (void __user *)from, count);
-	if (len == 0)
-		return 0;
-	if (ia64_map_hypercall_param())
-		len = __copy_user(to, (void __user *)from, count); /* retry */
-	return len;
+	return __copy_user(to, (void __user *)from, count);
 }
 
 static inline unsigned long
 __copy_from_user (void *to, const void __user *from, unsigned long count)
 {
-	unsigned long len;
-	len = __copy_user((void __user *)to, from, count);
-	if (len == 0)
-		return 0;
-	if (ia64_map_hypercall_param())
-		len = __copy_user((void __user *) to, from, count); /* retry */
-	return len;
+	return __copy_user((void __user *)to, from, count);
 }
 
 #define __copy_to_user_inatomic		__copy_to_user
--- xen/include/asm-ia64/vhpt.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/vhpt.h	Wed Oct 11 16:10:40 2006 -0400
@@ -37,11 +37,46 @@ extern void vhpt_multiple_insert(unsigne
 				 unsigned long logps);
 extern void vhpt_insert (unsigned long vadr, unsigned long pte,
 			 unsigned long logps);
-void vhpt_flush(void);
+void local_vhpt_flush(void);
 
 /* Currently the VHPT is allocated per CPU.  */
 DECLARE_PER_CPU (unsigned long, vhpt_paddr);
 DECLARE_PER_CPU (unsigned long, vhpt_pend);
 
+#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
+#if !VHPT_ENABLED
+#error "VHPT_ENABLED must be set for CONFIG_XEN_IA64_PERVCPU_VHPT"
+#endif
+#endif
+
+#include <xen/sched.h>
+int pervcpu_vhpt_alloc(struct vcpu *v);
+void pervcpu_vhpt_free(struct vcpu *v);
+static inline unsigned long
+vcpu_vhpt_maddr(struct vcpu* v)
+{
+#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
+    if (HAS_PERVCPU_VHPT(v->domain))
+        return v->arch.vhpt_maddr;
+#endif
+
+#if 0
+    // referencecing v->processor is racy.
+    return per_cpu(vhpt_paddr, v->processor);
+#endif
+    BUG_ON(v != current);
+    return __get_cpu_var(vhpt_paddr);
+}
+
+static inline unsigned long
+vcpu_pta(struct vcpu* v)
+{
+#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
+    if (HAS_PERVCPU_VHPT(v->domain))
+        return v->arch.pta.val;
+#endif
+    return VHPT_ADDR | (1 << 8) | (VHPT_SIZE_LOG2 << 2) | VHPT_ENABLED;
+}
+
 #endif /* !__ASSEMBLY */
 #endif
--- xen/include/asm-ia64/vmx.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/vmx.h	Wed Oct 11 16:10:40 2006 -0400
@@ -35,6 +35,7 @@ extern void vmx_save_state(struct vcpu *
 extern void vmx_save_state(struct vcpu *v);
 extern void vmx_load_state(struct vcpu *v);
 extern void vmx_setup_platform(struct domain *d);
+extern void vmx_do_launch(struct vcpu *v);
 extern void vmx_io_assist(struct vcpu *v);
 extern int ia64_hypercall (struct pt_regs *regs);
 extern void vmx_save_state(struct vcpu *v);
--- xen/include/asm-ia64/vmx_vcpu.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/vmx_vcpu.h	Wed Oct 11 16:10:40 2006 -0400
@@ -114,6 +114,7 @@ extern void memwrite_p(VCPU *vcpu, u64 *
 extern void memwrite_p(VCPU *vcpu, u64 *src, u64 *dest, size_t s);
 extern void vcpu_load_kernel_regs(VCPU *vcpu);
 extern IA64FAULT vmx_vcpu_increment_iip(VCPU *vcpu);
+extern IA64FAULT vmx_vcpu_decrement_iip(VCPU *vcpu);
 extern void vmx_switch_rr7(unsigned long ,shared_info_t*,void *,void *,void *);
 
 extern void dtlb_fault (VCPU *vcpu, u64 vadr);
@@ -121,7 +122,8 @@ extern void alt_dtlb (VCPU *vcpu, u64 va
 extern void alt_dtlb (VCPU *vcpu, u64 vadr);
 extern void dvhpt_fault (VCPU *vcpu, u64 vadr);
 extern void dnat_page_consumption (VCPU *vcpu, uint64_t vadr);
-extern void page_not_present(VCPU *vcpu, u64 vadr);
+extern void data_page_not_present(VCPU *vcpu, u64 vadr);
+extern void inst_page_not_present(VCPU *vcpu, u64 vadr);
 extern void data_access_rights(VCPU *vcpu, u64 vadr);
 
 /**************************************************************************
--- xen/include/asm-ia64/xenkregs.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/asm-ia64/xenkregs.h	Wed Oct 11 16:10:40 2006 -0400
@@ -7,8 +7,7 @@
 #define IA64_TR_SHARED_INFO	3	/* dtr3: page shared with domain */
 #define	IA64_TR_VHPT		4	/* dtr4: vhpt */
 #define IA64_TR_MAPPED_REGS	5	/* dtr5: vcpu mapped regs */
-#define IA64_TR_PERVP_VHPT	6
-#define IA64_DTR_GUEST_KERNEL   7
+#define IA64_DTR_GUEST_KERNEL   6
 #define IA64_ITR_GUEST_KERNEL   2
 /* Processor status register bits: */
 #define IA64_PSR_VM_BIT		46
--- xen/include/public/arch-ia64.h	Tue Oct 10 21:05:50 2006 +0100
+++ xen/include/public/arch-ia64.h	Wed Oct 11 16:10:40 2006 -0400
@@ -47,18 +47,6 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 #ifndef __ASSEMBLY__
 
 typedef unsigned long xen_ulong_t;
-
-#define GPFN_MEM          (0UL << 56) /* Guest pfn is normal mem */
-#define GPFN_FRAME_BUFFER (1UL << 56) /* VGA framebuffer */
-#define GPFN_LOW_MMIO     (2UL << 56) /* Low MMIO range */
-#define GPFN_PIB          (3UL << 56) /* PIB base */
-#define GPFN_IOSAPIC      (4UL << 56) /* IOSAPIC base */
-#define GPFN_LEGACY_IO    (5UL << 56) /* Legacy I/O base */
-#define GPFN_GFW          (6UL << 56) /* Guest Firmware */
-#define GPFN_HIGH_MMIO    (7UL << 56) /* High MMIO range */
-
-#define GPFN_IO_MASK     (7UL << 56)  /* Guest pfn is I/O type */
-#define GPFN_INV_MASK    (31UL << 59) /* Guest pfn is invalid */
 
 #define INVALID_MFN       (~0UL)
 
@@ -336,33 +324,33 @@ typedef struct vcpu_guest_context vcpu_g
 typedef struct vcpu_guest_context vcpu_guest_context_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
 
-// dom0 vp op
+/* dom0 vp op */
 #define __HYPERVISOR_ia64_dom0vp_op     __HYPERVISOR_arch_0
-#define IA64_DOM0VP_ioremap             0       // map io space in machine
-                                                // address to dom0 physical
-                                                // address space.
-                                                // currently physical
-                                                // assignedg address equals to
-                                                // machine address
-#define IA64_DOM0VP_phystomach          1       // convert a pseudo physical
-                                                // page frame number
-                                                // to the corresponding
-                                                // machine page frame number.
-                                                // if no page is assigned,
-                                                // INVALID_MFN or GPFN_INV_MASK
-                                                // is returned depending on
-                                                // domain's non-vti/vti mode.
-#define IA64_DOM0VP_machtophys          3       // convert a machine page
-                                                // frame number
-                                                // to the corresponding
-                                                // pseudo physical page frame
-                                                // number of the caller domain
-#define IA64_DOM0VP_zap_physmap         17      // unmap and free pages
-                                                // contained in the specified
-                                                // pseudo physical region
-#define IA64_DOM0VP_add_physmap         18      // assigne machine page frane
-                                                // to dom0's pseudo physical
-                                                // address space.
+/*  Map io space in machine address to dom0 physical address space.
+    Currently physical assigned address equals to machine address.  */
+#define IA64_DOM0VP_ioremap             0
+
+/* Convert a pseudo physical page frame number to the corresponding
+   machine page frame number. If no page is assigned, INVALID_MFN or
+   GPFN_INV_MASK is returned depending on domain's non-vti/vti mode.  */
+#define IA64_DOM0VP_phystomach          1
+
+/* Convert a machine page frame number to the corresponding pseudo physical
+   page frame number of the caller domain.  */
+#define IA64_DOM0VP_machtophys          3
+
+/* Reserved for future use.  */
+#define IA64_DOM0VP_iounmap             4
+
+/* Unmap and free pages contained in the specified pseudo physical region.  */
+#define IA64_DOM0VP_zap_physmap         5
+
+/* Assign machine page frame to dom0's pseudo physical address space.  */
+#define IA64_DOM0VP_add_physmap         6
+
+/* expose the p2m table into domain */
+#define IA64_DOM0VP_expose_p2m          7
+
 // flags for page assignement to pseudo physical address space
 #define _ASSIGN_readonly                0
 #define ASSIGN_readonly                 (1UL << _ASSIGN_readonly)
@@ -395,15 +383,12 @@ struct xen_ia64_boot_param {
 
 #endif /* !__ASSEMBLY__ */
 
-/* Address of shared_info in domain virtual space.
-   This is the default address, for compatibility only.  */
-#define XSI_BASE			0xf100000000000000
-
 /* Size of the shared_info area (this is not related to page size).  */
 #define XSI_SHIFT			14
 #define XSI_SIZE			(1 << XSI_SHIFT)
 /* Log size of mapped_regs area (64 KB - only 4KB is used).  */
 #define XMAPPEDREGS_SHIFT		12
+#define XMAPPEDREGS_SIZE		(1 << XMAPPEDREGS_SHIFT)
 /* Offset of XASI (Xen arch shared info) wrt XSI_BASE.  */
 #define XMAPPEDREGS_OFS			XSI_SIZE
 
@@ -435,6 +420,17 @@ struct xen_ia64_boot_param {
 #define HYPERPRIVOP_GET_PSR		0x19
 #define HYPERPRIVOP_MAX			0x19
 
+/* Fast and light hypercalls.  */
+#define __HYPERVISOR_ia64_fast_eoi	0x0200
+
+/* Xencomm macros.  */
+#define XENCOMM_INLINE_MASK 0xf800000000000000UL
+#define XENCOMM_INLINE_FLAG 0x8000000000000000UL
+
+#define XENCOMM_IS_INLINE(addr) \
+  (((unsigned long)(addr) & XENCOMM_INLINE_MASK) == XENCOMM_INLINE_FLAG)
+#define XENCOMM_INLINE_ADDR(addr) \
+  ((unsigned long)(addr) & ~XENCOMM_INLINE_MASK)
 #endif /* __HYPERVISOR_IF_IA64_H__ */
 
 /*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ xen/arch/ia64/tools/p2m_expose/Makefile	Wed Oct 11 16:10:40 2006 -0400
@@ -0,0 +1,28 @@
+ifneq ($(KERNELRELEASE),)
+obj-m += expose_p2m.o
+else
+PWD := $(shell pwd)
+TOPDIR ?= $(abspath $(PWD)/../../../../..)
+KDIR ?= $(TOPDIR)/linux-$(shell awk '/^LINUX_VER\>/{print $$3}' $(TOPDIR)/buildconfigs/mk.linux-2.6-xen)-xen
+#CROSS_COMPILE ?= ia64-unknown-linux-
+#ARCH ?= ia64
+
+ifneq ($(O),)
+OPT_O := O=$(realpath $(O))
+endif
+
+ifneq ($(V),)
+OPT_V := V=$(V)
+endif
+
+ifneq ($(ARCH),)
+OPT_ARCH := ARCH=$(ARCH)
+endif
+
+ifneq ($(CROSS_COMPILE),)
+OPT_CORSS_COMPILE := CROSS_COMPILE=$(CROSS_COMPILE)
+endif
+
+default:
+	$(MAKE) -C $(KDIR) $(OPT_O) $(OPT_V) $(OPT_CORSS_COMPILE) $(OPT_ARCH) M=$(PWD)
+endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ xen/arch/ia64/tools/p2m_expose/README.p2m_expose	Wed Oct 11 16:10:40 2006 -0400
@@ -0,0 +1,12 @@
+This directory contains Linux kernel module for p2m exposure test/benchmark.
+
+1. build kernel module
+   - At fist build, linux-xen as usual
+   - then type just 'make' in this directory, then you'll have expose_p2m.ko.
+     See Makefile for details.
+
+2. test, benchmark.
+   - type 'insmod expose_p2m.ko' on the system.
+   Then the result is printed out to your console.
+   insmod fails with EINVAL so that you don't have to execute rmmod.
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ xen/arch/ia64/tools/p2m_expose/expose_p2m.c	Wed Oct 11 16:10:40 2006 -0400
@@ -0,0 +1,185 @@
+/******************************************************************************
+ * arch/ia64/xen/expose_p2m.c
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/hypercall.h>
+#include <asm/hypervisor.h>
+
+#define printd(fmt, ...)	printk("%s:%d " fmt, __func__, __LINE__, \
+				       ##__VA_ARGS__)
+
+// copied from arch/ia64/mm/tlb.c. it isn't exported.
+void
+local_flush_tlb_all (void)
+{
+        unsigned long i, j, flags, count0, count1, stride0, stride1, addr;
+
+        addr    = local_cpu_data->ptce_base;
+        count0  = local_cpu_data->ptce_count[0];
+        count1  = local_cpu_data->ptce_count[1];
+        stride0 = local_cpu_data->ptce_stride[0];
+        stride1 = local_cpu_data->ptce_stride[1];
+
+        local_irq_save(flags);
+        for (i = 0; i < count0; ++i) {
+                for (j = 0; j < count1; ++j) {
+                        ia64_ptce(addr);
+                        addr += stride1;
+                }
+                addr += stride0;
+        }
+        local_irq_restore(flags);
+        ia64_srlz_i();                  /* srlz.i implies srlz.d */
+}
+
+static void
+do_p2m(unsigned long (*conv)(unsigned long),
+       const char* msg, const char* prefix, 
+       unsigned long start_gpfn, unsigned end_gpfn, unsigned long stride)
+{
+	struct timeval before_tv;
+	struct timeval after_tv;
+	unsigned long gpfn;
+	unsigned long mfn;
+	unsigned long count;
+	nsec_t nsec;
+
+	count = 0;
+	do_gettimeofday(&before_tv);
+	for (gpfn = start_gpfn; gpfn < end_gpfn; gpfn += stride) {
+		mfn = (*conv)(gpfn);
+		count++;
+	}
+	do_gettimeofday(&after_tv);
+	nsec = timeval_to_ns(&after_tv) - timeval_to_ns(&before_tv);
+	printk("%s stride %4ld %s: %9ld / %6ld = %5ld nsec\n",
+	       msg, stride, prefix,
+	       nsec, count, nsec/count);
+}
+
+
+static void
+do_with_hypercall(const char* msg,
+		  unsigned long start_gpfn, unsigned long end_gpfn,
+		  unsigned long stride)
+{
+	do_p2m(&HYPERVISOR_phystomach, msg, "hypercall",
+	       start_gpfn, end_gpfn, stride);
+}
+
+static void
+do_with_table(const char* msg,
+	    unsigned long start_gpfn, unsigned long end_gpfn,
+	    unsigned long stride)
+{
+	do_p2m(&p2m_phystomach, msg, "p2m table",
+	       start_gpfn, end_gpfn, stride);
+}
+
+static int __init
+expose_p2m_init(void)
+{
+	unsigned long gpfn;
+	unsigned long mfn;
+	unsigned long p2m_mfn;
+
+	int error_count = 0;
+
+	const int strides[] = {
+		PTRS_PER_PTE, PTRS_PER_PTE/2, PTRS_PER_PTE/3, PTRS_PER_PTE/4,
+		L1_CACHE_BYTES/sizeof(pte_t), 1
+	};
+	int i;
+	
+
+#if 0
+	printd("about to call p2m_expose_init()\n");
+	if (p2m_expose_init() < 0) {
+		printd("p2m_expose_init() failed\n");
+		return -EINVAL;
+	}
+	printd("p2m_expose_init() success\n");
+#else
+	if (!p2m_initialized) {
+		printd("p2m exposure isn't initialized\n");
+		return -EINVAL;
+	}
+#endif
+
+	printd("p2m expose test begins\n");
+	for (gpfn = p2m_min_low_pfn; gpfn < p2m_max_low_pfn; gpfn++) {
+		mfn = HYPERVISOR_phystomach(gpfn);
+		p2m_mfn = p2m_phystomach(gpfn);
+		if (mfn != p2m_mfn) {
+			printd("gpfn 0x%016lx "
+			       "mfn 0x%016lx p2m_mfn 0x%016lx\n",
+			       gpfn, mfn, p2m_mfn);
+			printd("mpaddr 0x%016lx "
+			       "maddr 0x%016lx p2m_maddr 0x%016lx\n",
+			       gpfn << PAGE_SHIFT,
+			       mfn << PAGE_SHIFT, p2m_mfn << PAGE_SHIFT);
+
+			error_count++;
+			if (error_count > 16) {
+				printk("too many errors\n");
+				return -EINVAL;
+			}
+		}
+	}
+	printd("p2m expose test done!\n");
+
+	printk("type     "
+	       "stride      "
+	       "type     : "
+	       "     nsec /  count = "
+	       "nsec per conv\n");
+	for (i = 0; i < sizeof(strides)/sizeof(strides[0]); i++) {
+		int stride = strides[i];
+		local_flush_tlb_all();
+		do_with_hypercall("cold tlb",
+				  p2m_min_low_pfn, p2m_max_low_pfn, stride);
+		do_with_hypercall("warm tlb",
+				  p2m_min_low_pfn, p2m_max_low_pfn, stride);
+
+		local_flush_tlb_all();
+		do_with_table("cold tlb",
+			      p2m_min_low_pfn, p2m_max_low_pfn, stride);
+		do_with_table("warm tlb",
+			      p2m_min_low_pfn, p2m_max_low_pfn, stride);
+	}
+
+	return -EINVAL;
+}
+
+static void __exit
+expose_p2m_cleanup(void)
+{
+}
+
+module_init(expose_p2m_init);
+module_exit(expose_p2m_cleanup);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Isaku Yamahata <yamahata@xxxxxxxxxxxxx>");
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ xen/arch/ia64/vmx/optvfault.S	Wed Oct 11 16:10:40 2006 -0400
@@ -0,0 +1,518 @@
+/*
+ * arch/ia64/vmx/optvfault.S
+ * optimize virtualization fault handler
+ *
+ * Copyright (C) 2006 Intel Co
+ *	Xuefei Xu (Anthony Xu) <anthony.xu@xxxxxxxxx>
+ */
+
+#include <linux/config.h>
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/offsets.h>
+#include <asm/percpu.h>
+#include <asm/processor.h>
+#include <asm/vmx_vpd.h>
+#include <asm/vmx_pal_vsa.h>
+#include <asm/asm-offsets.h>
+
+#define ACCE_MOV_FROM_AR
+#define ACCE_MOV_FROM_RR
+
+//mov r1=ar3
+GLOBAL_ENTRY(asm_mov_from_ar)
+#ifndef ACCE_MOV_FROM_AR
+    br.many vmx_vitualization_fault_back
+#endif
+    add r18=VCPU_VTM_OFFSET_OFS,r21
+    mov r19=ar.itc
+    extr.u r17=r25,6,7
+    ;;
+    ld8 r18=[r18]
+    movl r20=asm_mov_to_reg
+    ;;
+    adds r30=vmx_resume_to_guest-asm_mov_to_reg,r20
+    shladd r17=r17,4,r20
+    mov r24=b0
+    ;;
+    add r19=r19,r18
+    mov b0=r17
+    br.sptk.few b0
+    ;;
+END(asm_mov_from_ar)
+
+
+// mov r1=rr[r3]
+GLOBAL_ENTRY(asm_mov_from_rr)
+#ifndef ACCE_MOV_FROM_RR
+    br.many vmx_vitualization_fault_back
+#endif
+    extr.u r16=r25,20,7
+    extr.u r17=r25,6,7
+    movl r20=asm_mov_from_reg
+    ;;
+    adds r30=asm_mov_from_rr_back_1-asm_mov_from_reg,r20
+    shladd r16=r16,4,r20
+    mov r24=b0
+    ;;
+    add r27=VCPU_VRR0_OFS,r21
+    mov b0=r16
+    br.many b0
+    ;;   
+asm_mov_from_rr_back_1:  
+    adds r30=vmx_resume_to_guest-asm_mov_from_reg,r20
+    adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
+    shr.u r26=r19,61
+    ;;
+    shladd r17=r17,4,r22
+    shladd r27=r26,3,r27
+    ;;
+    ld8 r19=[r27]
+    mov b0=r17
+    br.many b0
+END(asm_mov_from_rr)
+
+
+#define MOV_TO_REG0	\
+{;			\
+    nop.b 0x0;		\
+    nop.b 0x0;		\
+    nop.b 0x0;		\
+    ;;			\
+};
+
+
+#define MOV_TO_REG(n)	\
+{;			\
+    mov r##n##=r19;	\
+    mov b0=r30;		\
+    br.sptk.many b0;	\
+    ;;			\
+};
+
+
+#define MOV_FROM_REG(n)	\
+{;			\
+    mov r19=r##n##;	\
+    mov b0=r30;		\
+    br.sptk.many b0;	\
+    ;;			\
+};
+
+
+#define MOV_TO_BANK0_REG(n)			\
+ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##);	\
+{;						\
+    mov r26=r2;					\
+    mov r2=r19;					\
+    bsw.1;					\
+    ;;						\
+};						\
+{;						\
+    mov r##n##=r2;				\
+    nop.b 0x0;					\
+    bsw.0;					\
+    ;;						\
+};						\
+{;						\
+    mov r2=r26;					\
+    mov b0=r30;					\
+    br.sptk.many b0;				\
+    ;;						\
+};						\
+END(asm_mov_to_bank0_reg##n##)
+
+
+#define MOV_FROM_BANK0_REG(n)			\
+ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##);	\
+{;						\
+    mov r26=r2;					\
+    nop.b 0x0;					\
+    bsw.1;					\
+    ;;						\
+};						\
+{;						\
+    mov r2=r##n##;				\
+    nop.b 0x0;					\
+    bsw.0;					\
+    ;;						\
+};						\
+{;						\
+    mov r19=r2;					\
+    mov r2=r26;					\
+    mov b0=r30;					\
+};						\
+{;						\
+    nop.b 0x0;					\
+    nop.b 0x0;					\
+    br.sptk.many b0;				\
+    ;;						\
+};						\
+END(asm_mov_from_bank0_reg##n##)
+
+
+#define JMP_TO_MOV_TO_BANK0_REG(n)		\
+{;						\
+    nop.b 0x0;					\
+    nop.b 0x0;					\
+    br.sptk.many asm_mov_to_bank0_reg##n##;	\
+    ;;						\
+}    
+
+
+#define JMP_TO_MOV_FROM_BANK0_REG(n)		\
+{;						\
+    nop.b 0x0;					\
+    nop.b 0x0;					\
+    br.sptk.many asm_mov_from_bank0_reg##n##;	\
+    ;;						\
+}
+
+
+MOV_FROM_BANK0_REG(16)
+MOV_FROM_BANK0_REG(17)
+MOV_FROM_BANK0_REG(18)
+MOV_FROM_BANK0_REG(19)
+MOV_FROM_BANK0_REG(20)
+MOV_FROM_BANK0_REG(21)
+MOV_FROM_BANK0_REG(22)
+MOV_FROM_BANK0_REG(23)
+MOV_FROM_BANK0_REG(24)
+MOV_FROM_BANK0_REG(25)
+MOV_FROM_BANK0_REG(26)
+MOV_FROM_BANK0_REG(27)
+MOV_FROM_BANK0_REG(28)
+MOV_FROM_BANK0_REG(29)
+MOV_FROM_BANK0_REG(30)
+MOV_FROM_BANK0_REG(31)
+
+
+// mov from reg table
+ENTRY(asm_mov_from_reg)
+    MOV_FROM_REG(0)
+    MOV_FROM_REG(1)
+    MOV_FROM_REG(2)
+    MOV_FROM_REG(3)
+    MOV_FROM_REG(4)
+    MOV_FROM_REG(5)
+    MOV_FROM_REG(6)
+    MOV_FROM_REG(7)
+    MOV_FROM_REG(8)
+    MOV_FROM_REG(9)
+    MOV_FROM_REG(10)
+    MOV_FROM_REG(11)
+    MOV_FROM_REG(12)
+    MOV_FROM_REG(13)
+    MOV_FROM_REG(14)
+    MOV_FROM_REG(15)
+    JMP_TO_MOV_FROM_BANK0_REG(16)
+    JMP_TO_MOV_FROM_BANK0_REG(17)
+    JMP_TO_MOV_FROM_BANK0_REG(18)
+    JMP_TO_MOV_FROM_BANK0_REG(19)
+    JMP_TO_MOV_FROM_BANK0_REG(20)
+    JMP_TO_MOV_FROM_BANK0_REG(21)
+    JMP_TO_MOV_FROM_BANK0_REG(22)
+    JMP_TO_MOV_FROM_BANK0_REG(23)
+    JMP_TO_MOV_FROM_BANK0_REG(24)
+    JMP_TO_MOV_FROM_BANK0_REG(25)
+    JMP_TO_MOV_FROM_BANK0_REG(26)
+    JMP_TO_MOV_FROM_BANK0_REG(27)
+    JMP_TO_MOV_FROM_BANK0_REG(28)
+    JMP_TO_MOV_FROM_BANK0_REG(29)
+    JMP_TO_MOV_FROM_BANK0_REG(30)
+    JMP_TO_MOV_FROM_BANK0_REG(31)
+    MOV_FROM_REG(32)
+    MOV_FROM_REG(33)
+    MOV_FROM_REG(34)
+    MOV_FROM_REG(35)
+    MOV_FROM_REG(36)
+    MOV_FROM_REG(37)
+    MOV_FROM_REG(38)
+    MOV_FROM_REG(39)
+    MOV_FROM_REG(40)
+    MOV_FROM_REG(41)
+    MOV_FROM_REG(42)
+    MOV_FROM_REG(43)
+    MOV_FROM_REG(44)
+    MOV_FROM_REG(45)
+    MOV_FROM_REG(46)
+    MOV_FROM_REG(47)
+    MOV_FROM_REG(48)
+    MOV_FROM_REG(49)
+    MOV_FROM_REG(50)
+    MOV_FROM_REG(51)
+    MOV_FROM_REG(52)
+    MOV_FROM_REG(53)
+    MOV_FROM_REG(54)
+    MOV_FROM_REG(55)
+    MOV_FROM_REG(56)
+    MOV_FROM_REG(57)
+    MOV_FROM_REG(58)
+    MOV_FROM_REG(59)
+    MOV_FROM_REG(60)
+    MOV_FROM_REG(61)
+    MOV_FROM_REG(62)
+    MOV_FROM_REG(63)
+    MOV_FROM_REG(64)
+    MOV_FROM_REG(65)
+    MOV_FROM_REG(66)
+    MOV_FROM_REG(67)
+    MOV_FROM_REG(68)
+    MOV_FROM_REG(69)
+    MOV_FROM_REG(70)
+    MOV_FROM_REG(71)
+    MOV_FROM_REG(72)
+    MOV_FROM_REG(73)
+    MOV_FROM_REG(74)
+    MOV_FROM_REG(75)
+    MOV_FROM_REG(76)
+    MOV_FROM_REG(77)
+    MOV_FROM_REG(78)
+    MOV_FROM_REG(79)
+    MOV_FROM_REG(80)
+    MOV_FROM_REG(81)
+    MOV_FROM_REG(82)
+    MOV_FROM_REG(83)
+    MOV_FROM_REG(84)
+    MOV_FROM_REG(85)
+    MOV_FROM_REG(86)
+    MOV_FROM_REG(87)
+    MOV_FROM_REG(88)
+    MOV_FROM_REG(89)
+    MOV_FROM_REG(90)
+    MOV_FROM_REG(91)
+    MOV_FROM_REG(92)
+    MOV_FROM_REG(93)
+    MOV_FROM_REG(94)
+    MOV_FROM_REG(95)
+    MOV_FROM_REG(96)
+    MOV_FROM_REG(97)
+    MOV_FROM_REG(98)
+    MOV_FROM_REG(99)
+    MOV_FROM_REG(100)
+    MOV_FROM_REG(101)
+    MOV_FROM_REG(102)
+    MOV_FROM_REG(103)
+    MOV_FROM_REG(104)
+    MOV_FROM_REG(105)
+    MOV_FROM_REG(106)
+    MOV_FROM_REG(107)
+    MOV_FROM_REG(108)
+    MOV_FROM_REG(109)
+    MOV_FROM_REG(110)
+    MOV_FROM_REG(111)
+    MOV_FROM_REG(112)
+    MOV_FROM_REG(113)
+    MOV_FROM_REG(114)
+    MOV_FROM_REG(115)
+    MOV_FROM_REG(116)
+    MOV_FROM_REG(117)
+    MOV_FROM_REG(118)
+    MOV_FROM_REG(119)
+    MOV_FROM_REG(120)
+    MOV_FROM_REG(121)
+    MOV_FROM_REG(122)
+    MOV_FROM_REG(123)
+    MOV_FROM_REG(124)
+    MOV_FROM_REG(125)
+    MOV_FROM_REG(126)
+    MOV_FROM_REG(127)
+END(asm_mov_from_reg)
+
+
+/* must be in bank 0
+ * parameter:
+ * r31: pr
+ * r24: b0
+ */
+ENTRY(vmx_resume_to_guest)
+    mov r16=cr.ipsr
+    movl r20=__vsa_base
+    ;;
+    ld8 r20=[r20]
+    adds r19=IA64_VPD_BASE_OFFSET,r21
+    ;;
+    ld8 r25=[r19]
+    extr.u r17=r16,IA64_PSR_RI_BIT,2
+    tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
+    ;;	
+    (p6) mov r18=cr.iip
+    (p6) mov r17=r0
+    ;;    
+    (p6) add r18=0x10,r18
+    (p7) add r17=1,r17
+    ;;		
+    (p6) mov cr.iip=r18
+    dep r16=r17,r16,IA64_PSR_RI_BIT,2
+    ;;
+    mov cr.ipsr=r16
+    mov r17=cr.isr
+    adds r19= VPD_VPSR_START_OFFSET,r25
+    ld8 r26=[r25]
+    add r29=PAL_VPS_RESUME_NORMAL,r20
+    add r28=PAL_VPS_RESUME_HANDLER,r20
+    ;;
+    ld8 r19=[r19]
+    mov b0=r29
+    cmp.ne p6,p7 = r0,r0
+    ;;
+    tbit.nz.or.andcm p6,p7 = r19,IA64_PSR_IC_BIT		// p1=vpsr.ic
+    tbit.nz.or.andcm p6,p7 = r17,IA64_ISR_IR_BIT		//p1=cr.isr.ir
+    ;;
+    (p6) mov b0=r29
+    (p7) mov b0=r28
+    mov pr=r31,-2
+    br.sptk.many b0             // call pal service
+    ;;
+END(vmx_resume_to_guest)
+
+
+MOV_TO_BANK0_REG(16)
+MOV_TO_BANK0_REG(17)
+MOV_TO_BANK0_REG(18)
+MOV_TO_BANK0_REG(19)
+MOV_TO_BANK0_REG(20)
+MOV_TO_BANK0_REG(21)
+MOV_TO_BANK0_REG(22)
+MOV_TO_BANK0_REG(23)
+MOV_TO_BANK0_REG(24)
+MOV_TO_BANK0_REG(25)
+MOV_TO_BANK0_REG(26)
+MOV_TO_BANK0_REG(27)
+MOV_TO_BANK0_REG(28)
+MOV_TO_BANK0_REG(29)
+MOV_TO_BANK0_REG(30)
+MOV_TO_BANK0_REG(31)
+
+
+// mov to reg table
+ENTRY(asm_mov_to_reg)
+    MOV_TO_REG0
+    MOV_TO_REG(1)
+    MOV_TO_REG(2)
+    MOV_TO_REG(3)
+    MOV_TO_REG(4)
+    MOV_TO_REG(5)
+    MOV_TO_REG(6)
+    MOV_TO_REG(7)
+    MOV_TO_REG(8)
+    MOV_TO_REG(9)
+    MOV_TO_REG(10)
+    MOV_TO_REG(11)
+    MOV_TO_REG(12)
+    MOV_TO_REG(13)
+    MOV_TO_REG(14)
+    MOV_TO_REG(15)
+    JMP_TO_MOV_TO_BANK0_REG(16)
+    JMP_TO_MOV_TO_BANK0_REG(17)
+    JMP_TO_MOV_TO_BANK0_REG(18)
+    JMP_TO_MOV_TO_BANK0_REG(19)
+    JMP_TO_MOV_TO_BANK0_REG(20)
+    JMP_TO_MOV_TO_BANK0_REG(21)
+    JMP_TO_MOV_TO_BANK0_REG(22)
+    JMP_TO_MOV_TO_BANK0_REG(23)
+    JMP_TO_MOV_TO_BANK0_REG(24)
+    JMP_TO_MOV_TO_BANK0_REG(25)
+    JMP_TO_MOV_TO_BANK0_REG(26)
+    JMP_TO_MOV_TO_BANK0_REG(27)
+    JMP_TO_MOV_TO_BANK0_REG(28)
+    JMP_TO_MOV_TO_BANK0_REG(29)
+    JMP_TO_MOV_TO_BANK0_REG(30)
+    JMP_TO_MOV_TO_BANK0_REG(31)
+    MOV_TO_REG(32)
+    MOV_TO_REG(33)
+    MOV_TO_REG(34)
+    MOV_TO_REG(35)
+    MOV_TO_REG(36)
+    MOV_TO_REG(37)
+    MOV_TO_REG(38)
+    MOV_TO_REG(39)
+    MOV_TO_REG(40)
+    MOV_TO_REG(41)
+    MOV_TO_REG(42)
+    MOV_TO_REG(43)
+    MOV_TO_REG(44)
+    MOV_TO_REG(45)
+    MOV_TO_REG(46)
+    MOV_TO_REG(47)
+    MOV_TO_REG(48)
+    MOV_TO_REG(49)
+    MOV_TO_REG(50)
+    MOV_TO_REG(51)
+    MOV_TO_REG(52)
+    MOV_TO_REG(53)
+    MOV_TO_REG(54)
+    MOV_TO_REG(55)
+    MOV_TO_REG(56)
+    MOV_TO_REG(57)
+    MOV_TO_REG(58)
+    MOV_TO_REG(59)
+    MOV_TO_REG(60)
+    MOV_TO_REG(61)
+    MOV_TO_REG(62)
+    MOV_TO_REG(63)
+    MOV_TO_REG(64)
+    MOV_TO_REG(65)
+    MOV_TO_REG(66)
+    MOV_TO_REG(67)
+    MOV_TO_REG(68)
+    MOV_TO_REG(69)
+    MOV_TO_REG(70)
+    MOV_TO_REG(71)
+    MOV_TO_REG(72)
+    MOV_TO_REG(73)
+    MOV_TO_REG(74)
+    MOV_TO_REG(75)
+    MOV_TO_REG(76)
+    MOV_TO_REG(77)
+    MOV_TO_REG(78)
+    MOV_TO_REG(79)
+    MOV_TO_REG(80)
+    MOV_TO_REG(81)
+    MOV_TO_REG(82)
+    MOV_TO_REG(83)
+    MOV_TO_REG(84)
+    MOV_TO_REG(85)
+    MOV_TO_REG(86)
+    MOV_TO_REG(87)
+    MOV_TO_REG(88)
+    MOV_TO_REG(89)
+    MOV_TO_REG(90)
+    MOV_TO_REG(91)
+    MOV_TO_REG(92)
+    MOV_TO_REG(93)
+    MOV_TO_REG(94)
+    MOV_TO_REG(95)
+    MOV_TO_REG(96)
+    MOV_TO_REG(97)
+    MOV_TO_REG(98)
+    MOV_TO_REG(99)
+    MOV_TO_REG(100)
+    MOV_TO_REG(101)
+    MOV_TO_REG(102)
+    MOV_TO_REG(103)
+    MOV_TO_REG(104)
+    MOV_TO_REG(105)
+    MOV_TO_REG(106)
+    MOV_TO_REG(107)
+    MOV_TO_REG(108)
+    MOV_TO_REG(109)
+    MOV_TO_REG(110)
+    MOV_TO_REG(111)
+    MOV_TO_REG(112)
+    MOV_TO_REG(113)
+    MOV_TO_REG(114)
+    MOV_TO_REG(115)
+    MOV_TO_REG(116)
+    MOV_TO_REG(117)
+    MOV_TO_REG(118)
+    MOV_TO_REG(119)
+    MOV_TO_REG(120)
+    MOV_TO_REG(121)
+    MOV_TO_REG(122)
+    MOV_TO_REG(123)
+    MOV_TO_REG(124)
+    MOV_TO_REG(125)
+    MOV_TO_REG(126)
+    MOV_TO_REG(127)
+END(asm_mov_to_reg)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ xen/arch/ia64/xen/xencomm.c	Wed Oct 11 16:10:40 2006 -0400
@@ -0,0 +1,380 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ *          Tristan Gingold <tristan.gingold@xxxxxxxx>
+ */
+
+#include <xen/config.h>
+#include <xen/mm.h>
+#include <xen/sched.h>
+#include <asm/current.h>
+#include <asm/guest_access.h>
+#include <public/xen.h>
+#include <public/xencomm.h>
+#include <xen/errno.h>
+
+#undef DEBUG
+#ifdef DEBUG
+static int xencomm_debug = 1; /* extremely verbose */
+#else
+#define xencomm_debug 0
+#endif
+
+static int
+xencomm_copy_chunk_from(
+    unsigned long to,
+    unsigned long paddr,
+    unsigned int  len)
+{
+    unsigned long maddr;
+    struct page_info *page;
+
+    while (1) {
+	maddr = xencomm_paddr_to_maddr(paddr);
+	if (xencomm_debug > 1)
+	    printk("%lx[%d] -> %lx\n", maddr, len, to);
+	if (maddr == 0)
+	    return -EFAULT;
+
+	page = virt_to_page(maddr);
+	if (get_page(page, current->domain) == 0) {
+	    if (page_get_owner(page) != current->domain) {
+		/* This page might be a page granted by another domain  */
+		panic_domain(NULL, "copy_from_guest from foreign domain\n");
+	    }
+	    /* Try again.  */
+	    continue;
+	}
+	memcpy((void *)to, (void *)maddr, len);
+	put_page(page);
+	return 0;
+    }
+}
+
+/**
+ * xencomm_copy_from_guest: Copy a block of data from domain space.
+ * @to:   Machine address.
+ * @from: Physical address to a xencomm buffer descriptor.
+ * @n:    Number of bytes to copy.
+ * @skip: Number of bytes from the start to skip.
+ *
+ * Copy data from domain to hypervisor.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+unsigned long
+xencomm_copy_from_guest(
+    void         *to,
+    const void   *from,
+    unsigned int n,
+    unsigned int skip)
+{
+    struct xencomm_desc *desc;
+    unsigned long desc_addr;
+    unsigned int from_pos = 0;
+    unsigned int to_pos = 0;
+    unsigned int i = 0;
+
+    if (xencomm_debug)
+        printf("xencomm_copy_from_guest: from=%lx+%u n=%u\n",
+               (unsigned long)from, skip, n);
+
+    if (XENCOMM_IS_INLINE(from)) {
+        unsigned long src_paddr = XENCOMM_INLINE_ADDR(from);
+            
+        src_paddr += skip;
+
+        while (n > 0) {
+            unsigned int chunksz;
+            unsigned int bytes;
+	    int res;
+            
+            chunksz = PAGE_SIZE - (src_paddr % PAGE_SIZE);
+            
+            bytes = min(chunksz, n);
+
+            res = xencomm_copy_chunk_from((unsigned long)to, src_paddr, bytes);
+	    if (res != 0)
+		return -EFAULT;
+            src_paddr += bytes;
+            to += bytes;
+            n -= bytes;
+        }
+        
+        /* Always successful.  */
+        return 0;
+    }
+
+    /* first we need to access the descriptor */
+    desc_addr = xencomm_paddr_to_maddr((unsigned long)from);
+    if (desc_addr == 0)
+        return -EFAULT;
+
+    desc = (struct xencomm_desc *)desc_addr;
+    if (desc->magic != XENCOMM_MAGIC) {
+        printk("%s: error: %p magic was 0x%x\n",
+               __func__, desc, desc->magic);
+        return -EFAULT;
+    }
+
+    /* iterate through the descriptor, copying up to a page at a time */
+    while ((to_pos < n) && (i < desc->nr_addrs)) {
+        unsigned long src_paddr = desc->address[i];
+        unsigned int pgoffset;
+        unsigned int chunksz;
+        unsigned int chunk_skip;
+
+        if (src_paddr == XENCOMM_INVALID) {
+            i++;
+            continue;
+        }
+
+        pgoffset = src_paddr % PAGE_SIZE;
+        chunksz = PAGE_SIZE - pgoffset;
+
+        chunk_skip = min(chunksz, skip);
+        from_pos += chunk_skip;
+        chunksz -= chunk_skip;
+        skip -= chunk_skip;
+
+        if (skip == 0) {
+            unsigned int bytes = min(chunksz, n - to_pos);
+	    int res;
+
+            if (xencomm_debug > 1)
+                printf ("src_paddr=%lx i=%d, skip=%d\n",
+                        src_paddr, i, chunk_skip);
+
+            res = xencomm_copy_chunk_from((unsigned long)to + to_pos,
+                                          src_paddr + chunk_skip, bytes);
+            if (res != 0)
+                return -EFAULT;
+
+            from_pos += bytes;
+            to_pos += bytes;
+        }
+
+        i++;
+    }
+
+    return n - to_pos;
+}
+
+static int
+xencomm_copy_chunk_to(
+    unsigned long paddr,
+    unsigned long from,
+    unsigned int  len)
+{
+    unsigned long maddr;
+    struct page_info *page;
+
+    while (1) {
+	maddr = xencomm_paddr_to_maddr(paddr);
+	if (xencomm_debug > 1)
+	    printk("%lx[%d] -> %lx\n", from, len, maddr);
+	if (maddr == 0)
+	    return -EFAULT;
+
+	page = virt_to_page(maddr);
+	if (get_page(page, current->domain) == 0) {
+	    if (page_get_owner(page) != current->domain) {
+		/* This page might be a page granted by another domain  */
+		panic_domain(NULL, "copy_to_guest to foreign domain\n");
+	    }
+	    /* Try again.  */
+	    continue;
+	}
+	memcpy((void *)maddr, (void *)from, len);
+	put_page(page);
+	return 0;
+    }
+}
+
+/**
+ * xencomm_copy_to_guest: Copy a block of data to domain space.
+ * @to:     Physical address to xencomm buffer descriptor.
+ * @from:   Machine address.
+ * @n:      Number of bytes to copy.
+ * @skip: Number of bytes from the start to skip.
+ *
+ * Copy data from hypervisor to domain.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+unsigned long
+xencomm_copy_to_guest(
+    void         *to,
+    const void   *from,
+    unsigned int n,
+    unsigned int skip)
+{
+    struct xencomm_desc *desc;
+    unsigned long desc_addr;
+    unsigned int from_pos = 0;
+    unsigned int to_pos = 0;
+    unsigned int i = 0;
+
+    if (xencomm_debug)
+        printf ("xencomm_copy_to_guest: to=%lx+%u n=%u\n",
+                (unsigned long)to, skip, n);
+
+    if (XENCOMM_IS_INLINE(to)) {
+        unsigned long dest_paddr = XENCOMM_INLINE_ADDR(to);
+            
+        dest_paddr += skip;
+
+        while (n > 0) {
+            unsigned int chunksz;
+            unsigned int bytes;
+            int res;
+
+            chunksz = PAGE_SIZE - (dest_paddr % PAGE_SIZE);
+            
+            bytes = min(chunksz, n);
+
+            res = xencomm_copy_chunk_to(dest_paddr, (unsigned long)from, bytes);
+            if (res != 0)
+                return res;
+
+            dest_paddr += bytes;
+            from += bytes;
+            n -= bytes;
+        }
+
+        /* Always successful.  */
+        return 0;
+    }
+
+    /* first we need to access the descriptor */
+    desc_addr = xencomm_paddr_to_maddr((unsigned long)to);
+    if (desc_addr == 0)
+        return -EFAULT;
+
+    desc = (struct xencomm_desc *)desc_addr;
+    if (desc->magic != XENCOMM_MAGIC) {
+        printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic);
+        return -EFAULT;
+    }
+
+    /* iterate through the descriptor, copying up to a page at a time */
+    while ((from_pos < n) && (i < desc->nr_addrs)) {
+        unsigned long dest_paddr = desc->address[i];
+        unsigned int pgoffset;
+        unsigned int chunksz;
+        unsigned int chunk_skip;
+
+        if (dest_paddr == XENCOMM_INVALID) {
+            i++;
+            continue;
+        }
+
+        pgoffset = dest_paddr % PAGE_SIZE;
+        chunksz = PAGE_SIZE - pgoffset;
+
+        chunk_skip = min(chunksz, skip);
+        to_pos += chunk_skip;
+        chunksz -= chunk_skip;
+        skip -= chunk_skip;
+        dest_paddr += chunk_skip;
+
+        if (skip == 0) {
+            unsigned int bytes = min(chunksz, n - from_pos);
+            int res;
+
+            res = xencomm_copy_chunk_to(dest_paddr,
+                                        (unsigned long)from + from_pos, bytes);
+            if (res != 0)
+                return res;
+
+            from_pos += bytes;
+            to_pos += bytes;
+        }
+
+        i++;
+    }
+    return n - from_pos;
+}
+
+/* Offset page addresses in 'handle' to skip 'bytes' bytes. Set completely
+ * exhausted pages to XENCOMM_INVALID. */
+void *
+xencomm_add_offset(
+    void         *handle,
+    unsigned int bytes)
+{
+    struct xencomm_desc *desc;
+    unsigned long desc_addr;
+    int i = 0;
+
+    if (XENCOMM_IS_INLINE(handle))
+        return (void *)((unsigned long)handle + bytes);
+
+    /* first we need to access the descriptor */
+    desc_addr = xencomm_paddr_to_maddr((unsigned long)handle);
+    if (desc_addr == 0)
+        return NULL;
+
+    desc = (struct xencomm_desc *)desc_addr;
+    if (desc->magic != XENCOMM_MAGIC) {
+        printk("%s error: %p magic was 0x%x\n", __func__, desc, desc->magic);
+        return NULL;
+    }
+
+    /* iterate through the descriptor incrementing addresses */
+    while ((bytes > 0) && (i < desc->nr_addrs)) {
+        unsigned long dest_paddr = desc->address[i];
+        unsigned int pgoffset;
+        unsigned int chunksz;
+        unsigned int chunk_skip;
+
+        pgoffset = dest_paddr % PAGE_SIZE;
+        chunksz = PAGE_SIZE - pgoffset;
+
+        chunk_skip = min(chunksz, bytes);
+        if (chunk_skip == chunksz) {
+            /* exhausted this page */
+            desc->address[i] = XENCOMM_INVALID;
+        } else {
+            desc->address[i] += chunk_skip;
+        }
+        bytes -= chunk_skip;
+    }
+    return handle;
+}
+
+int
+xencomm_handle_is_null(
+   void *ptr)
+{
+    if (XENCOMM_IS_INLINE(ptr))
+        return XENCOMM_INLINE_ADDR(ptr) == 0;
+    else {
+        struct xencomm_desc *desc;
+        unsigned long desc_addr;
+
+        desc_addr = xencomm_paddr_to_maddr((unsigned long)ptr);
+        if (desc_addr == 0)
+            return 1;
+
+        desc = (struct xencomm_desc *)desc_addr;
+        return (desc->address[0] == XENCOMM_INVALID);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ xen/arch/ia64/xen/xenpatch.c	Wed Oct 11 16:10:40 2006 -0400
@@ -0,0 +1,122 @@
+/******************************************************************************
+ * xenpatch.c
+ * Copyright (c) 2006 Silicon Graphics Inc.
+ *         Jes Sorensen <jes@xxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Parts of this based on code from arch/ia64/kernel/patch.c
+ */
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <asm/xensystem.h>
+#include <asm/intrinsics.h>
+
+/*
+ * This was adapted from code written by Tony Luck:
+ *
+ * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle
+ * like this:
+ *
+ * 6  6         5         4         3         2         1
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG
+ *
+ * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB
+ */
+static u64
+get_imm64 (u64 insn_addr)
+{
+	u64 *p = (u64 *) (insn_addr & -16);	/* mask out slot number */
+
+	return ( (p[1] & 0x0800000000000000UL) << 4)  | /*A*/
+		((p[1] & 0x00000000007fffffUL) << 40) | /*B*/
+		((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/
+		((p[1] & 0x0000100000000000UL) >> 23) | /*D*/
+		((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/
+		((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/
+		((p[1] & 0x000007f000000000UL) >> 36);  /*G*/
+}
+
+/* Patch instruction with "val" where "mask" has 1 bits. */
+void
+ia64_patch (u64 insn_addr, u64 mask, u64 val)
+{
+	u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16);
+#define insn_mask ((1UL << 41) - 1)
+	unsigned long shift;
+
+	b0 = b[0]; b1 = b[1];
+	/* 5 bits of template, then 3 x 41-bit instructions */
+	shift = 5 + 41 * (insn_addr % 16);
+	if (shift >= 64) {
+		m1 = mask << (shift - 64);
+		v1 = val << (shift - 64);
+	} else {
+		m0 = mask << shift; m1 = mask >> (64 - shift);
+		v0 = val  << shift; v1 = val >> (64 - shift);
+		b[0] = (b0 & ~m0) | (v0 & m0);
+	}
+	b[1] = (b1 & ~m1) | (v1 & m1);
+}
+
+void
+ia64_patch_imm64 (u64 insn_addr, u64 val)
+{
+	/* The assembler may generate offset pointing to either slot 1
+	   or slot 2 for a long (2-slot) instruction, occupying slots 1
+	   and 2.  */
+  	insn_addr &= -16UL;
+	ia64_patch(insn_addr + 2, 0x01fffefe000UL,
+		   (((val & 0x8000000000000000UL) >> 27) | /* bit 63 -> 36 */
+		    ((val & 0x0000000000200000UL) <<  0) | /* bit 21 -> 21 */
+		    ((val & 0x00000000001f0000UL) <<  6) | /* bit 16 -> 22 */
+		    ((val & 0x000000000000ff80UL) << 20) | /* bit  7 -> 27 */
+		    ((val & 0x000000000000007fUL) << 13)  /* bit  0 -> 13 */));
+	ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22);
+}
+
+extern char frametable_miss;
+extern unsigned long xen_pstart;
+
+/*
+ * Add more patch points in seperate functions as appropriate
+ */
+
+static void xen_patch_frametable_miss(u64 offset)
+{
+	u64 addr, val;
+
+	addr = (u64)&frametable_miss;
+	val = get_imm64(addr) + offset;
+	ia64_patch_imm64(addr, val);
+}
+
+
+void xen_patch_kernel(void)
+{
+	unsigned long patch_offset;
+
+	patch_offset = xen_pstart - (KERNEL_START - PAGE_OFFSET);
+
+	printk("Xen patching physical address access by offset: "
+	       "0x%lx\n", patch_offset);
+
+	xen_patch_frametable_miss(patch_offset);
+
+	ia64_sync_i();
+	ia64_srlz_i();
+}
Attachment:
pgp0eB0M9DFrS.pgp

Description: PGP signature
--
Fedora-xen mailing list
Fedora-xen@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/fedora-xen