From 3b4724b0e60cdfdc2679ee7135f3a234c74c2b83 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:10 -0700
Subject: xen: use phys_addr_t when referring to physical addresses

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/xen/page.h | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'include/xen')

diff --git a/include/xen/page.h b/include/xen/page.h
index 031ef22a971e..1742f60828f3 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -8,27 +8,15 @@
 
 #include <xen/features.h>
 
-#ifdef CONFIG_X86_PAE
 /* Xen machine address */
 typedef struct xmaddr {
-	unsigned long long maddr;
+	phys_addr_t maddr;
 } xmaddr_t;
 
 /* Xen pseudo-physical address */
 typedef struct xpaddr {
-	unsigned long long paddr;
+	phys_addr_t paddr;
 } xpaddr_t;
-#else
-/* Xen machine address */
-typedef struct xmaddr {
-	unsigned long maddr;
-} xmaddr_t;
-
-/* Xen pseudo-physical address */
-typedef struct xpaddr {
-	unsigned long paddr;
-} xpaddr_t;
-#endif
 
 #define XMADDR(x)	((xmaddr_t) { .maddr = (x) })
 #define XPADDR(x)	((xpaddr_t) { .paddr = (x) })
-- 
cgit v1.2.3


From 9666e9d44b83755c53615fb89c0787b6846786a1 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:11 -0700
Subject: xen: unify pte operations on machine frames

Xen's pte operations on mfns can be unified like the kernel's pfn operations.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/xen/page.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/xen')

diff --git a/include/xen/page.h b/include/xen/page.h
index 1742f60828f3..01799305f02a 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -125,37 +125,37 @@ static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 #define virt_to_mfn(v)		(pfn_to_mfn(PFN_DOWN(__pa(v))))
 #define mfn_to_virt(m)		(__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
-#ifdef CONFIG_X86_PAE
-#define pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) |			\
-		       (((_pte).pte_high & 0xfff) << (32-PAGE_SHIFT)))
+static inline unsigned long pte_mfn(pte_t pte)
+{
+	return (pte.pte & ~_PAGE_NX) >> PAGE_SHIFT;
+}
 
 static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
 {
 	pte_t pte;
 
-	pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) |
-		(pgprot_val(pgprot) >> 32);
-	pte.pte_high &= (__supported_pte_mask >> 32);
-	pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot));
-	pte.pte_low &= __supported_pte_mask;
+	pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) |
+		(pgprot_val(pgprot) & __supported_pte_mask);
 
 	return pte;
 }
 
-static inline unsigned long long pte_val_ma(pte_t x)
+static inline pteval_t pte_val_ma(pte_t pte)
+{
+	return pte.pte;
+}
+
+static inline pte_t __pte_ma(pteval_t x)
 {
-	return x.pte;
+	return (pte_t) { .pte = x };
 }
+
+#ifdef CONFIG_X86_PAE
 #define pmd_val_ma(v) ((v).pmd)
 #define pud_val_ma(v) ((v).pgd.pgd)
-#define __pte_ma(x)	((pte_t) { .pte = (x) })
 #define __pmd_ma(x)	((pmd_t) { (x) } )
 #else  /* !X86_PAE */
-#define pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
-#define mfn_pte(pfn, prot)	__pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#define pte_val_ma(x)	((x).pte)
 #define pmd_val_ma(v)	((v).pud.pgd.pgd)
-#define __pte_ma(x)	((pte_t) { (x) } )
 #endif	/* CONFIG_X86_PAE */
 
 #define pgd_val_ma(x)	((x).pgd)
-- 
cgit v1.2.3


From aa380c82b83252754a8c11bfc92359bd87cbf710 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:15 -0700
Subject: xen: add support for callbackops hypercall

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/xen/hypercall.h  |   6 +++
 include/asm-x86/xen/interface.h  |   4 ++
 include/xen/interface/callback.h | 102 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 112 insertions(+)
 create mode 100644 include/xen/interface/callback.h

(limited to 'include/xen')

diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h
index bc0ee7d961ca..c2ccd997ed35 100644
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -163,6 +163,12 @@ HYPERVISOR_set_callbacks(unsigned long event_selector,
 			   failsafe_selector, failsafe_address);
 }
 
+static inline int
+HYPERVISOR_callback_op(int cmd, void *arg)
+{
+	return _hypercall2(int, callback_op, cmd, arg);
+}
+
 static inline int
 HYPERVISOR_fpu_taskswitch(int set)
 {
diff --git a/include/asm-x86/xen/interface.h b/include/asm-x86/xen/interface.h
index 165c3968e138..588a0716cd78 100644
--- a/include/asm-x86/xen/interface.h
+++ b/include/asm-x86/xen/interface.h
@@ -171,6 +171,10 @@ struct arch_vcpu_info {
     unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */
 };
 
+struct xen_callback {
+	unsigned long cs;
+	unsigned long eip;
+};
 #endif /* !__ASSEMBLY__ */
 
 /*
diff --git a/include/xen/interface/callback.h b/include/xen/interface/callback.h
new file mode 100644
index 000000000000..4aadcba31af9
--- /dev/null
+++ b/include/xen/interface/callback.h
@@ -0,0 +1,102 @@
+/******************************************************************************
+ * callback.h
+ *
+ * Register guest OS callbacks with Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2006, Ian Campbell
+ */
+
+#ifndef __XEN_PUBLIC_CALLBACK_H__
+#define __XEN_PUBLIC_CALLBACK_H__
+
+#include "xen.h"
+
+/*
+ * Prototype for this hypercall is:
+ *   long callback_op(int cmd, void *extra_args)
+ * @cmd        == CALLBACKOP_??? (callback operation).
+ * @extra_args == Operation-specific extra arguments (NULL if none).
+ */
+
+/* ia64, x86: Callback for event delivery. */
+#define CALLBACKTYPE_event                 0
+
+/* x86: Failsafe callback when guest state cannot be restored by Xen. */
+#define CALLBACKTYPE_failsafe              1
+
+/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */
+#define CALLBACKTYPE_syscall               2
+
+/*
+ * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel
+ *     feature is enabled. Do not use this callback type in new code.
+ */
+#define CALLBACKTYPE_sysenter_deprecated   3
+
+/* x86: Callback for NMI delivery. */
+#define CALLBACKTYPE_nmi                   4
+
+/*
+ * x86: sysenter is only available as follows:
+ * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled
+ * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs
+ *                      ('32-on-32-on-64', '32-on-64-on-64')
+ *                      [nb. also 64-bit guest applications on Intel CPUs
+ *                           ('64-on-64-on-64'), but syscall is preferred]
+ */
+#define CALLBACKTYPE_sysenter              5
+
+/*
+ * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs
+ *                    ('32-on-32-on-64', '32-on-64-on-64')
+ */
+#define CALLBACKTYPE_syscall32             7
+
+/*
+ * Disable event deliver during callback? This flag is ignored for event and
+ * NMI callbacks: event delivery is unconditionally disabled.
+ */
+#define _CALLBACKF_mask_events             0
+#define CALLBACKF_mask_events              (1U << _CALLBACKF_mask_events)
+
+/*
+ * Register a callback.
+ */
+#define CALLBACKOP_register                0
+struct callback_register {
+    uint16_t type;
+    uint16_t flags;
+    struct xen_callback address;
+};
+
+/*
+ * Unregister a callback.
+ *
+ * Not all callbacks can be unregistered. -EINVAL will be returned if
+ * you attempt to unregister such a callback.
+ */
+#define CALLBACKOP_unregister              1
+struct callback_unregister {
+    uint16_t type;
+    uint16_t _unused;
+};
+
+#endif /* __XEN_PUBLIC_CALLBACK_H__ */
-- 
cgit v1.2.3


From 9a9db275b02e91fba837750ccfc82411ada834b8 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Wed, 2 Apr 2008 10:53:50 -0700
Subject: xen: definisions which ia64 needs

Add xen hypercall numbers defined for arch specific use.
ia64/xen domU uses __HYPERVISOR_arch_1 to manipulate paravirtualized
IOSAPIC. Although all those constants aren't used yet by IA64 at this
moment, add all arch specific hypercall numbers.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/xen/interface/xen.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/xen')

diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 518a5bf79ed3..87ad143be406 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -58,6 +58,16 @@
 #define __HYPERVISOR_physdev_op           33
 #define __HYPERVISOR_hvm_op               34
 
+/* Architecture-specific hypercall definitions. */
+#define __HYPERVISOR_arch_0               48
+#define __HYPERVISOR_arch_1               49
+#define __HYPERVISOR_arch_2               50
+#define __HYPERVISOR_arch_3               51
+#define __HYPERVISOR_arch_4               52
+#define __HYPERVISOR_arch_5               53
+#define __HYPERVISOR_arch_6               54
+#define __HYPERVISOR_arch_7               55
+
 /*
  * VIRTUAL INTERRUPTS
  *
-- 
cgit v1.2.3


From 2eb6d5eb48fd6aedf5787b30e5c41693e8c91fa3 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Wed, 2 Apr 2008 10:53:51 -0700
Subject: xen: definitions which ia64/xen needs

Add xen VIRQ numbers defined for arch specific use.
ia64/xen domU uses VIRQ_ARCH_0 for virtual itc timer.
Although all those constants aren't used yet by ia64
at this moment, add all arch specific VIRQ numbers.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/xen/interface/xen.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/xen')

diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 87ad143be406..9b018da48cf3 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -78,8 +78,18 @@
 #define VIRQ_CONSOLE    2  /* (DOM0) Bytes received on emergency console. */
 #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */
 #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
-#define NR_VIRQS        8
 
+/* Architecture-specific VIRQ definitions. */
+#define VIRQ_ARCH_0    16
+#define VIRQ_ARCH_1    17
+#define VIRQ_ARCH_2    18
+#define VIRQ_ARCH_3    19
+#define VIRQ_ARCH_4    20
+#define VIRQ_ARCH_5    21
+#define VIRQ_ARCH_6    22
+#define VIRQ_ARCH_7    23
+
+#define NR_VIRQS       24
 /*
  * MMU-UPDATE REQUESTS
  *
-- 
cgit v1.2.3


From 87e27cf6288c6bf089ed34a72213d9ad16e82d84 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Wed, 2 Apr 2008 10:53:52 -0700
Subject: xen: add missing definitions for xen grant table which ia64/xen needs

Add xen handles realted definitions for grant table which ia64/xen
needs.
Pointer argumsnts for ia64/xen hypercall are passed in pseudo physical
address (guest physical address) so that it is required to convert
guest kernel virtual address into pseudo physical address right before
issuing hypercall.
The xen guest handle represents such arguments.
Define necessary handles and helper functions.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/xen/grant-table.c           |  2 +-
 include/asm-x86/xen/interface.h     | 24 ++++++++++++++++++++++++
 include/xen/interface/grant_table.h | 11 ++++++++---
 3 files changed, 33 insertions(+), 4 deletions(-)

(limited to 'include/xen')

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index d85dc6d41c2a..d4a89b3d694f 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -470,7 +470,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 
 	setup.dom        = DOMID_SELF;
 	setup.nr_frames  = nr_gframes;
-	setup.frame_list = frames;
+	set_xen_guest_handle(setup.frame_list, frames);
 
 	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
 	if (rc == -ENOSYS) {
diff --git a/include/asm-x86/xen/interface.h b/include/asm-x86/xen/interface.h
index 588a0716cd78..6227000a1e84 100644
--- a/include/asm-x86/xen/interface.h
+++ b/include/asm-x86/xen/interface.h
@@ -22,6 +22,30 @@
 #define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name)
 #define GUEST_HANDLE(name)        __guest_handle_ ## name
 
+#ifdef __XEN__
+#if defined(__i386__)
+#define set_xen_guest_handle(hnd, val)			\
+	do {						\
+		if (sizeof(hnd) == 8)			\
+			*(uint64_t *)&(hnd) = 0;	\
+		(hnd).p = val;				\
+	} while (0)
+#elif defined(__x86_64__)
+#define set_xen_guest_handle(hnd, val)	do { (hnd).p = val; } while (0)
+#endif
+#else
+#if defined(__i386__)
+#define set_xen_guest_handle(hnd, val)			\
+	do {						\
+		if (sizeof(hnd) == 8)			\
+			*(uint64_t *)&(hnd) = 0;	\
+		(hnd) = val;				\
+	} while (0)
+#elif defined(__x86_64__)
+#define set_xen_guest_handle(hnd, val)	do { (hnd) = val; } while (0)
+#endif
+#endif
+
 #ifndef __ASSEMBLY__
 /* Guest handles for primitive C types. */
 __DEFINE_GUEST_HANDLE(uchar, unsigned char);
diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h
index 219049802cf2..39da93c21de0 100644
--- a/include/xen/interface/grant_table.h
+++ b/include/xen/interface/grant_table.h
@@ -185,6 +185,7 @@ struct gnttab_map_grant_ref {
     grant_handle_t handle;
     uint64_t dev_bus_addr;
 };
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref);
 
 /*
  * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
@@ -206,6 +207,7 @@ struct gnttab_unmap_grant_ref {
     /* OUT parameters. */
     int16_t  status;              /* GNTST_* */
 };
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref);
 
 /*
  * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
@@ -223,8 +225,9 @@ struct gnttab_setup_table {
     uint32_t nr_frames;
     /* OUT parameters. */
     int16_t  status;              /* GNTST_* */
-    ulong *frame_list;
+    GUEST_HANDLE(ulong) frame_list;
 };
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table);
 
 /*
  * GNTTABOP_dump_table: Dump the contents of the grant table to the
@@ -237,6 +240,7 @@ struct gnttab_dump_table {
     /* OUT parameters. */
     int16_t status;               /* GNTST_* */
 };
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table);
 
 /*
  * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The
@@ -255,7 +259,7 @@ struct gnttab_transfer {
     /* OUT parameters. */
     int16_t       status;
 };
-
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer);
 
 /*
  * GNTTABOP_copy: Hypervisor based copy
@@ -296,6 +300,7 @@ struct gnttab_copy {
 	/* OUT parameters. */
 	int16_t       status;
 };
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy);
 
 /*
  * GNTTABOP_query_size: Query the current and maximum sizes of the shared
@@ -313,7 +318,7 @@ struct gnttab_query_size {
     uint32_t max_nr_frames;
     int16_t  status;              /* GNTST_* */
 };
-
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);
 
 /*
  * Bitfield values for update_pin_status.flags.
-- 
cgit v1.2.3


From 2724426924a471dc9fd8989dae56ab4d79519e34 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Wed, 2 Apr 2008 10:53:53 -0700
Subject: xen: add missing definitions in include/xen/interface/vcpu.h which
 ia64/xen needs

Add xen handles realted definitions for xen vcpu which ia64/xen needs.
Pointer argumsnts for ia64/xen hypercall are passed in pseudo physical
address (guest physical address) so that it is required to convert
guest kernel virtual address into pseudo physical address.
The xen guest handle represents such arguments.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/xen/interface/vcpu.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/xen')

diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h
index b05d8a6d9143..87e6f8a48661 100644
--- a/include/xen/interface/vcpu.h
+++ b/include/xen/interface/vcpu.h
@@ -85,6 +85,7 @@ struct vcpu_runstate_info {
 		 */
 		uint64_t time[4];
 };
+DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate_info);
 
 /* VCPU is currently running on a physical CPU. */
 #define RUNSTATE_running  0
@@ -119,6 +120,7 @@ struct vcpu_runstate_info {
 #define VCPUOP_register_runstate_memory_area 5
 struct vcpu_register_runstate_memory_area {
 		union {
+				GUEST_HANDLE(vcpu_runstate_info) h;
 				struct vcpu_runstate_info *v;
 				uint64_t p;
 		} addr;
@@ -134,6 +136,7 @@ struct vcpu_register_runstate_memory_area {
 struct vcpu_set_periodic_timer {
 		uint64_t period_ns;
 };
+DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_periodic_timer);
 
 /*
  * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot
@@ -145,6 +148,7 @@ struct vcpu_set_singleshot_timer {
 		uint64_t timeout_abs_ns;
 		uint32_t flags;			   /* VCPU_SSHOTTMR_??? */
 };
+DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_singleshot_timer);
 
 /* Flags to VCPUOP_set_singleshot_timer. */
  /* Require the timeout to be in the future (return -ETIME if it's passed). */
@@ -164,5 +168,6 @@ struct vcpu_register_vcpu_info {
     uint32_t offset; /* offset within page */
     uint32_t rsvd;   /* unused */
 };
+DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info);
 
 #endif /* __XEN_PUBLIC_VCPU_H__ */
-- 
cgit v1.2.3


From e04d0d0767a9c272d3c7300fb7a5221c5e3a71eb Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Wed, 2 Apr 2008 10:53:55 -0700
Subject: xen: move events.c to drivers/xen for IA64/Xen support

move arch/x86/xen/events.c undedr drivers/xen to share codes
with x86 and ia64. And minor adjustment to compile.
ia64/xen also uses events.c

Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/Makefile  |   2 +-
 arch/x86/xen/events.c  | 658 -------------------------------------------------
 arch/x86/xen/xen-ops.h |   2 +-
 drivers/xen/Makefile   |   2 +-
 drivers/xen/events.c   | 657 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/xen/xen-ops.h  |   8 +
 6 files changed, 668 insertions(+), 661 deletions(-)
 delete mode 100644 arch/x86/xen/events.c
 create mode 100644 drivers/xen/events.c
 create mode 100644 include/xen/xen-ops.h

(limited to 'include/xen')

diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index c5e9aa489900..95c59260dccb 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
 obj-y		:= enlighten.o setup.o multicalls.o mmu.o \
-			events.o time.o manage.o xen-asm.o
+			time.o manage.o xen-asm.o
 
 obj-$(CONFIG_SMP)	+= smp.o
diff --git a/arch/x86/xen/events.c b/arch/x86/xen/events.c
deleted file mode 100644
index 85bac298b3cb..000000000000
--- a/arch/x86/xen/events.c
+++ /dev/null
@@ -1,658 +0,0 @@
-/*
- * Xen event channels
- *
- * Xen models interrupts with abstract event channels.  Because each
- * domain gets 1024 event channels, but NR_IRQ is not that large, we
- * must dynamically map irqs<->event channels.  The event channels
- * interface with the rest of the kernel by defining a xen interrupt
- * chip.  When an event is recieved, it is mapped to an irq and sent
- * through the normal interrupt processing path.
- *
- * There are four kinds of events which can be mapped to an event
- * channel:
- *
- * 1. Inter-domain notifications.  This includes all the virtual
- *    device events, since they're driven by front-ends in another domain
- *    (typically dom0).
- * 2. VIRQs, typically used for timers.  These are per-cpu events.
- * 3. IPIs.
- * 4. Hardware interrupts. Not supported at present.
- *
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-
-#include <linux/linkage.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <asm/ptrace.h>
-#include <asm/irq.h>
-#include <asm/sync_bitops.h>
-#include <asm/xen/hypercall.h>
-#include <asm/xen/hypervisor.h>
-
-#include <xen/events.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/event_channel.h>
-
-#include "xen-ops.h"
-
-/*
- * This lock protects updates to the following mapping and reference-count
- * arrays. The lock does not need to be acquired to read the mapping tables.
- */
-static DEFINE_SPINLOCK(irq_mapping_update_lock);
-
-/* IRQ <-> VIRQ mapping. */
-static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
-
-/* IRQ <-> IPI mapping */
-static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1};
-
-/* Packed IRQ information: binding type, sub-type index, and event channel. */
-struct packed_irq
-{
-	unsigned short evtchn;
-	unsigned char index;
-	unsigned char type;
-};
-
-static struct packed_irq irq_info[NR_IRQS];
-
-/* Binding types. */
-enum {
-	IRQT_UNBOUND,
-	IRQT_PIRQ,
-	IRQT_VIRQ,
-	IRQT_IPI,
-	IRQT_EVTCHN
-};
-
-/* Convenient shorthand for packed representation of an unbound IRQ. */
-#define IRQ_UNBOUND	mk_irq_info(IRQT_UNBOUND, 0, 0)
-
-static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
-	[0 ... NR_EVENT_CHANNELS-1] = -1
-};
-static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
-static u8 cpu_evtchn[NR_EVENT_CHANNELS];
-
-/* Reference counts for bindings to IRQs. */
-static int irq_bindcount[NR_IRQS];
-
-/* Xen will never allocate port zero for any purpose. */
-#define VALID_EVTCHN(chn)	((chn) != 0)
-
-/*
- * Force a proper event-channel callback from Xen after clearing the
- * callback mask. We do this in a very simple manner, by making a call
- * down into Xen. The pending flag will be checked by Xen on return.
- */
-void force_evtchn_callback(void)
-{
-	(void)HYPERVISOR_xen_version(0, NULL);
-}
-EXPORT_SYMBOL_GPL(force_evtchn_callback);
-
-static struct irq_chip xen_dynamic_chip;
-
-/* Constructor for packed IRQ information. */
-static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn)
-{
-	return (struct packed_irq) { evtchn, index, type };
-}
-
-/*
- * Accessors for packed IRQ information.
- */
-static inline unsigned int evtchn_from_irq(int irq)
-{
-	return irq_info[irq].evtchn;
-}
-
-static inline unsigned int index_from_irq(int irq)
-{
-	return irq_info[irq].index;
-}
-
-static inline unsigned int type_from_irq(int irq)
-{
-	return irq_info[irq].type;
-}
-
-static inline unsigned long active_evtchns(unsigned int cpu,
-					   struct shared_info *sh,
-					   unsigned int idx)
-{
-	return (sh->evtchn_pending[idx] &
-		cpu_evtchn_mask[cpu][idx] &
-		~sh->evtchn_mask[idx]);
-}
-
-static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
-{
-	int irq = evtchn_to_irq[chn];
-
-	BUG_ON(irq == -1);
-#ifdef CONFIG_SMP
-	irq_desc[irq].affinity = cpumask_of_cpu(cpu);
-#endif
-
-	__clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
-	__set_bit(chn, cpu_evtchn_mask[cpu]);
-
-	cpu_evtchn[chn] = cpu;
-}
-
-static void init_evtchn_cpu_bindings(void)
-{
-#ifdef CONFIG_SMP
-	int i;
-	/* By default all event channels notify CPU#0. */
-	for (i = 0; i < NR_IRQS; i++)
-		irq_desc[i].affinity = cpumask_of_cpu(0);
-#endif
-
-	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
-	memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
-}
-
-static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
-{
-	return cpu_evtchn[evtchn];
-}
-
-static inline void clear_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_clear_bit(port, &s->evtchn_pending[0]);
-}
-
-static inline void set_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_set_bit(port, &s->evtchn_pending[0]);
-}
-
-
-/**
- * notify_remote_via_irq - send event to remote end of event channel via irq
- * @irq: irq of event channel to send event to
- *
- * Unlike notify_remote_via_evtchn(), this is safe to use across
- * save/restore. Notifications on a broken connection are silently
- * dropped.
- */
-void notify_remote_via_irq(int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		notify_remote_via_evtchn(evtchn);
-}
-EXPORT_SYMBOL_GPL(notify_remote_via_irq);
-
-static void mask_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_set_bit(port, &s->evtchn_mask[0]);
-}
-
-static void unmask_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	unsigned int cpu = get_cpu();
-
-	BUG_ON(!irqs_disabled());
-
-	/* Slow path (hypercall) if this is a non-local port. */
-	if (unlikely(cpu != cpu_from_evtchn(port))) {
-		struct evtchn_unmask unmask = { .port = port };
-		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
-	} else {
-		struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
-
-		sync_clear_bit(port, &s->evtchn_mask[0]);
-
-		/*
-		 * The following is basically the equivalent of
-		 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
-		 * the interrupt edge' if the channel is masked.
-		 */
-		if (sync_test_bit(port, &s->evtchn_pending[0]) &&
-		    !sync_test_and_set_bit(port / BITS_PER_LONG,
-					   &vcpu_info->evtchn_pending_sel))
-			vcpu_info->evtchn_upcall_pending = 1;
-	}
-
-	put_cpu();
-}
-
-static int find_unbound_irq(void)
-{
-	int irq;
-
-	/* Only allocate from dynirq range */
-	for (irq = 0; irq < NR_IRQS; irq++)
-		if (irq_bindcount[irq] == 0)
-			break;
-
-	if (irq == NR_IRQS)
-		panic("No available IRQ to bind to: increase NR_IRQS!\n");
-
-	return irq;
-}
-
-int bind_evtchn_to_irq(unsigned int evtchn)
-{
-	int irq;
-
-	spin_lock(&irq_mapping_update_lock);
-
-	irq = evtchn_to_irq[evtchn];
-
-	if (irq == -1) {
-		irq = find_unbound_irq();
-
-		dynamic_irq_init(irq);
-		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
-					      handle_level_irq, "event");
-
-		evtchn_to_irq[evtchn] = irq;
-		irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
-	}
-
-	irq_bindcount[irq]++;
-
-	spin_unlock(&irq_mapping_update_lock);
-
-	return irq;
-}
-EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
-
-static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
-{
-	struct evtchn_bind_ipi bind_ipi;
-	int evtchn, irq;
-
-	spin_lock(&irq_mapping_update_lock);
-
-	irq = per_cpu(ipi_to_irq, cpu)[ipi];
-	if (irq == -1) {
-		irq = find_unbound_irq();
-		if (irq < 0)
-			goto out;
-
-		dynamic_irq_init(irq);
-		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
-					      handle_level_irq, "ipi");
-
-		bind_ipi.vcpu = cpu;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
-						&bind_ipi) != 0)
-			BUG();
-		evtchn = bind_ipi.port;
-
-		evtchn_to_irq[evtchn] = irq;
-		irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
-
-		per_cpu(ipi_to_irq, cpu)[ipi] = irq;
-
-		bind_evtchn_to_cpu(evtchn, cpu);
-	}
-
-	irq_bindcount[irq]++;
-
- out:
-	spin_unlock(&irq_mapping_update_lock);
-	return irq;
-}
-
-
-static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
-{
-	struct evtchn_bind_virq bind_virq;
-	int evtchn, irq;
-
-	spin_lock(&irq_mapping_update_lock);
-
-	irq = per_cpu(virq_to_irq, cpu)[virq];
-
-	if (irq == -1) {
-		bind_virq.virq = virq;
-		bind_virq.vcpu = cpu;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
-						&bind_virq) != 0)
-			BUG();
-		evtchn = bind_virq.port;
-
-		irq = find_unbound_irq();
-
-		dynamic_irq_init(irq);
-		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
-					      handle_level_irq, "virq");
-
-		evtchn_to_irq[evtchn] = irq;
-		irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
-
-		per_cpu(virq_to_irq, cpu)[virq] = irq;
-
-		bind_evtchn_to_cpu(evtchn, cpu);
-	}
-
-	irq_bindcount[irq]++;
-
-	spin_unlock(&irq_mapping_update_lock);
-
-	return irq;
-}
-
-static void unbind_from_irq(unsigned int irq)
-{
-	struct evtchn_close close;
-	int evtchn = evtchn_from_irq(irq);
-
-	spin_lock(&irq_mapping_update_lock);
-
-	if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) {
-		close.port = evtchn;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
-			BUG();
-
-		switch (type_from_irq(irq)) {
-		case IRQT_VIRQ:
-			per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
-				[index_from_irq(irq)] = -1;
-			break;
-		default:
-			break;
-		}
-
-		/* Closed ports are implicitly re-bound to VCPU0. */
-		bind_evtchn_to_cpu(evtchn, 0);
-
-		evtchn_to_irq[evtchn] = -1;
-		irq_info[irq] = IRQ_UNBOUND;
-
-		dynamic_irq_init(irq);
-	}
-
-	spin_unlock(&irq_mapping_update_lock);
-}
-
-int bind_evtchn_to_irqhandler(unsigned int evtchn,
-			      irq_handler_t handler,
-			      unsigned long irqflags,
-			      const char *devname, void *dev_id)
-{
-	unsigned int irq;
-	int retval;
-
-	irq = bind_evtchn_to_irq(evtchn);
-	retval = request_irq(irq, handler, irqflags, devname, dev_id);
-	if (retval != 0) {
-		unbind_from_irq(irq);
-		return retval;
-	}
-
-	return irq;
-}
-EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
-
-int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
-			    irq_handler_t handler,
-			    unsigned long irqflags, const char *devname, void *dev_id)
-{
-	unsigned int irq;
-	int retval;
-
-	irq = bind_virq_to_irq(virq, cpu);
-	retval = request_irq(irq, handler, irqflags, devname, dev_id);
-	if (retval != 0) {
-		unbind_from_irq(irq);
-		return retval;
-	}
-
-	return irq;
-}
-EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
-
-int bind_ipi_to_irqhandler(enum ipi_vector ipi,
-			   unsigned int cpu,
-			   irq_handler_t handler,
-			   unsigned long irqflags,
-			   const char *devname,
-			   void *dev_id)
-{
-	int irq, retval;
-
-	irq = bind_ipi_to_irq(ipi, cpu);
-	if (irq < 0)
-		return irq;
-
-	retval = request_irq(irq, handler, irqflags, devname, dev_id);
-	if (retval != 0) {
-		unbind_from_irq(irq);
-		return retval;
-	}
-
-	return irq;
-}
-
-void unbind_from_irqhandler(unsigned int irq, void *dev_id)
-{
-	free_irq(irq, dev_id);
-	unbind_from_irq(irq);
-}
-EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
-
-void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
-{
-	int irq = per_cpu(ipi_to_irq, cpu)[vector];
-	BUG_ON(irq < 0);
-	notify_remote_via_irq(irq);
-}
-
-irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
-{
-	struct shared_info *sh = HYPERVISOR_shared_info;
-	int cpu = smp_processor_id();
-	int i;
-	unsigned long flags;
-	static DEFINE_SPINLOCK(debug_lock);
-
-	spin_lock_irqsave(&debug_lock, flags);
-
-	printk("vcpu %d\n  ", cpu);
-
-	for_each_online_cpu(i) {
-		struct vcpu_info *v = per_cpu(xen_vcpu, i);
-		printk("%d: masked=%d pending=%d event_sel %08lx\n  ", i,
-			(get_irq_regs() && i == cpu) ? !(get_irq_regs()->flags & X86_EFLAGS_IF) : v->evtchn_upcall_mask,
-			v->evtchn_upcall_pending,
-			v->evtchn_pending_sel);
-	}
-	printk("pending:\n   ");
-	for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
-		printk("%08lx%s", sh->evtchn_pending[i],
-			i % 8 == 0 ? "\n   " : " ");
-	printk("\nmasks:\n   ");
-	for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
-		printk("%08lx%s", sh->evtchn_mask[i],
-			i % 8 == 0 ? "\n   " : " ");
-
-	printk("\nunmasked:\n   ");
-	for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
-		printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
-			i % 8 == 0 ? "\n   " : " ");
-
-	printk("\npending list:\n");
-	for(i = 0; i < NR_EVENT_CHANNELS; i++) {
-		if (sync_test_bit(i, sh->evtchn_pending)) {
-			printk("  %d: event %d -> irq %d\n",
-				cpu_evtchn[i], i,
-				evtchn_to_irq[i]);
-		}
-	}
-
-	spin_unlock_irqrestore(&debug_lock, flags);
-
-	return IRQ_HANDLED;
-}
-
-
-/*
- * Search the CPUs pending events bitmasks.  For each one found, map
- * the event number to an irq, and feed it into do_IRQ() for
- * handling.
- *
- * Xen uses a two-level bitmap to speed searching.  The first level is
- * a bitset of words which contain pending event bits.  The second
- * level is a bitset of pending events themselves.
- */
-void xen_evtchn_do_upcall(struct pt_regs *regs)
-{
-	int cpu = get_cpu();
-	struct shared_info *s = HYPERVISOR_shared_info;
-	struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
-	static DEFINE_PER_CPU(unsigned, nesting_count);
- 	unsigned count;
-
-	do {
-		unsigned long pending_words;
-
-		vcpu_info->evtchn_upcall_pending = 0;
-
-		if (__get_cpu_var(nesting_count)++)
-			goto out;
-
-		/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
-		pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
-		while (pending_words != 0) {
-			unsigned long pending_bits;
-			int word_idx = __ffs(pending_words);
-			pending_words &= ~(1UL << word_idx);
-
-			while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
-				int bit_idx = __ffs(pending_bits);
-				int port = (word_idx * BITS_PER_LONG) + bit_idx;
-				int irq = evtchn_to_irq[port];
-
-				if (irq != -1) {
-					regs->orig_ax = ~irq;
-					do_IRQ(regs);
-				}
-			}
-		}
-
-		BUG_ON(!irqs_disabled());
-
-		count = __get_cpu_var(nesting_count);
-		__get_cpu_var(nesting_count) = 0;
-	} while(count != 1);
-
-out:
-	put_cpu();
-}
-
-/* Rebind an evtchn so that it gets delivered to a specific cpu */
-static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
-{
-	struct evtchn_bind_vcpu bind_vcpu;
-	int evtchn = evtchn_from_irq(irq);
-
-	if (!VALID_EVTCHN(evtchn))
-		return;
-
-	/* Send future instances of this interrupt to other vcpu. */
-	bind_vcpu.port = evtchn;
-	bind_vcpu.vcpu = tcpu;
-
-	/*
-	 * If this fails, it usually just indicates that we're dealing with a
-	 * virq or IPI channel, which don't actually need to be rebound. Ignore
-	 * it, but don't do the xenlinux-level rebind in that case.
-	 */
-	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
-		bind_evtchn_to_cpu(evtchn, tcpu);
-}
-
-
-static void set_affinity_irq(unsigned irq, cpumask_t dest)
-{
-	unsigned tcpu = first_cpu(dest);
-	rebind_irq_to_cpu(irq, tcpu);
-}
-
-static void enable_dynirq(unsigned int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		unmask_evtchn(evtchn);
-}
-
-static void disable_dynirq(unsigned int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		mask_evtchn(evtchn);
-}
-
-static void ack_dynirq(unsigned int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	move_native_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		clear_evtchn(evtchn);
-}
-
-static int retrigger_dynirq(unsigned int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-	struct shared_info *sh = HYPERVISOR_shared_info;
-	int ret = 0;
-
-	if (VALID_EVTCHN(evtchn)) {
-		int masked;
-
-		masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask);
-		sync_set_bit(evtchn, sh->evtchn_pending);
-		if (!masked)
-			unmask_evtchn(evtchn);
-		ret = 1;
-	}
-
-	return ret;
-}
-
-static struct irq_chip xen_dynamic_chip __read_mostly = {
-	.name		= "xen-dyn",
-	.mask		= disable_dynirq,
-	.unmask		= enable_dynirq,
-	.ack		= ack_dynirq,
-	.set_affinity	= set_affinity_irq,
-	.retrigger	= retrigger_dynirq,
-};
-
-void __init xen_init_IRQ(void)
-{
-	int i;
-
-	init_evtchn_cpu_bindings();
-
-	/* No event channels are 'live' right now. */
-	for (i = 0; i < NR_EVENT_CHANNELS; i++)
-		mask_evtchn(i);
-
-	/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
-	for (i = 0; i < NR_IRQS; i++)
-		irq_bindcount[i] = 0;
-
-	irq_ctx_init(smp_processor_id());
-}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 22395d20dd6e..f1063ae08037 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -3,6 +3,7 @@
 
 #include <linux/init.h>
 #include <linux/irqreturn.h>
+#include <xen/xen-ops.h>
 
 /* These are code, but not functions.  Defined in entry.S */
 extern const char xen_hypervisor_callback[];
@@ -10,7 +11,6 @@ extern const char xen_failsafe_callback[];
 
 void xen_copy_trap_info(struct trap_info *traps);
 
-DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
 DECLARE_PER_CPU(unsigned long, xen_cr3);
 DECLARE_PER_CPU(unsigned long, xen_current_cr3);
 
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 609fdda90a34..823ce788b14b 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,2 +1,2 @@
-obj-y	+= grant-table.o features.o
+obj-y	+= grant-table.o features.o events.o
 obj-y	+= xenbus/
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
new file mode 100644
index 000000000000..c50d499b1e6f
--- /dev/null
+++ b/drivers/xen/events.c
@@ -0,0 +1,657 @@
+/*
+ * Xen event channels
+ *
+ * Xen models interrupts with abstract event channels.  Because each
+ * domain gets 1024 event channels, but NR_IRQ is not that large, we
+ * must dynamically map irqs<->event channels.  The event channels
+ * interface with the rest of the kernel by defining a xen interrupt
+ * chip.  When an event is recieved, it is mapped to an irq and sent
+ * through the normal interrupt processing path.
+ *
+ * There are four kinds of events which can be mapped to an event
+ * channel:
+ *
+ * 1. Inter-domain notifications.  This includes all the virtual
+ *    device events, since they're driven by front-ends in another domain
+ *    (typically dom0).
+ * 2. VIRQs, typically used for timers.  These are per-cpu events.
+ * 3. IPIs.
+ * 4. Hardware interrupts. Not supported at present.
+ *
+ * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
+ */
+
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <asm/ptrace.h>
+#include <asm/irq.h>
+#include <asm/sync_bitops.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen-ops.h>
+#include <xen/events.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+
+/*
+ * This lock protects updates to the following mapping and reference-count
+ * arrays. The lock does not need to be acquired to read the mapping tables.
+ */
+static DEFINE_SPINLOCK(irq_mapping_update_lock);
+
+/* IRQ <-> VIRQ mapping. */
+static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
+
+/* IRQ <-> IPI mapping */
+static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1};
+
+/* Packed IRQ information: binding type, sub-type index, and event channel. */
+struct packed_irq
+{
+	unsigned short evtchn;
+	unsigned char index;
+	unsigned char type;
+};
+
+static struct packed_irq irq_info[NR_IRQS];
+
+/* Binding types. */
+enum {
+	IRQT_UNBOUND,
+	IRQT_PIRQ,
+	IRQT_VIRQ,
+	IRQT_IPI,
+	IRQT_EVTCHN
+};
+
+/* Convenient shorthand for packed representation of an unbound IRQ. */
+#define IRQ_UNBOUND	mk_irq_info(IRQT_UNBOUND, 0, 0)
+
+static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
+	[0 ... NR_EVENT_CHANNELS-1] = -1
+};
+static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
+static u8 cpu_evtchn[NR_EVENT_CHANNELS];
+
+/* Reference counts for bindings to IRQs. */
+static int irq_bindcount[NR_IRQS];
+
+/* Xen will never allocate port zero for any purpose. */
+#define VALID_EVTCHN(chn)	((chn) != 0)
+
+/*
+ * Force a proper event-channel callback from Xen after clearing the
+ * callback mask. We do this in a very simple manner, by making a call
+ * down into Xen. The pending flag will be checked by Xen on return.
+ */
+void force_evtchn_callback(void)
+{
+	(void)HYPERVISOR_xen_version(0, NULL);
+}
+EXPORT_SYMBOL_GPL(force_evtchn_callback);
+
+static struct irq_chip xen_dynamic_chip;
+
+/* Constructor for packed IRQ information. */
+static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn)
+{
+	return (struct packed_irq) { evtchn, index, type };
+}
+
+/*
+ * Accessors for packed IRQ information.
+ */
+static inline unsigned int evtchn_from_irq(int irq)
+{
+	return irq_info[irq].evtchn;
+}
+
+static inline unsigned int index_from_irq(int irq)
+{
+	return irq_info[irq].index;
+}
+
+static inline unsigned int type_from_irq(int irq)
+{
+	return irq_info[irq].type;
+}
+
+static inline unsigned long active_evtchns(unsigned int cpu,
+					   struct shared_info *sh,
+					   unsigned int idx)
+{
+	return (sh->evtchn_pending[idx] &
+		cpu_evtchn_mask[cpu][idx] &
+		~sh->evtchn_mask[idx]);
+}
+
+static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
+{
+	int irq = evtchn_to_irq[chn];
+
+	BUG_ON(irq == -1);
+#ifdef CONFIG_SMP
+	irq_desc[irq].affinity = cpumask_of_cpu(cpu);
+#endif
+
+	__clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
+	__set_bit(chn, cpu_evtchn_mask[cpu]);
+
+	cpu_evtchn[chn] = cpu;
+}
+
+static void init_evtchn_cpu_bindings(void)
+{
+#ifdef CONFIG_SMP
+	int i;
+	/* By default all event channels notify CPU#0. */
+	for (i = 0; i < NR_IRQS; i++)
+		irq_desc[i].affinity = cpumask_of_cpu(0);
+#endif
+
+	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
+	memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
+}
+
+static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
+{
+	return cpu_evtchn[evtchn];
+}
+
+static inline void clear_evtchn(int port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	sync_clear_bit(port, &s->evtchn_pending[0]);
+}
+
+static inline void set_evtchn(int port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	sync_set_bit(port, &s->evtchn_pending[0]);
+}
+
+
+/**
+ * notify_remote_via_irq - send event to remote end of event channel via irq
+ * @irq: irq of event channel to send event to
+ *
+ * Unlike notify_remote_via_evtchn(), this is safe to use across
+ * save/restore. Notifications on a broken connection are silently
+ * dropped.
+ */
+void notify_remote_via_irq(int irq)
+{
+	int evtchn = evtchn_from_irq(irq);
+
+	if (VALID_EVTCHN(evtchn))
+		notify_remote_via_evtchn(evtchn);
+}
+EXPORT_SYMBOL_GPL(notify_remote_via_irq);
+
+static void mask_evtchn(int port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	sync_set_bit(port, &s->evtchn_mask[0]);
+}
+
+static void unmask_evtchn(int port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	unsigned int cpu = get_cpu();
+
+	BUG_ON(!irqs_disabled());
+
+	/* Slow path (hypercall) if this is a non-local port. */
+	if (unlikely(cpu != cpu_from_evtchn(port))) {
+		struct evtchn_unmask unmask = { .port = port };
+		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+	} else {
+		struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
+
+		sync_clear_bit(port, &s->evtchn_mask[0]);
+
+		/*
+		 * The following is basically the equivalent of
+		 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+		 * the interrupt edge' if the channel is masked.
+		 */
+		if (sync_test_bit(port, &s->evtchn_pending[0]) &&
+		    !sync_test_and_set_bit(port / BITS_PER_LONG,
+					   &vcpu_info->evtchn_pending_sel))
+			vcpu_info->evtchn_upcall_pending = 1;
+	}
+
+	put_cpu();
+}
+
+static int find_unbound_irq(void)
+{
+	int irq;
+
+	/* Only allocate from dynirq range */
+	for (irq = 0; irq < NR_IRQS; irq++)
+		if (irq_bindcount[irq] == 0)
+			break;
+
+	if (irq == NR_IRQS)
+		panic("No available IRQ to bind to: increase NR_IRQS!\n");
+
+	return irq;
+}
+
+int bind_evtchn_to_irq(unsigned int evtchn)
+{
+	int irq;
+
+	spin_lock(&irq_mapping_update_lock);
+
+	irq = evtchn_to_irq[evtchn];
+
+	if (irq == -1) {
+		irq = find_unbound_irq();
+
+		dynamic_irq_init(irq);
+		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+					      handle_level_irq, "event");
+
+		evtchn_to_irq[evtchn] = irq;
+		irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
+	}
+
+	irq_bindcount[irq]++;
+
+	spin_unlock(&irq_mapping_update_lock);
+
+	return irq;
+}
+EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
+
+static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+{
+	struct evtchn_bind_ipi bind_ipi;
+	int evtchn, irq;
+
+	spin_lock(&irq_mapping_update_lock);
+
+	irq = per_cpu(ipi_to_irq, cpu)[ipi];
+	if (irq == -1) {
+		irq = find_unbound_irq();
+		if (irq < 0)
+			goto out;
+
+		dynamic_irq_init(irq);
+		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+					      handle_level_irq, "ipi");
+
+		bind_ipi.vcpu = cpu;
+		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+						&bind_ipi) != 0)
+			BUG();
+		evtchn = bind_ipi.port;
+
+		evtchn_to_irq[evtchn] = irq;
+		irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
+
+		per_cpu(ipi_to_irq, cpu)[ipi] = irq;
+
+		bind_evtchn_to_cpu(evtchn, cpu);
+	}
+
+	irq_bindcount[irq]++;
+
+ out:
+	spin_unlock(&irq_mapping_update_lock);
+	return irq;
+}
+
+
+static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+{
+	struct evtchn_bind_virq bind_virq;
+	int evtchn, irq;
+
+	spin_lock(&irq_mapping_update_lock);
+
+	irq = per_cpu(virq_to_irq, cpu)[virq];
+
+	if (irq == -1) {
+		bind_virq.virq = virq;
+		bind_virq.vcpu = cpu;
+		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+						&bind_virq) != 0)
+			BUG();
+		evtchn = bind_virq.port;
+
+		irq = find_unbound_irq();
+
+		dynamic_irq_init(irq);
+		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+					      handle_level_irq, "virq");
+
+		evtchn_to_irq[evtchn] = irq;
+		irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
+
+		per_cpu(virq_to_irq, cpu)[virq] = irq;
+
+		bind_evtchn_to_cpu(evtchn, cpu);
+	}
+
+	irq_bindcount[irq]++;
+
+	spin_unlock(&irq_mapping_update_lock);
+
+	return irq;
+}
+
+static void unbind_from_irq(unsigned int irq)
+{
+	struct evtchn_close close;
+	int evtchn = evtchn_from_irq(irq);
+
+	spin_lock(&irq_mapping_update_lock);
+
+	if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) {
+		close.port = evtchn;
+		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+			BUG();
+
+		switch (type_from_irq(irq)) {
+		case IRQT_VIRQ:
+			per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
+				[index_from_irq(irq)] = -1;
+			break;
+		default:
+			break;
+		}
+
+		/* Closed ports are implicitly re-bound to VCPU0. */
+		bind_evtchn_to_cpu(evtchn, 0);
+
+		evtchn_to_irq[evtchn] = -1;
+		irq_info[irq] = IRQ_UNBOUND;
+
+		dynamic_irq_init(irq);
+	}
+
+	spin_unlock(&irq_mapping_update_lock);
+}
+
+int bind_evtchn_to_irqhandler(unsigned int evtchn,
+			      irq_handler_t handler,
+			      unsigned long irqflags,
+			      const char *devname, void *dev_id)
+{
+	unsigned int irq;
+	int retval;
+
+	irq = bind_evtchn_to_irq(evtchn);
+	retval = request_irq(irq, handler, irqflags, devname, dev_id);
+	if (retval != 0) {
+		unbind_from_irq(irq);
+		return retval;
+	}
+
+	return irq;
+}
+EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
+
+int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
+			    irq_handler_t handler,
+			    unsigned long irqflags, const char *devname, void *dev_id)
+{
+	unsigned int irq;
+	int retval;
+
+	irq = bind_virq_to_irq(virq, cpu);
+	retval = request_irq(irq, handler, irqflags, devname, dev_id);
+	if (retval != 0) {
+		unbind_from_irq(irq);
+		return retval;
+	}
+
+	return irq;
+}
+EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
+
+int bind_ipi_to_irqhandler(enum ipi_vector ipi,
+			   unsigned int cpu,
+			   irq_handler_t handler,
+			   unsigned long irqflags,
+			   const char *devname,
+			   void *dev_id)
+{
+	int irq, retval;
+
+	irq = bind_ipi_to_irq(ipi, cpu);
+	if (irq < 0)
+		return irq;
+
+	retval = request_irq(irq, handler, irqflags, devname, dev_id);
+	if (retval != 0) {
+		unbind_from_irq(irq);
+		return retval;
+	}
+
+	return irq;
+}
+
+void unbind_from_irqhandler(unsigned int irq, void *dev_id)
+{
+	free_irq(irq, dev_id);
+	unbind_from_irq(irq);
+}
+EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
+
+void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
+{
+	int irq = per_cpu(ipi_to_irq, cpu)[vector];
+	BUG_ON(irq < 0);
+	notify_remote_via_irq(irq);
+}
+
+irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
+{
+	struct shared_info *sh = HYPERVISOR_shared_info;
+	int cpu = smp_processor_id();
+	int i;
+	unsigned long flags;
+	static DEFINE_SPINLOCK(debug_lock);
+
+	spin_lock_irqsave(&debug_lock, flags);
+
+	printk("vcpu %d\n  ", cpu);
+
+	for_each_online_cpu(i) {
+		struct vcpu_info *v = per_cpu(xen_vcpu, i);
+		printk("%d: masked=%d pending=%d event_sel %08lx\n  ", i,
+			(get_irq_regs() && i == cpu) ? !(get_irq_regs()->flags & X86_EFLAGS_IF) : v->evtchn_upcall_mask,
+			v->evtchn_upcall_pending,
+			v->evtchn_pending_sel);
+	}
+	printk("pending:\n   ");
+	for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
+		printk("%08lx%s", sh->evtchn_pending[i],
+			i % 8 == 0 ? "\n   " : " ");
+	printk("\nmasks:\n   ");
+	for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+		printk("%08lx%s", sh->evtchn_mask[i],
+			i % 8 == 0 ? "\n   " : " ");
+
+	printk("\nunmasked:\n   ");
+	for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+		printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
+			i % 8 == 0 ? "\n   " : " ");
+
+	printk("\npending list:\n");
+	for(i = 0; i < NR_EVENT_CHANNELS; i++) {
+		if (sync_test_bit(i, sh->evtchn_pending)) {
+			printk("  %d: event %d -> irq %d\n",
+				cpu_evtchn[i], i,
+				evtchn_to_irq[i]);
+		}
+	}
+
+	spin_unlock_irqrestore(&debug_lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+
+/*
+ * Search the CPUs pending events bitmasks.  For each one found, map
+ * the event number to an irq, and feed it into do_IRQ() for
+ * handling.
+ *
+ * Xen uses a two-level bitmap to speed searching.  The first level is
+ * a bitset of words which contain pending event bits.  The second
+ * level is a bitset of pending events themselves.
+ */
+void xen_evtchn_do_upcall(struct pt_regs *regs)
+{
+	int cpu = get_cpu();
+	struct shared_info *s = HYPERVISOR_shared_info;
+	struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
+	static DEFINE_PER_CPU(unsigned, nesting_count);
+ 	unsigned count;
+
+	do {
+		unsigned long pending_words;
+
+		vcpu_info->evtchn_upcall_pending = 0;
+
+		if (__get_cpu_var(nesting_count)++)
+			goto out;
+
+		/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
+		pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
+		while (pending_words != 0) {
+			unsigned long pending_bits;
+			int word_idx = __ffs(pending_words);
+			pending_words &= ~(1UL << word_idx);
+
+			while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
+				int bit_idx = __ffs(pending_bits);
+				int port = (word_idx * BITS_PER_LONG) + bit_idx;
+				int irq = evtchn_to_irq[port];
+
+				if (irq != -1) {
+					regs->orig_ax = ~irq;
+					do_IRQ(regs);
+				}
+			}
+		}
+
+		BUG_ON(!irqs_disabled());
+
+		count = __get_cpu_var(nesting_count);
+		__get_cpu_var(nesting_count) = 0;
+	} while(count != 1);
+
+out:
+	put_cpu();
+}
+
+/* Rebind an evtchn so that it gets delivered to a specific cpu */
+static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+{
+	struct evtchn_bind_vcpu bind_vcpu;
+	int evtchn = evtchn_from_irq(irq);
+
+	if (!VALID_EVTCHN(evtchn))
+		return;
+
+	/* Send future instances of this interrupt to other vcpu. */
+	bind_vcpu.port = evtchn;
+	bind_vcpu.vcpu = tcpu;
+
+	/*
+	 * If this fails, it usually just indicates that we're dealing with a
+	 * virq or IPI channel, which don't actually need to be rebound. Ignore
+	 * it, but don't do the xenlinux-level rebind in that case.
+	 */
+	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
+		bind_evtchn_to_cpu(evtchn, tcpu);
+}
+
+
+static void set_affinity_irq(unsigned irq, cpumask_t dest)
+{
+	unsigned tcpu = first_cpu(dest);
+	rebind_irq_to_cpu(irq, tcpu);
+}
+
+static void enable_dynirq(unsigned int irq)
+{
+	int evtchn = evtchn_from_irq(irq);
+
+	if (VALID_EVTCHN(evtchn))
+		unmask_evtchn(evtchn);
+}
+
+static void disable_dynirq(unsigned int irq)
+{
+	int evtchn = evtchn_from_irq(irq);
+
+	if (VALID_EVTCHN(evtchn))
+		mask_evtchn(evtchn);
+}
+
+static void ack_dynirq(unsigned int irq)
+{
+	int evtchn = evtchn_from_irq(irq);
+
+	move_native_irq(irq);
+
+	if (VALID_EVTCHN(evtchn))
+		clear_evtchn(evtchn);
+}
+
+static int retrigger_dynirq(unsigned int irq)
+{
+	int evtchn = evtchn_from_irq(irq);
+	struct shared_info *sh = HYPERVISOR_shared_info;
+	int ret = 0;
+
+	if (VALID_EVTCHN(evtchn)) {
+		int masked;
+
+		masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask);
+		sync_set_bit(evtchn, sh->evtchn_pending);
+		if (!masked)
+			unmask_evtchn(evtchn);
+		ret = 1;
+	}
+
+	return ret;
+}
+
+static struct irq_chip xen_dynamic_chip __read_mostly = {
+	.name		= "xen-dyn",
+	.mask		= disable_dynirq,
+	.unmask		= enable_dynirq,
+	.ack		= ack_dynirq,
+	.set_affinity	= set_affinity_irq,
+	.retrigger	= retrigger_dynirq,
+};
+
+void __init xen_init_IRQ(void)
+{
+	int i;
+
+	init_evtchn_cpu_bindings();
+
+	/* No event channels are 'live' right now. */
+	for (i = 0; i < NR_EVENT_CHANNELS; i++)
+		mask_evtchn(i);
+
+	/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
+	for (i = 0; i < NR_IRQS; i++)
+		irq_bindcount[i] = 0;
+
+	irq_ctx_init(smp_processor_id());
+}
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
new file mode 100644
index 000000000000..10ddfe0142d0
--- /dev/null
+++ b/include/xen/xen-ops.h
@@ -0,0 +1,8 @@
+#ifndef INCLUDE_XEN_OPS_H
+#define INCLUDE_XEN_OPS_H
+
+#include <linux/percpu.h>
+
+DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
+
+#endif /* INCLUDE_XEN_OPS_H */
-- 
cgit v1.2.3


From e849c3e9e0b786619c451d89ef0c47ac9a28fbc1 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Wed, 2 Apr 2008 10:53:56 -0700
Subject: Xen: make events.c portable for ia64/xen support

Remove x86 dependency in drivers/xen/events.c for ia64/xen support
introducing include/asm/xen/events.h.
Introduce xen_irqs_disabled() to hide regs->flags
Introduce xen_do_IRQ() to hide regs->orig_ax.
make enum ipi_vector definition arch specific. ia64/xen needs four vectors.
Add one rmb() because on ia64 xchg() isn't barrier.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/xen/events.c         | 13 +++++++------
 include/asm-x86/xen/events.h | 22 ++++++++++++++++++++++
 include/xen/events.h         |  8 +-------
 3 files changed, 30 insertions(+), 13 deletions(-)
 create mode 100644 include/asm-x86/xen/events.h

(limited to 'include/xen')

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index c50d499b1e6f..2396b4492f70 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -469,7 +469,7 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 	for_each_online_cpu(i) {
 		struct vcpu_info *v = per_cpu(xen_vcpu, i);
 		printk("%d: masked=%d pending=%d event_sel %08lx\n  ", i,
-			(get_irq_regs() && i == cpu) ? !(get_irq_regs()->flags & X86_EFLAGS_IF) : v->evtchn_upcall_mask,
+			(get_irq_regs() && i == cpu) ? xen_irqs_disabled(get_irq_regs()) : v->evtchn_upcall_mask,
 			v->evtchn_upcall_pending,
 			v->evtchn_pending_sel);
 	}
@@ -527,7 +527,10 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
 		if (__get_cpu_var(nesting_count)++)
 			goto out;
 
-		/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
+#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
+		/* Clear master flag /before/ clearing selector flag. */
+		rmb();
+#endif
 		pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
 		while (pending_words != 0) {
 			unsigned long pending_bits;
@@ -539,10 +542,8 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
 				int port = (word_idx * BITS_PER_LONG) + bit_idx;
 				int irq = evtchn_to_irq[port];
 
-				if (irq != -1) {
-					regs->orig_ax = ~irq;
-					do_IRQ(regs);
-				}
+				if (irq != -1)
+					xen_do_IRQ(irq, regs);
 			}
 		}
 
diff --git a/include/asm-x86/xen/events.h b/include/asm-x86/xen/events.h
new file mode 100644
index 000000000000..596312a7bfc9
--- /dev/null
+++ b/include/asm-x86/xen/events.h
@@ -0,0 +1,22 @@
+#ifndef __XEN_EVENTS_H
+#define __XEN_EVENTS_H
+
+enum ipi_vector {
+	XEN_RESCHEDULE_VECTOR,
+	XEN_CALL_FUNCTION_VECTOR,
+
+	XEN_NR_IPIS,
+};
+
+static inline int xen_irqs_disabled(struct pt_regs *regs)
+{
+	return raw_irqs_disabled_flags(regs->flags);
+}
+
+static inline void xen_do_IRQ(int irq, struct pt_regs *regs)
+{
+	regs->orig_ax = ~irq;
+	do_IRQ(regs);
+}
+
+#endif /* __XEN_EVENTS_H */
diff --git a/include/xen/events.h b/include/xen/events.h
index 2bde54d29be5..d99a3e0df880 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -5,13 +5,7 @@
 
 #include <xen/interface/event_channel.h>
 #include <asm/xen/hypercall.h>
-
-enum ipi_vector {
-	XEN_RESCHEDULE_VECTOR,
-	XEN_CALL_FUNCTION_VECTOR,
-
-	XEN_NR_IPIS,
-};
+#include <asm/xen/events.h>
 
 int bind_evtchn_to_irq(unsigned int evtchn);
 int bind_evtchn_to_irqhandler(unsigned int evtchn,
-- 
cgit v1.2.3


From 642e0c882cd5369429c833d97e4804c8be473e8a Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Wed, 2 Apr 2008 10:53:57 -0700
Subject: xen: add resend_irq_on_evtchn() definition into events.c

Define resend_irq_on_evtchn() which ia64/xen uses.
Although it isn't used by current x86/xen code, it's arch generic
so that put it into common code.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/xen/events.c | 16 ++++++++++++++++
 include/xen/events.h |  1 +
 2 files changed, 17 insertions(+)

(limited to 'include/xen')

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 2396b4492f70..4f0f22b020ea 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -586,6 +586,22 @@ static void set_affinity_irq(unsigned irq, cpumask_t dest)
 	rebind_irq_to_cpu(irq, tcpu);
 }
 
+int resend_irq_on_evtchn(unsigned int irq)
+{
+	int masked, evtchn = evtchn_from_irq(irq);
+	struct shared_info *s = HYPERVISOR_shared_info;
+
+	if (!VALID_EVTCHN(evtchn))
+		return 1;
+
+	masked = sync_test_and_set_bit(evtchn, s->evtchn_mask);
+	sync_set_bit(evtchn, s->evtchn_pending);
+	if (!masked)
+		unmask_evtchn(evtchn);
+
+	return 1;
+}
+
 static void enable_dynirq(unsigned int irq)
 {
 	int evtchn = evtchn_from_irq(irq);
diff --git a/include/xen/events.h b/include/xen/events.h
index d99a3e0df880..acd8e062c85f 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -31,6 +31,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
 void unbind_from_irqhandler(unsigned int irq, void *dev_id);
 
 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector);
+int resend_irq_on_evtchn(unsigned int irq);
 
 static inline void notify_remote_via_evtchn(int port)
 {
-- 
cgit v1.2.3


From 20e71f2edb5991de8f2a70902b4aa5982f67c69c Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Wed, 2 Apr 2008 10:53:58 -0700
Subject: xen: make include/xen/page.h portable moving those definitions under
 asm dir

The definitions in include/asm/xen/page.h are arch specific.
ia64/xen wants to define its own version. So move them to arch specific
directory and keep include/xen/page.h in order not to break compilation.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/xen/page.h | 168 ++++++++++++++++++++++++++++++++++++++++++++
 include/xen/page.h         | 169 +--------------------------------------------
 2 files changed, 169 insertions(+), 168 deletions(-)
 create mode 100644 include/asm-x86/xen/page.h

(limited to 'include/xen')

diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
new file mode 100644
index 000000000000..01799305f02a
--- /dev/null
+++ b/include/asm-x86/xen/page.h
@@ -0,0 +1,168 @@
+#ifndef __XEN_PAGE_H
+#define __XEN_PAGE_H
+
+#include <linux/pfn.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+
+#include <xen/features.h>
+
+/* Xen machine address */
+typedef struct xmaddr {
+	phys_addr_t maddr;
+} xmaddr_t;
+
+/* Xen pseudo-physical address */
+typedef struct xpaddr {
+	phys_addr_t paddr;
+} xpaddr_t;
+
+#define XMADDR(x)	((xmaddr_t) { .maddr = (x) })
+#define XPADDR(x)	((xpaddr_t) { .paddr = (x) })
+
+/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
+#define INVALID_P2M_ENTRY	(~0UL)
+#define FOREIGN_FRAME_BIT	(1UL<<31)
+#define FOREIGN_FRAME(m)	((m) | FOREIGN_FRAME_BIT)
+
+extern unsigned long *phys_to_machine_mapping;
+
+static inline unsigned long pfn_to_mfn(unsigned long pfn)
+{
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return pfn;
+
+	return phys_to_machine_mapping[(unsigned int)(pfn)] &
+		~FOREIGN_FRAME_BIT;
+}
+
+static inline int phys_to_machine_mapping_valid(unsigned long pfn)
+{
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return 1;
+
+	return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
+}
+
+static inline unsigned long mfn_to_pfn(unsigned long mfn)
+{
+	unsigned long pfn;
+
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return mfn;
+
+#if 0
+	if (unlikely((mfn >> machine_to_phys_order) != 0))
+		return max_mapnr;
+#endif
+
+	pfn = 0;
+	/*
+	 * The array access can fail (e.g., device space beyond end of RAM).
+	 * In such cases it doesn't matter what we return (we return garbage),
+	 * but we must handle the fault without crashing!
+	 */
+	__get_user(pfn, &machine_to_phys_mapping[mfn]);
+
+	return pfn;
+}
+
+static inline xmaddr_t phys_to_machine(xpaddr_t phys)
+{
+	unsigned offset = phys.paddr & ~PAGE_MASK;
+	return XMADDR(PFN_PHYS((u64)pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset);
+}
+
+static inline xpaddr_t machine_to_phys(xmaddr_t machine)
+{
+	unsigned offset = machine.maddr & ~PAGE_MASK;
+	return XPADDR(PFN_PHYS((u64)mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset);
+}
+
+/*
+ * We detect special mappings in one of two ways:
+ *  1. If the MFN is an I/O page then Xen will set the m2p entry
+ *     to be outside our maximum possible pseudophys range.
+ *  2. If the MFN belongs to a different domain then we will certainly
+ *     not have MFN in our p2m table. Conversely, if the page is ours,
+ *     then we'll have p2m(m2p(MFN))==MFN.
+ * If we detect a special mapping then it doesn't have a 'struct page'.
+ * We force !pfn_valid() by returning an out-of-range pointer.
+ *
+ * NB. These checks require that, for any MFN that is not in our reservation,
+ * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
+ * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
+ * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
+ *
+ * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
+ *      use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
+ *      require. In all the cases we care about, the FOREIGN_FRAME bit is
+ *      masked (e.g., pfn_to_mfn()) so behaviour there is correct.
+ */
+static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
+{
+	extern unsigned long max_mapnr;
+	unsigned long pfn = mfn_to_pfn(mfn);
+	if ((pfn < max_mapnr)
+	    && !xen_feature(XENFEAT_auto_translated_physmap)
+	    && (phys_to_machine_mapping[pfn] != mfn))
+		return max_mapnr; /* force !pfn_valid() */
+	return pfn;
+}
+
+static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+	if (xen_feature(XENFEAT_auto_translated_physmap)) {
+		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+		return;
+	}
+	phys_to_machine_mapping[pfn] = mfn;
+}
+
+/* VIRT <-> MACHINE conversion */
+#define virt_to_machine(v)	(phys_to_machine(XPADDR(__pa(v))))
+#define virt_to_mfn(v)		(pfn_to_mfn(PFN_DOWN(__pa(v))))
+#define mfn_to_virt(m)		(__va(mfn_to_pfn(m) << PAGE_SHIFT))
+
+static inline unsigned long pte_mfn(pte_t pte)
+{
+	return (pte.pte & ~_PAGE_NX) >> PAGE_SHIFT;
+}
+
+static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
+{
+	pte_t pte;
+
+	pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) |
+		(pgprot_val(pgprot) & __supported_pte_mask);
+
+	return pte;
+}
+
+static inline pteval_t pte_val_ma(pte_t pte)
+{
+	return pte.pte;
+}
+
+static inline pte_t __pte_ma(pteval_t x)
+{
+	return (pte_t) { .pte = x };
+}
+
+#ifdef CONFIG_X86_PAE
+#define pmd_val_ma(v) ((v).pmd)
+#define pud_val_ma(v) ((v).pgd.pgd)
+#define __pmd_ma(x)	((pmd_t) { (x) } )
+#else  /* !X86_PAE */
+#define pmd_val_ma(v)	((v).pud.pgd.pgd)
+#endif	/* CONFIG_X86_PAE */
+
+#define pgd_val_ma(x)	((x).pgd)
+
+
+xmaddr_t arbitrary_virt_to_machine(unsigned long address);
+void make_lowmem_page_readonly(void *vaddr);
+void make_lowmem_page_readwrite(void *vaddr);
+
+#endif /* __XEN_PAGE_H */
diff --git a/include/xen/page.h b/include/xen/page.h
index 01799305f02a..eaf85fab1263 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -1,168 +1 @@
-#ifndef __XEN_PAGE_H
-#define __XEN_PAGE_H
-
-#include <linux/pfn.h>
-
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
-
-#include <xen/features.h>
-
-/* Xen machine address */
-typedef struct xmaddr {
-	phys_addr_t maddr;
-} xmaddr_t;
-
-/* Xen pseudo-physical address */
-typedef struct xpaddr {
-	phys_addr_t paddr;
-} xpaddr_t;
-
-#define XMADDR(x)	((xmaddr_t) { .maddr = (x) })
-#define XPADDR(x)	((xpaddr_t) { .paddr = (x) })
-
-/**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/
-#define INVALID_P2M_ENTRY	(~0UL)
-#define FOREIGN_FRAME_BIT	(1UL<<31)
-#define FOREIGN_FRAME(m)	((m) | FOREIGN_FRAME_BIT)
-
-extern unsigned long *phys_to_machine_mapping;
-
-static inline unsigned long pfn_to_mfn(unsigned long pfn)
-{
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return pfn;
-
-	return phys_to_machine_mapping[(unsigned int)(pfn)] &
-		~FOREIGN_FRAME_BIT;
-}
-
-static inline int phys_to_machine_mapping_valid(unsigned long pfn)
-{
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return 1;
-
-	return (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
-}
-
-static inline unsigned long mfn_to_pfn(unsigned long mfn)
-{
-	unsigned long pfn;
-
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return mfn;
-
-#if 0
-	if (unlikely((mfn >> machine_to_phys_order) != 0))
-		return max_mapnr;
-#endif
-
-	pfn = 0;
-	/*
-	 * The array access can fail (e.g., device space beyond end of RAM).
-	 * In such cases it doesn't matter what we return (we return garbage),
-	 * but we must handle the fault without crashing!
-	 */
-	__get_user(pfn, &machine_to_phys_mapping[mfn]);
-
-	return pfn;
-}
-
-static inline xmaddr_t phys_to_machine(xpaddr_t phys)
-{
-	unsigned offset = phys.paddr & ~PAGE_MASK;
-	return XMADDR(PFN_PHYS((u64)pfn_to_mfn(PFN_DOWN(phys.paddr))) | offset);
-}
-
-static inline xpaddr_t machine_to_phys(xmaddr_t machine)
-{
-	unsigned offset = machine.maddr & ~PAGE_MASK;
-	return XPADDR(PFN_PHYS((u64)mfn_to_pfn(PFN_DOWN(machine.maddr))) | offset);
-}
-
-/*
- * We detect special mappings in one of two ways:
- *  1. If the MFN is an I/O page then Xen will set the m2p entry
- *     to be outside our maximum possible pseudophys range.
- *  2. If the MFN belongs to a different domain then we will certainly
- *     not have MFN in our p2m table. Conversely, if the page is ours,
- *     then we'll have p2m(m2p(MFN))==MFN.
- * If we detect a special mapping then it doesn't have a 'struct page'.
- * We force !pfn_valid() by returning an out-of-range pointer.
- *
- * NB. These checks require that, for any MFN that is not in our reservation,
- * there is no PFN such that p2m(PFN) == MFN. Otherwise we can get confused if
- * we are foreign-mapping the MFN, and the other domain as m2p(MFN) == PFN.
- * Yikes! Various places must poke in INVALID_P2M_ENTRY for safety.
- *
- * NB2. When deliberately mapping foreign pages into the p2m table, you *must*
- *      use FOREIGN_FRAME(). This will cause pte_pfn() to choke on it, as we
- *      require. In all the cases we care about, the FOREIGN_FRAME bit is
- *      masked (e.g., pfn_to_mfn()) so behaviour there is correct.
- */
-static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
-{
-	extern unsigned long max_mapnr;
-	unsigned long pfn = mfn_to_pfn(mfn);
-	if ((pfn < max_mapnr)
-	    && !xen_feature(XENFEAT_auto_translated_physmap)
-	    && (phys_to_machine_mapping[pfn] != mfn))
-		return max_mapnr; /* force !pfn_valid() */
-	return pfn;
-}
-
-static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
-	if (xen_feature(XENFEAT_auto_translated_physmap)) {
-		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
-		return;
-	}
-	phys_to_machine_mapping[pfn] = mfn;
-}
-
-/* VIRT <-> MACHINE conversion */
-#define virt_to_machine(v)	(phys_to_machine(XPADDR(__pa(v))))
-#define virt_to_mfn(v)		(pfn_to_mfn(PFN_DOWN(__pa(v))))
-#define mfn_to_virt(m)		(__va(mfn_to_pfn(m) << PAGE_SHIFT))
-
-static inline unsigned long pte_mfn(pte_t pte)
-{
-	return (pte.pte & ~_PAGE_NX) >> PAGE_SHIFT;
-}
-
-static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot)
-{
-	pte_t pte;
-
-	pte.pte = ((phys_addr_t)page_nr << PAGE_SHIFT) |
-		(pgprot_val(pgprot) & __supported_pte_mask);
-
-	return pte;
-}
-
-static inline pteval_t pte_val_ma(pte_t pte)
-{
-	return pte.pte;
-}
-
-static inline pte_t __pte_ma(pteval_t x)
-{
-	return (pte_t) { .pte = x };
-}
-
-#ifdef CONFIG_X86_PAE
-#define pmd_val_ma(v) ((v).pmd)
-#define pud_val_ma(v) ((v).pgd.pgd)
-#define __pmd_ma(x)	((pmd_t) { (x) } )
-#else  /* !X86_PAE */
-#define pmd_val_ma(v)	((v).pud.pgd.pgd)
-#endif	/* CONFIG_X86_PAE */
-
-#define pgd_val_ma(x)	((x).pgd)
-
-
-xmaddr_t arbitrary_virt_to_machine(unsigned long address);
-void make_lowmem_page_readonly(void *vaddr);
-void make_lowmem_page_readwrite(void *vaddr);
-
-#endif /* __XEN_PAGE_H */
+#include <asm/xen/page.h>
-- 
cgit v1.2.3


From 5f0ababbf49f12330effab932a18055a50f4c0a1 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Wed, 2 Apr 2008 10:53:59 -0700
Subject: xen: replace callers of alloc_vm_area()/free_vm_area() with xen_
 prefixed one

Don't use alloc_vm_area()/free_vm_area() directly, instead define
xen_alloc_vm_area()/xen_free_vm_area() and use them.

alloc_vm_area()/free_vm_area() are used to allocate/free area which
are for grant table mapping. Xen/x86 grant table is based on virtual
address so that alloc_vm_area()/free_vm_area() are suitable.
On the other hand Xen/ia64 (and Xen/powerpc) grant table is based on
pseudo physical address (guest physical address) so that allocation
should be done differently.
The original version of xenified Linux/IA64 have its own
allocate_vm_area()/free_vm_area() definitions which don't allocate vm area
contradictory to those names.
Now vanilla Linux already has its definitions so that it's impossible
to have IA64 definitions of allocate_vm_area()/free_vm_area().
Instead introduce xen_allocate_vm_area()/xen_free_vm_area() and use them.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/xen/grant-table.c          | 2 +-
 drivers/xen/xenbus/xenbus_client.c | 6 +++---
 include/asm-x86/xen/grant_table.h  | 7 +++++++
 include/xen/grant_table.h          | 1 +
 4 files changed, 12 insertions(+), 4 deletions(-)
 create mode 100644 include/asm-x86/xen/grant_table.h

(limited to 'include/xen')

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index d4a89b3d694f..3e02808c4055 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -482,7 +482,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 
 	if (shared == NULL) {
 		struct vm_struct *area;
-		area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
+		area = xen_alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
 		BUG_ON(area == NULL);
 		shared = area->addr;
 	}
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 9fd2f70ab46d..0f86b0ff7879 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -399,7 +399,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
 
 	*vaddr = NULL;
 
-	area = alloc_vm_area(PAGE_SIZE);
+	area = xen_alloc_vm_area(PAGE_SIZE);
 	if (!area)
 		return -ENOMEM;
 
@@ -409,7 +409,7 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
 		BUG();
 
 	if (op.status != GNTST_okay) {
-		free_vm_area(area);
+		xen_free_vm_area(area);
 		xenbus_dev_fatal(dev, op.status,
 				 "mapping in shared page %d from domain %d",
 				 gnt_ref, dev->otherend_id);
@@ -508,7 +508,7 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
 		BUG();
 
 	if (op.status == GNTST_okay)
-		free_vm_area(area);
+		xen_free_vm_area(area);
 	else
 		xenbus_dev_error(dev, op.status,
 				 "unmapping page at handle %d error %d",
diff --git a/include/asm-x86/xen/grant_table.h b/include/asm-x86/xen/grant_table.h
new file mode 100644
index 000000000000..2444d4593a3b
--- /dev/null
+++ b/include/asm-x86/xen/grant_table.h
@@ -0,0 +1,7 @@
+#ifndef __XEN_GRANT_TABLE_H
+#define __XEN_GRANT_TABLE_H
+
+#define xen_alloc_vm_area(size)	alloc_vm_area(size)
+#define xen_free_vm_area(area)	free_vm_area(area)
+
+#endif /* __XEN_GRANT_TABLE_H */
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 761c83498e03..d2822920d43e 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -39,6 +39,7 @@
 
 #include <asm/xen/hypervisor.h>
 #include <xen/interface/grant_table.h>
+#include <asm/xen/grant_table.h>
 
 /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
 #define NR_GRANT_FRAMES 4
-- 
cgit v1.2.3


From 8d3d2106c19f4e69f208f59fe484ca113fbb48b3 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Wed, 2 Apr 2008 10:54:00 -0700
Subject: xen: make grant table arch portable

split out x86 specific part from grant-table.c and
allow ia64/xen specific initialization.
ia64/xen grant table is based on pseudo physical address
(guest physical address) unlike x86/xen. On ia64 init_mm
doesn't map identity straight mapped area.
ia64/xen specific grant table initialization is necessary.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/Makefile      |  2 +-
 arch/x86/xen/grant-table.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/xen/grant-table.c  | 35 ++----------------
 include/xen/grant_table.h  |  6 +++
 4 files changed, 101 insertions(+), 33 deletions(-)
 create mode 100644 arch/x86/xen/grant-table.c

(limited to 'include/xen')

diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 95c59260dccb..3d8df981d5fd 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
 obj-y		:= enlighten.o setup.o multicalls.o mmu.o \
-			time.o manage.o xen-asm.o
+			time.o manage.o xen-asm.o grant-table.o
 
 obj-$(CONFIG_SMP)	+= smp.o
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
new file mode 100644
index 000000000000..49ba9b5224d1
--- /dev/null
+++ b/arch/x86/xen/grant-table.c
@@ -0,0 +1,91 @@
+/******************************************************************************
+ * grant_table.c
+ * x86 specific part
+ *
+ * Granting foreign access to our memory reservation.
+ *
+ * Copyright (c) 2005-2006, Christopher Clark
+ * Copyright (c) 2004-2005, K A Fraser
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan. Split out x86 specific part.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+#include <xen/interface/xen.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+
+#include <asm/pgtable.h>
+
+static int map_pte_fn(pte_t *pte, struct page *pmd_page,
+		      unsigned long addr, void *data)
+{
+	unsigned long **frames = (unsigned long **)data;
+
+	set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
+	(*frames)++;
+	return 0;
+}
+
+static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
+			unsigned long addr, void *data)
+{
+
+	set_pte_at(&init_mm, addr, pte, __pte(0));
+	return 0;
+}
+
+int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
+			   unsigned long max_nr_gframes,
+			   struct grant_entry **__shared)
+{
+	int rc;
+	struct grant_entry *shared = *__shared;
+
+	if (shared == NULL) {
+		struct vm_struct *area =
+			xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes);
+		BUG_ON(area == NULL);
+		shared = area->addr;
+		*__shared = shared;
+	}
+
+	rc = apply_to_page_range(&init_mm, (unsigned long)shared,
+				 PAGE_SIZE * nr_gframes,
+				 map_pte_fn, &frames);
+	return rc;
+}
+
+void arch_gnttab_unmap_shared(struct grant_entry *shared,
+			      unsigned long nr_gframes)
+{
+	apply_to_page_range(&init_mm, (unsigned long)shared,
+			    PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
+}
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 3e02808c4055..52b6b41b909d 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -439,24 +439,6 @@ static inline unsigned int max_nr_grant_frames(void)
 	return xen_max;
 }
 
-static int map_pte_fn(pte_t *pte, struct page *pmd_page,
-		      unsigned long addr, void *data)
-{
-	unsigned long **frames = (unsigned long **)data;
-
-	set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
-	(*frames)++;
-	return 0;
-}
-
-static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
-			unsigned long addr, void *data)
-{
-
-	set_pte_at(&init_mm, addr, pte, __pte(0));
-	return 0;
-}
-
 static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 {
 	struct gnttab_setup_table setup;
@@ -480,17 +462,9 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 
 	BUG_ON(rc || setup.status);
 
-	if (shared == NULL) {
-		struct vm_struct *area;
-		area = xen_alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
-		BUG_ON(area == NULL);
-		shared = area->addr;
-	}
-	rc = apply_to_page_range(&init_mm, (unsigned long)shared,
-				 PAGE_SIZE * nr_gframes,
-				 map_pte_fn, &frames);
+	rc = arch_gnttab_map_shared(frames, nr_gframes, max_nr_grant_frames(),
+				    &shared);
 	BUG_ON(rc);
-	frames -= nr_gframes; /* adjust after map_pte_fn() */
 
 	kfree(frames);
 
@@ -506,10 +480,7 @@ static int gnttab_resume(void)
 
 static int gnttab_suspend(void)
 {
-	apply_to_page_range(&init_mm, (unsigned long)shared,
-			    PAGE_SIZE * nr_grant_frames,
-			    unmap_pte_fn, NULL);
-
+	arch_gnttab_unmap_shared(shared, nr_grant_frames);
 	return 0;
 }
 
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index d2822920d43e..466204846121 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -103,6 +103,12 @@ void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
 void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
 				       unsigned long pfn);
 
+int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
+			   unsigned long max_nr_gframes,
+			   struct grant_entry **__shared);
+void arch_gnttab_unmap_shared(struct grant_entry *shared,
+			      unsigned long nr_gframes);
+
 #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
 
 #endif /* __ASM_GNTTAB_H__ */
-- 
cgit v1.2.3


From b15993fcc1bf15f717fb4414b32e4a11534dfdc4 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Wed, 2 Apr 2008 10:54:01 -0700
Subject: xen: import arch generic part of xencomm

On xen/ia64 and xen/powerpc hypercall arguments are passed by pseudo
physical address (guest physical address) so that it's necessary to
convert from virtual address into pseudo physical address. The frame
work is called xencomm.
Import arch generic part of xencomm.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/xen/Makefile            |   1 +
 drivers/xen/xencomm.c           | 232 ++++++++++++++++++++++++++++++++++++++++
 include/xen/interface/xencomm.h |  41 +++++++
 include/xen/xencomm.h           |  77 +++++++++++++
 4 files changed, 351 insertions(+)
 create mode 100644 drivers/xen/xencomm.c
 create mode 100644 include/xen/interface/xencomm.h
 create mode 100644 include/xen/xencomm.h

(limited to 'include/xen')

diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 823ce788b14b..43f014cfb458 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,2 +1,3 @@
 obj-y	+= grant-table.o features.o events.o
 obj-y	+= xenbus/
+obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
diff --git a/drivers/xen/xencomm.c b/drivers/xen/xencomm.c
new file mode 100644
index 000000000000..797cb4e31f07
--- /dev/null
+++ b/drivers/xen/xencomm.c
@@ -0,0 +1,232 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <xen/xencomm.h>
+#include <xen/interface/xen.h>
+#ifdef __ia64__
+#include <asm/xen/xencomm.h>	/* for is_kern_addr() */
+#endif
+
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
+static int xencomm_init(struct xencomm_desc *desc,
+			void *buffer, unsigned long bytes)
+{
+	unsigned long recorded = 0;
+	int i = 0;
+
+	while ((recorded < bytes) && (i < desc->nr_addrs)) {
+		unsigned long vaddr = (unsigned long)buffer + recorded;
+		unsigned long paddr;
+		int offset;
+		int chunksz;
+
+		offset = vaddr % PAGE_SIZE; /* handle partial pages */
+		chunksz = min(PAGE_SIZE - offset, bytes - recorded);
+
+		paddr = xencomm_vtop(vaddr);
+		if (paddr == ~0UL) {
+			printk(KERN_DEBUG "%s: couldn't translate vaddr %lx\n",
+			       __func__, vaddr);
+			return -EINVAL;
+		}
+
+		desc->address[i++] = paddr;
+		recorded += chunksz;
+	}
+
+	if (recorded < bytes) {
+		printk(KERN_DEBUG
+		       "%s: could only translate %ld of %ld bytes\n",
+		       __func__, recorded, bytes);
+		return -ENOSPC;
+	}
+
+	/* mark remaining addresses invalid (just for safety) */
+	while (i < desc->nr_addrs)
+		desc->address[i++] = XENCOMM_INVALID;
+
+	desc->magic = XENCOMM_MAGIC;
+
+	return 0;
+}
+
+static struct xencomm_desc *xencomm_alloc(gfp_t gfp_mask,
+					  void *buffer, unsigned long bytes)
+{
+	struct xencomm_desc *desc;
+	unsigned long buffer_ulong = (unsigned long)buffer;
+	unsigned long start = buffer_ulong & PAGE_MASK;
+	unsigned long end = (buffer_ulong + bytes) | ~PAGE_MASK;
+	unsigned long nr_addrs = (end - start + 1) >> PAGE_SHIFT;
+	unsigned long size = sizeof(*desc) +
+		sizeof(desc->address[0]) * nr_addrs;
+
+	/*
+	 * slab allocator returns at least sizeof(void*) aligned pointer.
+	 * When sizeof(*desc) > sizeof(void*), struct xencomm_desc might
+	 * cross page boundary.
+	 */
+	if (sizeof(*desc) > sizeof(void *)) {
+		unsigned long order = get_order(size);
+		desc = (struct xencomm_desc *)__get_free_pages(gfp_mask,
+							       order);
+		if (desc == NULL)
+			return NULL;
+
+		desc->nr_addrs =
+			((PAGE_SIZE << order) - sizeof(struct xencomm_desc)) /
+			sizeof(*desc->address);
+	} else {
+		desc = kmalloc(size, gfp_mask);
+		if (desc == NULL)
+			return NULL;
+
+		desc->nr_addrs = nr_addrs;
+	}
+	return desc;
+}
+
+void xencomm_free(struct xencomm_handle *desc)
+{
+	if (desc && !((ulong)desc & XENCOMM_INLINE_FLAG)) {
+		struct xencomm_desc *desc__ = (struct xencomm_desc *)desc;
+		if (sizeof(*desc__) > sizeof(void *)) {
+			unsigned long size = sizeof(*desc__) +
+				sizeof(desc__->address[0]) * desc__->nr_addrs;
+			unsigned long order = get_order(size);
+			free_pages((unsigned long)__va(desc), order);
+		} else
+			kfree(__va(desc));
+	}
+}
+
+static int xencomm_create(void *buffer, unsigned long bytes,
+			  struct xencomm_desc **ret, gfp_t gfp_mask)
+{
+	struct xencomm_desc *desc;
+	int rc;
+
+	pr_debug("%s: %p[%ld]\n", __func__, buffer, bytes);
+
+	if (bytes == 0) {
+		/* don't create a descriptor; Xen recognizes NULL. */
+		BUG_ON(buffer != NULL);
+		*ret = NULL;
+		return 0;
+	}
+
+	BUG_ON(buffer == NULL); /* 'bytes' is non-zero */
+
+	desc = xencomm_alloc(gfp_mask, buffer, bytes);
+	if (!desc) {
+		printk(KERN_DEBUG "%s failure\n", "xencomm_alloc");
+		return -ENOMEM;
+	}
+
+	rc = xencomm_init(desc, buffer, bytes);
+	if (rc) {
+		printk(KERN_DEBUG "%s failure: %d\n", "xencomm_init", rc);
+		xencomm_free((struct xencomm_handle *)__pa(desc));
+		return rc;
+	}
+
+	*ret = desc;
+	return 0;
+}
+
+/* check if memory address is within VMALLOC region  */
+static int is_phys_contiguous(unsigned long addr)
+{
+	if (!is_kernel_addr(addr))
+		return 0;
+
+	return (addr < VMALLOC_START) || (addr >= VMALLOC_END);
+}
+
+static struct xencomm_handle *xencomm_create_inline(void *ptr)
+{
+	unsigned long paddr;
+
+	BUG_ON(!is_phys_contiguous((unsigned long)ptr));
+
+	paddr = (unsigned long)xencomm_pa(ptr);
+	BUG_ON(paddr & XENCOMM_INLINE_FLAG);
+	return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG);
+}
+
+/* "mini" routine, for stack-based communications: */
+static int xencomm_create_mini(void *buffer,
+	unsigned long bytes, struct xencomm_mini *xc_desc,
+	struct xencomm_desc **ret)
+{
+	int rc = 0;
+	struct xencomm_desc *desc;
+	BUG_ON(((unsigned long)xc_desc) % sizeof(*xc_desc) != 0);
+
+	desc = (void *)xc_desc;
+
+	desc->nr_addrs = XENCOMM_MINI_ADDRS;
+
+	rc = xencomm_init(desc, buffer, bytes);
+	if (!rc)
+		*ret = desc;
+
+	return rc;
+}
+
+struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes)
+{
+	int rc;
+	struct xencomm_desc *desc;
+
+	if (is_phys_contiguous((unsigned long)ptr))
+		return xencomm_create_inline(ptr);
+
+	rc = xencomm_create(ptr, bytes, &desc, GFP_KERNEL);
+
+	if (rc || desc == NULL)
+		return NULL;
+
+	return xencomm_pa(desc);
+}
+
+struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, unsigned long bytes,
+			struct xencomm_mini *xc_desc)
+{
+	int rc;
+	struct xencomm_desc *desc = NULL;
+
+	if (is_phys_contiguous((unsigned long)ptr))
+		return xencomm_create_inline(ptr);
+
+	rc = xencomm_create_mini(ptr, bytes, xc_desc,
+				&desc);
+
+	if (rc)
+		return NULL;
+
+	return xencomm_pa(desc);
+}
diff --git a/include/xen/interface/xencomm.h b/include/xen/interface/xencomm.h
new file mode 100644
index 000000000000..ac45e0712afa
--- /dev/null
+++ b/include/xen/interface/xencomm.h
@@ -0,0 +1,41 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) IBM Corp. 2006
+ */
+
+#ifndef _XEN_XENCOMM_H_
+#define _XEN_XENCOMM_H_
+
+/* A xencomm descriptor is a scatter/gather list containing physical
+ * addresses corresponding to a virtually contiguous memory area. The
+ * hypervisor translates these physical addresses to machine addresses to copy
+ * to and from the virtually contiguous area.
+ */
+
+#define XENCOMM_MAGIC 0x58434F4D /* 'XCOM' */
+#define XENCOMM_INVALID (~0UL)
+
+struct xencomm_desc {
+    uint32_t magic;
+    uint32_t nr_addrs; /* the number of entries in address[] */
+    uint64_t address[0];
+};
+
+#endif /* _XEN_XENCOMM_H_ */
diff --git a/include/xen/xencomm.h b/include/xen/xencomm.h
new file mode 100644
index 000000000000..e43b039be112
--- /dev/null
+++ b/include/xen/xencomm.h
@@ -0,0 +1,77 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ * Copyright (C) IBM Corp. 2006
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ *          Jerone Young <jyoung5@us.ibm.com>
+ */
+
+#ifndef _LINUX_XENCOMM_H_
+#define _LINUX_XENCOMM_H_
+
+#include <xen/interface/xencomm.h>
+
+#define XENCOMM_MINI_ADDRS 3
+struct xencomm_mini {
+	struct xencomm_desc _desc;
+	uint64_t address[XENCOMM_MINI_ADDRS];
+};
+
+/* To avoid additionnal virt to phys conversion, an opaque structure is
+   presented.  */
+struct xencomm_handle;
+
+extern void xencomm_free(struct xencomm_handle *desc);
+extern struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes);
+extern struct xencomm_handle *__xencomm_map_no_alloc(void *ptr,
+			unsigned long bytes,  struct xencomm_mini *xc_area);
+
+#if 0
+#define XENCOMM_MINI_ALIGNED(xc_desc, n)				\
+	struct xencomm_mini xc_desc ## _base[(n)]			\
+	__attribute__((__aligned__(sizeof(struct xencomm_mini))));	\
+	struct xencomm_mini *xc_desc = &xc_desc ## _base[0];
+#else
+/*
+ * gcc bug workaround:
+ * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16660
+ * gcc doesn't handle properly stack variable with
+ * __attribute__((__align__(sizeof(struct xencomm_mini))))
+ */
+#define XENCOMM_MINI_ALIGNED(xc_desc, n)				\
+	unsigned char xc_desc ## _base[((n) + 1 ) *			\
+				       sizeof(struct xencomm_mini)];	\
+	struct xencomm_mini *xc_desc = (struct xencomm_mini *)		\
+		((unsigned long)xc_desc ## _base +			\
+		 (sizeof(struct xencomm_mini) -				\
+		  ((unsigned long)xc_desc ## _base) %			\
+		  sizeof(struct xencomm_mini)));
+#endif
+#define xencomm_map_no_alloc(ptr, bytes)			\
+	({ XENCOMM_MINI_ALIGNED(xc_desc, 1);			\
+		__xencomm_map_no_alloc(ptr, bytes, xc_desc); })
+
+/* provided by architecture code: */
+extern unsigned long xencomm_vtop(unsigned long vaddr);
+
+static inline void *xencomm_pa(void *ptr)
+{
+	return (void *)xencomm_vtop((unsigned long)ptr);
+}
+
+#define xen_guest_handle(hnd)  ((hnd).p)
+
+#endif /* _LINUX_XENCOMM_H_ */
-- 
cgit v1.2.3


From 3e334239d89d4a71610be5a3e8432464d421d9ec Mon Sep 17 00:00:00 2001
From: Markus Armbruster <armbru@redhat.com>
Date: Wed, 2 Apr 2008 10:54:02 -0700
Subject: xen: Make xen-blkfront write its protocol ABI to xenstore

Frontends are expected to write their protocol ABI to xenstore.  Since
the protocol ABI defaults to the backend's native ABI, things work
fine without that as long as the frontend's native ABI is identical to
the backend's native ABI.  This is not the case for xen-blkfront
running 32-on-64, because its ABI differs between 32 and 64 bit, and
thus needs this fix.

Based on http://xenbits.xensource.com/xen-unstable.hg?rev/c545932a18f3
and http://xenbits.xensource.com/xen-unstable.hg?rev/ffe52263b430 by
Gerd Hoffmann <kraxel@suse.de>

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <Jeremy.Fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/block/xen-blkfront.c         |  7 +++++++
 include/xen/interface/io/protocols.h | 21 +++++++++++++++++++++
 2 files changed, 28 insertions(+)
 create mode 100644 include/xen/interface/io/protocols.h

(limited to 'include/xen')

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 9c6f3f99208d..2e7c81e3f36a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -47,6 +47,7 @@
 
 #include <xen/interface/grant_table.h>
 #include <xen/interface/io/blkif.h>
+#include <xen/interface/io/protocols.h>
 
 #include <asm/xen/hypervisor.h>
 
@@ -614,6 +615,12 @@ again:
 		message = "writing event-channel";
 		goto abort_transaction;
 	}
+	err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
+			    XEN_IO_PROTO_ABI_NATIVE);
+	if (err) {
+		message = "writing protocol";
+		goto abort_transaction;
+	}
 
 	err = xenbus_transaction_end(xbt, 0);
 	if (err) {
diff --git a/include/xen/interface/io/protocols.h b/include/xen/interface/io/protocols.h
new file mode 100644
index 000000000000..01fc8ae5f0b0
--- /dev/null
+++ b/include/xen/interface/io/protocols.h
@@ -0,0 +1,21 @@
+#ifndef __XEN_PROTOCOLS_H__
+#define __XEN_PROTOCOLS_H__
+
+#define XEN_IO_PROTO_ABI_X86_32     "x86_32-abi"
+#define XEN_IO_PROTO_ABI_X86_64     "x86_64-abi"
+#define XEN_IO_PROTO_ABI_IA64       "ia64-abi"
+#define XEN_IO_PROTO_ABI_POWERPC64  "powerpc64-abi"
+
+#if defined(__i386__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
+#elif defined(__x86_64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
+#elif defined(__ia64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64
+#elif defined(__powerpc64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64
+#else
+# error arch fixup needed here
+#endif
+
+#endif
-- 
cgit v1.2.3


From 1d78d7055629e3f6300d6b8d7028259ee2bffc0e Mon Sep 17 00:00:00 2001
From: Christian Limpach <Christian.Limpach@xensource.com>
Date: Wed, 2 Apr 2008 10:54:04 -0700
Subject: xen blkfront: Delay wait for block devices until after the disk is
 added

When the xen block frontend driver is built as a module the module load
is only synchronous up to the point where the frontend and the backend
become connected rather than when the disk is added.

This means that there can be a race on boot between loading the module and
loading the dm-* modules and doing the scan for LVM physical volumes (all
in the initrd). In the failure case the disk is not present until after the
scan for physical volumes is complete.

Taken from:

  http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/11483a00c017

Signed-off-by: Christian Limpach <Christian.Limpach@xensource.com>
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/block/xen-blkfront.c      | 11 +++++++++++
 drivers/xen/xenbus/xenbus_probe.c |  5 ++++-
 include/xen/xenbus.h              |  1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/xen')

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 4497ff84f64a..dfe61afe676a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -88,6 +88,7 @@ struct blkfront_info
 	struct blk_shadow shadow[BLK_RING_SIZE];
 	unsigned long shadow_free;
 	int feature_barrier;
+	int is_ready;
 
 	/**
 	 * The number of people holding this device open.  We won't allow a
@@ -839,6 +840,8 @@ static void blkfront_connect(struct blkfront_info *info)
 	spin_unlock_irq(&blkif_io_lock);
 
 	add_disk(info->gd);
+
+	info->is_ready = 1;
 }
 
 /**
@@ -931,6 +934,13 @@ static int blkfront_remove(struct xenbus_device *dev)
 	return 0;
 }
 
+static int blkfront_is_ready(struct xenbus_device *dev)
+{
+	struct blkfront_info *info = dev->dev.driver_data;
+
+	return info->is_ready;
+}
+
 static int blkif_open(struct inode *inode, struct file *filep)
 {
 	struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
@@ -977,6 +987,7 @@ static struct xenbus_driver blkfront = {
 	.remove = blkfront_remove,
 	.resume = blkfront_resume,
 	.otherend_changed = backend_changed,
+	.is_ready = blkfront_is_ready,
 };
 
 static int __init xlblk_init(void)
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 4750de316ad3..88fc5ec665f5 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -846,6 +846,7 @@ static int is_disconnected_device(struct device *dev, void *data)
 {
 	struct xenbus_device *xendev = to_xenbus_device(dev);
 	struct device_driver *drv = data;
+	struct xenbus_driver *xendrv;
 
 	/*
 	 * A device with no driver will never connect. We care only about
@@ -858,7 +859,9 @@ static int is_disconnected_device(struct device *dev, void *data)
 	if (drv && (dev->driver != drv))
 		return 0;
 
-	return (xendev->state != XenbusStateConnected);
+	xendrv = to_xenbus_driver(dev->driver);
+	return (xendev->state != XenbusStateConnected ||
+		(xendrv->is_ready && !xendrv->is_ready(xendev)));
 }
 
 static int exists_disconnected_device(struct device_driver *drv)
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 6f7c290651ae..6369d89c25d5 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -97,6 +97,7 @@ struct xenbus_driver {
 	int (*uevent)(struct xenbus_device *, char **, int, char *, int);
 	struct device_driver driver;
 	int (*read_otherend_details)(struct xenbus_device *dev);
+	int (*is_ready)(struct xenbus_device *dev);
 };
 
 static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv)
-- 
cgit v1.2.3


From 4ee36dc08e5c4d16d078f59acd6d9d536f9718dd Mon Sep 17 00:00:00 2001
From: Markus Armbruster <armbru@redhat.com>
Date: Wed, 2 Apr 2008 10:54:07 -0700
Subject: xen pvfb: Para-virtual framebuffer, keyboard and pointer driver

This is a pair of Xen para-virtual frontend device drivers:
drivers/video/xen-fbfront.c provides a framebuffer, and
drivers/input/xen-kbdfront provides keyboard and mouse.

The backends run in dom0 user space.

The two drivers are not in two separate patches, because the
intermediate step (one driver, not the other) is somewhat problematic:
the backend in dom0 needs both drivers, and will refuse to complete
device initialization unless they're both present.

Signed-off-by: Markus Armbruster <armbru@redhat.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/input/Kconfig            |   9 +
 drivers/input/Makefile           |   2 +
 drivers/input/xen-kbdfront.c     | 340 ++++++++++++++++++++++++
 drivers/video/Kconfig            |  14 +
 drivers/video/Makefile           |   1 +
 drivers/video/xen-fbfront.c      | 550 +++++++++++++++++++++++++++++++++++++++
 include/xen/interface/io/fbif.h  | 124 +++++++++
 include/xen/interface/io/kbdif.h | 114 ++++++++
 8 files changed, 1154 insertions(+)
 create mode 100644 drivers/input/xen-kbdfront.c
 create mode 100644 drivers/video/xen-fbfront.c
 create mode 100644 include/xen/interface/io/fbif.h
 create mode 100644 include/xen/interface/io/kbdif.h

(limited to 'include/xen')

diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig
index 9dea14db724c..5f9d860925a1 100644
--- a/drivers/input/Kconfig
+++ b/drivers/input/Kconfig
@@ -149,6 +149,15 @@ config INPUT_APMPOWER
 	  To compile this driver as a module, choose M here: the
 	  module will be called apm-power.
 
+config XEN_KBDDEV_FRONTEND
+	tristate "Xen virtual keyboard and mouse support"
+	depends on XEN_FBDEV_FRONTEND
+	default y
+	help
+	  This driver implements the front-end of the Xen virtual
+	  keyboard and mouse device driver.  It communicates with a back-end
+	  in another domain.
+
 comment "Input Device Drivers"
 
 source "drivers/input/keyboard/Kconfig"
diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 2ae87b19caa8..98c4f9a77876 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -23,3 +23,5 @@ obj-$(CONFIG_INPUT_TOUCHSCREEN)	+= touchscreen/
 obj-$(CONFIG_INPUT_MISC)	+= misc/
 
 obj-$(CONFIG_INPUT_APMPOWER)	+= apm-power.o
+
+obj-$(CONFIG_XEN_KBDDEV_FRONTEND)	+= xen-kbdfront.o
diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c
new file mode 100644
index 000000000000..0f47f4697cdf
--- /dev/null
+++ b/drivers/input/xen-kbdfront.c
@@ -0,0 +1,340 @@
+/*
+ * Xen para-virtual input device
+ *
+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006-2008 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ *
+ *  Based on linux/drivers/input/mouse/sermouse.c
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License. See the file COPYING in the main directory of this archive for
+ *  more details.
+ */
+
+/*
+ * TODO:
+ *
+ * Switch to grant tables together with xen-fbfront.c.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/input.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/page.h>
+#include <xen/interface/io/fbif.h>
+#include <xen/interface/io/kbdif.h>
+#include <xen/xenbus.h>
+
+struct xenkbd_info {
+	struct input_dev *kbd;
+	struct input_dev *ptr;
+	struct xenkbd_page *page;
+	int irq;
+	struct xenbus_device *xbdev;
+	char phys[32];
+};
+
+static int xenkbd_remove(struct xenbus_device *);
+static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *);
+static void xenkbd_disconnect_backend(struct xenkbd_info *);
+
+/*
+ * Note: if you need to send out events, see xenfb_do_update() for how
+ * to do that.
+ */
+
+static irqreturn_t input_handler(int rq, void *dev_id)
+{
+	struct xenkbd_info *info = dev_id;
+	struct xenkbd_page *page = info->page;
+	__u32 cons, prod;
+
+	prod = page->in_prod;
+	if (prod == page->in_cons)
+		return IRQ_HANDLED;
+	rmb();			/* ensure we see ring contents up to prod */
+	for (cons = page->in_cons; cons != prod; cons++) {
+		union xenkbd_in_event *event;
+		struct input_dev *dev;
+		event = &XENKBD_IN_RING_REF(page, cons);
+
+		dev = info->ptr;
+		switch (event->type) {
+		case XENKBD_TYPE_MOTION:
+			input_report_rel(dev, REL_X, event->motion.rel_x);
+			input_report_rel(dev, REL_Y, event->motion.rel_y);
+			break;
+		case XENKBD_TYPE_KEY:
+			dev = NULL;
+			if (test_bit(event->key.keycode, info->kbd->keybit))
+				dev = info->kbd;
+			if (test_bit(event->key.keycode, info->ptr->keybit))
+				dev = info->ptr;
+			if (dev)
+				input_report_key(dev, event->key.keycode,
+						 event->key.pressed);
+			else
+				printk(KERN_WARNING
+				       "xenkbd: unhandled keycode 0x%x\n",
+				       event->key.keycode);
+			break;
+		case XENKBD_TYPE_POS:
+			input_report_abs(dev, ABS_X, event->pos.abs_x);
+			input_report_abs(dev, ABS_Y, event->pos.abs_y);
+			break;
+		}
+		if (dev)
+			input_sync(dev);
+	}
+	mb();			/* ensure we got ring contents */
+	page->in_cons = cons;
+	notify_remote_via_irq(info->irq);
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit xenkbd_probe(struct xenbus_device *dev,
+				  const struct xenbus_device_id *id)
+{
+	int ret, i;
+	struct xenkbd_info *info;
+	struct input_dev *kbd, *ptr;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
+		return -ENOMEM;
+	}
+	dev->dev.driver_data = info;
+	info->xbdev = dev;
+	info->irq = -1;
+	snprintf(info->phys, sizeof(info->phys), "xenbus/%s", dev->nodename);
+
+	info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+	if (!info->page)
+		goto error_nomem;
+
+	/* keyboard */
+	kbd = input_allocate_device();
+	if (!kbd)
+		goto error_nomem;
+	kbd->name = "Xen Virtual Keyboard";
+	kbd->phys = info->phys;
+	kbd->id.bustype = BUS_PCI;
+	kbd->id.vendor = 0x5853;
+	kbd->id.product = 0xffff;
+	kbd->evbit[0] = BIT(EV_KEY);
+	for (i = KEY_ESC; i < KEY_UNKNOWN; i++)
+		set_bit(i, kbd->keybit);
+	for (i = KEY_OK; i < KEY_MAX; i++)
+		set_bit(i, kbd->keybit);
+
+	ret = input_register_device(kbd);
+	if (ret) {
+		input_free_device(kbd);
+		xenbus_dev_fatal(dev, ret, "input_register_device(kbd)");
+		goto error;
+	}
+	info->kbd = kbd;
+
+	/* pointing device */
+	ptr = input_allocate_device();
+	if (!ptr)
+		goto error_nomem;
+	ptr->name = "Xen Virtual Pointer";
+	ptr->phys = info->phys;
+	ptr->id.bustype = BUS_PCI;
+	ptr->id.vendor = 0x5853;
+	ptr->id.product = 0xfffe;
+	ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS);
+	for (i = BTN_LEFT; i <= BTN_TASK; i++)
+		set_bit(i, ptr->keybit);
+	ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y);
+	input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0);
+	input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0);
+
+	ret = input_register_device(ptr);
+	if (ret) {
+		input_free_device(ptr);
+		xenbus_dev_fatal(dev, ret, "input_register_device(ptr)");
+		goto error;
+	}
+	info->ptr = ptr;
+
+	ret = xenkbd_connect_backend(dev, info);
+	if (ret < 0)
+		goto error;
+
+	return 0;
+
+ error_nomem:
+	ret = -ENOMEM;
+	xenbus_dev_fatal(dev, ret, "allocating device memory");
+ error:
+	xenkbd_remove(dev);
+	return ret;
+}
+
+static int xenkbd_resume(struct xenbus_device *dev)
+{
+	struct xenkbd_info *info = dev->dev.driver_data;
+
+	xenkbd_disconnect_backend(info);
+	memset(info->page, 0, PAGE_SIZE);
+	return xenkbd_connect_backend(dev, info);
+}
+
+static int xenkbd_remove(struct xenbus_device *dev)
+{
+	struct xenkbd_info *info = dev->dev.driver_data;
+
+	xenkbd_disconnect_backend(info);
+	if (info->kbd)
+		input_unregister_device(info->kbd);
+	if (info->ptr)
+		input_unregister_device(info->ptr);
+	free_page((unsigned long)info->page);
+	kfree(info);
+	return 0;
+}
+
+static int xenkbd_connect_backend(struct xenbus_device *dev,
+				  struct xenkbd_info *info)
+{
+	int ret, evtchn;
+	struct xenbus_transaction xbt;
+
+	ret = xenbus_alloc_evtchn(dev, &evtchn);
+	if (ret)
+		return ret;
+	ret = bind_evtchn_to_irqhandler(evtchn, input_handler,
+					0, dev->devicetype, info);
+	if (ret < 0) {
+		xenbus_free_evtchn(dev, evtchn);
+		xenbus_dev_fatal(dev, ret, "bind_evtchn_to_irqhandler");
+		return ret;
+	}
+	info->irq = ret;
+
+ again:
+	ret = xenbus_transaction_start(&xbt);
+	if (ret) {
+		xenbus_dev_fatal(dev, ret, "starting transaction");
+		return ret;
+	}
+	ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
+			    virt_to_mfn(info->page));
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
+			    evtchn);
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_transaction_end(xbt, 0);
+	if (ret) {
+		if (ret == -EAGAIN)
+			goto again;
+		xenbus_dev_fatal(dev, ret, "completing transaction");
+		return ret;
+	}
+
+	xenbus_switch_state(dev, XenbusStateInitialised);
+	return 0;
+
+ error_xenbus:
+	xenbus_transaction_end(xbt, 1);
+	xenbus_dev_fatal(dev, ret, "writing xenstore");
+	return ret;
+}
+
+static void xenkbd_disconnect_backend(struct xenkbd_info *info)
+{
+	if (info->irq >= 0)
+		unbind_from_irqhandler(info->irq, info);
+	info->irq = -1;
+}
+
+static void xenkbd_backend_changed(struct xenbus_device *dev,
+				   enum xenbus_state backend_state)
+{
+	struct xenkbd_info *info = dev->dev.driver_data;
+	int ret, val;
+
+	switch (backend_state) {
+	case XenbusStateInitialising:
+	case XenbusStateInitialised:
+	case XenbusStateUnknown:
+	case XenbusStateClosed:
+		break;
+
+	case XenbusStateInitWait:
+InitWait:
+		ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+				   "feature-abs-pointer", "%d", &val);
+		if (ret < 0)
+			val = 0;
+		if (val) {
+			ret = xenbus_printf(XBT_NIL, info->xbdev->nodename,
+					    "request-abs-pointer", "1");
+			if (ret)
+				printk(KERN_WARNING
+				       "xenkbd: can't request abs-pointer");
+		}
+		xenbus_switch_state(dev, XenbusStateConnected);
+		break;
+
+	case XenbusStateConnected:
+		/*
+		 * Work around xenbus race condition: If backend goes
+		 * through InitWait to Connected fast enough, we can
+		 * get Connected twice here.
+		 */
+		if (dev->state != XenbusStateConnected)
+			goto InitWait; /* no InitWait seen yet, fudge it */
+		break;
+
+	case XenbusStateClosing:
+		xenbus_frontend_closed(dev);
+		break;
+	}
+}
+
+static struct xenbus_device_id xenkbd_ids[] = {
+	{ "vkbd" },
+	{ "" }
+};
+
+static struct xenbus_driver xenkbd = {
+	.name = "vkbd",
+	.owner = THIS_MODULE,
+	.ids = xenkbd_ids,
+	.probe = xenkbd_probe,
+	.remove = xenkbd_remove,
+	.resume = xenkbd_resume,
+	.otherend_changed = xenkbd_backend_changed,
+};
+
+static int __init xenkbd_init(void)
+{
+	if (!is_running_on_xen())
+		return -ENODEV;
+
+	/* Nothing to do if running in dom0. */
+	if (is_initial_xendomain())
+		return -ENODEV;
+
+	return xenbus_register_frontend(&xenkbd);
+}
+
+static void __exit xenkbd_cleanup(void)
+{
+	xenbus_unregister_driver(&xenkbd);
+}
+
+module_init(xenkbd_init);
+module_exit(xenkbd_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 1bd5fb30237d..e3dc8f8d0c3e 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1930,6 +1930,20 @@ config FB_VIRTUAL
 
 	  If unsure, say N.
 
+config XEN_FBDEV_FRONTEND
+	tristate "Xen virtual frame buffer support"
+	depends on FB && XEN
+	select FB_SYS_FILLRECT
+	select FB_SYS_COPYAREA
+	select FB_SYS_IMAGEBLIT
+	select FB_SYS_FOPS
+	select FB_DEFERRED_IO
+	default y
+	help
+	  This driver implements the front-end of the Xen virtual
+	  frame buffer driver.  It communicates with a back-end
+	  in another domain.
+
 source "drivers/video/omap/Kconfig"
 
 source "drivers/video/backlight/Kconfig"
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 11c0e5e05f21..f172b9b73314 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -114,6 +114,7 @@ obj-$(CONFIG_FB_PS3)		  += ps3fb.o
 obj-$(CONFIG_FB_SM501)            += sm501fb.o
 obj-$(CONFIG_FB_XILINX)           += xilinxfb.o
 obj-$(CONFIG_FB_OMAP)             += omap/
+obj-$(CONFIG_XEN_FBDEV_FRONTEND)  += xen-fbfront.o
 
 # Platform or fallback drivers go here
 obj-$(CONFIG_FB_UVESA)            += uvesafb.o
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
new file mode 100644
index 000000000000..619a6f8d65a2
--- /dev/null
+++ b/drivers/video/xen-fbfront.c
@@ -0,0 +1,550 @@
+/*
+ * Xen para-virtual frame buffer device
+ *
+ * Copyright (C) 2005-2006 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006-2008 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ *
+ *  Based on linux/drivers/video/q40fb.c
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License. See the file COPYING in the main directory of this archive for
+ *  more details.
+ */
+
+/*
+ * TODO:
+ *
+ * Switch to grant tables when they become capable of dealing with the
+ * frame buffer.
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fb.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/page.h>
+#include <xen/interface/io/fbif.h>
+#include <xen/interface/io/protocols.h>
+#include <xen/xenbus.h>
+
+struct xenfb_info {
+	unsigned char		*fb;
+	struct fb_info		*fb_info;
+	int			x1, y1, x2, y2;	/* dirty rectangle,
+						   protected by dirty_lock */
+	spinlock_t		dirty_lock;
+	int			nr_pages;
+	int			irq;
+	struct xenfb_page	*page;
+	unsigned long 		*mfns;
+	int			update_wanted; /* XENFB_TYPE_UPDATE wanted */
+
+	struct xenbus_device	*xbdev;
+};
+
+static u32 xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8;
+
+static int xenfb_remove(struct xenbus_device *);
+static void xenfb_init_shared_page(struct xenfb_info *);
+static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *);
+static void xenfb_disconnect_backend(struct xenfb_info *);
+
+static void xenfb_do_update(struct xenfb_info *info,
+			    int x, int y, int w, int h)
+{
+	union xenfb_out_event event;
+	u32 prod;
+
+	event.type = XENFB_TYPE_UPDATE;
+	event.update.x = x;
+	event.update.y = y;
+	event.update.width = w;
+	event.update.height = h;
+
+	prod = info->page->out_prod;
+	/* caller ensures !xenfb_queue_full() */
+	mb();			/* ensure ring space available */
+	XENFB_OUT_RING_REF(info->page, prod) = event;
+	wmb();			/* ensure ring contents visible */
+	info->page->out_prod = prod + 1;
+
+	notify_remote_via_irq(info->irq);
+}
+
+static int xenfb_queue_full(struct xenfb_info *info)
+{
+	u32 cons, prod;
+
+	prod = info->page->out_prod;
+	cons = info->page->out_cons;
+	return prod - cons == XENFB_OUT_RING_LEN;
+}
+
+static void xenfb_refresh(struct xenfb_info *info,
+			  int x1, int y1, int w, int h)
+{
+	unsigned long flags;
+	int y2 = y1 + h - 1;
+	int x2 = x1 + w - 1;
+
+	if (!info->update_wanted)
+		return;
+
+	spin_lock_irqsave(&info->dirty_lock, flags);
+
+	/* Combine with dirty rectangle: */
+	if (info->y1 < y1)
+		y1 = info->y1;
+	if (info->y2 > y2)
+		y2 = info->y2;
+	if (info->x1 < x1)
+		x1 = info->x1;
+	if (info->x2 > x2)
+		x2 = info->x2;
+
+	if (xenfb_queue_full(info)) {
+		/* Can't send right now, stash it in the dirty rectangle */
+		info->x1 = x1;
+		info->x2 = x2;
+		info->y1 = y1;
+		info->y2 = y2;
+		spin_unlock_irqrestore(&info->dirty_lock, flags);
+		return;
+	}
+
+	/* Clear dirty rectangle: */
+	info->x1 = info->y1 = INT_MAX;
+	info->x2 = info->y2 = 0;
+
+	spin_unlock_irqrestore(&info->dirty_lock, flags);
+
+	if (x1 <= x2 && y1 <= y2)
+		xenfb_do_update(info, x1, y1, x2 - x1 + 1, y2 - y1 + 1);
+}
+
+static void xenfb_deferred_io(struct fb_info *fb_info,
+			      struct list_head *pagelist)
+{
+	struct xenfb_info *info = fb_info->par;
+	struct page *page;
+	unsigned long beg, end;
+	int y1, y2, miny, maxy;
+
+	miny = INT_MAX;
+	maxy = 0;
+	list_for_each_entry(page, pagelist, lru) {
+		beg = page->index << PAGE_SHIFT;
+		end = beg + PAGE_SIZE - 1;
+		y1 = beg / fb_info->fix.line_length;
+		y2 = end / fb_info->fix.line_length;
+		if (y2 >= fb_info->var.yres)
+			y2 = fb_info->var.yres - 1;
+		if (miny > y1)
+			miny = y1;
+		if (maxy < y2)
+			maxy = y2;
+	}
+	xenfb_refresh(info, 0, miny, fb_info->var.xres, maxy - miny + 1);
+}
+
+static struct fb_deferred_io xenfb_defio = {
+	.delay		= HZ / 20,
+	.deferred_io	= xenfb_deferred_io,
+};
+
+static int xenfb_setcolreg(unsigned regno, unsigned red, unsigned green,
+			   unsigned blue, unsigned transp,
+			   struct fb_info *info)
+{
+	u32 v;
+
+	if (regno > info->cmap.len)
+		return 1;
+
+#define CNVT_TOHW(val, width) ((((val)<<(width))+0x7FFF-(val))>>16)
+	red = CNVT_TOHW(red, info->var.red.length);
+	green = CNVT_TOHW(green, info->var.green.length);
+	blue = CNVT_TOHW(blue, info->var.blue.length);
+	transp = CNVT_TOHW(transp, info->var.transp.length);
+#undef CNVT_TOHW
+
+	v = (red << info->var.red.offset) |
+	    (green << info->var.green.offset) |
+	    (blue << info->var.blue.offset);
+
+	switch (info->var.bits_per_pixel) {
+	case 16:
+	case 24:
+	case 32:
+		((u32 *)info->pseudo_palette)[regno] = v;
+		break;
+	}
+
+	return 0;
+}
+
+static void xenfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
+{
+	struct xenfb_info *info = p->par;
+
+	sys_fillrect(p, rect);
+	xenfb_refresh(info, rect->dx, rect->dy, rect->width, rect->height);
+}
+
+static void xenfb_imageblit(struct fb_info *p, const struct fb_image *image)
+{
+	struct xenfb_info *info = p->par;
+
+	sys_imageblit(p, image);
+	xenfb_refresh(info, image->dx, image->dy, image->width, image->height);
+}
+
+static void xenfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
+{
+	struct xenfb_info *info = p->par;
+
+	sys_copyarea(p, area);
+	xenfb_refresh(info, area->dx, area->dy, area->width, area->height);
+}
+
+static ssize_t xenfb_write(struct fb_info *p, const char __user *buf,
+			size_t count, loff_t *ppos)
+{
+	struct xenfb_info *info = p->par;
+	ssize_t res;
+
+	res = fb_sys_write(p, buf, count, ppos);
+	xenfb_refresh(info, 0, 0, info->page->width, info->page->height);
+	return res;
+}
+
+static struct fb_ops xenfb_fb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_read	= fb_sys_read,
+	.fb_write	= xenfb_write,
+	.fb_setcolreg	= xenfb_setcolreg,
+	.fb_fillrect	= xenfb_fillrect,
+	.fb_copyarea	= xenfb_copyarea,
+	.fb_imageblit	= xenfb_imageblit,
+};
+
+static irqreturn_t xenfb_event_handler(int rq, void *dev_id)
+{
+	/*
+	 * No in events recognized, simply ignore them all.
+	 * If you need to recognize some, see xen-kbdfront's
+	 * input_handler() for how to do that.
+	 */
+	struct xenfb_info *info = dev_id;
+	struct xenfb_page *page = info->page;
+
+	if (page->in_cons != page->in_prod) {
+		info->page->in_cons = info->page->in_prod;
+		notify_remote_via_irq(info->irq);
+	}
+
+	/* Flush dirty rectangle: */
+	xenfb_refresh(info, INT_MAX, INT_MAX, -INT_MAX, -INT_MAX);
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit xenfb_probe(struct xenbus_device *dev,
+				 const struct xenbus_device_id *id)
+{
+	struct xenfb_info *info;
+	struct fb_info *fb_info;
+	int ret;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (info == NULL) {
+		xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
+		return -ENOMEM;
+	}
+	dev->dev.driver_data = info;
+	info->xbdev = dev;
+	info->irq = -1;
+	info->x1 = info->y1 = INT_MAX;
+	spin_lock_init(&info->dirty_lock);
+
+	info->fb = vmalloc(xenfb_mem_len);
+	if (info->fb == NULL)
+		goto error_nomem;
+	memset(info->fb, 0, xenfb_mem_len);
+
+	info->nr_pages = (xenfb_mem_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages);
+	if (!info->mfns)
+		goto error_nomem;
+
+	/* set up shared page */
+	info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+	if (!info->page)
+		goto error_nomem;
+
+	xenfb_init_shared_page(info);
+
+	/* abusing framebuffer_alloc() to allocate pseudo_palette */
+	fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL);
+	if (fb_info == NULL)
+		goto error_nomem;
+
+	/* complete the abuse: */
+	fb_info->pseudo_palette = fb_info->par;
+	fb_info->par = info;
+
+	fb_info->screen_base = info->fb;
+
+	fb_info->fbops = &xenfb_fb_ops;
+	fb_info->var.xres_virtual = fb_info->var.xres = info->page->width;
+	fb_info->var.yres_virtual = fb_info->var.yres = info->page->height;
+	fb_info->var.bits_per_pixel = info->page->depth;
+
+	fb_info->var.red = (struct fb_bitfield){16, 8, 0};
+	fb_info->var.green = (struct fb_bitfield){8, 8, 0};
+	fb_info->var.blue = (struct fb_bitfield){0, 8, 0};
+
+	fb_info->var.activate = FB_ACTIVATE_NOW;
+	fb_info->var.height = -1;
+	fb_info->var.width = -1;
+	fb_info->var.vmode = FB_VMODE_NONINTERLACED;
+
+	fb_info->fix.visual = FB_VISUAL_TRUECOLOR;
+	fb_info->fix.line_length = info->page->line_length;
+	fb_info->fix.smem_start = 0;
+	fb_info->fix.smem_len = xenfb_mem_len;
+	strcpy(fb_info->fix.id, "xen");
+	fb_info->fix.type = FB_TYPE_PACKED_PIXELS;
+	fb_info->fix.accel = FB_ACCEL_NONE;
+
+	fb_info->flags = FBINFO_FLAG_DEFAULT;
+
+	ret = fb_alloc_cmap(&fb_info->cmap, 256, 0);
+	if (ret < 0) {
+		framebuffer_release(fb_info);
+		xenbus_dev_fatal(dev, ret, "fb_alloc_cmap");
+		goto error;
+	}
+
+	fb_info->fbdefio = &xenfb_defio;
+	fb_deferred_io_init(fb_info);
+
+	ret = register_framebuffer(fb_info);
+	if (ret) {
+		fb_deferred_io_cleanup(fb_info);
+		fb_dealloc_cmap(&fb_info->cmap);
+		framebuffer_release(fb_info);
+		xenbus_dev_fatal(dev, ret, "register_framebuffer");
+		goto error;
+	}
+	info->fb_info = fb_info;
+
+	ret = xenfb_connect_backend(dev, info);
+	if (ret < 0)
+		goto error;
+
+	return 0;
+
+ error_nomem:
+	ret = -ENOMEM;
+	xenbus_dev_fatal(dev, ret, "allocating device memory");
+ error:
+	xenfb_remove(dev);
+	return ret;
+}
+
+static int xenfb_resume(struct xenbus_device *dev)
+{
+	struct xenfb_info *info = dev->dev.driver_data;
+
+	xenfb_disconnect_backend(info);
+	xenfb_init_shared_page(info);
+	return xenfb_connect_backend(dev, info);
+}
+
+static int xenfb_remove(struct xenbus_device *dev)
+{
+	struct xenfb_info *info = dev->dev.driver_data;
+
+	xenfb_disconnect_backend(info);
+	if (info->fb_info) {
+		fb_deferred_io_cleanup(info->fb_info);
+		unregister_framebuffer(info->fb_info);
+		fb_dealloc_cmap(&info->fb_info->cmap);
+		framebuffer_release(info->fb_info);
+	}
+	free_page((unsigned long)info->page);
+	vfree(info->mfns);
+	vfree(info->fb);
+	kfree(info);
+
+	return 0;
+}
+
+static unsigned long vmalloc_to_mfn(void *address)
+{
+	return pfn_to_mfn(vmalloc_to_pfn(address));
+}
+
+static void xenfb_init_shared_page(struct xenfb_info *info)
+{
+	int i;
+
+	for (i = 0; i < info->nr_pages; i++)
+		info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE);
+
+	info->page->pd[0] = vmalloc_to_mfn(info->mfns);
+	info->page->pd[1] = 0;
+	info->page->width = XENFB_WIDTH;
+	info->page->height = XENFB_HEIGHT;
+	info->page->depth = XENFB_DEPTH;
+	info->page->line_length = (info->page->depth / 8) * info->page->width;
+	info->page->mem_length = xenfb_mem_len;
+	info->page->in_cons = info->page->in_prod = 0;
+	info->page->out_cons = info->page->out_prod = 0;
+}
+
+static int xenfb_connect_backend(struct xenbus_device *dev,
+				 struct xenfb_info *info)
+{
+	int ret, evtchn;
+	struct xenbus_transaction xbt;
+
+	ret = xenbus_alloc_evtchn(dev, &evtchn);
+	if (ret)
+		return ret;
+	ret = bind_evtchn_to_irqhandler(evtchn, xenfb_event_handler,
+					0, dev->devicetype, info);
+	if (ret < 0) {
+		xenbus_free_evtchn(dev, evtchn);
+		xenbus_dev_fatal(dev, ret, "bind_evtchn_to_irqhandler");
+		return ret;
+	}
+	info->irq = ret;
+
+ again:
+	ret = xenbus_transaction_start(&xbt);
+	if (ret) {
+		xenbus_dev_fatal(dev, ret, "starting transaction");
+		return ret;
+	}
+	ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
+			    virt_to_mfn(info->page));
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
+			    evtchn);
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
+			    XEN_IO_PROTO_ABI_NATIVE);
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_printf(xbt, dev->nodename, "feature-update", "1");
+	if (ret)
+		goto error_xenbus;
+	ret = xenbus_transaction_end(xbt, 0);
+	if (ret) {
+		if (ret == -EAGAIN)
+			goto again;
+		xenbus_dev_fatal(dev, ret, "completing transaction");
+		return ret;
+	}
+
+	xenbus_switch_state(dev, XenbusStateInitialised);
+	return 0;
+
+ error_xenbus:
+	xenbus_transaction_end(xbt, 1);
+	xenbus_dev_fatal(dev, ret, "writing xenstore");
+	return ret;
+}
+
+static void xenfb_disconnect_backend(struct xenfb_info *info)
+{
+	if (info->irq >= 0)
+		unbind_from_irqhandler(info->irq, info);
+	info->irq = -1;
+}
+
+static void xenfb_backend_changed(struct xenbus_device *dev,
+				  enum xenbus_state backend_state)
+{
+	struct xenfb_info *info = dev->dev.driver_data;
+	int val;
+
+	switch (backend_state) {
+	case XenbusStateInitialising:
+	case XenbusStateInitialised:
+	case XenbusStateUnknown:
+	case XenbusStateClosed:
+		break;
+
+	case XenbusStateInitWait:
+InitWait:
+		xenbus_switch_state(dev, XenbusStateConnected);
+		break;
+
+	case XenbusStateConnected:
+		/*
+		 * Work around xenbus race condition: If backend goes
+		 * through InitWait to Connected fast enough, we can
+		 * get Connected twice here.
+		 */
+		if (dev->state != XenbusStateConnected)
+			goto InitWait; /* no InitWait seen yet, fudge it */
+
+		if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+				 "request-update", "%d", &val) < 0)
+			val = 0;
+		if (val)
+			info->update_wanted = 1;
+		break;
+
+	case XenbusStateClosing:
+		xenbus_frontend_closed(dev);
+		break;
+	}
+}
+
+static struct xenbus_device_id xenfb_ids[] = {
+	{ "vfb" },
+	{ "" }
+};
+
+static struct xenbus_driver xenfb = {
+	.name = "vfb",
+	.owner = THIS_MODULE,
+	.ids = xenfb_ids,
+	.probe = xenfb_probe,
+	.remove = xenfb_remove,
+	.resume = xenfb_resume,
+	.otherend_changed = xenfb_backend_changed,
+};
+
+static int __init xenfb_init(void)
+{
+	if (!is_running_on_xen())
+		return -ENODEV;
+
+	/* Nothing to do if running in dom0. */
+	if (is_initial_xendomain())
+		return -ENODEV;
+
+	return xenbus_register_frontend(&xenfb);
+}
+
+static void __exit xenfb_cleanup(void)
+{
+	xenbus_unregister_driver(&xenfb);
+}
+
+module_init(xenfb_init);
+module_exit(xenfb_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/include/xen/interface/io/fbif.h b/include/xen/interface/io/fbif.h
new file mode 100644
index 000000000000..5a934dd7796d
--- /dev/null
+++ b/include/xen/interface/io/fbif.h
@@ -0,0 +1,124 @@
+/*
+ * fbif.h -- Xen virtual frame buffer device
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ */
+
+#ifndef __XEN_PUBLIC_IO_FBIF_H__
+#define __XEN_PUBLIC_IO_FBIF_H__
+
+/* Out events (frontend -> backend) */
+
+/*
+ * Out events may be sent only when requested by backend, and receipt
+ * of an unknown out event is an error.
+ */
+
+/* Event type 1 currently not used */
+/*
+ * Framebuffer update notification event
+ * Capable frontend sets feature-update in xenstore.
+ * Backend requests it by setting request-update in xenstore.
+ */
+#define XENFB_TYPE_UPDATE 2
+
+struct xenfb_update {
+	uint8_t type;		/* XENFB_TYPE_UPDATE */
+	int32_t x;		/* source x */
+	int32_t y;		/* source y */
+	int32_t width;		/* rect width */
+	int32_t height;		/* rect height */
+};
+
+#define XENFB_OUT_EVENT_SIZE 40
+
+union xenfb_out_event {
+	uint8_t type;
+	struct xenfb_update update;
+	char pad[XENFB_OUT_EVENT_SIZE];
+};
+
+/* In events (backend -> frontend) */
+
+/*
+ * Frontends should ignore unknown in events.
+ * No in events currently defined.
+ */
+
+#define XENFB_IN_EVENT_SIZE 40
+
+union xenfb_in_event {
+	uint8_t type;
+	char pad[XENFB_IN_EVENT_SIZE];
+};
+
+/* shared page */
+
+#define XENFB_IN_RING_SIZE 1024
+#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE)
+#define XENFB_IN_RING_OFFS 1024
+#define XENFB_IN_RING(page) \
+	((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS))
+#define XENFB_IN_RING_REF(page, idx) \
+	(XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN])
+
+#define XENFB_OUT_RING_SIZE 2048
+#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE)
+#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE)
+#define XENFB_OUT_RING(page) \
+	((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS))
+#define XENFB_OUT_RING_REF(page, idx) \
+	(XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN])
+
+struct xenfb_page {
+	uint32_t in_cons, in_prod;
+	uint32_t out_cons, out_prod;
+
+	int32_t width;          /* width of the framebuffer (in pixels) */
+	int32_t height;         /* height of the framebuffer (in pixels) */
+	uint32_t line_length;   /* length of a row of pixels (in bytes) */
+	uint32_t mem_length;    /* length of the framebuffer (in bytes) */
+	uint8_t depth;          /* depth of a pixel (in bits) */
+
+	/*
+	 * Framebuffer page directory
+	 *
+	 * Each directory page holds PAGE_SIZE / sizeof(*pd)
+	 * framebuffer pages, and can thus map up to PAGE_SIZE *
+	 * PAGE_SIZE / sizeof(*pd) bytes.  With PAGE_SIZE == 4096 and
+	 * sizeof(unsigned long) == 4, that's 4 Megs.  Two directory
+	 * pages should be enough for a while.
+	 */
+	unsigned long pd[2];
+};
+
+/*
+ * Wart: xenkbd needs to know resolution.  Put it here until a better
+ * solution is found, but don't leak it to the backend.
+ */
+#ifdef __KERNEL__
+#define XENFB_WIDTH 800
+#define XENFB_HEIGHT 600
+#define XENFB_DEPTH 32
+#endif
+
+#endif
diff --git a/include/xen/interface/io/kbdif.h b/include/xen/interface/io/kbdif.h
new file mode 100644
index 000000000000..fb97f4284ffd
--- /dev/null
+++ b/include/xen/interface/io/kbdif.h
@@ -0,0 +1,114 @@
+/*
+ * kbdif.h -- Xen virtual keyboard/mouse
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ */
+
+#ifndef __XEN_PUBLIC_IO_KBDIF_H__
+#define __XEN_PUBLIC_IO_KBDIF_H__
+
+/* In events (backend -> frontend) */
+
+/*
+ * Frontends should ignore unknown in events.
+ */
+
+/* Pointer movement event */
+#define XENKBD_TYPE_MOTION  1
+/* Event type 2 currently not used */
+/* Key event (includes pointer buttons) */
+#define XENKBD_TYPE_KEY     3
+/*
+ * Pointer position event
+ * Capable backend sets feature-abs-pointer in xenstore.
+ * Frontend requests ot instead of XENKBD_TYPE_MOTION by setting
+ * request-abs-update in xenstore.
+ */
+#define XENKBD_TYPE_POS     4
+
+struct xenkbd_motion {
+	uint8_t type;		/* XENKBD_TYPE_MOTION */
+	int32_t rel_x;		/* relative X motion */
+	int32_t rel_y;		/* relative Y motion */
+};
+
+struct xenkbd_key {
+	uint8_t type;		/* XENKBD_TYPE_KEY */
+	uint8_t pressed;	/* 1 if pressed; 0 otherwise */
+	uint32_t keycode;	/* KEY_* from linux/input.h */
+};
+
+struct xenkbd_position {
+	uint8_t type;		/* XENKBD_TYPE_POS */
+	int32_t abs_x;		/* absolute X position (in FB pixels) */
+	int32_t abs_y;		/* absolute Y position (in FB pixels) */
+};
+
+#define XENKBD_IN_EVENT_SIZE 40
+
+union xenkbd_in_event {
+	uint8_t type;
+	struct xenkbd_motion motion;
+	struct xenkbd_key key;
+	struct xenkbd_position pos;
+	char pad[XENKBD_IN_EVENT_SIZE];
+};
+
+/* Out events (frontend -> backend) */
+
+/*
+ * Out events may be sent only when requested by backend, and receipt
+ * of an unknown out event is an error.
+ * No out events currently defined.
+ */
+
+#define XENKBD_OUT_EVENT_SIZE 40
+
+union xenkbd_out_event {
+	uint8_t type;
+	char pad[XENKBD_OUT_EVENT_SIZE];
+};
+
+/* shared page */
+
+#define XENKBD_IN_RING_SIZE 2048
+#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE)
+#define XENKBD_IN_RING_OFFS 1024
+#define XENKBD_IN_RING(page) \
+	((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS))
+#define XENKBD_IN_RING_REF(page, idx) \
+	(XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN])
+
+#define XENKBD_OUT_RING_SIZE 1024
+#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE)
+#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE)
+#define XENKBD_OUT_RING(page) \
+	((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS))
+#define XENKBD_OUT_RING_REF(page, idx) \
+	(XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN])
+
+struct xenkbd_page {
+	uint32_t in_cons, in_prod;
+	uint32_t out_cons, out_prod;
+};
+
+#endif
-- 
cgit v1.2.3


From 1775826ceec51187aa868406585799b7e76ffa7d Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 2 Apr 2008 10:54:13 -0700
Subject: xen: add balloon driver

The balloon driver allows memory to be dynamically added or removed from the domain,
in order to allow host memory to be balanced between multiple domains.

This patch introduces the Xen balloon driver, though it currently only
allows a domain to be shrunk from its initial size (and re-grown back to
that size).  A later patch will add the ability to grow a domain beyond
its initial size.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/Kconfig                |   2 +
 drivers/xen/Kconfig            |  19 ++
 drivers/xen/Makefile           |   1 +
 drivers/xen/balloon.c          | 712 +++++++++++++++++++++++++++++++++++++++++
 include/xen/balloon.h          |  61 ++++
 include/xen/interface/memory.h |  12 +-
 6 files changed, 799 insertions(+), 8 deletions(-)
 create mode 100644 drivers/xen/Kconfig
 create mode 100644 drivers/xen/balloon.c
 create mode 100644 include/xen/balloon.h

(limited to 'include/xen')

diff --git a/drivers/Kconfig b/drivers/Kconfig
index 3a0e3549739f..80f0ec91e2cf 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -97,4 +97,6 @@ source "drivers/dca/Kconfig"
 source "drivers/auxdisplay/Kconfig"
 
 source "drivers/uio/Kconfig"
+
+source "drivers/xen/Kconfig"
 endmenu
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
new file mode 100644
index 000000000000..4b75a16de009
--- /dev/null
+++ b/drivers/xen/Kconfig
@@ -0,0 +1,19 @@
+config XEN_BALLOON
+	bool "Xen memory balloon driver"
+	depends on XEN
+	default y
+	help
+	  The balloon driver allows the Xen domain to request more memory from
+	  the system to expand the domain's memory allocation, or alternatively
+	  return unneeded memory to the system.
+
+config XEN_SCRUB_PAGES
+	bool "Scrub pages before returning them to system"
+	depends on XEN_BALLOON
+	default y
+	help
+	  Scrub pages before returning them to the system for reuse by
+	  other domains.  This makes sure that any confidential data
+	  is not accidentally visible to other domains.  Is it more
+	  secure, but slightly less efficient.
+	  If in doubt, say yes.
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 43f014cfb458..37af04f1ffd9 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,3 +1,4 @@
 obj-y	+= grant-table.o features.o events.o
 obj-y	+= xenbus/
 obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
+obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
new file mode 100644
index 000000000000..ab25ba6cbbb9
--- /dev/null
+++ b/drivers/xen/balloon.c
@@ -0,0 +1,712 @@
+/******************************************************************************
+ * balloon.c
+ *
+ * Xen balloon driver - enables returning/claiming memory to/from Xen.
+ *
+ * Copyright (c) 2003, B Dragovic
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
+ * Copyright (c) 2005 Dan M. Smith, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/mutex.h>
+#include <linux/highmem.h>
+#include <linux/list.h>
+#include <linux/sysdev.h>
+
+#include <asm/xen/hypervisor.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+
+#include <xen/interface/memory.h>
+#include <xen/balloon.h>
+#include <xen/xenbus.h>
+#include <xen/features.h>
+#include <xen/page.h>
+
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+
+#define BALLOON_CLASS_NAME "memory"
+
+struct balloon_stats {
+	/* We aim for 'current allocation' == 'target allocation'. */
+	unsigned long current_pages;
+	unsigned long target_pages;
+	/* We may hit the hard limit in Xen. If we do then we remember it. */
+	unsigned long hard_limit;
+	/*
+	 * Drivers may alter the memory reservation independently, but they
+	 * must inform the balloon driver so we avoid hitting the hard limit.
+	 */
+	unsigned long driver_pages;
+	/* Number of pages in high- and low-memory balloons. */
+	unsigned long balloon_low;
+	unsigned long balloon_high;
+};
+
+static DEFINE_MUTEX(balloon_mutex);
+
+static struct sys_device balloon_sysdev;
+
+static int register_balloon(struct sys_device *sysdev);
+
+/*
+ * Protects atomic reservation decrease/increase against concurrent increases.
+ * Also protects non-atomic updates of current_pages and driver_pages, and
+ * balloon lists.
+ */
+static DEFINE_SPINLOCK(balloon_lock);
+
+static struct balloon_stats balloon_stats;
+
+/* We increase/decrease in batches which fit in a page */
+static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
+
+/* VM /proc information for memory */
+extern unsigned long totalram_pages;
+
+#ifdef CONFIG_HIGHMEM
+extern unsigned long totalhigh_pages;
+#define inc_totalhigh_pages() (totalhigh_pages++)
+#define dec_totalhigh_pages() (totalhigh_pages--)
+#else
+#define inc_totalhigh_pages() do {} while(0)
+#define dec_totalhigh_pages() do {} while(0)
+#endif
+
+/* List of ballooned pages, threaded through the mem_map array. */
+static LIST_HEAD(ballooned_pages);
+
+/* Main work function, always executed in process context. */
+static void balloon_process(struct work_struct *work);
+static DECLARE_WORK(balloon_worker, balloon_process);
+static struct timer_list balloon_timer;
+
+/* When ballooning out (allocating memory to return to Xen) we don't really
+   want the kernel to try too hard since that can trigger the oom killer. */
+#define GFP_BALLOON \
+	(GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
+
+static void scrub_page(struct page *page)
+{
+#ifdef CONFIG_XEN_SCRUB_PAGES
+	if (PageHighMem(page)) {
+		void *v = kmap(page);
+		clear_page(v);
+		kunmap(v);
+	} else {
+		void *v = page_address(page);
+		clear_page(v);
+	}
+#endif
+}
+
+/* balloon_append: add the given page to the balloon. */
+static void balloon_append(struct page *page)
+{
+	/* Lowmem is re-populated first, so highmem pages go at list tail. */
+	if (PageHighMem(page)) {
+		list_add_tail(&page->lru, &ballooned_pages);
+		balloon_stats.balloon_high++;
+		dec_totalhigh_pages();
+	} else {
+		list_add(&page->lru, &ballooned_pages);
+		balloon_stats.balloon_low++;
+	}
+}
+
+/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
+static struct page *balloon_retrieve(void)
+{
+	struct page *page;
+
+	if (list_empty(&ballooned_pages))
+		return NULL;
+
+	page = list_entry(ballooned_pages.next, struct page, lru);
+	list_del(&page->lru);
+
+	if (PageHighMem(page)) {
+		balloon_stats.balloon_high--;
+		inc_totalhigh_pages();
+	}
+	else
+		balloon_stats.balloon_low--;
+
+	return page;
+}
+
+static struct page *balloon_first_page(void)
+{
+	if (list_empty(&ballooned_pages))
+		return NULL;
+	return list_entry(ballooned_pages.next, struct page, lru);
+}
+
+static struct page *balloon_next_page(struct page *page)
+{
+	struct list_head *next = page->lru.next;
+	if (next == &ballooned_pages)
+		return NULL;
+	return list_entry(next, struct page, lru);
+}
+
+static void balloon_alarm(unsigned long unused)
+{
+	schedule_work(&balloon_worker);
+}
+
+static unsigned long current_target(void)
+{
+	unsigned long target = min(balloon_stats.target_pages, balloon_stats.hard_limit);
+
+	target = min(target,
+		     balloon_stats.current_pages +
+		     balloon_stats.balloon_low +
+		     balloon_stats.balloon_high);
+
+	return target;
+}
+
+static int increase_reservation(unsigned long nr_pages)
+{
+	unsigned long  pfn, i, flags;
+	struct page   *page;
+	long           rc;
+	struct xen_memory_reservation reservation = {
+		.address_bits = 0,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+
+	if (nr_pages > ARRAY_SIZE(frame_list))
+		nr_pages = ARRAY_SIZE(frame_list);
+
+	spin_lock_irqsave(&balloon_lock, flags);
+
+	page = balloon_first_page();
+	for (i = 0; i < nr_pages; i++) {
+		BUG_ON(page == NULL);
+		frame_list[i] = page_to_pfn(page);;
+		page = balloon_next_page(page);
+	}
+
+	reservation.extent_start = (unsigned long)frame_list;
+	reservation.nr_extents   = nr_pages;
+	rc = HYPERVISOR_memory_op(
+		XENMEM_populate_physmap, &reservation);
+	if (rc < nr_pages) {
+		if (rc > 0) {
+			int ret;
+
+			/* We hit the Xen hard limit: reprobe. */
+			reservation.nr_extents = rc;
+			ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+					&reservation);
+			BUG_ON(ret != rc);
+		}
+		if (rc >= 0)
+			balloon_stats.hard_limit = (balloon_stats.current_pages + rc -
+						    balloon_stats.driver_pages);
+		goto out;
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		page = balloon_retrieve();
+		BUG_ON(page == NULL);
+
+		pfn = page_to_pfn(page);
+		BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
+		       phys_to_machine_mapping_valid(pfn));
+
+		set_phys_to_machine(pfn, frame_list[i]);
+
+		/* Link back into the page tables if not highmem. */
+		if (pfn < max_low_pfn) {
+			int ret;
+			ret = HYPERVISOR_update_va_mapping(
+				(unsigned long)__va(pfn << PAGE_SHIFT),
+				mfn_pte(frame_list[i], PAGE_KERNEL),
+				0);
+			BUG_ON(ret);
+		}
+
+		/* Relinquish the page back to the allocator. */
+		ClearPageReserved(page);
+		init_page_count(page);
+		__free_page(page);
+	}
+
+	balloon_stats.current_pages += nr_pages;
+	totalram_pages = balloon_stats.current_pages;
+
+ out:
+	spin_unlock_irqrestore(&balloon_lock, flags);
+
+	return 0;
+}
+
+static int decrease_reservation(unsigned long nr_pages)
+{
+	unsigned long  pfn, i, flags;
+	struct page   *page;
+	int            need_sleep = 0;
+	int ret;
+	struct xen_memory_reservation reservation = {
+		.address_bits = 0,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+
+	if (nr_pages > ARRAY_SIZE(frame_list))
+		nr_pages = ARRAY_SIZE(frame_list);
+
+	for (i = 0; i < nr_pages; i++) {
+		if ((page = alloc_page(GFP_BALLOON)) == NULL) {
+			nr_pages = i;
+			need_sleep = 1;
+			break;
+		}
+
+		pfn = page_to_pfn(page);
+		frame_list[i] = pfn_to_mfn(pfn);
+
+		scrub_page(page);
+	}
+
+	/* Ensure that ballooned highmem pages don't have kmaps. */
+	kmap_flush_unused();
+	flush_tlb_all();
+
+	spin_lock_irqsave(&balloon_lock, flags);
+
+	/* No more mappings: invalidate P2M and add to balloon. */
+	for (i = 0; i < nr_pages; i++) {
+		pfn = mfn_to_pfn(frame_list[i]);
+		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+		balloon_append(pfn_to_page(pfn));
+	}
+
+	reservation.extent_start = (unsigned long)frame_list;
+	reservation.nr_extents   = nr_pages;
+	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+	BUG_ON(ret != nr_pages);
+
+	balloon_stats.current_pages -= nr_pages;
+	totalram_pages = balloon_stats.current_pages;
+
+	spin_unlock_irqrestore(&balloon_lock, flags);
+
+	return need_sleep;
+}
+
+/*
+ * We avoid multiple worker processes conflicting via the balloon mutex.
+ * We may of course race updates of the target counts (which are protected
+ * by the balloon lock), or with changes to the Xen hard limit, but we will
+ * recover from these in time.
+ */
+static void balloon_process(struct work_struct *work)
+{
+	int need_sleep = 0;
+	long credit;
+
+	mutex_lock(&balloon_mutex);
+
+	do {
+		credit = current_target() - balloon_stats.current_pages;
+		if (credit > 0)
+			need_sleep = (increase_reservation(credit) != 0);
+		if (credit < 0)
+			need_sleep = (decrease_reservation(-credit) != 0);
+
+#ifndef CONFIG_PREEMPT
+		if (need_resched())
+			schedule();
+#endif
+	} while ((credit != 0) && !need_sleep);
+
+	/* Schedule more work if there is some still to be done. */
+	if (current_target() != balloon_stats.current_pages)
+		mod_timer(&balloon_timer, jiffies + HZ);
+
+	mutex_unlock(&balloon_mutex);
+}
+
+/* Resets the Xen limit, sets new target, and kicks off processing. */
+void balloon_set_new_target(unsigned long target)
+{
+	/* No need for lock. Not read-modify-write updates. */
+	balloon_stats.hard_limit   = ~0UL;
+	balloon_stats.target_pages = target;
+	schedule_work(&balloon_worker);
+}
+
+static struct xenbus_watch target_watch =
+{
+	.node = "memory/target"
+};
+
+/* React to a change in the target key */
+static void watch_target(struct xenbus_watch *watch,
+			 const char **vec, unsigned int len)
+{
+	unsigned long long new_target;
+	int err;
+
+	err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
+	if (err != 1) {
+		/* This is ok (for domain0 at least) - so just return */
+		return;
+	}
+
+	/* The given memory/target value is in KiB, so it needs converting to
+	 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
+	 */
+	balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
+}
+
+static int balloon_init_watcher(struct notifier_block *notifier,
+				unsigned long event,
+				void *data)
+{
+	int err;
+
+	err = register_xenbus_watch(&target_watch);
+	if (err)
+		printk(KERN_ERR "Failed to set balloon watcher\n");
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block xenstore_notifier;
+
+static int __init balloon_init(void)
+{
+	unsigned long pfn;
+	struct page *page;
+
+	if (!is_running_on_xen())
+		return -ENODEV;
+
+	pr_info("xen_balloon: Initialising balloon driver.\n");
+
+	balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn);
+	totalram_pages   = balloon_stats.current_pages;
+	balloon_stats.target_pages  = balloon_stats.current_pages;
+	balloon_stats.balloon_low   = 0;
+	balloon_stats.balloon_high  = 0;
+	balloon_stats.driver_pages  = 0UL;
+	balloon_stats.hard_limit    = ~0UL;
+
+	init_timer(&balloon_timer);
+	balloon_timer.data = 0;
+	balloon_timer.function = balloon_alarm;
+
+	register_balloon(&balloon_sysdev);
+
+	/* Initialise the balloon with excess memory space. */
+	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+		page = pfn_to_page(pfn);
+		if (!PageReserved(page))
+			balloon_append(page);
+	}
+
+	target_watch.callback = watch_target;
+	xenstore_notifier.notifier_call = balloon_init_watcher;
+
+	register_xenstore_notifier(&xenstore_notifier);
+
+	return 0;
+}
+
+subsys_initcall(balloon_init);
+
+static void balloon_exit(void)
+{
+    /* XXX - release balloon here */
+    return;
+}
+
+module_exit(balloon_exit);
+
+static void balloon_update_driver_allowance(long delta)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&balloon_lock, flags);
+	balloon_stats.driver_pages += delta;
+	spin_unlock_irqrestore(&balloon_lock, flags);
+}
+
+static int dealloc_pte_fn(
+	pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
+{
+	unsigned long mfn = pte_mfn(*pte);
+	int ret;
+	struct xen_memory_reservation reservation = {
+		.nr_extents   = 1,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+	reservation.extent_start = (unsigned long)&mfn;
+	set_pte_at(&init_mm, addr, pte, __pte_ma(0ull));
+	set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
+	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+	BUG_ON(ret != 1);
+	return 0;
+}
+
+static struct page **alloc_empty_pages_and_pagevec(int nr_pages)
+{
+	unsigned long vaddr, flags;
+	struct page *page, **pagevec;
+	int i, ret;
+
+	pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
+	if (pagevec == NULL)
+		return NULL;
+
+	for (i = 0; i < nr_pages; i++) {
+		page = pagevec[i] = alloc_page(GFP_KERNEL);
+		if (page == NULL)
+			goto err;
+
+		vaddr = (unsigned long)page_address(page);
+
+		scrub_page(page);
+
+		spin_lock_irqsave(&balloon_lock, flags);
+
+		if (xen_feature(XENFEAT_auto_translated_physmap)) {
+			unsigned long gmfn = page_to_pfn(page);
+			struct xen_memory_reservation reservation = {
+				.nr_extents   = 1,
+				.extent_order = 0,
+				.domid        = DOMID_SELF
+			};
+			reservation.extent_start = (unsigned long)&gmfn;
+			ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+						   &reservation);
+			if (ret == 1)
+				ret = 0; /* success */
+		} else {
+			ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
+						  dealloc_pte_fn, NULL);
+		}
+
+		if (ret != 0) {
+			spin_unlock_irqrestore(&balloon_lock, flags);
+			__free_page(page);
+			goto err;
+		}
+
+		totalram_pages = --balloon_stats.current_pages;
+
+		spin_unlock_irqrestore(&balloon_lock, flags);
+	}
+
+ out:
+	schedule_work(&balloon_worker);
+	flush_tlb_all();
+	return pagevec;
+
+ err:
+	spin_lock_irqsave(&balloon_lock, flags);
+	while (--i >= 0)
+		balloon_append(pagevec[i]);
+	spin_unlock_irqrestore(&balloon_lock, flags);
+	kfree(pagevec);
+	pagevec = NULL;
+	goto out;
+}
+
+static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
+{
+	unsigned long flags;
+	int i;
+
+	if (pagevec == NULL)
+		return;
+
+	spin_lock_irqsave(&balloon_lock, flags);
+	for (i = 0; i < nr_pages; i++) {
+		BUG_ON(page_count(pagevec[i]) != 1);
+		balloon_append(pagevec[i]);
+	}
+	spin_unlock_irqrestore(&balloon_lock, flags);
+
+	kfree(pagevec);
+
+	schedule_work(&balloon_worker);
+}
+
+static void balloon_release_driver_page(struct page *page)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&balloon_lock, flags);
+	balloon_append(page);
+	balloon_stats.driver_pages--;
+	spin_unlock_irqrestore(&balloon_lock, flags);
+
+	schedule_work(&balloon_worker);
+}
+
+
+#define BALLOON_SHOW(name, format, args...)			\
+	static ssize_t show_##name(struct sys_device *dev,	\
+				   char *buf)			\
+	{							\
+		return sprintf(buf, format, ##args);		\
+	}							\
+	static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
+
+BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
+BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
+BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
+BALLOON_SHOW(hard_limit_kb,
+	     (balloon_stats.hard_limit!=~0UL) ? "%lu\n" : "???\n",
+	     (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0);
+BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
+
+static ssize_t show_target_kb(struct sys_device *dev, char *buf)
+{
+	return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
+}
+
+static ssize_t store_target_kb(struct sys_device *dev,
+			       const char *buf,
+			       size_t count)
+{
+	char memstring[64], *endchar;
+	unsigned long long target_bytes;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (count <= 1)
+		return -EBADMSG; /* runt */
+	if (count > sizeof(memstring))
+		return -EFBIG;   /* too long */
+	strcpy(memstring, buf);
+
+	target_bytes = memparse(memstring, &endchar);
+	balloon_set_new_target(target_bytes >> PAGE_SHIFT);
+
+	return count;
+}
+
+static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
+		   show_target_kb, store_target_kb);
+
+static struct sysdev_attribute *balloon_attrs[] = {
+	&attr_target_kb,
+};
+
+static struct attribute *balloon_info_attrs[] = {
+	&attr_current_kb.attr,
+	&attr_low_kb.attr,
+	&attr_high_kb.attr,
+	&attr_hard_limit_kb.attr,
+	&attr_driver_kb.attr,
+	NULL
+};
+
+static struct attribute_group balloon_info_group = {
+	.name = "info",
+	.attrs = balloon_info_attrs,
+};
+
+static struct sysdev_class balloon_sysdev_class = {
+	.name = BALLOON_CLASS_NAME,
+};
+
+static int register_balloon(struct sys_device *sysdev)
+{
+	int i, error;
+
+	error = sysdev_class_register(&balloon_sysdev_class);
+	if (error)
+		return error;
+
+	sysdev->id = 0;
+	sysdev->cls = &balloon_sysdev_class;
+
+	error = sysdev_register(sysdev);
+	if (error) {
+		sysdev_class_unregister(&balloon_sysdev_class);
+		return error;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
+		error = sysdev_create_file(sysdev, balloon_attrs[i]);
+		if (error)
+			goto fail;
+	}
+
+	error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
+	if (error)
+		goto fail;
+
+	return 0;
+
+ fail:
+	while (--i >= 0)
+		sysdev_remove_file(sysdev, balloon_attrs[i]);
+	sysdev_unregister(sysdev);
+	sysdev_class_unregister(&balloon_sysdev_class);
+	return error;
+}
+
+static void unregister_balloon(struct sys_device *sysdev)
+{
+	int i;
+
+	sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
+	for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
+		sysdev_remove_file(sysdev, balloon_attrs[i]);
+	sysdev_unregister(sysdev);
+	sysdev_class_unregister(&balloon_sysdev_class);
+}
+
+static void balloon_sysfs_exit(void)
+{
+	unregister_balloon(&balloon_sysdev);
+}
+
+MODULE_LICENSE("GPL");
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
new file mode 100644
index 000000000000..fe43b0f3c86a
--- /dev/null
+++ b/include/xen/balloon.h
@@ -0,0 +1,61 @@
+/******************************************************************************
+ * balloon.h
+ *
+ * Xen balloon driver - enables returning/claiming memory to/from Xen.
+ *
+ * Copyright (c) 2003, B Dragovic
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_BALLOON_H__
+#define __XEN_BALLOON_H__
+
+#include <linux/spinlock.h>
+
+#if 0
+/*
+ * Inform the balloon driver that it should allow some slop for device-driver
+ * memory activities.
+ */
+void balloon_update_driver_allowance(long delta);
+
+/* Allocate/free a set of empty pages in low memory (i.e., no RAM mapped). */
+struct page **alloc_empty_pages_and_pagevec(int nr_pages);
+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages);
+
+void balloon_release_driver_page(struct page *page);
+
+/*
+ * Prevent the balloon driver from changing the memory reservation during
+ * a driver critical region.
+ */
+extern spinlock_t balloon_lock;
+#define balloon_lock(__flags)   spin_lock_irqsave(&balloon_lock, __flags)
+#define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags)
+#endif
+
+#endif /* __XEN_BALLOON_H__ */
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index af36ead16817..da768469aa92 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -29,7 +29,7 @@ struct xen_memory_reservation {
      *   OUT: GMFN bases of extents that were allocated
      *   (NB. This command also updates the mach_to_phys translation table)
      */
-    GUEST_HANDLE(ulong) extent_start;
+    ulong extent_start;
 
     /* Number of extents, and size/alignment of each (2^extent_order pages). */
     unsigned long  nr_extents;
@@ -50,7 +50,6 @@ struct xen_memory_reservation {
     domid_t        domid;
 
 };
-DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
 
 /*
  * Returns the maximum machine frame number of mapped RAM in this system.
@@ -86,7 +85,7 @@ struct xen_machphys_mfn_list {
      * any large discontiguities in the machine address space, 2MB gaps in
      * the machphys table will be represented by an MFN base of zero.
      */
-    GUEST_HANDLE(ulong) extent_start;
+    ulong extent_start;
 
     /*
      * Number of extents written to the above array. This will be smaller
@@ -94,7 +93,6 @@ struct xen_machphys_mfn_list {
      */
     unsigned int nr_extents;
 };
-DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
 
 /*
  * Sets the GPFN at which a particular page appears in the specified guest's
@@ -117,7 +115,6 @@ struct xen_add_to_physmap {
     /* GPFN where the source mapping page should appear. */
     unsigned long gpfn;
 };
-DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
 
 /*
  * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error
@@ -132,14 +129,13 @@ struct xen_translate_gpfn_list {
     unsigned long nr_gpfns;
 
     /* List of GPFNs to translate. */
-    GUEST_HANDLE(ulong) gpfn_list;
+    ulong gpfn_list;
 
     /*
      * Output list to contain MFN translations. May be the same as the input
      * list (in which case each input GPFN is overwritten with the output MFN).
      */
-    GUEST_HANDLE(ulong) mfn_list;
+    ulong mfn_list;
 };
-DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
 
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
-- 
cgit v1.2.3