25 files changed, 1392 insertions, 426 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 1c484084ed4f..6d1ac180f6ee 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -18,8 +18,8 @@ config ATMEL_PWM
 	depends on AVR32 || ARCH_AT91SAM9263 || ARCH_AT91SAM9RL || ARCH_AT91CAP9
 	help
 	  This option enables device driver support for the PWM channels
-	  on certain Atmel prcoessors.  Pulse Width Modulation is used for
-	  purposes including software controlled power-efficent backlights
+	  on certain Atmel processors.  Pulse Width Modulation is used for
+	  purposes including software controlled power-efficient backlights
 	  on LCD displays, motor control, and waveform generation.
 
 config ATMEL_TCLIB
@@ -142,7 +142,7 @@ config ATMEL_SSC
 	tristate "Device driver for Atmel SSC peripheral"
 	depends on AVR32 || ARCH_AT91
 	---help---
-	  This option enables device driver support for Atmel Syncronized
+	  This option enables device driver support for Atmel Synchronized
 	  Serial Communication peripheral (SSC).
 
 	  The SSC peripheral supports a wide variety of serial frame based
@@ -165,7 +165,7 @@ config SGI_XP
 	depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP
 	select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
 	select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
-	select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP
+	select SGI_GRU if X86_64 && SMP
 	---help---
 	  An SGI machine can be divided into multiple Single System
 	  Images which act independently of each other and have
@@ -189,7 +189,7 @@ config HP_ILO
 
 config SGI_GRU
 	tristate "SGI GRU driver"
-	depends on (X86_UV || IA64_SGI_UV || IA64_GENERIC) && SMP
+	depends on X86_UV && SMP
 	default n
 	select MMU_NOTIFIER
 	---help---
@@ -223,6 +223,16 @@ config DELL_LAPTOP
 	This driver adds support for rfkill and backlight control to Dell
 	laptops.
 
+config ISL29003
+	tristate "Intersil ISL29003 ambient light sensor"
+	depends on I2C && SYSFS
+	help
+	  If you say yes here you get support for the Intersil ISL29003
+	  ambient light sensor.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called isl29003.
+
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
 
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index bc1199830554..7871f05dcb9b 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -18,5 +18,6 @@ obj-$(CONFIG_KGDB_TESTS)	+= kgdbts.o
 obj-$(CONFIG_SGI_XP)		+= sgi-xp/
 obj-$(CONFIG_SGI_GRU)		+= sgi-gru/
 obj-$(CONFIG_HP_ILO)		+= hpilo.o
+obj-$(CONFIG_ISL29003)		+= isl29003.o
 obj-$(CONFIG_C2PORT)		+= c2port/
 obj-y				+= eeprom/
diff --git a/drivers/misc/eeprom/Kconfig b/drivers/misc/eeprom/Kconfig
index c76df8cda5ef..89fec052f3b4 100644
--- a/drivers/misc/eeprom/Kconfig
+++ b/drivers/misc/eeprom/Kconfig
@@ -2,7 +2,7 @@ menu "EEPROM support"
 
 config EEPROM_AT24
 	tristate "I2C EEPROMs from most vendors"
-	depends on I2C && SYSFS && EXPERIMENTAL
+	depends on I2C && SYSFS
 	help
 	  Enable this driver to get read/write support to most I2C EEPROMs,
 	  after you configure the driver to know about each EEPROM on
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index d4775528abc6..d184dfab9631 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -53,6 +53,7 @@
 
 struct at24_data {
 	struct at24_platform_data chip;
+	struct memory_accessor macc;
 	bool use_smbus;
 
 	/*
@@ -225,14 +226,11 @@ static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf,
 		return status;
 }
 
-static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr,
+static ssize_t at24_read(struct at24_data *at24,
 		char *buf, loff_t off, size_t count)
 {
-	struct at24_data *at24;
 	ssize_t retval = 0;
 
-	at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
-
 	if (unlikely(!count))
 		return count;
 
@@ -262,12 +260,14 @@ static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr,
 	return retval;
 }
 
+static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr,
+		char *buf, loff_t off, size_t count)
+{
+	struct at24_data *at24;
 
-/*
- * REVISIT: export at24_bin{read,write}() to let other kernel code use
- * eeprom data. For example, it might hold a board's Ethernet address, or
- * board-specific calibration data generated on the manufacturing floor.
- */
+	at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
+	return at24_read(at24, buf, off, count);
+}
 
 
 /*
@@ -347,14 +347,11 @@ static ssize_t at24_eeprom_write(struct at24_data *at24, char *buf,
 	return -ETIMEDOUT;
 }
 
-static ssize_t at24_bin_write(struct kobject *kobj, struct bin_attribute *attr,
+static ssize_t at24_write(struct at24_data *at24,
 		char *buf, loff_t off, size_t count)
 {
-	struct at24_data *at24;
 	ssize_t retval = 0;
 
-	at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
-
 	if (unlikely(!count))
 		return count;
 
@@ -384,6 +381,39 @@ static ssize_t at24_bin_write(struct kobject *kobj, struct bin_attribute *attr,
 	return retval;
 }
 
+static ssize_t at24_bin_write(struct kobject *kobj, struct bin_attribute *attr,
+		char *buf, loff_t off, size_t count)
+{
+	struct at24_data *at24;
+
+	at24 = dev_get_drvdata(container_of(kobj, struct device, kobj));
+	return at24_write(at24, buf, off, count);
+}
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * This lets other kernel code access the eeprom data. For example, it
+ * might hold a board's Ethernet address, or board-specific calibration
+ * data generated on the manufacturing floor.
+ */
+
+static ssize_t at24_macc_read(struct memory_accessor *macc, char *buf,
+			 off_t offset, size_t count)
+{
+	struct at24_data *at24 = container_of(macc, struct at24_data, macc);
+
+	return at24_read(at24, buf, offset, count);
+}
+
+static ssize_t at24_macc_write(struct memory_accessor *macc, char *buf,
+			  off_t offset, size_t count)
+{
+	struct at24_data *at24 = container_of(macc, struct at24_data, macc);
+
+	return at24_write(at24, buf, offset, count);
+}
+
 /*-------------------------------------------------------------------------*/
 
 static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
@@ -413,6 +443,9 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		 * is recommended anyhow.
 		 */
 		chip.page_size = 1;
+
+		chip.setup = NULL;
+		chip.context = NULL;
 	}
 
 	if (!is_power_of_2(chip.byte_len))
@@ -463,6 +496,8 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	at24->bin.read = at24_bin_read;
 	at24->bin.size = chip.byte_len;
 
+	at24->macc.read = at24_macc_read;
+
 	writable = !(chip.flags & AT24_FLAG_READONLY);
 	if (writable) {
 		if (!use_smbus || i2c_check_functionality(client->adapter,
@@ -470,6 +505,8 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 
 			unsigned write_max = chip.page_size;
 
+			at24->macc.write = at24_macc_write;
+
 			at24->bin.write = at24_bin_write;
 			at24->bin.attr.mode |= S_IWUSR;
 
@@ -520,6 +557,10 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		at24->write_max,
 		use_smbus ? ", use_smbus" : "");
 
+	/* export data to kernel code */
+	if (chip.setup)
+		chip.setup(&at24->macc, chip.context);
+
 	return 0;
 
 err_clients:
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index 290dbe99647a..6bc0dac5c1e8 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -30,6 +30,7 @@
 
 struct at25_data {
 	struct spi_device	*spi;
+	struct memory_accessor	mem;
 	struct mutex		lock;
 	struct spi_eeprom	chip;
 	struct bin_attribute	bin;
@@ -75,6 +76,13 @@ at25_ee_read(
 	struct spi_transfer	t[2];
 	struct spi_message	m;
 
+	if (unlikely(offset >= at25->bin.size))
+		return 0;
+	if ((offset + count) > at25->bin.size)
+		count = at25->bin.size - offset;
+	if (unlikely(!count))
+		return count;
+
 	cp = command;
 	*cp++ = AT25_READ;
 
@@ -127,13 +135,6 @@ at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr,
 	dev = container_of(kobj, struct device, kobj);
 	at25 = dev_get_drvdata(dev);
 
-	if (unlikely(off >= at25->bin.size))
-		return 0;
-	if ((off + count) > at25->bin.size)
-		count = at25->bin.size - off;
-	if (unlikely(!count))
-		return count;
-
 	return at25_ee_read(at25, buf, off, count);
 }
 
@@ -146,6 +147,13 @@ at25_ee_write(struct at25_data *at25, char *buf, loff_t off, size_t count)
 	unsigned		buf_size;
 	u8			*bounce;
 
+	if (unlikely(off >= at25->bin.size))
+		return -EFBIG;
+	if ((off + count) > at25->bin.size)
+		count = at25->bin.size - off;
+	if (unlikely(!count))
+		return count;
+
 	/* Temp buffer starts with command and address */
 	buf_size = at25->chip.page_size;
 	if (buf_size > io_limit)
@@ -253,18 +261,31 @@ at25_bin_write(struct kobject *kobj, struct bin_attribute *bin_attr,
 	dev = container_of(kobj, struct device, kobj);
 	at25 = dev_get_drvdata(dev);
 
-	if (unlikely(off >= at25->bin.size))
-		return -EFBIG;
-	if ((off + count) > at25->bin.size)
-		count = at25->bin.size - off;
-	if (unlikely(!count))
-		return count;
-
 	return at25_ee_write(at25, buf, off, count);
 }
 
 /*-------------------------------------------------------------------------*/
 
+/* Let in-kernel code access the eeprom data. */
+
+static ssize_t at25_mem_read(struct memory_accessor *mem, char *buf,
+			 off_t offset, size_t count)
+{
+	struct at25_data *at25 = container_of(mem, struct at25_data, mem);
+
+	return at25_ee_read(at25, buf, offset, count);
+}
+
+static ssize_t at25_mem_write(struct memory_accessor *mem, char *buf,
+			  off_t offset, size_t count)
+{
+	struct at25_data *at25 = container_of(mem, struct at25_data, mem);
+
+	return at25_ee_write(at25, buf, offset, count);
+}
+
+/*-------------------------------------------------------------------------*/
+
 static int at25_probe(struct spi_device *spi)
 {
 	struct at25_data	*at25 = NULL;
@@ -317,6 +338,10 @@ static int at25_probe(struct spi_device *spi)
 	at25->addrlen = addrlen;
 
 	/* Export the EEPROM bytes through sysfs, since that's convenient.
+	 * And maybe to other kernel code; it might hold a board's Ethernet
+	 * address, or board-specific calibration data generated on the
+	 * manufacturing floor.
+	 *
 	 * Default to root-only access to the data; EEPROMs often hold data
 	 * that's sensitive for read and/or write, like ethernet addresses,
 	 * security codes, board-specific manufacturing calibrations, etc.
@@ -324,17 +349,22 @@ static int at25_probe(struct spi_device *spi)
 	at25->bin.attr.name = "eeprom";
 	at25->bin.attr.mode = S_IRUSR;
 	at25->bin.read = at25_bin_read;
+	at25->mem.read = at25_mem_read;
 
 	at25->bin.size = at25->chip.byte_len;
 	if (!(chip->flags & EE_READONLY)) {
 		at25->bin.write = at25_bin_write;
 		at25->bin.attr.mode |= S_IWUSR;
+		at25->mem.write = at25_mem_write;
 	}
 
 	err = sysfs_create_bin_file(&spi->dev.kobj, &at25->bin);
 	if (err)
 		goto fail;
 
+	if (chip->setup)
+		chip->setup(&at25->mem, chip->context);
+
 	dev_info(&spi->dev, "%Zd %s %s eeprom%s, pagesize %u\n",
 		(at25->bin.size < 1024)
 			? at25->bin.size
diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c
index cf991850f01b..880ccf39e23b 100644
--- a/drivers/misc/hpilo.c
+++ b/drivers/misc/hpilo.c
@@ -209,7 +209,7 @@ static void ilo_ccb_close(struct pci_dev *pdev, struct ccb_data *data)
 	/* give iLO some time to process stop request */
 	for (retries = MAX_WAIT; retries > 0; retries--) {
 		doorbell_set(driver_ccb);
-		udelay(1);
+		udelay(WAIT_TIME);
 		if (!(ioread32(&device_ccb->send_ctrl) & (1 << CTRL_BITPOS_A))
 		    &&
 		    !(ioread32(&device_ccb->recv_ctrl) & (1 << CTRL_BITPOS_A)))
@@ -312,7 +312,7 @@ static int ilo_ccb_open(struct ilo_hwinfo *hw, struct ccb_data *data, int slot)
 	for (i = MAX_WAIT; i > 0; i--) {
 		if (ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, NULL, NULL))
 			break;
-		udelay(1);
+		udelay(WAIT_TIME);
 	}
 
 	if (i) {
@@ -759,7 +759,7 @@ static void __exit ilo_exit(void)
 	class_destroy(ilo_class);
 }
 
-MODULE_VERSION("1.0");
+MODULE_VERSION("1.1");
 MODULE_ALIAS(ILO_NAME);
 MODULE_DESCRIPTION(ILO_NAME);
 MODULE_AUTHOR("David Altobelli <david.altobelli@hp.com>");
diff --git a/drivers/misc/hpilo.h b/drivers/misc/hpilo.h
index b64a20ef07e3..03a14c82aad9 100644
--- a/drivers/misc/hpilo.h
+++ b/drivers/misc/hpilo.h
@@ -19,8 +19,12 @@
 #define MAX_ILO_DEV	1
 /* max number of files */
 #define MAX_OPEN	(MAX_CCB * MAX_ILO_DEV)
+/* total wait time in usec */
+#define MAX_WAIT_TIME	10000
+/* per spin wait time in usec */
+#define WAIT_TIME	10
 /* spin counter for open/close delay */
-#define MAX_WAIT	10000
+#define MAX_WAIT	(MAX_WAIT_TIME / WAIT_TIME)
 
 /*
  * Per device, used to track global memory allocations.
diff --git a/drivers/misc/isl29003.c b/drivers/misc/isl29003.c
new file mode 100644
index 000000000000..2e2a5923d4c2
--- /dev/null
+++ b/drivers/misc/isl29003.c
@@ -0,0 +1,470 @@
+/*
+ *  isl29003.c - Linux kernel module for
+ * 	Intersil ISL29003 ambient light sensor
+ *
+ *  See file:Documentation/misc-devices/isl29003
+ *
+ *  Copyright (c) 2009 Daniel Mack <daniel@caiaq.de>
+ *
+ *  Based on code written by
+ *  	Rodolfo Giometti <giometti@linux.it>
+ *  	Eurotech S.p.A. <info@eurotech.it>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+
+#define ISL29003_DRV_NAME	"isl29003"
+#define DRIVER_VERSION		"1.0"
+
+#define ISL29003_REG_COMMAND		0x00
+#define ISL29003_ADC_ENABLED		(1 << 7)
+#define ISL29003_ADC_PD			(1 << 6)
+#define ISL29003_TIMING_INT		(1 << 5)
+#define ISL29003_MODE_SHIFT		(2)
+#define ISL29003_MODE_MASK		(0x3 << ISL29003_MODE_SHIFT)
+#define ISL29003_RES_SHIFT		(0)
+#define ISL29003_RES_MASK		(0x3 << ISL29003_RES_SHIFT)
+
+#define ISL29003_REG_CONTROL		0x01
+#define ISL29003_INT_FLG		(1 << 5)
+#define ISL29003_RANGE_SHIFT		(2)
+#define ISL29003_RANGE_MASK		(0x3 << ISL29003_RANGE_SHIFT)
+#define ISL29003_INT_PERSISTS_SHIFT	(0)
+#define ISL29003_INT_PERSISTS_MASK	(0xf << ISL29003_INT_PERSISTS_SHIFT)
+
+#define ISL29003_REG_IRQ_THRESH_HI	0x02
+#define ISL29003_REG_IRQ_THRESH_LO	0x03
+#define ISL29003_REG_LSB_SENSOR		0x04
+#define ISL29003_REG_MSB_SENSOR		0x05
+#define ISL29003_REG_LSB_TIMER		0x06
+#define ISL29003_REG_MSB_TIMER		0x07
+
+#define ISL29003_NUM_CACHABLE_REGS	4
+
+struct isl29003_data {
+	struct i2c_client *client;
+	struct mutex lock;
+	u8 reg_cache[ISL29003_NUM_CACHABLE_REGS];
+};
+
+static int gain_range[] = {
+	1000, 4000, 16000, 64000
+};
+
+/*
+ * register access helpers
+ */
+
+static int __isl29003_read_reg(struct i2c_client *client,
+			       u32 reg, u8 mask, u8 shift)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	return (data->reg_cache[reg] & mask) >> shift;
+}
+
+static int __isl29003_write_reg(struct i2c_client *client,
+				u32 reg, u8 mask, u8 shift, u8 val)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	int ret = 0;
+	u8 tmp;
+
+	if (reg >= ISL29003_NUM_CACHABLE_REGS)
+		return -EINVAL;
+
+	mutex_lock(&data->lock);
+
+	tmp = data->reg_cache[reg];
+	tmp &= ~mask;
+	tmp |= val << shift;
+
+	ret = i2c_smbus_write_byte_data(client, reg, tmp);
+	if (!ret)
+		data->reg_cache[reg] = tmp;
+
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+/*
+ * internally used functions
+ */
+
+/* range */
+static int isl29003_get_range(struct i2c_client *client)
+{
+	return __isl29003_read_reg(client, ISL29003_REG_CONTROL,
+		ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT);
+}
+
+static int isl29003_set_range(struct i2c_client *client, int range)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_CONTROL,
+		ISL29003_RANGE_MASK, ISL29003_RANGE_SHIFT, range);
+}
+
+/* resolution */
+static int isl29003_get_resolution(struct i2c_client *client)
+{
+	return __isl29003_read_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_RES_MASK, ISL29003_RES_SHIFT);
+}
+
+static int isl29003_set_resolution(struct i2c_client *client, int res)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_RES_MASK, ISL29003_RES_SHIFT, res);
+}
+
+/* mode */
+static int isl29003_get_mode(struct i2c_client *client)
+{
+	return __isl29003_read_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_RES_MASK, ISL29003_RES_SHIFT);
+}
+
+static int isl29003_set_mode(struct i2c_client *client, int mode)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_COMMAND,
+		ISL29003_RES_MASK, ISL29003_RES_SHIFT, mode);
+}
+
+/* power_state */
+static int isl29003_set_power_state(struct i2c_client *client, int state)
+{
+	return __isl29003_write_reg(client, ISL29003_REG_COMMAND,
+				ISL29003_ADC_ENABLED | ISL29003_ADC_PD, 0,
+				state ? ISL29003_ADC_ENABLED : ISL29003_ADC_PD);
+}
+
+static int isl29003_get_power_state(struct i2c_client *client)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	u8 cmdreg = data->reg_cache[ISL29003_REG_COMMAND];
+	return ~cmdreg & ISL29003_ADC_PD;
+}
+
+static int isl29003_get_adc_value(struct i2c_client *client)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	int lsb, msb, range, bitdepth;
+
+	mutex_lock(&data->lock);
+	lsb = i2c_smbus_read_byte_data(client, ISL29003_REG_LSB_SENSOR);
+
+	if (lsb < 0) {
+		mutex_unlock(&data->lock);
+		return lsb;
+	}
+
+	msb = i2c_smbus_read_byte_data(client, ISL29003_REG_MSB_SENSOR);
+	mutex_unlock(&data->lock);
+
+	if (msb < 0)
+		return msb;
+
+	range = isl29003_get_range(client);
+	bitdepth = (4 - isl29003_get_resolution(client)) * 4;
+	return (((msb << 8) | lsb) * gain_range[range]) >> bitdepth;
+}
+
+/*
+ * sysfs layer
+ */
+
+/* range */
+static ssize_t isl29003_show_range(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	return sprintf(buf, "%i\n", isl29003_get_range(client));
+}
+
+static ssize_t isl29003_store_range(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	if ((strict_strtoul(buf, 10, &val) < 0) || (val > 3))
+		return -EINVAL;
+
+	ret = isl29003_set_range(client, val);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(range, S_IWUSR | S_IRUGO,
+		   isl29003_show_range, isl29003_store_range);
+
+
+/* resolution */
+static ssize_t isl29003_show_resolution(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	return sprintf(buf, "%d\n", isl29003_get_resolution(client));
+}
+
+static ssize_t isl29003_store_resolution(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	if ((strict_strtoul(buf, 10, &val) < 0) || (val > 3))
+		return -EINVAL;
+
+	ret = isl29003_set_resolution(client, val);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(resolution, S_IWUSR | S_IRUGO,
+		   isl29003_show_resolution, isl29003_store_resolution);
+
+/* mode */
+static ssize_t isl29003_show_mode(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	return sprintf(buf, "%d\n", isl29003_get_mode(client));
+}
+
+static ssize_t isl29003_store_mode(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	if ((strict_strtoul(buf, 10, &val) < 0) || (val > 2))
+		return -EINVAL;
+
+	ret = isl29003_set_mode(client, val);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO,
+		   isl29003_show_mode, isl29003_store_mode);
+
+
+/* power state */
+static ssize_t isl29003_show_power_state(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	return sprintf(buf, "%d\n", isl29003_get_power_state(client));
+}
+
+static ssize_t isl29003_store_power_state(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long val;
+	int ret;
+
+	if ((strict_strtoul(buf, 10, &val) < 0) || (val > 1))
+		return -EINVAL;
+
+	ret = isl29003_set_power_state(client, val);
+	return ret ? ret : count;
+}
+
+static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO,
+		   isl29003_show_power_state, isl29003_store_power_state);
+
+
+/* lux */
+static ssize_t isl29003_show_lux(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+
+	/* No LUX data if not operational */
+	if (!isl29003_get_power_state(client))
+		return -EBUSY;
+
+	return sprintf(buf, "%d\n", isl29003_get_adc_value(client));
+}
+
+static DEVICE_ATTR(lux, S_IRUGO, isl29003_show_lux, NULL);
+
+static struct attribute *isl29003_attributes[] = {
+	&dev_attr_range.attr,
+	&dev_attr_resolution.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_power_state.attr,
+	&dev_attr_lux.attr,
+	NULL
+};
+
+static const struct attribute_group isl29003_attr_group = {
+	.attrs = isl29003_attributes,
+};
+
+static int isl29003_init_client(struct i2c_client *client)
+{
+	struct isl29003_data *data = i2c_get_clientdata(client);
+	int i;
+
+	/* read all the registers once to fill the cache.
+	 * if one of the reads fails, we consider the init failed */
+	for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++) {
+		int v = i2c_smbus_read_byte_data(client, i);
+		if (v < 0)
+			return -ENODEV;
+
+		data->reg_cache[i] = v;
+	}
+
+	/* set defaults */
+	isl29003_set_range(client, 0);
+	isl29003_set_resolution(client, 0);
+	isl29003_set_mode(client, 0);
+	isl29003_set_power_state(client, 0);
+
+	return 0;
+}
+
+/*
+ * I2C layer
+ */
+
+static int __devinit isl29003_probe(struct i2c_client *client,
+				    const struct i2c_device_id *id)
+{
+	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
+	struct isl29003_data *data;
+	int err = 0;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
+		return -EIO;
+
+	data = kzalloc(sizeof(struct isl29003_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->client = client;
+	i2c_set_clientdata(client, data);
+	mutex_init(&data->lock);
+
+	/* initialize the ISL29003 chip */
+	err = isl29003_init_client(client);
+	if (err)
+		goto exit_kfree;
+
+	/* register sysfs hooks */
+	err = sysfs_create_group(&client->dev.kobj, &isl29003_attr_group);
+	if (err)
+		goto exit_kfree;
+
+	dev_info(&client->dev, "driver version %s enabled\n", DRIVER_VERSION);
+	return 0;
+
+exit_kfree:
+	kfree(data);
+	return err;
+}
+
+static int __devexit isl29003_remove(struct i2c_client *client)
+{
+	sysfs_remove_group(&client->dev.kobj, &isl29003_attr_group);
+	isl29003_set_power_state(client, 0);
+	kfree(i2c_get_clientdata(client));
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int isl29003_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+	return isl29003_set_power_state(client, 0);
+}
+
+static int isl29003_resume(struct i2c_client *client)
+{
+	int i;
+	struct isl29003_data *data = i2c_get_clientdata(client);
+
+	/* restore registers from cache */
+	for (i = 0; i < ARRAY_SIZE(data->reg_cache); i++)
+		if (!i2c_smbus_write_byte_data(client, i, data->reg_cache[i]))
+			return -EIO;
+
+	return 0;
+}
+
+#else
+#define isl29003_suspend	NULL
+#define isl29003_resume		NULL
+#endif /* CONFIG_PM */
+
+static const struct i2c_device_id isl29003_id[] = {
+	{ "isl29003", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, isl29003_id);
+
+static struct i2c_driver isl29003_driver = {
+	.driver = {
+		.name	= ISL29003_DRV_NAME,
+		.owner	= THIS_MODULE,
+	},
+	.suspend = isl29003_suspend,
+	.resume	= isl29003_resume,
+	.probe	= isl29003_probe,
+	.remove	= __devexit_p(isl29003_remove),
+	.id_table = isl29003_id,
+};
+
+static int __init isl29003_init(void)
+{
+	return i2c_add_driver(&isl29003_driver);
+}
+
+static void __exit isl29003_exit(void)
+{
+	i2c_del_driver(&isl29003_driver);
+}
+
+MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
+MODULE_DESCRIPTION("ISL29003 ambient light sensor driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRIVER_VERSION);
+
+module_init(isl29003_init);
+module_exit(isl29003_exit);
+
diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile
index 9e9170b3599a..bcd8136d2f98 100644
--- a/drivers/misc/sgi-gru/Makefile
+++ b/drivers/misc/sgi-gru/Makefile
@@ -3,5 +3,5 @@ ifdef CONFIG_SGI_GRU_DEBUG
 endif
 
 obj-$(CONFIG_SGI_GRU) := gru.o
-gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o
+gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o
 
diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h
index 48762e7b98be..3fde33c1e8f3 100644
--- a/drivers/misc/sgi-gru/gru_instructions.h
+++ b/drivers/misc/sgi-gru/gru_instructions.h
@@ -19,8 +19,11 @@
 #ifndef __GRU_INSTRUCTIONS_H__
 #define __GRU_INSTRUCTIONS_H__
 
-#define gru_flush_cache_hook(p)
-#define gru_emulator_wait_hook(p, w)
+extern int gru_check_status_proc(void *cb);
+extern int gru_wait_proc(void *cb);
+extern void gru_wait_abort_proc(void *cb);
+
+
 
 /*
  * Architecture dependent functions
@@ -29,16 +32,16 @@
 #if defined(CONFIG_IA64)
 #include <linux/compiler.h>
 #include <asm/intrinsics.h>
-#define __flush_cache(p)		ia64_fc(p)
+#define __flush_cache(p)		ia64_fc((unsigned long)p)
 /* Use volatile on IA64 to ensure ordering via st4.rel */
-#define gru_ordered_store_int(p,v)					\
+#define gru_ordered_store_int(p, v)					\
 		do {							\
 			barrier();					\
 			*((volatile int *)(p)) = v; /* force st.rel */	\
 		} while (0)
 #elif defined(CONFIG_X86_64)
 #define __flush_cache(p)		clflush(p)
-#define gru_ordered_store_int(p,v)					\
+#define gru_ordered_store_int(p, v)					\
 		do {							\
 			barrier();					\
 			*(int *)p = v;					\
@@ -558,20 +561,19 @@ extern int gru_get_cb_exception_detail(void *cb,
 
 #define GRU_EXC_STR_SIZE		256
 
-extern int gru_check_status_proc(void *cb);
-extern int gru_wait_proc(void *cb);
-extern void gru_wait_abort_proc(void *cb);
 
 /*
  * Control block definition for checking status
  */
 struct gru_control_block_status {
 	unsigned int	icmd		:1;
-	unsigned int	unused1		:31;
+	unsigned int	ima		:3;
+	unsigned int	reserved0	:4;
+	unsigned int	unused1		:24;
 	unsigned int	unused2		:24;
 	unsigned int	istatus		:2;
 	unsigned int	isubstatus	:4;
-	unsigned int	inused3		:2;
+	unsigned int	unused3		:2;
 };
 
 /* Get CB status */
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index 3ee698ad8599..ab118558552e 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -32,6 +32,7 @@
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>
+#include <linux/security.h>
 #include <asm/pgtable.h>
 #include "gru.h"
 #include "grutables.h"
@@ -266,6 +267,44 @@ err:
 	return 1;
 }
 
+static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
+		    int write, int atomic, unsigned long *gpa, int *pageshift)
+{
+	struct mm_struct *mm = gts->ts_mm;
+	struct vm_area_struct *vma;
+	unsigned long paddr;
+	int ret, ps;
+
+	vma = find_vma(mm, vaddr);
+	if (!vma)
+		goto inval;
+
+	/*
+	 * Atomic lookup is faster & usually works even if called in non-atomic
+	 * context.
+	 */
+	rmb();	/* Must/check ms_range_active before loading PTEs */
+	ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &ps);
+	if (ret) {
+		if (atomic)
+			goto upm;
+		if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, &ps))
+			goto inval;
+	}
+	if (is_gru_paddr(paddr))
+		goto inval;
+	paddr = paddr & ~((1UL << ps) - 1);
+	*gpa = uv_soc_phys_ram_to_gpa(paddr);
+	*pageshift = ps;
+	return 0;
+
+inval:
+	return -1;
+upm:
+	return -2;
+}
+
+
 /*
  * Drop a TLB entry into the GRU. The fault is described by info in an TFH.
  *	Input:
@@ -280,10 +319,8 @@ static int gru_try_dropin(struct gru_thread_state *gts,
 			  struct gru_tlb_fault_handle *tfh,
 			  unsigned long __user *cb)
 {
-	struct mm_struct *mm = gts->ts_mm;
-	struct vm_area_struct *vma;
-	int pageshift, asid, write, ret;
-	unsigned long paddr, gpa, vaddr;
+	int pageshift = 0, asid, write, ret, atomic = !cb;
+	unsigned long gpa = 0, vaddr = 0;
 
 	/*
 	 * NOTE: The GRU contains magic hardware that eliminates races between
@@ -317,28 +354,19 @@ static int gru_try_dropin(struct gru_thread_state *gts,
 	if (atomic_read(&gts->ts_gms->ms_range_active))
 		goto failactive;
 
-	vma = find_vma(mm, vaddr);
-	if (!vma)
+	ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift);
+	if (ret == -1)
 		goto failinval;
+	if (ret == -2)
+		goto failupm;
 
-	/*
-	 * Atomic lookup is faster & usually works even if called in non-atomic
-	 * context.
-	 */
-	rmb();	/* Must/check ms_range_active before loading PTEs */
-	ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pageshift);
-	if (ret) {
-		if (!cb)
+	if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) {
+		gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift);
+		if (atomic || !gru_update_cch(gts, 0)) {
+			gts->ts_force_cch_reload = 1;
 			goto failupm;
-		if (non_atomic_pte_lookup(vma, vaddr, write, &paddr,
-					  &pageshift))
-			goto failinval;
+		}
 	}
-	if (is_gru_paddr(paddr))
-		goto failinval;
-
-	paddr = paddr & ~((1UL << pageshift) - 1);
-	gpa = uv_soc_phys_ram_to_gpa(paddr);
 	gru_cb_set_istatus_active(cb);
 	tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write,
 			  GRU_PAGESIZE(pageshift));
@@ -368,6 +396,7 @@ failupm:
 
 failfmm:
 	/* FMM state on UPM call */
+	gru_flush_cache(tfh);
 	STAT(tlb_dropin_fail_fmm);
 	gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state);
 	return 0;
@@ -448,6 +477,7 @@ irqreturn_t gru_intr(int irq, void *dev_id)
 			up_read(&gts->ts_mm->mmap_sem);
 		} else {
 			tfh_user_polling_mode(tfh);
+			STAT(intr_mm_lock_failed);
 		}
 	}
 	return IRQ_HANDLED;
@@ -497,10 +527,8 @@ int gru_handle_user_call_os(unsigned long cb)
 	if (!gts)
 		return -EINVAL;
 
-	if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) {
-		ret = -EINVAL;
+	if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE)
 		goto exit;
-	}
 
 	/*
 	 * If force_unload is set, the UPM TLB fault is phony. The task
@@ -508,6 +536,20 @@ int gru_handle_user_call_os(unsigned long cb)
 	 * unload the context. The task will page fault and assign a new
 	 * context.
 	 */
+	if (gts->ts_tgid_owner == current->tgid && gts->ts_blade >= 0 &&
+				gts->ts_blade != uv_numa_blade_id()) {
+		STAT(call_os_offnode_reference);
+		gts->ts_force_unload = 1;
+	}
+
+	/*
+	 * CCH may contain stale data if ts_force_cch_reload is set.
+	 */
+	if (gts->ts_gru && gts->ts_force_cch_reload) {
+		gru_update_cch(gts, 0);
+		gts->ts_force_cch_reload = 0;
+	}
+
 	ret = -EAGAIN;
 	cbrnum = thread_cbr_number(gts, ucbnum);
 	if (gts->ts_force_unload) {
@@ -541,11 +583,13 @@ int gru_get_exception_detail(unsigned long arg)
 	if (!gts)
 		return -EINVAL;
 
-	if (gts->ts_gru) {
-		ucbnum = get_cb_number((void *)excdet.cb);
+	ucbnum = get_cb_number((void *)excdet.cb);
+	if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) {
+		ret = -EINVAL;
+	} else if (gts->ts_gru) {
 		cbrnum = thread_cbr_number(gts, ucbnum);
 		cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
-		prefetchw(cbe);		/* Harmless on hardware, required for emulator */
+		prefetchw(cbe);/* Harmless on hardware, required for emulator */
 		excdet.opc = cbe->opccpy;
 		excdet.exopc = cbe->exopccpy;
 		excdet.ecause = cbe->ecause;
@@ -567,6 +611,31 @@ int gru_get_exception_detail(unsigned long arg)
 /*
  * User request to unload a context. Content is saved for possible reload.
  */
+static int gru_unload_all_contexts(void)
+{
+	struct gru_thread_state *gts;
+	struct gru_state *gru;
+	int gid, ctxnum;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	foreach_gid(gid) {
+		gru = GID_TO_GRU(gid);
+		spin_lock(&gru->gs_lock);
+		for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) {
+			gts = gru->gs_gts[ctxnum];
+			if (gts && mutex_trylock(&gts->ts_ctxlock)) {
+				spin_unlock(&gru->gs_lock);
+				gru_unload_context(gts, 1);
+				gru_unlock_gts(gts);
+				spin_lock(&gru->gs_lock);
+			}
+		}
+		spin_unlock(&gru->gs_lock);
+	}
+	return 0;
+}
+
 int gru_user_unload_context(unsigned long arg)
 {
 	struct gru_thread_state *gts;
@@ -578,6 +647,9 @@ int gru_user_unload_context(unsigned long arg)
 
 	gru_dbg(grudev, "gseg 0x%lx\n", req.gseg);
 
+	if (!req.gseg)
+		return gru_unload_all_contexts();
+
 	gts = gru_find_lock_gts(req.gseg);
 	if (!gts)
 		return -EINVAL;
@@ -609,7 +681,7 @@ int gru_user_flush_tlb(unsigned long arg)
 	if (!gts)
 		return -EINVAL;
 
-	gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.vaddr + req.len);
+	gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.len);
 	gru_unlock_gts(gts);
 
 	return 0;
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index c67e4e8bd62c..3e6e42d2f01b 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -45,7 +45,9 @@
 #include <asm/uv/uv_mmrs.h>
 
 struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly;
-unsigned long gru_start_paddr, gru_end_paddr __read_mostly;
+unsigned long gru_start_paddr __read_mostly;
+unsigned long gru_end_paddr __read_mostly;
+unsigned int gru_max_gids __read_mostly;
 struct gru_stats_s gru_stats;
 
 /* Guaranteed user available resources on each node */
@@ -101,7 +103,7 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
 		return -EPERM;
 
 	if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
-	    			vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
+				vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
 		return -EINVAL;
 
 	vma->vm_flags |=
@@ -273,8 +275,11 @@ static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr,
 	gru->gs_blade_id = bid;
 	gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1;
 	gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1;
+	gru->gs_asid_limit = MAX_ASID;
 	gru_tgh_flush_init(gru);
-	gru_dbg(grudev, "bid %d, nid %d, gru %x, vaddr %p (0x%lx)\n",
+	if (gru->gs_gid >= gru_max_gids)
+		gru_max_gids = gru->gs_gid + 1;
+	gru_dbg(grudev, "bid %d, nid %d, gid %d, vaddr %p (0x%lx)\n",
 		bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr,
 		gru->gs_gru_base_paddr);
 	gru_kservices_init(gru);
@@ -295,7 +300,7 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
 	for_each_online_node(nid) {
 		bid = uv_node_to_blade_id(nid);
 		pnode = uv_node_to_pnode(nid);
-		if (gru_base[bid])
+		if (bid < 0 || gru_base[bid])
 			continue;
 		page = alloc_pages_node(nid, GFP_KERNEL, order);
 		if (!page)
@@ -308,11 +313,11 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr)
 		dsrbytes = 0;
 		cbrs = 0;
 		for (gru = gru_base[bid]->bs_grus, chip = 0;
-		     		chip < GRU_CHIPLETS_PER_BLADE;
+				chip < GRU_CHIPLETS_PER_BLADE;
 				chip++, gru++) {
 			paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip);
 			vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip);
-			gru_init_chiplet(gru, paddr, vaddr, bid, nid, chip);
+			gru_init_chiplet(gru, paddr, vaddr, nid, bid, chip);
 			n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE;
 			cbrs = max(cbrs, n);
 			n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES;
@@ -370,26 +375,26 @@ static int __init gru_init(void)
 	void *gru_start_vaddr;
 
 	if (!is_uv_system())
-		return 0;
+		return -ENODEV;
 
 #if defined CONFIG_IA64
 	gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */
 #else
 	gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) &
 				0x7fffffffffffUL;
-
 #endif
 	gru_start_vaddr = __va(gru_start_paddr);
-	gru_end_paddr = gru_start_paddr + MAX_NUMNODES * GRU_SIZE;
+	gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE;
 	printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n",
 	       gru_start_paddr, gru_end_paddr);
 	irq = get_base_irq();
 	for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) {
 		ret = request_irq(irq + chip, gru_intr, 0, id, NULL);
-		/* TODO: fix irq handling on x86. For now ignore failures because
+		/* TODO: fix irq handling on x86. For now ignore failure because
 		 * interrupts are not required & not yet fully supported */
 		if (ret) {
-			printk("!!!WARNING: GRU ignoring request failure!!!\n");
+			printk(KERN_WARNING
+			       "!!!WARNING: GRU ignoring request failure!!!\n");
 			ret = 0;
 		}
 		if (ret) {
@@ -435,7 +440,7 @@ exit1:
 
 static void __exit gru_exit(void)
 {
-	int i, bid;
+	int i, bid, gid;
 	int order = get_order(sizeof(struct gru_state) *
 			      GRU_CHIPLETS_PER_BLADE);
 
@@ -445,6 +450,9 @@ static void __exit gru_exit(void)
 	for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++)
 		free_irq(IRQ_GRU + i, NULL);
 
+	foreach_gid(gid)
+		gru_kservices_exit(GID_TO_GRU(gid));
+
 	for (bid = 0; bid < GRU_MAX_BLADES; bid++)
 		free_pages((unsigned long)gru_base[bid], order);
 
@@ -469,7 +477,11 @@ struct vm_operations_struct gru_vm_ops = {
 	.fault		= gru_fault,
 };
 
+#ifndef MODULE
 fs_initcall(gru_init);
+#else
+module_init(gru_init);
+#endif
 module_exit(gru_exit);
 
 module_param(gru_options, ulong, 0644);
diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c
new file mode 100644
index 000000000000..9b7ccb328697
--- /dev/null
+++ b/drivers/misc/sgi-gru/gruhandles.c
@@ -0,0 +1,183 @@
+/*
+ *              GRU KERNEL MCS INSTRUCTIONS
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include "gru.h"
+#include "grulib.h"
+#include "grutables.h"
+
+/* 10 sec */
+#ifdef CONFIG_IA64
+#include <asm/processor.h>
+#define GRU_OPERATION_TIMEOUT	(((cycles_t) local_cpu_data->itc_freq)*10)
+#else
+#include <asm/tsc.h>
+#define GRU_OPERATION_TIMEOUT	((cycles_t) tsc_khz*10*1000)
+#endif
+
+/* Extract the status field from a kernel handle */
+#define GET_MSEG_HANDLE_STATUS(h)	(((*(unsigned long *)(h)) >> 16) & 3)
+
+struct mcs_op_statistic mcs_op_statistics[mcsop_last];
+
+static void update_mcs_stats(enum mcs_op op, unsigned long clks)
+{
+	atomic_long_inc(&mcs_op_statistics[op].count);
+	atomic_long_add(clks, &mcs_op_statistics[op].total);
+	if (mcs_op_statistics[op].max < clks)
+		mcs_op_statistics[op].max = clks;
+}
+
+static void start_instruction(void *h)
+{
+	unsigned long *w0 = h;
+
+	wmb();		/* setting CMD bit must be last */
+	*w0 = *w0 | 1;
+	gru_flush_cache(h);
+}
+
+static int wait_instruction_complete(void *h, enum mcs_op opc)
+{
+	int status;
+	cycles_t start_time = get_cycles();
+
+	while (1) {
+		cpu_relax();
+		status = GET_MSEG_HANDLE_STATUS(h);
+		if (status != CCHSTATUS_ACTIVE)
+			break;
+		if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time))
+			panic("GRU %p is malfunctioning\n", h);
+	}
+	if (gru_options & OPT_STATS)
+		update_mcs_stats(opc, get_cycles() - start_time);
+	return status;
+}
+
+int cch_allocate(struct gru_context_configuration_handle *cch,
+		int asidval, int sizeavail, unsigned long cbrmap,
+		unsigned long dsrmap)
+{
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		cch->asid[i] = (asidval++);
+		cch->sizeavail[i] = sizeavail;
+	}
+	cch->dsr_allocation_map = dsrmap;
+	cch->cbr_allocation_map = cbrmap;
+	cch->opc = CCHOP_ALLOCATE;
+	start_instruction(cch);
+	return wait_instruction_complete(cch, cchop_allocate);
+}
+
+int cch_start(struct gru_context_configuration_handle *cch)
+{
+	cch->opc = CCHOP_START;
+	start_instruction(cch);
+	return wait_instruction_complete(cch, cchop_start);
+}
+
+int cch_interrupt(struct gru_context_configuration_handle *cch)
+{
+	cch->opc = CCHOP_INTERRUPT;
+	start_instruction(cch);
+	return wait_instruction_complete(cch, cchop_interrupt);
+}
+
+int cch_deallocate(struct gru_context_configuration_handle *cch)
+{
+	cch->opc = CCHOP_DEALLOCATE;
+	start_instruction(cch);
+	return wait_instruction_complete(cch, cchop_deallocate);
+}
+
+int cch_interrupt_sync(struct gru_context_configuration_handle
+				     *cch)
+{
+	cch->opc = CCHOP_INTERRUPT_SYNC;
+	start_instruction(cch);
+	return wait_instruction_complete(cch, cchop_interrupt_sync);
+}
+
+int tgh_invalidate(struct gru_tlb_global_handle *tgh,
+				 unsigned long vaddr, unsigned long vaddrmask,
+				 int asid, int pagesize, int global, int n,
+				 unsigned short ctxbitmap)
+{
+	tgh->vaddr = vaddr;
+	tgh->asid = asid;
+	tgh->pagesize = pagesize;
+	tgh->n = n;
+	tgh->global = global;
+	tgh->vaddrmask = vaddrmask;
+	tgh->ctxbitmap = ctxbitmap;
+	tgh->opc = TGHOP_TLBINV;
+	start_instruction(tgh);
+	return wait_instruction_complete(tgh, tghop_invalidate);
+}
+
+void tfh_write_only(struct gru_tlb_fault_handle *tfh,
+				  unsigned long pfn, unsigned long vaddr,
+				  int asid, int dirty, int pagesize)
+{
+	tfh->fillasid = asid;
+	tfh->fillvaddr = vaddr;
+	tfh->pfn = pfn;
+	tfh->dirty = dirty;
+	tfh->pagesize = pagesize;
+	tfh->opc = TFHOP_WRITE_ONLY;
+	start_instruction(tfh);
+}
+
+void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
+				     unsigned long paddr, int gaa,
+				     unsigned long vaddr, int asid, int dirty,
+				     int pagesize)
+{
+	tfh->fillasid = asid;
+	tfh->fillvaddr = vaddr;
+	tfh->pfn = paddr >> GRU_PADDR_SHIFT;
+	tfh->gaa = gaa;
+	tfh->dirty = dirty;
+	tfh->pagesize = pagesize;
+	tfh->opc = TFHOP_WRITE_RESTART;
+	start_instruction(tfh);
+}
+
+void tfh_restart(struct gru_tlb_fault_handle *tfh)
+{
+	tfh->opc = TFHOP_RESTART;
+	start_instruction(tfh);
+}
+
+void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh)
+{
+	tfh->opc = TFHOP_USER_POLLING_MODE;
+	start_instruction(tfh);
+}
+
+void tfh_exception(struct gru_tlb_fault_handle *tfh)
+{
+	tfh->opc = TFHOP_EXCEPTION;
+	start_instruction(tfh);
+}
+
diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h
index b63018d60fe1..1ed74d7508c8 100644
--- a/drivers/misc/sgi-gru/gruhandles.h
+++ b/drivers/misc/sgi-gru/gruhandles.h
@@ -489,170 +489,28 @@ enum gru_cbr_state {
  * 	 64m			26	8
  * 	...
  */
-#define GRU_PAGESIZE(sh)	((((sh) > 20 ? (sh) + 2: (sh)) >> 1) - 6)
+#define GRU_PAGESIZE(sh)	((((sh) > 20 ? (sh) + 2 : (sh)) >> 1) - 6)
 #define GRU_SIZEAVAIL(sh)	(1UL << GRU_PAGESIZE(sh))
 
 /* minimum TLB purge count to ensure a full purge */
 #define GRUMAXINVAL		1024UL
 
-
-/* Extract the status field from a kernel handle */
-#define GET_MSEG_HANDLE_STATUS(h)	(((*(unsigned long *)(h)) >> 16) & 3)
-
-static inline void start_instruction(void *h)
-{
-	unsigned long *w0 = h;
-
-	wmb();		/* setting CMD bit must be last */
-	*w0 = *w0 | 1;
-	gru_flush_cache(h);
-}
-
-static inline int wait_instruction_complete(void *h)
-{
-	int status;
-
-	do {
-		cpu_relax();
-		barrier();
-		status = GET_MSEG_HANDLE_STATUS(h);
-	} while (status == CCHSTATUS_ACTIVE);
-	return status;
-}
-
-#if defined CONFIG_IA64
-static inline void cch_allocate_set_asids(
-		  struct gru_context_configuration_handle *cch, int asidval)
-{
-	int i;
-
-	for (i = 0; i <= RGN_HPAGE; i++) {  /*  assume HPAGE is last region */
-		cch->asid[i] = (asidval++);
-#if 0
-		/* ZZZ hugepages not supported yet */
-		if (i == RGN_HPAGE)
-			cch->sizeavail[i] = GRU_SIZEAVAIL(hpage_shift);
-		else
-#endif
-			cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT);
-	}
-}
-#elif defined CONFIG_X86_64
-static inline void cch_allocate_set_asids(
-		  struct gru_context_configuration_handle *cch, int asidval)
-{
-	int i;
-
-	for (i = 0; i < 8; i++) {
-		cch->asid[i] = asidval++;
-		cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT) |
-			GRU_SIZEAVAIL(21);
-	}
-}
-#endif
-
-static inline int cch_allocate(struct gru_context_configuration_handle *cch,
-			       int asidval, unsigned long cbrmap,
-			       unsigned long dsrmap)
-{
-	cch_allocate_set_asids(cch, asidval);
-	cch->dsr_allocation_map = dsrmap;
-	cch->cbr_allocation_map = cbrmap;
-	cch->opc = CCHOP_ALLOCATE;
-	start_instruction(cch);
-	return wait_instruction_complete(cch);
-}
-
-static inline int cch_start(struct gru_context_configuration_handle *cch)
-{
-	cch->opc = CCHOP_START;
-	start_instruction(cch);
-	return wait_instruction_complete(cch);
-}
-
-static inline int cch_interrupt(struct gru_context_configuration_handle *cch)
-{
-	cch->opc = CCHOP_INTERRUPT;
-	start_instruction(cch);
-	return wait_instruction_complete(cch);
-}
-
-static inline int cch_deallocate(struct gru_context_configuration_handle *cch)
-{
-	cch->opc = CCHOP_DEALLOCATE;
-	start_instruction(cch);
-	return wait_instruction_complete(cch);
-}
-
-static inline int cch_interrupt_sync(struct gru_context_configuration_handle
-				     *cch)
-{
-	cch->opc = CCHOP_INTERRUPT_SYNC;
-	start_instruction(cch);
-	return wait_instruction_complete(cch);
-}
-
-static inline int tgh_invalidate(struct gru_tlb_global_handle *tgh,
-				 unsigned long vaddr, unsigned long vaddrmask,
-				 int asid, int pagesize, int global, int n,
-				 unsigned short ctxbitmap)
-{
-	tgh->vaddr = vaddr;
-	tgh->asid = asid;
-	tgh->pagesize = pagesize;
-	tgh->n = n;
-	tgh->global = global;
-	tgh->vaddrmask = vaddrmask;
-	tgh->ctxbitmap = ctxbitmap;
-	tgh->opc = TGHOP_TLBINV;
-	start_instruction(tgh);
-	return wait_instruction_complete(tgh);
-}
-
-static inline void tfh_write_only(struct gru_tlb_fault_handle *tfh,
-				  unsigned long pfn, unsigned long vaddr,
-				  int asid, int dirty, int pagesize)
-{
-	tfh->fillasid = asid;
-	tfh->fillvaddr = vaddr;
-	tfh->pfn = pfn;
-	tfh->dirty = dirty;
-	tfh->pagesize = pagesize;
-	tfh->opc = TFHOP_WRITE_ONLY;
-	start_instruction(tfh);
-}
-
-static inline void tfh_write_restart(struct gru_tlb_fault_handle *tfh,
-				     unsigned long paddr, int gaa,
-				     unsigned long vaddr, int asid, int dirty,
-				     int pagesize)
-{
-	tfh->fillasid = asid;
-	tfh->fillvaddr = vaddr;
-	tfh->pfn = paddr >> GRU_PADDR_SHIFT;
-	tfh->gaa = gaa;
-	tfh->dirty = dirty;
-	tfh->pagesize = pagesize;
-	tfh->opc = TFHOP_WRITE_RESTART;
-	start_instruction(tfh);
-}
-
-static inline void tfh_restart(struct gru_tlb_fault_handle *tfh)
-{
-	tfh->opc = TFHOP_RESTART;
-	start_instruction(tfh);
-}
-
-static inline void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh)
-{
-	tfh->opc = TFHOP_USER_POLLING_MODE;
-	start_instruction(tfh);
-}
-
-static inline void tfh_exception(struct gru_tlb_fault_handle *tfh)
-{
-	tfh->opc = TFHOP_EXCEPTION;
-	start_instruction(tfh);
-}
+int cch_allocate(struct gru_context_configuration_handle *cch,
+       int asidval, int sizeavail, unsigned long cbrmap, unsigned long dsrmap);
+
+int cch_start(struct gru_context_configuration_handle *cch);
+int cch_interrupt(struct gru_context_configuration_handle *cch);
+int cch_deallocate(struct gru_context_configuration_handle *cch);
+int cch_interrupt_sync(struct gru_context_configuration_handle *cch);
+int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr,
+	unsigned long vaddrmask, int asid, int pagesize, int global, int n,
+	unsigned short ctxbitmap);
+void tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long pfn,
+	unsigned long vaddr, int asid, int dirty, int pagesize);
+void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr,
+	int gaa, unsigned long vaddr, int asid, int dirty, int pagesize);
+void tfh_restart(struct gru_tlb_fault_handle *tfh);
+void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh);
+void tfh_exception(struct gru_tlb_fault_handle *tfh);
 
 #endif /* __GRUHANDLES_H__ */
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
index 880c55dfb662..d8bd7d84a7cf 100644
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -52,8 +52,10 @@
  */
 
 /* Blade percpu resources PERMANENTLY reserved for kernel use */
-#define GRU_NUM_KERNEL_CBR      1
+#define GRU_NUM_KERNEL_CBR	1
 #define GRU_NUM_KERNEL_DSR_BYTES 256
+#define GRU_NUM_KERNEL_DSR_CL	(GRU_NUM_KERNEL_DSR_BYTES /		\
+					GRU_CACHE_LINE_BYTES)
 #define KERNEL_CTXNUM           15
 
 /* GRU instruction attributes for all instructions */
@@ -94,7 +96,6 @@ struct message_header {
 	char	fill;
 };
 
-#define QLINES(mq)	((mq) + offsetof(struct message_queue, qlines))
 #define HSTATUS(mq, h)	((mq) + offsetof(struct message_queue, hstatus[h]))
 
 static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr)
@@ -122,7 +123,7 @@ int gru_get_cb_exception_detail(void *cb,
 	struct gru_control_block_extended *cbe;
 
 	cbe = get_cbe(GRUBASE(cb), get_cb_number(cb));
-	prefetchw(cbe);         /* Harmless on hardware, required for emulator */
+	prefetchw(cbe);	/* Harmless on hardware, required for emulator */
 	excdet->opc = cbe->opccpy;
 	excdet->exopc = cbe->exopccpy;
 	excdet->ecause = cbe->ecause;
@@ -250,7 +251,8 @@ static inline void restore_present2(void *p, int val)
  * Create a message queue.
  * 	qlines - message queue size in cache lines. Includes 2-line header.
  */
-int gru_create_message_queue(void *p, unsigned int bytes)
+int gru_create_message_queue(struct gru_message_queue_desc *mqd,
+		void *p, unsigned int bytes, int nasid, int vector, int apicid)
 {
 	struct message_queue *mq = p;
 	unsigned int qlines;
@@ -265,6 +267,12 @@ int gru_create_message_queue(void *p, unsigned int bytes)
 	mq->hstatus[0] = 0;
 	mq->hstatus[1] = 1;
 	mq->head = gru_mesq_head(2, qlines / 2 + 1);
+	mqd->mq = mq;
+	mqd->mq_gpa = uv_gpa(mq);
+	mqd->qlines = qlines;
+	mqd->interrupt_pnode = UV_NASID_TO_PNODE(nasid);
+	mqd->interrupt_vector = vector;
+	mqd->interrupt_apicid = apicid;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(gru_create_message_queue);
@@ -277,8 +285,8 @@ EXPORT_SYMBOL_GPL(gru_create_message_queue);
  *		-1 - if mesq sent successfully but queue not full
  *		>0 - unexpected error. MQE_xxx returned
  */
-static int send_noop_message(void *cb,
-				unsigned long mq, void *mesg)
+static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd,
+				void *mesg)
 {
 	const struct message_header noop_header = {
 					.present = MQS_NOOP, .lines = 1};
@@ -289,7 +297,7 @@ static int send_noop_message(void *cb,
 	STAT(mesq_noop);
 	save_mhdr = *mhdr;
 	*mhdr = noop_header;
-	gru_mesq(cb, mq, gru_get_tri(mhdr), 1, IMA);
+	gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA);
 	ret = gru_wait(cb);
 
 	if (ret) {
@@ -313,7 +321,7 @@ static int send_noop_message(void *cb,
 			break;
 		case CBSS_PUT_NACKED:
 			STAT(mesq_noop_put_nacked);
-			m = mq + (gru_get_amo_value_head(cb) << 6);
+			m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
 			gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1,
 						IMA);
 			if (gru_wait(cb) == CBS_IDLE)
@@ -333,30 +341,20 @@ static int send_noop_message(void *cb,
 /*
  * Handle a gru_mesq full.
  */
-static int send_message_queue_full(void *cb,
-			   unsigned long mq, void *mesg, int lines)
+static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd,
+				void *mesg, int lines)
 {
 	union gru_mesqhead mqh;
 	unsigned int limit, head;
 	unsigned long avalue;
-	int half, qlines, save;
+	int half, qlines;
 
 	/* Determine if switching to first/second half of q */
 	avalue = gru_get_amo_value(cb);
 	head = gru_get_amo_value_head(cb);
 	limit = gru_get_amo_value_limit(cb);
 
-	/*
-	 * Fetch "qlines" from the queue header. Since the queue may be
-	 * in memory that can't be accessed using socket addresses, use
-	 * the GRU to access the data. Use DSR space from the message.
-	 */
-	save = *(int *)mesg;
-	gru_vload(cb, QLINES(mq), gru_get_tri(mesg), XTYPE_W, 1, 1, IMA);
-	if (gru_wait(cb) != CBS_IDLE)
-		goto cberr;
-	qlines = *(int *)mesg;
-	*(int *)mesg = save;
+	qlines = mqd->qlines;
 	half = (limit != qlines);
 
 	if (half)
@@ -365,7 +363,7 @@ static int send_message_queue_full(void *cb,
 		mqh = gru_mesq_head(2, qlines / 2 + 1);
 
 	/* Try to get lock for switching head pointer */
-	gru_gamir(cb, EOP_IR_CLR, HSTATUS(mq, half), XTYPE_DW, IMA);
+	gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA);
 	if (gru_wait(cb) != CBS_IDLE)
 		goto cberr;
 	if (!gru_get_amo_value(cb)) {
@@ -375,8 +373,8 @@ static int send_message_queue_full(void *cb,
 
 	/* Got the lock. Send optional NOP if queue not full, */
 	if (head != limit) {
-		if (send_noop_message(cb, mq, mesg)) {
-			gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half),
+		if (send_noop_message(cb, mqd, mesg)) {
+			gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half),
 					XTYPE_DW, IMA);
 			if (gru_wait(cb) != CBS_IDLE)
 				goto cberr;
@@ -387,14 +385,16 @@ static int send_message_queue_full(void *cb,
 	}
 
 	/* Then flip queuehead to other half of queue. */
-	gru_gamer(cb, EOP_ERR_CSWAP, mq, XTYPE_DW, mqh.val, avalue, IMA);
+	gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue,
+							IMA);
 	if (gru_wait(cb) != CBS_IDLE)
 		goto cberr;
 
 	/* If not successfully in swapping queue head, clear the hstatus lock */
 	if (gru_get_amo_value(cb) != avalue) {
 		STAT(mesq_qf_switch_head_failed);
-		gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half), XTYPE_DW, IMA);
+		gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW,
+							IMA);
 		if (gru_wait(cb) != CBS_IDLE)
 			goto cberr;
 	}
@@ -404,15 +404,25 @@ cberr:
 	return MQE_UNEXPECTED_CB_ERR;
 }
 
+/*
+ * Send a cross-partition interrupt to the SSI that contains the target
+ * message queue. Normally, the interrupt is automatically delivered by hardware
+ * but some error conditions require explicit delivery.
+ */
+static void send_message_queue_interrupt(struct gru_message_queue_desc *mqd)
+{
+	if (mqd->interrupt_vector)
+		uv_hub_send_ipi(mqd->interrupt_pnode, mqd->interrupt_apicid,
+				mqd->interrupt_vector);
+}
+
 
 /*
  * Handle a gru_mesq failure. Some of these failures are software recoverable
  * or retryable.
  */
-static int send_message_failure(void *cb,
-				unsigned long mq,
-				void *mesg,
-				int lines)
+static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd,
+				void *mesg, int lines)
 {
 	int substatus, ret = 0;
 	unsigned long m;
@@ -429,7 +439,7 @@ static int send_message_failure(void *cb,
 		break;
 	case CBSS_QLIMIT_REACHED:
 		STAT(mesq_send_qlimit_reached);
-		ret = send_message_queue_full(cb, mq, mesg, lines);
+		ret = send_message_queue_full(cb, mqd, mesg, lines);
 		break;
 	case CBSS_AMO_NACKED:
 		STAT(mesq_send_amo_nacked);
@@ -437,12 +447,14 @@ static int send_message_failure(void *cb,
 		break;
 	case CBSS_PUT_NACKED:
 		STAT(mesq_send_put_nacked);
-		m =mq + (gru_get_amo_value_head(cb) << 6);
+		m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6);
 		gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA);
-		if (gru_wait(cb) == CBS_IDLE)
+		if (gru_wait(cb) == CBS_IDLE) {
 			ret = MQE_OK;
-		else
+			send_message_queue_interrupt(mqd);
+		} else {
 			ret = MQE_UNEXPECTED_CB_ERR;
+		}
 		break;
 	default:
 		BUG();
@@ -452,12 +464,12 @@ static int send_message_failure(void *cb,
 
 /*
  * Send a message to a message queue
- * 	cb	GRU control block to use to send message
- * 	mq	message queue
+ * 	mqd	message queue descriptor
  * 	mesg	message. ust be vaddr within a GSEG
  * 	bytes	message size (<= 2 CL)
  */
-int gru_send_message_gpa(unsigned long mq, void *mesg, unsigned int bytes)
+int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg,
+				unsigned int bytes)
 {
 	struct message_header *mhdr;
 	void *cb;
@@ -481,10 +493,10 @@ int gru_send_message_gpa(unsigned long mq, void *mesg, unsigned int bytes)
 
 	do {
 		ret = MQE_OK;
-		gru_mesq(cb, mq, gru_get_tri(mhdr), clines, IMA);
+		gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA);
 		istatus = gru_wait(cb);
 		if (istatus != CBS_IDLE)
-			ret = send_message_failure(cb, mq, dsr, clines);
+			ret = send_message_failure(cb, mqd, dsr, clines);
 	} while (ret == MQIE_AGAIN);
 	gru_free_cpu_resources(cb, dsr);
 
@@ -497,9 +509,9 @@ EXPORT_SYMBOL_GPL(gru_send_message_gpa);
 /*
  * Advance the receive pointer for the queue to the next message.
  */
-void gru_free_message(void *rmq, void *mesg)
+void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg)
 {
-	struct message_queue *mq = rmq;
+	struct message_queue *mq = mqd->mq;
 	struct message_header *mhdr = mq->next;
 	void *next, *pnext;
 	int half = -1;
@@ -529,16 +541,16 @@ EXPORT_SYMBOL_GPL(gru_free_message);
  * present. User must call next_message() to move to next message.
  * 	rmq	message queue
  */
-void *gru_get_next_message(void *rmq)
+void *gru_get_next_message(struct gru_message_queue_desc *mqd)
 {
-	struct message_queue *mq = rmq;
+	struct message_queue *mq = mqd->mq;
 	struct message_header *mhdr = mq->next;
 	int present = mhdr->present;
 
 	/* skip NOOP messages */
 	STAT(mesq_receive);
 	while (present == MQS_NOOP) {
-		gru_free_message(rmq, mhdr);
+		gru_free_message(mqd, mhdr);
 		mhdr = mq->next;
 		present = mhdr->present;
 	}
@@ -576,7 +588,7 @@ int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa,
 	if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr))
 		return MQE_BUG_NO_RESOURCES;
 	gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr),
-		  XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_BYTES, IMA);
+		  XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA);
 	ret = gru_wait(cb);
 	gru_free_cpu_resources(cb, dsr);
 	return ret;
@@ -611,7 +623,7 @@ static int quicktest(struct gru_state *gru)
 
 	if (word0 != word1 || word0 != MAGIC) {
 		printk
-		    ("GRU quicktest err: gru %d, found 0x%lx, expected 0x%lx\n",
+		    ("GRU quicktest err: gid %d, found 0x%lx, expected 0x%lx\n",
 		     gru->gs_gid, word1, MAGIC);
 		BUG();		/* ZZZ should not be fatal */
 	}
@@ -660,15 +672,15 @@ int gru_kservices_init(struct gru_state *gru)
 	cch->tlb_int_enable = 0;
 	cch->tfm_done_bit_enable = 0;
 	cch->unmap_enable = 1;
-	err = cch_allocate(cch, 0, cbr_map, dsr_map);
+	err = cch_allocate(cch, 0, 0, cbr_map, dsr_map);
 	if (err) {
 		gru_dbg(grudev,
-			"Unable to allocate kernel CCH: gru %d, err %d\n",
+			"Unable to allocate kernel CCH: gid %d, err %d\n",
 			gru->gs_gid, err);
 		BUG();
 	}
 	if (cch_start(cch)) {
-		gru_dbg(grudev, "Unable to start kernel CCH: gru %d, err %d\n",
+		gru_dbg(grudev, "Unable to start kernel CCH: gid %d, err %d\n",
 			gru->gs_gid, err);
 		BUG();
 	}
@@ -678,3 +690,22 @@ int gru_kservices_init(struct gru_state *gru)
 		quicktest(gru);
 	return 0;
 }
+
+void gru_kservices_exit(struct gru_state *gru)
+{
+	struct gru_context_configuration_handle *cch;
+	struct gru_blade_state *bs;
+
+	bs = gru->gs_blade;
+	if (gru != &bs->bs_grus[1])
+		return;
+
+	cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM);
+	lock_cch_handle(cch);
+	if (cch_interrupt_sync(cch))
+		BUG();
+	if (cch_deallocate(cch))
+		BUG();
+	unlock_cch_handle(cch);
+}
+
diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h
index eb17e0a3ac61..747ed315d56f 100644
--- a/drivers/misc/sgi-gru/grukservices.h
+++ b/drivers/misc/sgi-gru/grukservices.h
@@ -41,6 +41,15 @@
  * 	- gru_create_message_queue() needs interrupt vector info
  */
 
+struct gru_message_queue_desc {
+	void		*mq;			/* message queue vaddress */
+	unsigned long	mq_gpa;			/* global address of mq */
+	int		qlines;			/* queue size in CL */
+	int		interrupt_vector;	/* interrupt vector */
+	int		interrupt_pnode;	/* pnode for interrupt */
+	int		interrupt_apicid;	/* lapicid for interrupt */
+};
+
 /*
  * Initialize a user allocated chunk of memory to be used as
  * a message queue. The caller must ensure that the queue is
@@ -51,14 +60,19 @@
  * to manage the queue.
  *
  *  Input:
- * 	p	pointer to user allocated memory.
+ * 	mqd	pointer to message queue descriptor
+ * 	p	pointer to user allocated mesq memory.
  * 	bytes	size of message queue in bytes
+ *      vector	interrupt vector (zero if no interrupts)
+ *      nasid	nasid of blade where interrupt is delivered
+ *      apicid	apicid of cpu for interrupt
  *
  *  Errors:
  *  	0	OK
  *  	>0	error
  */
-extern int gru_create_message_queue(void *p, unsigned int bytes);
+extern int gru_create_message_queue(struct gru_message_queue_desc *mqd,
+		void *p, unsigned int bytes, int nasid, int vector, int apicid);
 
 /*
  * Send a message to a message queue.
@@ -68,7 +82,7 @@ extern int gru_create_message_queue(void *p, unsigned int bytes);
  *
  *
  *   Input:
- * 	xmq	message queue - must be a UV global physical address
+ * 	mqd	pointer to message queue descriptor
  * 	mesg	pointer to message. Must be 64-bit aligned
  * 	bytes	size of message in bytes
  *
@@ -77,8 +91,8 @@ extern int gru_create_message_queue(void *p, unsigned int bytes);
  *     >0	Send failure - see error codes below
  *
  */
-extern int gru_send_message_gpa(unsigned long mq_gpa, void *mesg,
-						unsigned int bytes);
+extern int gru_send_message_gpa(struct gru_message_queue_desc *mqd,
+			void *mesg, unsigned int bytes);
 
 /* Status values for gru_send_message() */
 #define MQE_OK			0	/* message sent successfully */
@@ -94,10 +108,11 @@ extern int gru_send_message_gpa(unsigned long mq_gpa, void *mesg,
  * API extensions may allow for out-of-order freeing.
  *
  *   Input
- * 	mq	message queue
+ * 	mqd	pointer to message queue descriptor
  * 	mesq	message being freed
  */
-extern void gru_free_message(void *mq, void *mesq);
+extern void gru_free_message(struct gru_message_queue_desc *mqd,
+			     void *mesq);
 
 /*
  * Get next message from message queue. Returns pointer to
@@ -106,13 +121,13 @@ extern void gru_free_message(void *mq, void *mesq);
  * in order to move the queue pointers to next message.
  *
  *   Input
- * 	mq	message queue
+ * 	mqd	pointer to message queue descriptor
  *
  *   Output:
  *	p	pointer to message
  *	NULL	no message available
  */
-extern void *gru_get_next_message(void *mq);
+extern void *gru_get_next_message(struct gru_message_queue_desc *mqd);
 
 
 /*
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index 3d2fc216bae5..ec3f7a17d221 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -76,10 +76,9 @@ int gru_cpu_fault_map_id(void)
 /* Hit the asid limit. Start over */
 static int gru_wrap_asid(struct gru_state *gru)
 {
-	gru_dbg(grudev, "gru %p\n", gru);
+	gru_dbg(grudev, "gid %d\n", gru->gs_gid);
 	STAT(asid_wrap);
 	gru->gs_asid_gen++;
-	gru_flush_all_tlb(gru);
 	return MIN_ASID;
 }
 
@@ -88,19 +87,21 @@ static int gru_reset_asid_limit(struct gru_state *gru, int asid)
 {
 	int i, gid, inuse_asid, limit;
 
-	gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid);
+	gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
 	STAT(asid_next);
 	limit = MAX_ASID;
 	if (asid >= limit)
 		asid = gru_wrap_asid(gru);
+	gru_flush_all_tlb(gru);
 	gid = gru->gs_gid;
 again:
 	for (i = 0; i < GRU_NUM_CCH; i++) {
 		if (!gru->gs_gts[i])
 			continue;
 		inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid;
-		gru_dbg(grudev, "gru %p, inuse_asid 0x%x, cxtnum %d, gts %p\n",
-			gru, inuse_asid, i, gru->gs_gts[i]);
+		gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n",
+			gru->gs_gid, gru->gs_gts[i], gru->gs_gts[i]->ts_gms,
+			inuse_asid, i);
 		if (inuse_asid == asid) {
 			asid += ASID_INC;
 			if (asid >= limit) {
@@ -120,8 +121,8 @@ again:
 	}
 	gru->gs_asid_limit = limit;
 	gru->gs_asid = asid;
-	gru_dbg(grudev, "gru %p, new asid 0x%x, new_limit 0x%x\n", gru, asid,
-		limit);
+	gru_dbg(grudev, "gid %d, new asid 0x%x, new_limit 0x%x\n", gru->gs_gid,
+					asid, limit);
 	return asid;
 }
 
@@ -130,14 +131,12 @@ static int gru_assign_asid(struct gru_state *gru)
 {
 	int asid;
 
-	spin_lock(&gru->gs_asid_lock);
 	gru->gs_asid += ASID_INC;
 	asid = gru->gs_asid;
 	if (asid >= gru->gs_asid_limit)
 		asid = gru_reset_asid_limit(gru, asid);
-	spin_unlock(&gru->gs_asid_lock);
 
-	gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid);
+	gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid);
 	return asid;
 }
 
@@ -215,17 +214,20 @@ static int check_gru_resources(struct gru_state *gru, int cbr_au_count,
  * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
  * context.
  */
-static int gru_load_mm_tracker(struct gru_state *gru, struct gru_mm_struct *gms,
-			       int ctxnum)
+static int gru_load_mm_tracker(struct gru_state *gru,
+					struct gru_thread_state *gts)
 {
+	struct gru_mm_struct *gms = gts->ts_gms;
 	struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid];
-	unsigned short ctxbitmap = (1 << ctxnum);
+	unsigned short ctxbitmap = (1 << gts->ts_ctxnum);
 	int asid;
 
 	spin_lock(&gms->ms_asid_lock);
 	asid = asids->mt_asid;
 
-	if (asid == 0 || asids->mt_asid_gen != gru->gs_asid_gen) {
+	spin_lock(&gru->gs_asid_lock);
+	if (asid == 0 || (asids->mt_ctxbitmap == 0 && asids->mt_asid_gen !=
+			  gru->gs_asid_gen)) {
 		asid = gru_assign_asid(gru);
 		asids->mt_asid = asid;
 		asids->mt_asid_gen = gru->gs_asid_gen;
@@ -233,6 +235,7 @@ static int gru_load_mm_tracker(struct gru_state *gru, struct gru_mm_struct *gms,
 	} else {
 		STAT(asid_reuse);
 	}
+	spin_unlock(&gru->gs_asid_lock);
 
 	BUG_ON(asids->mt_ctxbitmap & ctxbitmap);
 	asids->mt_ctxbitmap |= ctxbitmap;
@@ -241,24 +244,28 @@ static int gru_load_mm_tracker(struct gru_state *gru, struct gru_mm_struct *gms,
 	spin_unlock(&gms->ms_asid_lock);
 
 	gru_dbg(grudev,
-		"gru %x, gms %p, ctxnum 0x%d, asid 0x%x, asidmap 0x%lx\n",
-		gru->gs_gid, gms, ctxnum, asid, gms->ms_asidmap[0]);
+		"gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n",
+		gru->gs_gid, gts, gms, gts->ts_ctxnum, asid,
+		gms->ms_asidmap[0]);
 	return asid;
 }
 
 static void gru_unload_mm_tracker(struct gru_state *gru,
-				  struct gru_mm_struct *gms, int ctxnum)
+					struct gru_thread_state *gts)
 {
+	struct gru_mm_struct *gms = gts->ts_gms;
 	struct gru_mm_tracker *asids;
 	unsigned short ctxbitmap;
 
 	asids = &gms->ms_asids[gru->gs_gid];
-	ctxbitmap = (1 << ctxnum);
+	ctxbitmap = (1 << gts->ts_ctxnum);
 	spin_lock(&gms->ms_asid_lock);
+	spin_lock(&gru->gs_asid_lock);
 	BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
 	asids->mt_ctxbitmap ^= ctxbitmap;
-	gru_dbg(grudev, "gru %x, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
-		gru->gs_gid, gms, ctxnum, gms->ms_asidmap[0]);
+	gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
+		gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]);
+	spin_unlock(&gru->gs_asid_lock);
 	spin_unlock(&gms->ms_asid_lock);
 }
 
@@ -319,6 +326,7 @@ static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma,
 	gts->ts_vma = vma;
 	gts->ts_tlb_int_select = -1;
 	gts->ts_gms = gru_register_mmu_notifier();
+	gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT);
 	if (!gts->ts_gms)
 		goto err;
 
@@ -399,7 +407,7 @@ static void gru_free_gru_context(struct gru_thread_state *gts)
 	struct gru_state *gru;
 
 	gru = gts->ts_gru;
-	gru_dbg(grudev, "gts %p, gru %p\n", gts, gru);
+	gru_dbg(grudev, "gts %p, gid %d\n", gts, gru->gs_gid);
 
 	spin_lock(&gru->gs_lock);
 	gru->gs_gts[gts->ts_ctxnum] = NULL;
@@ -408,6 +416,7 @@ static void gru_free_gru_context(struct gru_thread_state *gts)
 	__clear_bit(gts->ts_ctxnum, &gru->gs_context_map);
 	gts->ts_ctxnum = NULLCTX;
 	gts->ts_gru = NULL;
+	gts->ts_blade = -1;
 	spin_unlock(&gru->gs_lock);
 
 	gts_drop(gts);
@@ -432,8 +441,8 @@ static inline long gru_copy_handle(void *d, void *s)
 	return GRU_HANDLE_BYTES;
 }
 
-static void gru_prefetch_context(void *gseg, void *cb, void *cbe, unsigned long cbrmap,
-				unsigned long length)
+static void gru_prefetch_context(void *gseg, void *cb, void *cbe,
+				unsigned long cbrmap, unsigned long length)
 {
 	int i, scr;
 
@@ -500,12 +509,12 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate)
 	zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE);
 	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
 
+	gru_dbg(grudev, "gts %p\n", gts);
 	lock_cch_handle(cch);
 	if (cch_interrupt_sync(cch))
 		BUG();
-	gru_dbg(grudev, "gts %p\n", gts);
 
-	gru_unload_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum);
+	gru_unload_mm_tracker(gru, gts);
 	if (savestate)
 		gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr,
 					ctxnum, gts->ts_cbr_map,
@@ -534,7 +543,7 @@ static void gru_load_context(struct gru_thread_state *gts)
 	cch = get_cch(gru->gs_gru_base_vaddr, ctxnum);
 
 	lock_cch_handle(cch);
-	asid = gru_load_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum);
+	asid = gru_load_mm_tracker(gru, gts);
 	cch->tfm_fault_bit_enable =
 	    (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL
 	     || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR);
@@ -544,7 +553,8 @@ static void gru_load_context(struct gru_thread_state *gts)
 		cch->tlb_int_select = gts->ts_tlb_int_select;
 	}
 	cch->tfm_done_bit_enable = 0;
-	err = cch_allocate(cch, asid, gts->ts_cbr_map, gts->ts_dsr_map);
+	err = cch_allocate(cch, asid, gts->ts_sizeavail, gts->ts_cbr_map,
+				gts->ts_dsr_map);
 	if (err) {
 		gru_dbg(grudev,
 			"err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
@@ -565,11 +575,12 @@ static void gru_load_context(struct gru_thread_state *gts)
 /*
  * Update fields in an active CCH:
  * 	- retarget interrupts on local blade
+ * 	- update sizeavail mask
  * 	- force a delayed context unload by clearing the CCH asids. This
  * 	  forces TLB misses for new GRU instructions. The context is unloaded
  * 	  when the next TLB miss occurs.
  */
-static int gru_update_cch(struct gru_thread_state *gts, int int_select)
+int gru_update_cch(struct gru_thread_state *gts, int force_unload)
 {
 	struct gru_context_configuration_handle *cch;
 	struct gru_state *gru = gts->ts_gru;
@@ -583,9 +594,11 @@ static int gru_update_cch(struct gru_thread_state *gts, int int_select)
 			goto exit;
 		if (cch_interrupt(cch))
 			BUG();
-		if (int_select >= 0) {
-			gts->ts_tlb_int_select = int_select;
-			cch->tlb_int_select = int_select;
+		if (!force_unload) {
+			for (i = 0; i < 8; i++)
+				cch->sizeavail[i] = gts->ts_sizeavail;
+			gts->ts_tlb_int_select = gru_cpu_fault_map_id();
+			cch->tlb_int_select = gru_cpu_fault_map_id();
 		} else {
 			for (i = 0; i < 8; i++)
 				cch->asid[i] = 0;
@@ -617,7 +630,7 @@ static int gru_retarget_intr(struct gru_thread_state *gts)
 
 	gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select,
 		gru_cpu_fault_map_id());
-	return gru_update_cch(gts, gru_cpu_fault_map_id());
+	return gru_update_cch(gts, 0);
 }
 
 
@@ -688,7 +701,7 @@ static void gru_steal_context(struct gru_thread_state *gts)
 		STAT(steal_context_failed);
 	}
 	gru_dbg(grudev,
-		"stole gru %x, ctxnum %d from gts %p. Need cb %d, ds %d;"
+		"stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;"
 		" avail cb %ld, ds %ld\n",
 		gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map),
 		hweight64(gru->gs_dsr_map));
@@ -727,6 +740,7 @@ again:
 		}
 		reserve_gru_resources(gru, gts);
 		gts->ts_gru = gru;
+		gts->ts_blade = gru->gs_blade_id;
 		gts->ts_ctxnum =
 		    find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH);
 		BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH);
@@ -737,7 +751,7 @@ again:
 
 		STAT(assign_context);
 		gru_dbg(grudev,
-			"gseg %p, gts %p, gru %x, ctx %d, cbr %d, dsr %d\n",
+			"gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n",
 			gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts,
 			gts->ts_gru->gs_gid, gts->ts_ctxnum,
 			gts->ts_cbr_au_count, gts->ts_dsr_au_count);
@@ -773,8 +787,8 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 
 again:
-	preempt_disable();
 	mutex_lock(&gts->ts_ctxlock);
+	preempt_disable();
 	if (gts->ts_gru) {
 		if (gts->ts_gru->gs_blade_id != uv_numa_blade_id()) {
 			STAT(migrated_nopfn_unload);
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
index 73b0ca061bb5..ee74821b171c 100644
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -62,7 +62,9 @@ static int statistics_show(struct seq_file *s, void *p)
 	printstat(s, asid_wrap);
 	printstat(s, asid_reuse);
 	printstat(s, intr);
+	printstat(s, intr_mm_lock_failed);
 	printstat(s, call_os);
+	printstat(s, call_os_offnode_reference);
 	printstat(s, call_os_check_for_bug);
 	printstat(s, call_os_wait_queue);
 	printstat(s, user_flush_tlb);
@@ -120,6 +122,30 @@ static ssize_t statistics_write(struct file *file, const char __user *userbuf,
 	return count;
 }
 
+static int mcs_statistics_show(struct seq_file *s, void *p)
+{
+	int op;
+	unsigned long total, count, max;
+	static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt",
+		"cch_interrupt_sync", "cch_deallocate", "tgh_invalidate"};
+
+	for (op = 0; op < mcsop_last; op++) {
+		count = atomic_long_read(&mcs_op_statistics[op].count);
+		total = atomic_long_read(&mcs_op_statistics[op].total);
+		max = mcs_op_statistics[op].max;
+		seq_printf(s, "%-20s%12ld%12ld%12ld\n", id[op], count,
+			   count ? total / count : 0, max);
+	}
+	return 0;
+}
+
+static ssize_t mcs_statistics_write(struct file *file,
+			const char __user *userbuf, size_t count, loff_t *data)
+{
+	memset(mcs_op_statistics, 0, sizeof(mcs_op_statistics));
+	return count;
+}
+
 static int options_show(struct seq_file *s, void *p)
 {
 	seq_printf(s, "0x%lx\n", gru_options);
@@ -135,6 +161,7 @@ static ssize_t options_write(struct file *file, const char __user *userbuf,
 	if (copy_from_user
 	    (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
 		return -EFAULT;
+	buf[count - 1] = '\0';
 	if (!strict_strtoul(buf, 10, &val))
 		gru_options = val;
 
@@ -199,7 +226,7 @@ static void seq_stop(struct seq_file *file, void *data)
 
 static void *seq_start(struct seq_file *file, loff_t *gid)
 {
-	if (*gid < GRU_MAX_GRUS)
+	if (*gid < gru_max_gids)
 		return gid;
 	return NULL;
 }
@@ -207,7 +234,7 @@ static void *seq_start(struct seq_file *file, loff_t *gid)
 static void *seq_next(struct seq_file *file, void *data, loff_t *gid)
 {
 	(*gid)++;
-	if (*gid < GRU_MAX_GRUS)
+	if (*gid < gru_max_gids)
 		return gid;
 	return NULL;
 }
@@ -231,6 +258,11 @@ static int statistics_open(struct inode *inode, struct file *file)
 	return single_open(file, statistics_show, NULL);
 }
 
+static int mcs_statistics_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mcs_statistics_show, NULL);
+}
+
 static int options_open(struct inode *inode, struct file *file)
 {
 	return single_open(file, options_show, NULL);
@@ -255,6 +287,14 @@ static const struct file_operations statistics_fops = {
 	.release 	= single_release,
 };
 
+static const struct file_operations mcs_statistics_fops = {
+	.open 		= mcs_statistics_open,
+	.read 		= seq_read,
+	.write 		= mcs_statistics_write,
+	.llseek 	= seq_lseek,
+	.release 	= single_release,
+};
+
 static const struct file_operations options_fops = {
 	.open 		= options_open,
 	.read 		= seq_read,
@@ -283,6 +323,7 @@ static struct proc_entry {
 	struct proc_dir_entry *entry;
 } proc_files[] = {
 	{"statistics", 0644, &statistics_fops},
+	{"mcs_statistics", 0644, &mcs_statistics_fops},
 	{"debug_options", 0644, &options_fops},
 	{"cch_status", 0444, &cch_fops},
 	{"gru_status", 0444, &gru_fops},
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
index a78f70deeb59..bf1eeb7553ed 100644
--- a/drivers/misc/sgi-gru/grutables.h
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -153,6 +153,7 @@
 extern struct gru_stats_s gru_stats;
 extern struct gru_blade_state *gru_base[];
 extern unsigned long gru_start_paddr, gru_end_paddr;
+extern unsigned int gru_max_gids;
 
 #define GRU_MAX_BLADES		MAX_NUMNODES
 #define GRU_MAX_GRUS		(GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE)
@@ -184,7 +185,9 @@ struct gru_stats_s {
 	atomic_long_t asid_wrap;
 	atomic_long_t asid_reuse;
 	atomic_long_t intr;
+	atomic_long_t intr_mm_lock_failed;
 	atomic_long_t call_os;
+	atomic_long_t call_os_offnode_reference;
 	atomic_long_t call_os_check_for_bug;
 	atomic_long_t call_os_wait_queue;
 	atomic_long_t user_flush_tlb;
@@ -237,6 +240,17 @@ struct gru_stats_s {
 
 };
 
+enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync,
+	cchop_deallocate, tghop_invalidate, mcsop_last};
+
+struct mcs_op_statistic {
+	atomic_long_t	count;
+	atomic_long_t	total;
+	unsigned long	max;
+};
+
+extern struct mcs_op_statistic mcs_op_statistics[mcsop_last];
+
 #define OPT_DPRINT	1
 #define OPT_STATS	2
 #define GRU_QUICKLOOK	4
@@ -278,13 +292,12 @@ struct gru_stats_s {
 /* Generate a GRU asid value from a GRU base asid & a virtual address. */
 #if defined CONFIG_IA64
 #define VADDR_HI_BIT		64
-#define GRUREGION(addr)		((addr) >> (VADDR_HI_BIT - 3) & 3)
 #elif defined CONFIG_X86_64
 #define VADDR_HI_BIT		48
-#define GRUREGION(addr)		(0)		/* ZZZ could do better */
 #else
 #error "Unsupported architecture"
 #endif
+#define GRUREGION(addr)		((addr) >> (VADDR_HI_BIT - 3) & 3)
 #define GRUASID(asid, addr)	((asid) + GRUREGION(addr))
 
 /*------------------------------------------------------------------------------
@@ -297,12 +310,12 @@ struct gru_state;
  * This structure is pointed to from the mmstruct via the notifier pointer.
  * There is one of these per address space.
  */
-struct gru_mm_tracker {
-	unsigned int		mt_asid_gen;	/* ASID wrap count */
-	int			mt_asid;	/* current base ASID for gru */
-	unsigned short		mt_ctxbitmap;	/* bitmap of contexts using
+struct gru_mm_tracker {				/* pack to reduce size */
+	unsigned int		mt_asid_gen:24;	/* ASID wrap count */
+	unsigned int		mt_asid:24;	/* current base ASID for gru */
+	unsigned short		mt_ctxbitmap:16;/* bitmap of contexts using
 						   asid */
-};
+} __attribute__ ((packed));
 
 struct gru_mm_struct {
 	struct mmu_notifier	ms_notifier;
@@ -348,6 +361,7 @@ struct gru_thread_state {
 	long			ts_user_options;/* misc user option flags */
 	pid_t			ts_tgid_owner;	/* task that is using the
 						   context - for migration */
+	unsigned short		ts_sizeavail;	/* Pagesizes in use */
 	int			ts_tsid;	/* thread that owns the
 						   structure */
 	int			ts_tlb_int_select;/* target cpu if interrupts
@@ -359,6 +373,9 @@ struct gru_thread_state {
 						   required for contest */
 	unsigned char		ts_cbr_au_count;/* Number of CBR resources
 						   required for contest */
+	char			ts_blade;	/* If >= 0, migrate context if
+						   ref from diferent blade */
+	char			ts_force_cch_reload;
 	char			ts_force_unload;/* force context to be unloaded
 						   after migration */
 	char			ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each
@@ -392,12 +409,12 @@ struct gru_state {
 							   gru segments (64) */
 	void			*gs_gru_base_vaddr;	/* Virtual address of
 							   gru segments (64) */
-	unsigned char		gs_gid;			/* unique GRU number */
+	unsigned short		gs_gid;			/* unique GRU number */
+	unsigned short		gs_blade_id;		/* blade of GRU */
 	unsigned char		gs_tgh_local_shift;	/* used to pick TGH for
 							   local flush */
 	unsigned char		gs_tgh_first_remote;	/* starting TGH# for
 							   remote flush */
-	unsigned short		gs_blade_id;		/* blade of GRU */
 	spinlock_t		gs_asid_lock;		/* lock used for
 							   assigning asids */
 	spinlock_t		gs_lock;		/* lock used for
@@ -492,6 +509,10 @@ struct gru_blade_state {
 			(i) < GRU_CHIPLETS_PER_BLADE;			\
 			(i)++, (gru)++)
 
+/* Scan all GRUs */
+#define foreach_gid(gid)						\
+	for ((gid) = 0; (gid) < gru_max_gids; (gid)++)
+
 /* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */
 #define for_each_gts_on_gru(gts, gru, ctxnum)				\
 	for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++)		\
@@ -578,9 +599,11 @@ extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct
 extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct
 				*vma, int tsid);
 extern void gru_unload_context(struct gru_thread_state *gts, int savestate);
+extern int gru_update_cch(struct gru_thread_state *gts, int force_unload);
 extern void gts_drop(struct gru_thread_state *gts);
 extern void gru_tgh_flush_init(struct gru_state *gru);
 extern int gru_kservices_init(struct gru_state *gru);
+extern void gru_kservices_exit(struct gru_state *gru);
 extern irqreturn_t gru_intr(int irq, void *dev_id);
 extern int gru_handle_user_call_os(unsigned long address);
 extern int gru_user_flush_tlb(unsigned long arg);
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
index c84496a77691..1d125091f5e7 100644
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -187,7 +187,7 @@ void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
 	"  FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n",
 				gid, asid, num, asids->mt_ctxbitmap);
 			tgh = get_lock_tgh_handle(gru);
-			tgh_invalidate(tgh, start, 0, asid, grupagesize, 0,
+			tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0,
 				       num - 1, asids->mt_ctxbitmap);
 			get_unlock_tgh_handle(tgh);
 		} else {
@@ -210,11 +210,10 @@ void gru_flush_all_tlb(struct gru_state *gru)
 {
 	struct gru_tlb_global_handle *tgh;
 
-	gru_dbg(grudev, "gru %p, gid %d\n", gru, gru->gs_gid);
+	gru_dbg(grudev, "gid %d\n", gru->gs_gid);
 	tgh = get_lock_tgh_handle(gru);
-	tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0);
+	tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0xffff);
 	get_unlock_tgh_handle(tgh);
-	preempt_enable();
 }
 
 /*
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 275b78896a73..114444cfd496 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -92,7 +92,9 @@ struct xpc_rsvd_page {
 	u8 pad1[3];		/* align to next u64 in 1st 64-byte cacheline */
 	union {
 		unsigned long vars_pa;	/* phys address of struct xpc_vars */
-		unsigned long activate_mq_gpa; /* gru phy addr of activate_mq */
+		unsigned long activate_gru_mq_desc_gpa; /* phys addr of */
+							/* activate mq's */
+							/* gru mq descriptor */
 	} sn;
 	unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
 	u64 pad2[10];		/* align to last u64 in 2nd 64-byte cacheline */
@@ -189,7 +191,9 @@ struct xpc_gru_mq_uv {
 	int irq;		/* irq raised when message is received in mq */
 	int mmr_blade;		/* blade where watchlist was allocated from */
 	unsigned long mmr_offset; /* offset of irq mmr located on mmr_blade */
+	unsigned long mmr_value; /* value of irq mmr located on mmr_blade */
 	int watchlist_num;	/* number of watchlist allocatd by BIOS */
+	void *gru_mq_desc;	/* opaque structure used by the GRU driver */
 };
 
 /*
@@ -197,6 +201,7 @@ struct xpc_gru_mq_uv {
  * heartbeat, partition active state, and channel state. This is UV only.
  */
 struct xpc_activate_mq_msghdr_uv {
+	unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */
 	short partid;		/* sender's partid */
 	u8 act_state;		/* sender's act_state at time msg sent */
 	u8 type;		/* message's type */
@@ -232,7 +237,7 @@ struct xpc_activate_mq_msg_heartbeat_req_uv {
 struct xpc_activate_mq_msg_activate_req_uv {
 	struct xpc_activate_mq_msghdr_uv hdr;
 	unsigned long rp_gpa;
-	unsigned long activate_mq_gpa;
+	unsigned long activate_gru_mq_desc_gpa;
 };
 
 struct xpc_activate_mq_msg_deactivate_req_uv {
@@ -263,7 +268,7 @@ struct xpc_activate_mq_msg_chctl_openreply_uv {
 	short ch_number;
 	short remote_nentries;	/* ??? Is this needed? What is? */
 	short local_nentries;	/* ??? Is this needed? What is? */
-	unsigned long local_notify_mq_gpa;
+	unsigned long notify_gru_mq_desc_gpa;
 };
 
 /*
@@ -510,8 +515,8 @@ struct xpc_channel_sn2 {
 };
 
 struct xpc_channel_uv {
-	unsigned long remote_notify_mq_gpa;	/* gru phys address of remote */
-						/* partition's notify mq */
+	void *cached_notify_gru_mq_desc; /* remote partition's notify mq's */
+					 /* gru mq descriptor */
 
 	struct xpc_send_msg_slot_uv *send_msg_slots;
 	void *recv_msg_slots;	/* each slot will hold a xpc_notify_mq_msg_uv */
@@ -682,8 +687,12 @@ struct xpc_partition_sn2 {
 };
 
 struct xpc_partition_uv {
-	unsigned long remote_activate_mq_gpa;	/* gru phys address of remote */
-						/* partition's activate mq */
+	unsigned long activate_gru_mq_desc_gpa;	/* phys addr of parititon's */
+						/* activate mq's gru mq */
+						/* descriptor */
+	void *cached_activate_gru_mq_desc; /* cached copy of partition's */
+					   /* activate mq's gru mq descriptor */
+	struct mutex cached_activate_gru_mq_desc_mutex;
 	spinlock_t flags_lock;	/* protect updating of flags */
 	unsigned int flags;	/* general flags */
 	u8 remote_act_state;	/* remote partition's act_state */
@@ -694,8 +703,9 @@ struct xpc_partition_uv {
 
 /* struct xpc_partition_uv flags */
 
-#define XPC_P_HEARTBEAT_OFFLINE_UV	0x00000001
-#define XPC_P_ENGAGED_UV		0x00000002
+#define XPC_P_HEARTBEAT_OFFLINE_UV		0x00000001
+#define XPC_P_ENGAGED_UV			0x00000002
+#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV	0x00000004
 
 /* struct xpc_partition_uv act_state change requests */
 
@@ -804,6 +814,7 @@ extern void xpc_activate_kthreads(struct xpc_channel *, int);
 extern void xpc_create_kthreads(struct xpc_channel *, int, int);
 extern void xpc_disconnect_wait(int);
 extern int (*xpc_setup_partitions_sn) (void);
+extern void (*xpc_teardown_partitions_sn) (void);
 extern enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *, u64 *,
 							 unsigned long *,
 							 size_t *);
@@ -846,8 +857,8 @@ extern void (*xpc_send_chctl_openrequest) (struct xpc_channel *,
 					   unsigned long *);
 extern void (*xpc_send_chctl_openreply) (struct xpc_channel *, unsigned long *);
 
-extern void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *,
-					    unsigned long);
+extern enum xp_retval (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *,
+						      unsigned long);
 
 extern enum xp_retval (*xpc_send_payload) (struct xpc_channel *, u32, void *,
 					   u16, u8, xpc_notify_func, void *);
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 45fd653dbe31..99a2534c38a1 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -183,6 +183,7 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
 	    &part->remote_openclose_args[ch_number];
 	struct xpc_channel *ch = &part->channels[ch_number];
 	enum xp_retval reason;
+	enum xp_retval ret;
 
 	spin_lock_irqsave(&ch->lock, irq_flags);
 
@@ -399,8 +400,13 @@ again:
 		DBUG_ON(args->local_nentries == 0);
 		DBUG_ON(args->remote_nentries == 0);
 
+		ret = xpc_save_remote_msgqueue_pa(ch, args->local_msgqueue_pa);
+		if (ret != xpSuccess) {
+			XPC_DISCONNECT_CHANNEL(ch, ret, &irq_flags);
+			spin_unlock_irqrestore(&ch->lock, irq_flags);
+			return;
+		}
 		ch->flags |= XPC_C_ROPENREPLY;
-		xpc_save_remote_msgqueue_pa(ch, args->local_msgqueue_pa);
 
 		if (args->local_nentries < ch->remote_nentries) {
 			dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new "
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 6576170de962..1ab9fda87fab 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -171,6 +171,7 @@ static struct notifier_block xpc_die_notifier = {
 };
 
 int (*xpc_setup_partitions_sn) (void);
+void (*xpc_teardown_partitions_sn) (void);
 enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *buf, u64 *cookie,
 						  unsigned long *rp_pa,
 						  size_t *len);
@@ -217,8 +218,8 @@ void (*xpc_send_chctl_openrequest) (struct xpc_channel *ch,
 void (*xpc_send_chctl_openreply) (struct xpc_channel *ch,
 				  unsigned long *irq_flags);
 
-void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch,
-				     unsigned long msgqueue_pa);
+enum xp_retval (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch,
+					       unsigned long msgqueue_pa);
 
 enum xp_retval (*xpc_send_payload) (struct xpc_channel *ch, u32 flags,
 				    void *payload, u16 payload_size,
@@ -998,6 +999,7 @@ xpc_setup_partitions(void)
 static void
 xpc_teardown_partitions(void)
 {
+	xpc_teardown_partitions_sn();
 	kfree(xpc_partitions);
 }
 
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index 2e975762c32b..eaaa964942de 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -66,6 +66,12 @@ xpc_setup_partitions_sn_sn2(void)
 	return 0;
 }
 
+static void
+xpc_teardown_partitions_sn_sn2(void)
+{
+	/* nothing needs to be done */
+}
+
 /* SH_IPI_ACCESS shub register value on startup */
 static u64 xpc_sh1_IPI_access_sn2;
 static u64 xpc_sh2_IPI_access0_sn2;
@@ -436,11 +442,12 @@ xpc_send_chctl_local_msgrequest_sn2(struct xpc_channel *ch)
 	XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST);
 }
 
-static void
+static enum xp_retval
 xpc_save_remote_msgqueue_pa_sn2(struct xpc_channel *ch,
 				unsigned long msgqueue_pa)
 {
 	ch->sn.sn2.remote_msgqueue_pa = msgqueue_pa;
+	return xpSuccess;
 }
 
 /*
@@ -1737,20 +1744,20 @@ xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch)
 {
 	struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2;
 	struct xpc_msg_sn2 *msg;
-	s64 put;
+	s64 put, remote_nentries = ch->remote_nentries;
 
 	/* flags are zeroed when the buffer is allocated */
-	if (ch_sn2->remote_GP.put < ch->remote_nentries)
+	if (ch_sn2->remote_GP.put < remote_nentries)
 		return;
 
-	put = max(ch_sn2->w_remote_GP.put, ch->remote_nentries);
+	put = max(ch_sn2->w_remote_GP.put, remote_nentries);
 	do {
 		msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue +
-					     (put % ch->remote_nentries) *
+					     (put % remote_nentries) *
 					     ch->entry_size);
 		DBUG_ON(!(msg->flags & XPC_M_SN2_READY));
 		DBUG_ON(!(msg->flags & XPC_M_SN2_DONE));
-		DBUG_ON(msg->number != put - ch->remote_nentries);
+		DBUG_ON(msg->number != put - remote_nentries);
 		msg->flags = 0;
 	} while (++put < ch_sn2->remote_GP.put);
 }
@@ -2315,6 +2322,7 @@ xpc_init_sn2(void)
 	size_t buf_size;
 
 	xpc_setup_partitions_sn = xpc_setup_partitions_sn_sn2;
+	xpc_teardown_partitions_sn = xpc_teardown_partitions_sn_sn2;
 	xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2;
 	xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_sn2;
 	xpc_increment_heartbeat = xpc_increment_heartbeat_sn2;
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 29c0502a96b2..f7fff4727edb 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -31,6 +31,21 @@
 #include "../sgi-gru/grukservices.h"
 #include "xpc.h"
 
+#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
+struct uv_IO_APIC_route_entry {
+	__u64	vector		:  8,
+		delivery_mode	:  3,
+		dest_mode	:  1,
+		delivery_status	:  1,
+		polarity	:  1,
+		__reserved_1	:  1,
+		trigger		:  1,
+		mask		:  1,
+		__reserved_2	: 15,
+		dest		: 32;
+};
+#endif
+
 static atomic64_t xpc_heartbeat_uv;
 static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
 
@@ -56,26 +71,52 @@ xpc_setup_partitions_sn_uv(void)
 	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
 		part_uv = &xpc_partitions[partid].sn.uv;
 
+		mutex_init(&part_uv->cached_activate_gru_mq_desc_mutex);
 		spin_lock_init(&part_uv->flags_lock);
 		part_uv->remote_act_state = XPC_P_AS_INACTIVE;
 	}
 	return 0;
 }
 
+static void
+xpc_teardown_partitions_sn_uv(void)
+{
+	short partid;
+	struct xpc_partition_uv *part_uv;
+	unsigned long irq_flags;
+
+	for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) {
+		part_uv = &xpc_partitions[partid].sn.uv;
+
+		if (part_uv->cached_activate_gru_mq_desc != NULL) {
+			mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
+			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+			part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
+			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+			kfree(part_uv->cached_activate_gru_mq_desc);
+			part_uv->cached_activate_gru_mq_desc = NULL;
+			mutex_unlock(&part_uv->
+				     cached_activate_gru_mq_desc_mutex);
+		}
+	}
+}
+
 static int
 xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
 {
+	int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
+
 #if defined CONFIG_X86_64
 	mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset);
 	if (mq->irq < 0) {
 		dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
-			mq->irq);
+			-mq->irq);
+		return mq->irq;
 	}
 
-#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
-	int mmr_pnode;
-	unsigned long mmr_value;
+	mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset);
 
+#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
 	if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0)
 		mq->irq = SGI_XPC_ACTIVATE;
 	else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0)
@@ -83,10 +124,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
 	else
 		return -EINVAL;
 
-	mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
-	mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq;
-
-	uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value);
+	mq->mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq;
+	uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mq->mmr_value);
 #else
 	#error not a supported configuration
 #endif
@@ -127,7 +166,7 @@ xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq)
 		return ret;
 	}
 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
-	ret = sn_mq_watchlist_alloc(mq->mmr_blade, uv_gpa(mq->address),
+	ret = sn_mq_watchlist_alloc(mq->mmr_blade, (void *)uv_gpa(mq->address),
 				    mq->order, &mq->mmr_offset);
 	if (ret < 0) {
 		dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n",
@@ -168,12 +207,22 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
 	int pg_order;
 	struct page *page;
 	struct xpc_gru_mq_uv *mq;
+	struct uv_IO_APIC_route_entry *mmr_value;
 
 	mq = kmalloc(sizeof(struct xpc_gru_mq_uv), GFP_KERNEL);
 	if (mq == NULL) {
 		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
 			"a xpc_gru_mq_uv structure\n");
 		ret = -ENOMEM;
+		goto out_0;
+	}
+
+	mq->gru_mq_desc = kzalloc(sizeof(struct gru_message_queue_desc),
+				  GFP_KERNEL);
+	if (mq->gru_mq_desc == NULL) {
+		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() "
+			"a gru_message_queue_desc structure\n");
+		ret = -ENOMEM;
 		goto out_1;
 	}
 
@@ -194,14 +243,6 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
 	}
 	mq->address = page_address(page);
 
-	ret = gru_create_message_queue(mq->address, mq_size);
-	if (ret != 0) {
-		dev_err(xpc_part, "gru_create_message_queue() returned "
-			"error=%d\n", ret);
-		ret = -EINVAL;
-		goto out_3;
-	}
-
 	/* enable generation of irq when GRU mq operation occurs to this mq */
 	ret = xpc_gru_mq_watchlist_alloc_uv(mq);
 	if (ret != 0)
@@ -214,10 +255,20 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,
 	ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL);
 	if (ret != 0) {
 		dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n",
-			mq->irq, ret);
+			mq->irq, -ret);
 		goto out_5;
 	}
 
+	mmr_value = (struct uv_IO_APIC_route_entry *)&mq->mmr_value;
+	ret = gru_create_message_queue(mq->gru_mq_desc, mq->address, mq_size,
+				       nid, mmr_value->vector, mmr_value->dest);
+	if (ret != 0) {
+		dev_err(xpc_part, "gru_create_message_queue() returned "
+			"error=%d\n", ret);
+		ret = -EINVAL;
+		goto out_6;
+	}
+
 	/* allow other partitions to access this GRU mq */
 	xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size);
 	if (xp_ret != xpSuccess) {
@@ -237,8 +288,10 @@ out_4:
 out_3:
 	free_pages((unsigned long)mq->address, pg_order);
 out_2:
-	kfree(mq);
+	kfree(mq->gru_mq_desc);
 out_1:
+	kfree(mq);
+out_0:
 	return ERR_PTR(ret);
 }
 
@@ -268,13 +321,14 @@ xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq)
 }
 
 static enum xp_retval
-xpc_send_gru_msg(unsigned long mq_gpa, void *msg, size_t msg_size)
+xpc_send_gru_msg(struct gru_message_queue_desc *gru_mq_desc, void *msg,
+		 size_t msg_size)
 {
 	enum xp_retval xp_ret;
 	int ret;
 
 	while (1) {
-		ret = gru_send_message_gpa(mq_gpa, msg, msg_size);
+		ret = gru_send_message_gpa(gru_mq_desc, msg, msg_size);
 		if (ret == MQE_OK) {
 			xp_ret = xpSuccess;
 			break;
@@ -421,7 +475,15 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
 		part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
 		part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
 		part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
-		part_uv->remote_activate_mq_gpa = msg->activate_mq_gpa;
+
+		if (msg->activate_gru_mq_desc_gpa !=
+		    part_uv->activate_gru_mq_desc_gpa) {
+			spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+			part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
+			spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+			part_uv->activate_gru_mq_desc_gpa =
+			    msg->activate_gru_mq_desc_gpa;
+		}
 		spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags);
 
 		(*wakeup_hb_checker)++;
@@ -498,7 +560,7 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
 		args = &part->remote_openclose_args[msg->ch_number];
 		args->remote_nentries = msg->remote_nentries;
 		args->local_nentries = msg->local_nentries;
-		args->local_msgqueue_pa = msg->local_notify_mq_gpa;
+		args->local_msgqueue_pa = msg->notify_gru_mq_desc_gpa;
 
 		spin_lock_irqsave(&part->chctl_lock, irq_flags);
 		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY;
@@ -558,9 +620,10 @@ xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
 	short partid;
 	struct xpc_partition *part;
 	int wakeup_hb_checker = 0;
+	int part_referenced;
 
 	while (1) {
-		msg_hdr = gru_get_next_message(xpc_activate_mq_uv->address);
+		msg_hdr = gru_get_next_message(xpc_activate_mq_uv->gru_mq_desc);
 		if (msg_hdr == NULL)
 			break;
 
@@ -571,14 +634,15 @@ xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
 				partid);
 		} else {
 			part = &xpc_partitions[partid];
-			if (xpc_part_ref(part)) {
-				xpc_handle_activate_mq_msg_uv(part, msg_hdr,
-							    &wakeup_hb_checker);
+
+			part_referenced = xpc_part_ref(part);
+			xpc_handle_activate_mq_msg_uv(part, msg_hdr,
+						      &wakeup_hb_checker);
+			if (part_referenced)
 				xpc_part_deref(part);
-			}
 		}
 
-		gru_free_message(xpc_activate_mq_uv->address, msg_hdr);
+		gru_free_message(xpc_activate_mq_uv->gru_mq_desc, msg_hdr);
 	}
 
 	if (wakeup_hb_checker)
@@ -588,21 +652,73 @@ xpc_handle_activate_IRQ_uv(int irq, void *dev_id)
 }
 
 static enum xp_retval
+xpc_cache_remote_gru_mq_desc_uv(struct gru_message_queue_desc *gru_mq_desc,
+				unsigned long gru_mq_desc_gpa)
+{
+	enum xp_retval ret;
+
+	ret = xp_remote_memcpy(uv_gpa(gru_mq_desc), gru_mq_desc_gpa,
+			       sizeof(struct gru_message_queue_desc));
+	if (ret == xpSuccess)
+		gru_mq_desc->mq = NULL;
+
+	return ret;
+}
+
+static enum xp_retval
 xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size,
 			 int msg_type)
 {
 	struct xpc_activate_mq_msghdr_uv *msg_hdr = msg;
+	struct xpc_partition_uv *part_uv = &part->sn.uv;
+	struct gru_message_queue_desc *gru_mq_desc;
+	unsigned long irq_flags;
+	enum xp_retval ret;
 
 	DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV);
 
 	msg_hdr->type = msg_type;
-	msg_hdr->partid = XPC_PARTID(part);
+	msg_hdr->partid = xp_partition_id;
 	msg_hdr->act_state = part->act_state;
 	msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies;
 
+	mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex);
+again:
+	if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) {
+		gru_mq_desc = part_uv->cached_activate_gru_mq_desc;
+		if (gru_mq_desc == NULL) {
+			gru_mq_desc = kmalloc(sizeof(struct
+					      gru_message_queue_desc),
+					      GFP_KERNEL);
+			if (gru_mq_desc == NULL) {
+				ret = xpNoMemory;
+				goto done;
+			}
+			part_uv->cached_activate_gru_mq_desc = gru_mq_desc;
+		}
+
+		ret = xpc_cache_remote_gru_mq_desc_uv(gru_mq_desc,
+						      part_uv->
+						      activate_gru_mq_desc_gpa);
+		if (ret != xpSuccess)
+			goto done;
+
+		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
+		part_uv->flags |= XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV;
+		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
+	}
+
 	/* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */
-	return xpc_send_gru_msg(part->sn.uv.remote_activate_mq_gpa, msg,
-				msg_size);
+	ret = xpc_send_gru_msg(part_uv->cached_activate_gru_mq_desc, msg,
+			       msg_size);
+	if (ret != xpSuccess) {
+		smp_rmb();	/* ensure a fresh copy of part_uv->flags */
+		if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV))
+			goto again;
+	}
+done:
+	mutex_unlock(&part_uv->cached_activate_gru_mq_desc_mutex);
+	return ret;
 }
 
 static void
@@ -620,7 +736,7 @@ static void
 xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags,
 			 void *msg, size_t msg_size, int msg_type)
 {
-	struct xpc_partition *part = &xpc_partitions[ch->number];
+	struct xpc_partition *part = &xpc_partitions[ch->partid];
 	enum xp_retval ret;
 
 	ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type);
@@ -692,7 +808,8 @@ xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
 static int
 xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp)
 {
-	rp->sn.activate_mq_gpa = uv_gpa(xpc_activate_mq_uv->address);
+	rp->sn.activate_gru_mq_desc_gpa =
+	    uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
 	return 0;
 }
 
@@ -787,7 +904,8 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
 
 	part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
 	part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
-	part->sn.uv.remote_activate_mq_gpa = remote_rp->sn.activate_mq_gpa;
+	part->sn.uv.activate_gru_mq_desc_gpa =
+	    remote_rp->sn.activate_gru_mq_desc_gpa;
 
 	/*
 	 * ??? Is it a good idea to make this conditional on what is
@@ -795,7 +913,8 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
 	 */
 	if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
 		msg.rp_gpa = uv_gpa(xpc_rsvd_page);
-		msg.activate_mq_gpa = xpc_rsvd_page->sn.activate_mq_gpa;
+		msg.activate_gru_mq_desc_gpa =
+		    xpc_rsvd_page->sn.activate_gru_mq_desc_gpa;
 		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
 					   XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
 	}
@@ -857,7 +976,8 @@ xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head)
 		if (head->first == NULL)
 			head->last = NULL;
 	}
-	head->n_entries++;
+	head->n_entries--;
+	BUG_ON(head->n_entries < 0);
 	spin_unlock_irqrestore(&head->lock, irq_flags);
 	first->next = NULL;
 	return first;
@@ -876,8 +996,7 @@ xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head,
 	else
 		head->first = last;
 	head->last = last;
-	head->n_entries--;
-	BUG_ON(head->n_entries < 0);
+	head->n_entries++;
 	spin_unlock_irqrestore(&head->lock, irq_flags);
 }
 
@@ -1037,6 +1156,12 @@ xpc_setup_msg_structures_uv(struct xpc_channel *ch)
 
 	DBUG_ON(ch->flags & XPC_C_SETUP);
 
+	ch_uv->cached_notify_gru_mq_desc = kmalloc(sizeof(struct
+						   gru_message_queue_desc),
+						   GFP_KERNEL);
+	if (ch_uv->cached_notify_gru_mq_desc == NULL)
+		return xpNoMemory;
+
 	ret = xpc_allocate_send_msg_slot_uv(ch);
 	if (ret == xpSuccess) {
 
@@ -1060,7 +1185,8 @@ xpc_teardown_msg_structures_uv(struct xpc_channel *ch)
 
 	DBUG_ON(!spin_is_locked(&ch->lock));
 
-	ch_uv->remote_notify_mq_gpa = 0;
+	kfree(ch_uv->cached_notify_gru_mq_desc);
+	ch_uv->cached_notify_gru_mq_desc = NULL;
 
 	if (ch->flags & XPC_C_SETUP) {
 		xpc_init_fifo_uv(&ch_uv->msg_slot_free_list);
@@ -1111,7 +1237,7 @@ xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
 	msg.ch_number = ch->number;
 	msg.local_nentries = ch->local_nentries;
 	msg.remote_nentries = ch->remote_nentries;
-	msg.local_notify_mq_gpa = uv_gpa(xpc_notify_mq_uv);
+	msg.notify_gru_mq_desc_gpa = uv_gpa(xpc_notify_mq_uv->gru_mq_desc);
 	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
 				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV);
 }
@@ -1128,11 +1254,15 @@ xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
 	xpc_wakeup_channel_mgr(part);
 }
 
-static void
+static enum xp_retval
 xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch,
-			       unsigned long msgqueue_pa)
+			       unsigned long gru_mq_desc_gpa)
 {
-	ch->sn.uv.remote_notify_mq_gpa = msgqueue_pa;
+	struct xpc_channel_uv *ch_uv = &ch->sn.uv;
+
+	DBUG_ON(ch_uv->cached_notify_gru_mq_desc == NULL);
+	return xpc_cache_remote_gru_mq_desc_uv(ch_uv->cached_notify_gru_mq_desc,
+					       gru_mq_desc_gpa);
 }
 
 static void
@@ -1339,7 +1469,8 @@ xpc_handle_notify_IRQ_uv(int irq, void *dev_id)
 	short partid;
 	struct xpc_partition *part;
 
-	while ((msg = gru_get_next_message(xpc_notify_mq_uv)) != NULL) {
+	while ((msg = gru_get_next_message(xpc_notify_mq_uv->gru_mq_desc)) !=
+	       NULL) {
 
 		partid = msg->hdr.partid;
 		if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) {
@@ -1354,7 +1485,7 @@ xpc_handle_notify_IRQ_uv(int irq, void *dev_id)
 			}
 		}
 
-		gru_free_message(xpc_notify_mq_uv, msg);
+		gru_free_message(xpc_notify_mq_uv->gru_mq_desc, msg);
 	}
 
 	return IRQ_HANDLED;
@@ -1438,7 +1569,8 @@ xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload,
 	msg->hdr.msg_slot_number = msg_slot->msg_slot_number;
 	memcpy(&msg->payload, payload, payload_size);
 
-	ret = xpc_send_gru_msg(ch->sn.uv.remote_notify_mq_gpa, msg, msg_size);
+	ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
+			       msg_size);
 	if (ret == xpSuccess)
 		goto out_1;
 
@@ -1529,7 +1661,7 @@ xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
 	msg->hdr.partid = xp_partition_id;
 	msg->hdr.size = 0;	/* size of zero indicates this is an ACK */
 
-	ret = xpc_send_gru_msg(ch->sn.uv.remote_notify_mq_gpa, msg,
+	ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg,
 			       sizeof(struct xpc_notify_mq_msghdr_uv));
 	if (ret != xpSuccess)
 		XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret);
@@ -1541,6 +1673,7 @@ int
 xpc_init_uv(void)
 {
 	xpc_setup_partitions_sn = xpc_setup_partitions_sn_uv;
+	xpc_teardown_partitions_sn = xpc_teardown_partitions_sn_uv;
 	xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv;
 	xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv;
 	xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv;