diff options
Diffstat (limited to 'drivers/misc')
-rw-r--r-- | drivers/misc/Kconfig | 4 | ||||
-rw-r--r-- | drivers/misc/eeprom/at24.c | 67 | ||||
-rw-r--r-- | drivers/misc/eeprom/at25.c | 58 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/Makefile | 2 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gru_instructions.h | 22 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grufault.c | 130 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grufile.c | 36 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gruhandles.c | 183 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gruhandles.h | 178 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grukservices.c | 131 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grukservices.h | 33 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grumain.c | 84 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/gruprocfs.c | 45 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grutables.h | 41 | ||||
-rw-r--r-- | drivers/misc/sgi-gru/grutlbpurge.c | 7 | ||||
-rw-r--r-- | drivers/misc/sgi-xp/xpc.h | 33 | ||||
-rw-r--r-- | drivers/misc/sgi-xp/xpc_channel.c | 8 | ||||
-rw-r--r-- | drivers/misc/sgi-xp/xpc_main.c | 6 | ||||
-rw-r--r-- | drivers/misc/sgi-xp/xpc_sn2.c | 20 | ||||
-rw-r--r-- | drivers/misc/sgi-xp/xpc_uv.c | 229 |
20 files changed, 899 insertions, 418 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 5f3bff434621..0b92b2f6ea68 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -165,7 +165,7 @@ config SGI_XP depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 - select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP + select SGI_GRU if X86_64 && SMP ---help--- An SGI machine can be divided into multiple Single System Images which act independently of each other and have @@ -189,7 +189,7 @@ config HP_ILO config SGI_GRU tristate "SGI GRU driver" - depends on (X86_UV || IA64_SGI_UV || IA64_GENERIC) && SMP + depends on X86_UV && SMP default n select MMU_NOTIFIER ---help--- diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index d4775528abc6..d184dfab9631 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -53,6 +53,7 @@ struct at24_data { struct at24_platform_data chip; + struct memory_accessor macc; bool use_smbus; /* @@ -225,14 +226,11 @@ static ssize_t at24_eeprom_read(struct at24_data *at24, char *buf, return status; } -static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr, +static ssize_t at24_read(struct at24_data *at24, char *buf, loff_t off, size_t count) { - struct at24_data *at24; ssize_t retval = 0; - at24 = dev_get_drvdata(container_of(kobj, struct device, kobj)); - if (unlikely(!count)) return count; @@ -262,12 +260,14 @@ static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr, return retval; } +static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct at24_data *at24; -/* - * REVISIT: export at24_bin{read,write}() to let other kernel code use - * eeprom data. For example, it might hold a board's Ethernet address, or - * board-specific calibration data generated on the manufacturing floor. - */ + at24 = dev_get_drvdata(container_of(kobj, struct device, kobj)); + return at24_read(at24, buf, off, count); +} /* @@ -347,14 +347,11 @@ static ssize_t at24_eeprom_write(struct at24_data *at24, char *buf, return -ETIMEDOUT; } -static ssize_t at24_bin_write(struct kobject *kobj, struct bin_attribute *attr, +static ssize_t at24_write(struct at24_data *at24, char *buf, loff_t off, size_t count) { - struct at24_data *at24; ssize_t retval = 0; - at24 = dev_get_drvdata(container_of(kobj, struct device, kobj)); - if (unlikely(!count)) return count; @@ -384,6 +381,39 @@ static ssize_t at24_bin_write(struct kobject *kobj, struct bin_attribute *attr, return retval; } +static ssize_t at24_bin_write(struct kobject *kobj, struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + struct at24_data *at24; + + at24 = dev_get_drvdata(container_of(kobj, struct device, kobj)); + return at24_write(at24, buf, off, count); +} + +/*-------------------------------------------------------------------------*/ + +/* + * This lets other kernel code access the eeprom data. For example, it + * might hold a board's Ethernet address, or board-specific calibration + * data generated on the manufacturing floor. + */ + +static ssize_t at24_macc_read(struct memory_accessor *macc, char *buf, + off_t offset, size_t count) +{ + struct at24_data *at24 = container_of(macc, struct at24_data, macc); + + return at24_read(at24, buf, offset, count); +} + +static ssize_t at24_macc_write(struct memory_accessor *macc, char *buf, + off_t offset, size_t count) +{ + struct at24_data *at24 = container_of(macc, struct at24_data, macc); + + return at24_write(at24, buf, offset, count); +} + /*-------------------------------------------------------------------------*/ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) @@ -413,6 +443,9 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) * is recommended anyhow. */ chip.page_size = 1; + + chip.setup = NULL; + chip.context = NULL; } if (!is_power_of_2(chip.byte_len)) @@ -463,6 +496,8 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) at24->bin.read = at24_bin_read; at24->bin.size = chip.byte_len; + at24->macc.read = at24_macc_read; + writable = !(chip.flags & AT24_FLAG_READONLY); if (writable) { if (!use_smbus || i2c_check_functionality(client->adapter, @@ -470,6 +505,8 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) unsigned write_max = chip.page_size; + at24->macc.write = at24_macc_write; + at24->bin.write = at24_bin_write; at24->bin.attr.mode |= S_IWUSR; @@ -520,6 +557,10 @@ static int at24_probe(struct i2c_client *client, const struct i2c_device_id *id) at24->write_max, use_smbus ? ", use_smbus" : ""); + /* export data to kernel code */ + if (chip.setup) + chip.setup(&at24->macc, chip.context); + return 0; err_clients: diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c index 290dbe99647a..6bc0dac5c1e8 100644 --- a/drivers/misc/eeprom/at25.c +++ b/drivers/misc/eeprom/at25.c @@ -30,6 +30,7 @@ struct at25_data { struct spi_device *spi; + struct memory_accessor mem; struct mutex lock; struct spi_eeprom chip; struct bin_attribute bin; @@ -75,6 +76,13 @@ at25_ee_read( struct spi_transfer t[2]; struct spi_message m; + if (unlikely(offset >= at25->bin.size)) + return 0; + if ((offset + count) > at25->bin.size) + count = at25->bin.size - offset; + if (unlikely(!count)) + return count; + cp = command; *cp++ = AT25_READ; @@ -127,13 +135,6 @@ at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr, dev = container_of(kobj, struct device, kobj); at25 = dev_get_drvdata(dev); - if (unlikely(off >= at25->bin.size)) - return 0; - if ((off + count) > at25->bin.size) - count = at25->bin.size - off; - if (unlikely(!count)) - return count; - return at25_ee_read(at25, buf, off, count); } @@ -146,6 +147,13 @@ at25_ee_write(struct at25_data *at25, char *buf, loff_t off, size_t count) unsigned buf_size; u8 *bounce; + if (unlikely(off >= at25->bin.size)) + return -EFBIG; + if ((off + count) > at25->bin.size) + count = at25->bin.size - off; + if (unlikely(!count)) + return count; + /* Temp buffer starts with command and address */ buf_size = at25->chip.page_size; if (buf_size > io_limit) @@ -253,18 +261,31 @@ at25_bin_write(struct kobject *kobj, struct bin_attribute *bin_attr, dev = container_of(kobj, struct device, kobj); at25 = dev_get_drvdata(dev); - if (unlikely(off >= at25->bin.size)) - return -EFBIG; - if ((off + count) > at25->bin.size) - count = at25->bin.size - off; - if (unlikely(!count)) - return count; - return at25_ee_write(at25, buf, off, count); } /*-------------------------------------------------------------------------*/ +/* Let in-kernel code access the eeprom data. */ + +static ssize_t at25_mem_read(struct memory_accessor *mem, char *buf, + off_t offset, size_t count) +{ + struct at25_data *at25 = container_of(mem, struct at25_data, mem); + + return at25_ee_read(at25, buf, offset, count); +} + +static ssize_t at25_mem_write(struct memory_accessor *mem, char *buf, + off_t offset, size_t count) +{ + struct at25_data *at25 = container_of(mem, struct at25_data, mem); + + return at25_ee_write(at25, buf, offset, count); +} + +/*-------------------------------------------------------------------------*/ + static int at25_probe(struct spi_device *spi) { struct at25_data *at25 = NULL; @@ -317,6 +338,10 @@ static int at25_probe(struct spi_device *spi) at25->addrlen = addrlen; /* Export the EEPROM bytes through sysfs, since that's convenient. + * And maybe to other kernel code; it might hold a board's Ethernet + * address, or board-specific calibration data generated on the + * manufacturing floor. + * * Default to root-only access to the data; EEPROMs often hold data * that's sensitive for read and/or write, like ethernet addresses, * security codes, board-specific manufacturing calibrations, etc. @@ -324,17 +349,22 @@ static int at25_probe(struct spi_device *spi) at25->bin.attr.name = "eeprom"; at25->bin.attr.mode = S_IRUSR; at25->bin.read = at25_bin_read; + at25->mem.read = at25_mem_read; at25->bin.size = at25->chip.byte_len; if (!(chip->flags & EE_READONLY)) { at25->bin.write = at25_bin_write; at25->bin.attr.mode |= S_IWUSR; + at25->mem.write = at25_mem_write; } err = sysfs_create_bin_file(&spi->dev.kobj, &at25->bin); if (err) goto fail; + if (chip->setup) + chip->setup(&at25->mem, chip->context); + dev_info(&spi->dev, "%Zd %s %s eeprom%s, pagesize %u\n", (at25->bin.size < 1024) ? at25->bin.size diff --git a/drivers/misc/sgi-gru/Makefile b/drivers/misc/sgi-gru/Makefile index 9e9170b3599a..bcd8136d2f98 100644 --- a/drivers/misc/sgi-gru/Makefile +++ b/drivers/misc/sgi-gru/Makefile @@ -3,5 +3,5 @@ ifdef CONFIG_SGI_GRU_DEBUG endif obj-$(CONFIG_SGI_GRU) := gru.o -gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o +gru-y := grufile.o grumain.o grufault.o grutlbpurge.o gruprocfs.o grukservices.o gruhandles.o diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h index 48762e7b98be..3fde33c1e8f3 100644 --- a/drivers/misc/sgi-gru/gru_instructions.h +++ b/drivers/misc/sgi-gru/gru_instructions.h @@ -19,8 +19,11 @@ #ifndef __GRU_INSTRUCTIONS_H__ #define __GRU_INSTRUCTIONS_H__ -#define gru_flush_cache_hook(p) -#define gru_emulator_wait_hook(p, w) +extern int gru_check_status_proc(void *cb); +extern int gru_wait_proc(void *cb); +extern void gru_wait_abort_proc(void *cb); + + /* * Architecture dependent functions @@ -29,16 +32,16 @@ #if defined(CONFIG_IA64) #include <linux/compiler.h> #include <asm/intrinsics.h> -#define __flush_cache(p) ia64_fc(p) +#define __flush_cache(p) ia64_fc((unsigned long)p) /* Use volatile on IA64 to ensure ordering via st4.rel */ -#define gru_ordered_store_int(p,v) \ +#define gru_ordered_store_int(p, v) \ do { \ barrier(); \ *((volatile int *)(p)) = v; /* force st.rel */ \ } while (0) #elif defined(CONFIG_X86_64) #define __flush_cache(p) clflush(p) -#define gru_ordered_store_int(p,v) \ +#define gru_ordered_store_int(p, v) \ do { \ barrier(); \ *(int *)p = v; \ @@ -558,20 +561,19 @@ extern int gru_get_cb_exception_detail(void *cb, #define GRU_EXC_STR_SIZE 256 -extern int gru_check_status_proc(void *cb); -extern int gru_wait_proc(void *cb); -extern void gru_wait_abort_proc(void *cb); /* * Control block definition for checking status */ struct gru_control_block_status { unsigned int icmd :1; - unsigned int unused1 :31; + unsigned int ima :3; + unsigned int reserved0 :4; + unsigned int unused1 :24; unsigned int unused2 :24; unsigned int istatus :2; unsigned int isubstatus :4; - unsigned int inused3 :2; + unsigned int unused3 :2; }; /* Get CB status */ diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index 3ee698ad8599..ab118558552e 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -32,6 +32,7 @@ #include <linux/device.h> #include <linux/io.h> #include <linux/uaccess.h> +#include <linux/security.h> #include <asm/pgtable.h> #include "gru.h" #include "grutables.h" @@ -266,6 +267,44 @@ err: return 1; } +static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr, + int write, int atomic, unsigned long *gpa, int *pageshift) +{ + struct mm_struct *mm = gts->ts_mm; + struct vm_area_struct *vma; + unsigned long paddr; + int ret, ps; + + vma = find_vma(mm, vaddr); + if (!vma) + goto inval; + + /* + * Atomic lookup is faster & usually works even if called in non-atomic + * context. + */ + rmb(); /* Must/check ms_range_active before loading PTEs */ + ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &ps); + if (ret) { + if (atomic) + goto upm; + if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, &ps)) + goto inval; + } + if (is_gru_paddr(paddr)) + goto inval; + paddr = paddr & ~((1UL << ps) - 1); + *gpa = uv_soc_phys_ram_to_gpa(paddr); + *pageshift = ps; + return 0; + +inval: + return -1; +upm: + return -2; +} + + /* * Drop a TLB entry into the GRU. The fault is described by info in an TFH. * Input: @@ -280,10 +319,8 @@ static int gru_try_dropin(struct gru_thread_state *gts, struct gru_tlb_fault_handle *tfh, unsigned long __user *cb) { - struct mm_struct *mm = gts->ts_mm; - struct vm_area_struct *vma; - int pageshift, asid, write, ret; - unsigned long paddr, gpa, vaddr; + int pageshift = 0, asid, write, ret, atomic = !cb; + unsigned long gpa = 0, vaddr = 0; /* * NOTE: The GRU contains magic hardware that eliminates races between @@ -317,28 +354,19 @@ static int gru_try_dropin(struct gru_thread_state *gts, if (atomic_read(>s->ts_gms->ms_range_active)) goto failactive; - vma = find_vma(mm, vaddr); - if (!vma) + ret = gru_vtop(gts, vaddr, write, atomic, &gpa, &pageshift); + if (ret == -1) goto failinval; + if (ret == -2) + goto failupm; - /* - * Atomic lookup is faster & usually works even if called in non-atomic - * context. - */ - rmb(); /* Must/check ms_range_active before loading PTEs */ - ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pageshift); - if (ret) { - if (!cb) + if (!(gts->ts_sizeavail & GRU_SIZEAVAIL(pageshift))) { + gts->ts_sizeavail |= GRU_SIZEAVAIL(pageshift); + if (atomic || !gru_update_cch(gts, 0)) { + gts->ts_force_cch_reload = 1; goto failupm; - if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, - &pageshift)) - goto failinval; + } } - if (is_gru_paddr(paddr)) - goto failinval; - - paddr = paddr & ~((1UL << pageshift) - 1); - gpa = uv_soc_phys_ram_to_gpa(paddr); gru_cb_set_istatus_active(cb); tfh_write_restart(tfh, gpa, GAA_RAM, vaddr, asid, write, GRU_PAGESIZE(pageshift)); @@ -368,6 +396,7 @@ failupm: failfmm: /* FMM state on UPM call */ + gru_flush_cache(tfh); STAT(tlb_dropin_fail_fmm); gru_dbg(grudev, "FAILED fmm tfh: 0x%p, state %d\n", tfh, tfh->state); return 0; @@ -448,6 +477,7 @@ irqreturn_t gru_intr(int irq, void *dev_id) up_read(>s->ts_mm->mmap_sem); } else { tfh_user_polling_mode(tfh); + STAT(intr_mm_lock_failed); } } return IRQ_HANDLED; @@ -497,10 +527,8 @@ int gru_handle_user_call_os(unsigned long cb) if (!gts) return -EINVAL; - if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { - ret = -EINVAL; + if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) goto exit; - } /* * If force_unload is set, the UPM TLB fault is phony. The task @@ -508,6 +536,20 @@ int gru_handle_user_call_os(unsigned long cb) * unload the context. The task will page fault and assign a new * context. */ + if (gts->ts_tgid_owner == current->tgid && gts->ts_blade >= 0 && + gts->ts_blade != uv_numa_blade_id()) { + STAT(call_os_offnode_reference); + gts->ts_force_unload = 1; + } + + /* + * CCH may contain stale data if ts_force_cch_reload is set. + */ + if (gts->ts_gru && gts->ts_force_cch_reload) { + gru_update_cch(gts, 0); + gts->ts_force_cch_reload = 0; + } + ret = -EAGAIN; cbrnum = thread_cbr_number(gts, ucbnum); if (gts->ts_force_unload) { @@ -541,11 +583,13 @@ int gru_get_exception_detail(unsigned long arg) if (!gts) return -EINVAL; - if (gts->ts_gru) { - ucbnum = get_cb_number((void *)excdet.cb); + ucbnum = get_cb_number((void *)excdet.cb); + if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) { + ret = -EINVAL; + } else if (gts->ts_gru) { cbrnum = thread_cbr_number(gts, ucbnum); cbe = get_cbe_by_index(gts->ts_gru, cbrnum); - prefetchw(cbe); /* Harmless on hardware, required for emulator */ + prefetchw(cbe);/* Harmless on hardware, required for emulator */ excdet.opc = cbe->opccpy; excdet.exopc = cbe->exopccpy; excdet.ecause = cbe->ecause; @@ -567,6 +611,31 @@ int gru_get_exception_detail(unsigned long arg) /* * User request to unload a context. Content is saved for possible reload. */ +static int gru_unload_all_contexts(void) +{ + struct gru_thread_state *gts; + struct gru_state *gru; + int gid, ctxnum; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + foreach_gid(gid) { + gru = GID_TO_GRU(gid); + spin_lock(&gru->gs_lock); + for (ctxnum = 0; ctxnum < GRU_NUM_CCH; ctxnum++) { + gts = gru->gs_gts[ctxnum]; + if (gts && mutex_trylock(>s->ts_ctxlock)) { + spin_unlock(&gru->gs_lock); + gru_unload_context(gts, 1); + gru_unlock_gts(gts); + spin_lock(&gru->gs_lock); + } + } + spin_unlock(&gru->gs_lock); + } + return 0; +} + int gru_user_unload_context(unsigned long arg) { struct gru_thread_state *gts; @@ -578,6 +647,9 @@ int gru_user_unload_context(unsigned long arg) gru_dbg(grudev, "gseg 0x%lx\n", req.gseg); + if (!req.gseg) + return gru_unload_all_contexts(); + gts = gru_find_lock_gts(req.gseg); if (!gts) return -EINVAL; @@ -609,7 +681,7 @@ int gru_user_flush_tlb(unsigned long arg) if (!gts) return -EINVAL; - gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.vaddr + req.len); + gru_flush_tlb_range(gts->ts_gms, req.vaddr, req.len); gru_unlock_gts(gts); return 0; diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index c67e4e8bd62c..3e6e42d2f01b 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -45,7 +45,9 @@ #include <asm/uv/uv_mmrs.h> struct gru_blade_state *gru_base[GRU_MAX_BLADES] __read_mostly; -unsigned long gru_start_paddr, gru_end_paddr __read_mostly; +unsigned long gru_start_paddr __read_mostly; +unsigned long gru_end_paddr __read_mostly; +unsigned int gru_max_gids __read_mostly; struct gru_stats_s gru_stats; /* Guaranteed user available resources on each node */ @@ -101,7 +103,7 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma) return -EPERM; if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) || - vma->vm_end & (GRU_GSEG_PAGESIZE - 1)) + vma->vm_end & (GRU_GSEG_PAGESIZE - 1)) return -EINVAL; vma->vm_flags |= @@ -273,8 +275,11 @@ static void gru_init_chiplet(struct gru_state *gru, unsigned long paddr, gru->gs_blade_id = bid; gru->gs_cbr_map = (GRU_CBR_AU == 64) ? ~0 : (1UL << GRU_CBR_AU) - 1; gru->gs_dsr_map = (1UL << GRU_DSR_AU) - 1; + gru->gs_asid_limit = MAX_ASID; gru_tgh_flush_init(gru); - gru_dbg(grudev, "bid %d, nid %d, gru %x, vaddr %p (0x%lx)\n", + if (gru->gs_gid >= gru_max_gids) + gru_max_gids = gru->gs_gid + 1; + gru_dbg(grudev, "bid %d, nid %d, gid %d, vaddr %p (0x%lx)\n", bid, nid, gru->gs_gid, gru->gs_gru_base_vaddr, gru->gs_gru_base_paddr); gru_kservices_init(gru); @@ -295,7 +300,7 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) for_each_online_node(nid) { bid = uv_node_to_blade_id(nid); pnode = uv_node_to_pnode(nid); - if (gru_base[bid]) + if (bid < 0 || gru_base[bid]) continue; page = alloc_pages_node(nid, GFP_KERNEL, order); if (!page) @@ -308,11 +313,11 @@ static int gru_init_tables(unsigned long gru_base_paddr, void *gru_base_vaddr) dsrbytes = 0; cbrs = 0; for (gru = gru_base[bid]->bs_grus, chip = 0; - chip < GRU_CHIPLETS_PER_BLADE; + chip < GRU_CHIPLETS_PER_BLADE; chip++, gru++) { paddr = gru_chiplet_paddr(gru_base_paddr, pnode, chip); vaddr = gru_chiplet_vaddr(gru_base_vaddr, pnode, chip); - gru_init_chiplet(gru, paddr, vaddr, bid, nid, chip); + gru_init_chiplet(gru, paddr, vaddr, nid, bid, chip); n = hweight64(gru->gs_cbr_map) * GRU_CBR_AU_SIZE; cbrs = max(cbrs, n); n = hweight64(gru->gs_dsr_map) * GRU_DSR_AU_BYTES; @@ -370,26 +375,26 @@ static int __init gru_init(void) void *gru_start_vaddr; if (!is_uv_system()) - return 0; + return -ENODEV; #if defined CONFIG_IA64 gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */ #else gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) & 0x7fffffffffffUL; - #endif gru_start_vaddr = __va(gru_start_paddr); - gru_end_paddr = gru_start_paddr + MAX_NUMNODES * GRU_SIZE; + gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE; printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n", gru_start_paddr, gru_end_paddr); irq = get_base_irq(); for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) { ret = request_irq(irq + chip, gru_intr, 0, id, NULL); - /* TODO: fix irq handling on x86. For now ignore failures because + /* TODO: fix irq handling on x86. For now ignore failure because * interrupts are not required & not yet fully supported */ if (ret) { - printk("!!!WARNING: GRU ignoring request failure!!!\n"); + printk(KERN_WARNING + "!!!WARNING: GRU ignoring request failure!!!\n"); ret = 0; } if (ret) { @@ -435,7 +440,7 @@ exit1: static void __exit gru_exit(void) { - int i, bid; + int i, bid, gid; int order = get_order(sizeof(struct gru_state) * GRU_CHIPLETS_PER_BLADE); @@ -445,6 +450,9 @@ static void __exit gru_exit(void) for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++) free_irq(IRQ_GRU + i, NULL); + foreach_gid(gid) + gru_kservices_exit(GID_TO_GRU(gid)); + for (bid = 0; bid < GRU_MAX_BLADES; bid++) free_pages((unsigned long)gru_base[bid], order); @@ -469,7 +477,11 @@ struct vm_operations_struct gru_vm_ops = { .fault = gru_fault, }; +#ifndef MODULE fs_initcall(gru_init); +#else +module_init(gru_init); +#endif module_exit(gru_exit); module_param(gru_options, ulong, 0644); diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c new file mode 100644 index 000000000000..9b7ccb328697 --- /dev/null +++ b/drivers/misc/sgi-gru/gruhandles.c @@ -0,0 +1,183 @@ +/* + * GRU KERNEL MCS INSTRUCTIONS + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include "gru.h" +#include "grulib.h" +#include "grutables.h" + +/* 10 sec */ +#ifdef CONFIG_IA64 +#include <asm/processor.h> +#define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) +#else +#include <asm/tsc.h> +#define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) +#endif + +/* Extract the status field from a kernel handle */ +#define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3) + +struct mcs_op_statistic mcs_op_statistics[mcsop_last]; + +static void update_mcs_stats(enum mcs_op op, unsigned long clks) +{ + atomic_long_inc(&mcs_op_statistics[op].count); + atomic_long_add(clks, &mcs_op_statistics[op].total); + if (mcs_op_statistics[op].max < clks) + mcs_op_statistics[op].max = clks; +} + +static void start_instruction(void *h) +{ + unsigned long *w0 = h; + + wmb(); /* setting CMD bit must be last */ + *w0 = *w0 | 1; + gru_flush_cache(h); +} + +static int wait_instruction_complete(void *h, enum mcs_op opc) +{ + int status; + cycles_t start_time = get_cycles(); + + while (1) { + cpu_relax(); + status = GET_MSEG_HANDLE_STATUS(h); + if (status != CCHSTATUS_ACTIVE) + break; + if (GRU_OPERATION_TIMEOUT < (get_cycles() - start_time)) + panic("GRU %p is malfunctioning\n", h); + } + if (gru_options & OPT_STATS) + update_mcs_stats(opc, get_cycles() - start_time); + return status; +} + +int cch_allocate(struct gru_context_configuration_handle *cch, + int asidval, int sizeavail, unsigned long cbrmap, + unsigned long dsrmap) +{ + int i; + + for (i = 0; i < 8; i++) { + cch->asid[i] = (asidval++); + cch->sizeavail[i] = sizeavail; + } + cch->dsr_allocation_map = dsrmap; + cch->cbr_allocation_map = cbrmap; + cch->opc = CCHOP_ALLOCATE; + start_instruction(cch); + return wait_instruction_complete(cch, cchop_allocate); +} + +int cch_start(struct gru_context_configuration_handle *cch) +{ + cch->opc = CCHOP_START; + start_instruction(cch); + return wait_instruction_complete(cch, cchop_start); +} + +int cch_interrupt(struct gru_context_configuration_handle *cch) +{ + cch->opc = CCHOP_INTERRUPT; + start_instruction(cch); + return wait_instruction_complete(cch, cchop_interrupt); +} + +int cch_deallocate(struct gru_context_configuration_handle *cch) +{ + cch->opc = CCHOP_DEALLOCATE; + start_instruction(cch); + return wait_instruction_complete(cch, cchop_deallocate); +} + +int cch_interrupt_sync(struct gru_context_configuration_handle + *cch) +{ + cch->opc = CCHOP_INTERRUPT_SYNC; + start_instruction(cch); + return wait_instruction_complete(cch, cchop_interrupt_sync); +} + +int tgh_invalidate(struct gru_tlb_global_handle *tgh, + unsigned long vaddr, unsigned long vaddrmask, + int asid, int pagesize, int global, int n, + unsigned short ctxbitmap) +{ + tgh->vaddr = vaddr; + tgh->asid = asid; + tgh->pagesize = pagesize; + tgh->n = n; + tgh->global = global; + tgh->vaddrmask = vaddrmask; + tgh->ctxbitmap = ctxbitmap; + tgh->opc = TGHOP_TLBINV; + start_instruction(tgh); + return wait_instruction_complete(tgh, tghop_invalidate); +} + +void tfh_write_only(struct gru_tlb_fault_handle *tfh, + unsigned long pfn, unsigned long vaddr, + int asid, int dirty, int pagesize) +{ + tfh->fillasid = asid; + tfh->fillvaddr = vaddr; + tfh->pfn = pfn; + tfh->dirty = dirty; + tfh->pagesize = pagesize; + tfh->opc = TFHOP_WRITE_ONLY; + start_instruction(tfh); +} + +void tfh_write_restart(struct gru_tlb_fault_handle *tfh, + unsigned long paddr, int gaa, + unsigned long vaddr, int asid, int dirty, + int pagesize) +{ + tfh->fillasid = asid; + tfh->fillvaddr = vaddr; + tfh->pfn = paddr >> GRU_PADDR_SHIFT; + tfh->gaa = gaa; + tfh->dirty = dirty; + tfh->pagesize = pagesize; + tfh->opc = TFHOP_WRITE_RESTART; + start_instruction(tfh); +} + +void tfh_restart(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_RESTART; + start_instruction(tfh); +} + +void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_USER_POLLING_MODE; + start_instruction(tfh); +} + +void tfh_exception(struct gru_tlb_fault_handle *tfh) +{ + tfh->opc = TFHOP_EXCEPTION; + start_instruction(tfh); +} + diff --git a/drivers/misc/sgi-gru/gruhandles.h b/drivers/misc/sgi-gru/gruhandles.h index b63018d60fe1..1ed74d7508c8 100644 --- a/drivers/misc/sgi-gru/gruhandles.h +++ b/drivers/misc/sgi-gru/gruhandles.h @@ -489,170 +489,28 @@ enum gru_cbr_state { * 64m 26 8 * ... */ -#define GRU_PAGESIZE(sh) ((((sh) > 20 ? (sh) + 2: (sh)) >> 1) - 6) +#define GRU_PAGESIZE(sh) ((((sh) > 20 ? (sh) + 2 : (sh)) >> 1) - 6) #define GRU_SIZEAVAIL(sh) (1UL << GRU_PAGESIZE(sh)) /* minimum TLB purge count to ensure a full purge */ #define GRUMAXINVAL 1024UL - -/* Extract the status field from a kernel handle */ -#define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3) - -static inline void start_instruction(void *h) -{ - unsigned long *w0 = h; - - wmb(); /* setting CMD bit must be last */ - *w0 = *w0 | 1; - gru_flush_cache(h); -} - -static inline int wait_instruction_complete(void *h) -{ - int status; - - do { - cpu_relax(); - barrier(); - status = GET_MSEG_HANDLE_STATUS(h); - } while (status == CCHSTATUS_ACTIVE); - return status; -} - -#if defined CONFIG_IA64 -static inline void cch_allocate_set_asids( - struct gru_context_configuration_handle *cch, int asidval) -{ - int i; - - for (i = 0; i <= RGN_HPAGE; i++) { /* assume HPAGE is last region */ - cch->asid[i] = (asidval++); -#if 0 - /* ZZZ hugepages not supported yet */ - if (i == RGN_HPAGE) - cch->sizeavail[i] = GRU_SIZEAVAIL(hpage_shift); - else -#endif - cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT); - } -} -#elif defined CONFIG_X86_64 -static inline void cch_allocate_set_asids( - struct gru_context_configuration_handle *cch, int asidval) -{ - int i; - - for (i = 0; i < 8; i++) { - cch->asid[i] = asidval++; - cch->sizeavail[i] = GRU_SIZEAVAIL(PAGE_SHIFT) | - GRU_SIZEAVAIL(21); - } -} -#endif - -static inline int cch_allocate(struct gru_context_configuration_handle *cch, - int asidval, unsigned long cbrmap, - unsigned long dsrmap) -{ - cch_allocate_set_asids(cch, asidval); - cch->dsr_allocation_map = dsrmap; - cch->cbr_allocation_map = cbrmap; - cch->opc = CCHOP_ALLOCATE; - start_instruction(cch); - return wait_instruction_complete(cch); -} - -static inline int cch_start(struct gru_context_configuration_handle *cch) -{ - cch->opc = CCHOP_START; - start_instruction(cch); - return wait_instruction_complete(cch); -} - -static inline int cch_interrupt(struct gru_context_configuration_handle *cch) -{ - cch->opc = CCHOP_INTERRUPT; - start_instruction(cch); - return wait_instruction_complete(cch); -} - -static inline int cch_deallocate(struct gru_context_configuration_handle *cch) -{ - cch->opc = CCHOP_DEALLOCATE; - start_instruction(cch); - return wait_instruction_complete(cch); -} - -static inline int cch_interrupt_sync(struct gru_context_configuration_handle - *cch) -{ - cch->opc = CCHOP_INTERRUPT_SYNC; - start_instruction(cch); - return wait_instruction_complete(cch); -} - -static inline int tgh_invalidate(struct gru_tlb_global_handle *tgh, - unsigned long vaddr, unsigned long vaddrmask, - int asid, int pagesize, int global, int n, - unsigned short ctxbitmap) -{ - tgh->vaddr = vaddr; - tgh->asid = asid; - tgh->pagesize = pagesize; - tgh->n = n; - tgh->global = global; - tgh->vaddrmask = vaddrmask; - tgh->ctxbitmap = ctxbitmap; - tgh->opc = TGHOP_TLBINV; - start_instruction(tgh); - return wait_instruction_complete(tgh); -} - -static inline void tfh_write_only(struct gru_tlb_fault_handle *tfh, - unsigned long pfn, unsigned long vaddr, - int asid, int dirty, int pagesize) -{ - tfh->fillasid = asid; - tfh->fillvaddr = vaddr; - tfh->pfn = pfn; - tfh->dirty = dirty; - tfh->pagesize = pagesize; - tfh->opc = TFHOP_WRITE_ONLY; - start_instruction(tfh); -} - -static inline void tfh_write_restart(struct gru_tlb_fault_handle *tfh, - unsigned long paddr, int gaa, - unsigned long vaddr, int asid, int dirty, - int pagesize) -{ - tfh->fillasid = asid; - tfh->fillvaddr = vaddr; - tfh->pfn = paddr >> GRU_PADDR_SHIFT; - tfh->gaa = gaa; - tfh->dirty = dirty; - tfh->pagesize = pagesize; - tfh->opc = TFHOP_WRITE_RESTART; - start_instruction(tfh); -} - -static inline void tfh_restart(struct gru_tlb_fault_handle *tfh) -{ - tfh->opc = TFHOP_RESTART; - start_instruction(tfh); -} - -static inline void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh) -{ - tfh->opc = TFHOP_USER_POLLING_MODE; - start_instruction(tfh); -} - -static inline void tfh_exception(struct gru_tlb_fault_handle *tfh) -{ - tfh->opc = TFHOP_EXCEPTION; - start_instruction(tfh); -} +int cch_allocate(struct gru_context_configuration_handle *cch, + int asidval, int sizeavail, unsigned long cbrmap, unsigned long dsrmap); + +int cch_start(struct gru_context_configuration_handle *cch); +int cch_interrupt(struct gru_context_configuration_handle *cch); +int cch_deallocate(struct gru_context_configuration_handle *cch); +int cch_interrupt_sync(struct gru_context_configuration_handle *cch); +int tgh_invalidate(struct gru_tlb_global_handle *tgh, unsigned long vaddr, + unsigned long vaddrmask, int asid, int pagesize, int global, int n, + unsigned short ctxbitmap); +void tfh_write_only(struct gru_tlb_fault_handle *tfh, unsigned long pfn, + unsigned long vaddr, int asid, int dirty, int pagesize); +void tfh_write_restart(struct gru_tlb_fault_handle *tfh, unsigned long paddr, + int gaa, unsigned long vaddr, int asid, int dirty, int pagesize); +void tfh_restart(struct gru_tlb_fault_handle *tfh); +void tfh_user_polling_mode(struct gru_tlb_fault_handle *tfh); +void tfh_exception(struct gru_tlb_fault_handle *tfh); #endif /* __GRUHANDLES_H__ */ diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index 880c55dfb662..d8bd7d84a7cf 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c @@ -52,8 +52,10 @@ */ /* Blade percpu resources PERMANENTLY reserved for kernel use */ -#define GRU_NUM_KERNEL_CBR 1 +#define GRU_NUM_KERNEL_CBR 1 #define GRU_NUM_KERNEL_DSR_BYTES 256 +#define GRU_NUM_KERNEL_DSR_CL (GRU_NUM_KERNEL_DSR_BYTES / \ + GRU_CACHE_LINE_BYTES) #define KERNEL_CTXNUM 15 /* GRU instruction attributes for all instructions */ @@ -94,7 +96,6 @@ struct message_header { char fill; }; -#define QLINES(mq) ((mq) + offsetof(struct message_queue, qlines)) #define HSTATUS(mq, h) ((mq) + offsetof(struct message_queue, hstatus[h])) static int gru_get_cpu_resources(int dsr_bytes, void **cb, void **dsr) @@ -122,7 +123,7 @@ int gru_get_cb_exception_detail(void *cb, struct gru_control_block_extended *cbe; cbe = get_cbe(GRUBASE(cb), get_cb_number(cb)); - prefetchw(cbe); /* Harmless on hardware, required for emulator */ + prefetchw(cbe); /* Harmless on hardware, required for emulator */ excdet->opc = cbe->opccpy; excdet->exopc = cbe->exopccpy; excdet->ecause = cbe->ecause; @@ -250,7 +251,8 @@ static inline void restore_present2(void *p, int val) * Create a message queue. * qlines - message queue size in cache lines. Includes 2-line header. */ -int gru_create_message_queue(void *p, unsigned int bytes) +int gru_create_message_queue(struct gru_message_queue_desc *mqd, + void *p, unsigned int bytes, int nasid, int vector, int apicid) { struct message_queue *mq = p; unsigned int qlines; @@ -265,6 +267,12 @@ int gru_create_message_queue(void *p, unsigned int bytes) mq->hstatus[0] = 0; mq->hstatus[1] = 1; mq->head = gru_mesq_head(2, qlines / 2 + 1); + mqd->mq = mq; + mqd->mq_gpa = uv_gpa(mq); + mqd->qlines = qlines; + mqd->interrupt_pnode = UV_NASID_TO_PNODE(nasid); + mqd->interrupt_vector = vector; + mqd->interrupt_apicid = apicid; return 0; } EXPORT_SYMBOL_GPL(gru_create_message_queue); @@ -277,8 +285,8 @@ EXPORT_SYMBOL_GPL(gru_create_message_queue); * -1 - if mesq sent successfully but queue not full * >0 - unexpected error. MQE_xxx returned */ -static int send_noop_message(void *cb, - unsigned long mq, void *mesg) +static int send_noop_message(void *cb, struct gru_message_queue_desc *mqd, + void *mesg) { const struct message_header noop_header = { .present = MQS_NOOP, .lines = 1}; @@ -289,7 +297,7 @@ static int send_noop_message(void *cb, STAT(mesq_noop); save_mhdr = *mhdr; *mhdr = noop_header; - gru_mesq(cb, mq, gru_get_tri(mhdr), 1, IMA); + gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), 1, IMA); ret = gru_wait(cb); if (ret) { @@ -313,7 +321,7 @@ static int send_noop_message(void *cb, break; case CBSS_PUT_NACKED: STAT(mesq_noop_put_nacked); - m = mq + (gru_get_amo_value_head(cb) << 6); + m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, 1, 1, IMA); if (gru_wait(cb) == CBS_IDLE) @@ -333,30 +341,20 @@ static int send_noop_message(void *cb, /* * Handle a gru_mesq full. */ -static int send_message_queue_full(void *cb, - unsigned long mq, void *mesg, int lines) +static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd, + void *mesg, int lines) { union gru_mesqhead mqh; unsigned int limit, head; unsigned long avalue; - int half, qlines, save; + int half, qlines; /* Determine if switching to first/second half of q */ avalue = gru_get_amo_value(cb); head = gru_get_amo_value_head(cb); limit = gru_get_amo_value_limit(cb); - /* - * Fetch "qlines" from the queue header. Since the queue may be - * in memory that can't be accessed using socket addresses, use - * the GRU to access the data. Use DSR space from the message. - */ - save = *(int *)mesg; - gru_vload(cb, QLINES(mq), gru_get_tri(mesg), XTYPE_W, 1, 1, IMA); - if (gru_wait(cb) != CBS_IDLE) - goto cberr; - qlines = *(int *)mesg; - *(int *)mesg = save; + qlines = mqd->qlines; half = (limit != qlines); if (half) @@ -365,7 +363,7 @@ static int send_message_queue_full(void *cb, mqh = gru_mesq_head(2, qlines / 2 + 1); /* Try to get lock for switching head pointer */ - gru_gamir(cb, EOP_IR_CLR, HSTATUS(mq, half), XTYPE_DW, IMA); + gru_gamir(cb, EOP_IR_CLR, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA); if (gru_wait(cb) != CBS_IDLE) goto cberr; if (!gru_get_amo_value(cb)) { @@ -375,8 +373,8 @@ static int send_message_queue_full(void *cb, /* Got the lock. Send optional NOP if queue not full, */ if (head != limit) { - if (send_noop_message(cb, mq, mesg)) { - gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half), + if (send_noop_message(cb, mqd, mesg)) { + gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, IMA); if (gru_wait(cb) != CBS_IDLE) goto cberr; @@ -387,14 +385,16 @@ static int send_message_queue_full(void *cb, } /* Then flip queuehead to other half of queue. */ - gru_gamer(cb, EOP_ERR_CSWAP, mq, XTYPE_DW, mqh.val, avalue, IMA); + gru_gamer(cb, EOP_ERR_CSWAP, mqd->mq_gpa, XTYPE_DW, mqh.val, avalue, + IMA); if (gru_wait(cb) != CBS_IDLE) goto cberr; /* If not successfully in swapping queue head, clear the hstatus lock */ if (gru_get_amo_value(cb) != avalue) { STAT(mesq_qf_switch_head_failed); - gru_gamir(cb, EOP_IR_INC, HSTATUS(mq, half), XTYPE_DW, IMA); + gru_gamir(cb, EOP_IR_INC, HSTATUS(mqd->mq_gpa, half), XTYPE_DW, + IMA); if (gru_wait(cb) != CBS_IDLE) goto cberr; } @@ -404,15 +404,25 @@ cberr: return MQE_UNEXPECTED_CB_ERR; } +/* + * Send a cross-partition interrupt to the SSI that contains the target + * message queue. Normally, the interrupt is automatically delivered by hardware + * but some error conditions require explicit delivery. + */ +static void send_message_queue_interrupt(struct gru_message_queue_desc *mqd) +{ + if (mqd->interrupt_vector) + uv_hub_send_ipi(mqd->interrupt_pnode, mqd->interrupt_apicid, + mqd->interrupt_vector); +} + /* * Handle a gru_mesq failure. Some of these failures are software recoverable * or retryable. */ -static int send_message_failure(void *cb, - unsigned long mq, - void *mesg, - int lines) +static int send_message_failure(void *cb, struct gru_message_queue_desc *mqd, + void *mesg, int lines) { int substatus, ret = 0; unsigned long m; @@ -429,7 +439,7 @@ static int send_message_failure(void *cb, break; case CBSS_QLIMIT_REACHED: STAT(mesq_send_qlimit_reached); - ret = send_message_queue_full(cb, mq, mesg, lines); + ret = send_message_queue_full(cb, mqd, mesg, lines); break; case CBSS_AMO_NACKED: STAT(mesq_send_amo_nacked); @@ -437,12 +447,14 @@ static int send_message_failure(void *cb, break; case CBSS_PUT_NACKED: STAT(mesq_send_put_nacked); - m =mq + (gru_get_amo_value_head(cb) << 6); + m = mqd->mq_gpa + (gru_get_amo_value_head(cb) << 6); gru_vstore(cb, m, gru_get_tri(mesg), XTYPE_CL, lines, 1, IMA); - if (gru_wait(cb) == CBS_IDLE) + if (gru_wait(cb) == CBS_IDLE) { ret = MQE_OK; - else + send_message_queue_interrupt(mqd); + } else { ret = MQE_UNEXPECTED_CB_ERR; + } break; default: BUG(); @@ -452,12 +464,12 @@ static int send_message_failure(void *cb, /* * Send a message to a message queue - * cb GRU control block to use to send message - * mq message queue + * mqd message queue descriptor * mesg message. ust be vaddr within a GSEG * bytes message size (<= 2 CL) */ -int gru_send_message_gpa(unsigned long mq, void *mesg, unsigned int bytes) +int gru_send_message_gpa(struct gru_message_queue_desc *mqd, void *mesg, + unsigned int bytes) { struct message_header *mhdr; void *cb; @@ -481,10 +493,10 @@ int gru_send_message_gpa(unsigned long mq, void *mesg, unsigned int bytes) do { ret = MQE_OK; - gru_mesq(cb, mq, gru_get_tri(mhdr), clines, IMA); + gru_mesq(cb, mqd->mq_gpa, gru_get_tri(mhdr), clines, IMA); istatus = gru_wait(cb); if (istatus != CBS_IDLE) - ret = send_message_failure(cb, mq, dsr, clines); + ret = send_message_failure(cb, mqd, dsr, clines); } while (ret == MQIE_AGAIN); gru_free_cpu_resources(cb, dsr); @@ -497,9 +509,9 @@ EXPORT_SYMBOL_GPL(gru_send_message_gpa); /* * Advance the receive pointer for the queue to the next message. */ -void gru_free_message(void *rmq, void *mesg) +void gru_free_message(struct gru_message_queue_desc *mqd, void *mesg) { - struct message_queue *mq = rmq; + struct message_queue *mq = mqd->mq; struct message_header *mhdr = mq->next; void *next, *pnext; int half = -1; @@ -529,16 +541,16 @@ EXPORT_SYMBOL_GPL(gru_free_message); * present. User must call next_message() to move to next message. * rmq message queue */ -void *gru_get_next_message(void *rmq) +void *gru_get_next_message(struct gru_message_queue_desc *mqd) { - struct message_queue *mq = rmq; + struct message_queue *mq = mqd->mq; struct message_header *mhdr = mq->next; int present = mhdr->present; /* skip NOOP messages */ STAT(mesq_receive); while (present == MQS_NOOP) { - gru_free_message(rmq, mhdr); + gru_free_message(mqd, mhdr); mhdr = mq->next; present = mhdr->present; } @@ -576,7 +588,7 @@ int gru_copy_gpa(unsigned long dest_gpa, unsigned long src_gpa, if (gru_get_cpu_resources(GRU_NUM_KERNEL_DSR_BYTES, &cb, &dsr)) return MQE_BUG_NO_RESOURCES; gru_bcopy(cb, src_gpa, dest_gpa, gru_get_tri(dsr), - XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_BYTES, IMA); + XTYPE_B, bytes, GRU_NUM_KERNEL_DSR_CL, IMA); ret = gru_wait(cb); gru_free_cpu_resources(cb, dsr); return ret; @@ -611,7 +623,7 @@ static int quicktest(struct gru_state *gru) if (word0 != word1 || word0 != MAGIC) { printk - ("GRU quicktest err: gru %d, found 0x%lx, expected 0x%lx\n", + ("GRU quicktest err: gid %d, found 0x%lx, expected 0x%lx\n", gru->gs_gid, word1, MAGIC); BUG(); /* ZZZ should not be fatal */ } @@ -660,15 +672,15 @@ int gru_kservices_init(struct gru_state *gru) cch->tlb_int_enable = 0; cch->tfm_done_bit_enable = 0; cch->unmap_enable = 1; - err = cch_allocate(cch, 0, cbr_map, dsr_map); + err = cch_allocate(cch, 0, 0, cbr_map, dsr_map); if (err) { gru_dbg(grudev, - "Unable to allocate kernel CCH: gru %d, err %d\n", + "Unable to allocate kernel CCH: gid %d, err %d\n", gru->gs_gid, err); BUG(); } if (cch_start(cch)) { - gru_dbg(grudev, "Unable to start kernel CCH: gru %d, err %d\n", + gru_dbg(grudev, "Unable to start kernel CCH: gid %d, err %d\n", gru->gs_gid, err); BUG(); } @@ -678,3 +690,22 @@ int gru_kservices_init(struct gru_state *gru) quicktest(gru); return 0; } + +void gru_kservices_exit(struct gru_state *gru) +{ + struct gru_context_configuration_handle *cch; + struct gru_blade_state *bs; + + bs = gru->gs_blade; + if (gru != &bs->bs_grus[1]) + return; + + cch = get_cch(gru->gs_gru_base_vaddr, KERNEL_CTXNUM); + lock_cch_handle(cch); + if (cch_interrupt_sync(cch)) + BUG(); + if (cch_deallocate(cch)) + BUG(); + unlock_cch_handle(cch); +} + diff --git a/drivers/misc/sgi-gru/grukservices.h b/drivers/misc/sgi-gru/grukservices.h index eb17e0a3ac61..747ed315d56f 100644 --- a/drivers/misc/sgi-gru/grukservices.h +++ b/drivers/misc/sgi-gru/grukservices.h @@ -41,6 +41,15 @@ * - gru_create_message_queue() needs interrupt vector info */ +struct gru_message_queue_desc { + void *mq; /* message queue vaddress */ + unsigned long mq_gpa; /* global address of mq */ + int qlines; /* queue size in CL */ + int interrupt_vector; /* interrupt vector */ + int interrupt_pnode; /* pnode for interrupt */ + int interrupt_apicid; /* lapicid for interrupt */ +}; + /* * Initialize a user allocated chunk of memory to be used as * a message queue. The caller must ensure that the queue is @@ -51,14 +60,19 @@ * to manage the queue. * * Input: - * p pointer to user allocated memory. + * mqd pointer to message queue descriptor + * p pointer to user allocated mesq memory. * bytes size of message queue in bytes + * vector interrupt vector (zero if no interrupts) + * nasid nasid of blade where interrupt is delivered + * apicid apicid of cpu for interrupt * * Errors: * 0 OK * >0 error */ -extern int gru_create_message_queue(void *p, unsigned int bytes); +extern int gru_create_message_queue(struct gru_message_queue_desc *mqd, + void *p, unsigned int bytes, int nasid, int vector, int apicid); /* * Send a message to a message queue. @@ -68,7 +82,7 @@ extern int gru_create_message_queue(void *p, unsigned int bytes); * * * Input: - * xmq message queue - must be a UV global physical address + * mqd pointer to message queue descriptor * mesg pointer to message. Must be 64-bit aligned * bytes size of message in bytes * @@ -77,8 +91,8 @@ extern int gru_create_message_queue(void *p, unsigned int bytes); * >0 Send failure - see error codes below * */ -extern int gru_send_message_gpa(unsigned long mq_gpa, void *mesg, - unsigned int bytes); +extern int gru_send_message_gpa(struct gru_message_queue_desc *mqd, + void *mesg, unsigned int bytes); /* Status values for gru_send_message() */ #define MQE_OK 0 /* message sent successfully */ @@ -94,10 +108,11 @@ extern int gru_send_message_gpa(unsigned long mq_gpa, void *mesg, * API extensions may allow for out-of-order freeing. * * Input - * mq message queue + * mqd pointer to message queue descriptor * mesq message being freed */ -extern void gru_free_message(void *mq, void *mesq); +extern void gru_free_message(struct gru_message_queue_desc *mqd, + void *mesq); /* * Get next message from message queue. Returns pointer to @@ -106,13 +121,13 @@ extern void gru_free_message(void *mq, void *mesq); * in order to move the queue pointers to next message. * * Input - * mq message queue + * mqd pointer to message queue descriptor * * Output: * p pointer to message * NULL no message available */ -extern void *gru_get_next_message(void *mq); +extern void *gru_get_next_message(struct gru_message_queue_desc *mqd); /* diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c index 3d2fc216bae5..ec3f7a17d221 100644 --- a/drivers/misc/sgi-gru/grumain.c +++ b/drivers/misc/sgi-gru/grumain.c @@ -76,10 +76,9 @@ int gru_cpu_fault_map_id(void) /* Hit the asid limit. Start over */ static int gru_wrap_asid(struct gru_state *gru) { - gru_dbg(grudev, "gru %p\n", gru); + gru_dbg(grudev, "gid %d\n", gru->gs_gid); STAT(asid_wrap); gru->gs_asid_gen++; - gru_flush_all_tlb(gru); return MIN_ASID; } @@ -88,19 +87,21 @@ static int gru_reset_asid_limit(struct gru_state *gru, int asid) { int i, gid, inuse_asid, limit; - gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid); + gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid); STAT(asid_next); limit = MAX_ASID; if (asid >= limit) asid = gru_wrap_asid(gru); + gru_flush_all_tlb(gru); gid = gru->gs_gid; again: for (i = 0; i < GRU_NUM_CCH; i++) { if (!gru->gs_gts[i]) continue; inuse_asid = gru->gs_gts[i]->ts_gms->ms_asids[gid].mt_asid; - gru_dbg(grudev, "gru %p, inuse_asid 0x%x, cxtnum %d, gts %p\n", - gru, inuse_asid, i, gru->gs_gts[i]); + gru_dbg(grudev, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n", + gru->gs_gid, gru->gs_gts[i], gru->gs_gts[i]->ts_gms, + inuse_asid, i); if (inuse_asid == asid) { asid += ASID_INC; if (asid >= limit) { @@ -120,8 +121,8 @@ again: } gru->gs_asid_limit = limit; gru->gs_asid = asid; - gru_dbg(grudev, "gru %p, new asid 0x%x, new_limit 0x%x\n", gru, asid, - limit); + gru_dbg(grudev, "gid %d, new asid 0x%x, new_limit 0x%x\n", gru->gs_gid, + asid, limit); return asid; } @@ -130,14 +131,12 @@ static int gru_assign_asid(struct gru_state *gru) { int asid; - spin_lock(&gru->gs_asid_lock); gru->gs_asid += ASID_INC; asid = gru->gs_asid; if (asid >= gru->gs_asid_limit) asid = gru_reset_asid_limit(gru, asid); - spin_unlock(&gru->gs_asid_lock); - gru_dbg(grudev, "gru %p, asid 0x%x\n", gru, asid); + gru_dbg(grudev, "gid %d, asid 0x%x\n", gru->gs_gid, asid); return asid; } @@ -215,17 +214,20 @@ static int check_gru_resources(struct gru_state *gru, int cbr_au_count, * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG * context. */ -static int gru_load_mm_tracker(struct gru_state *gru, struct gru_mm_struct *gms, - int ctxnum) +static int gru_load_mm_tracker(struct gru_state *gru, + struct gru_thread_state *gts) { + struct gru_mm_struct *gms = gts->ts_gms; struct gru_mm_tracker *asids = &gms->ms_asids[gru->gs_gid]; - unsigned short ctxbitmap = (1 << ctxnum); + unsigned short ctxbitmap = (1 << gts->ts_ctxnum); int asid; spin_lock(&gms->ms_asid_lock); asid = asids->mt_asid; - if (asid == 0 || asids->mt_asid_gen != gru->gs_asid_gen) { + spin_lock(&gru->gs_asid_lock); + if (asid == 0 || (asids->mt_ctxbitmap == 0 && asids->mt_asid_gen != + gru->gs_asid_gen)) { asid = gru_assign_asid(gru); asids->mt_asid = asid; asids->mt_asid_gen = gru->gs_asid_gen; @@ -233,6 +235,7 @@ static int gru_load_mm_tracker(struct gru_state *gru, struct gru_mm_struct *gms, } else { STAT(asid_reuse); } + spin_unlock(&gru->gs_asid_lock); BUG_ON(asids->mt_ctxbitmap & ctxbitmap); asids->mt_ctxbitmap |= ctxbitmap; @@ -241,24 +244,28 @@ static int gru_load_mm_tracker(struct gru_state *gru, struct gru_mm_struct *gms, spin_unlock(&gms->ms_asid_lock); gru_dbg(grudev, - "gru %x, gms %p, ctxnum 0x%d, asid 0x%x, asidmap 0x%lx\n", - gru->gs_gid, gms, ctxnum, asid, gms->ms_asidmap[0]); + "gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n", + gru->gs_gid, gts, gms, gts->ts_ctxnum, asid, + gms->ms_asidmap[0]); return asid; } static void gru_unload_mm_tracker(struct gru_state *gru, - struct gru_mm_struct *gms, int ctxnum) + struct gru_thread_state *gts) { + struct gru_mm_struct *gms = gts->ts_gms; struct gru_mm_tracker *asids; unsigned short ctxbitmap; asids = &gms->ms_asids[gru->gs_gid]; - ctxbitmap = (1 << ctxnum); + ctxbitmap = (1 << gts->ts_ctxnum); spin_lock(&gms->ms_asid_lock); + spin_lock(&gru->gs_asid_lock); BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap); asids->mt_ctxbitmap ^= ctxbitmap; - gru_dbg(grudev, "gru %x, gms %p, ctxnum 0x%d, asidmap 0x%lx\n", - gru->gs_gid, gms, ctxnum, gms->ms_asidmap[0]); + gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum 0x%d, asidmap 0x%lx\n", + gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]); + spin_unlock(&gru->gs_asid_lock); spin_unlock(&gms->ms_asid_lock); } @@ -319,6 +326,7 @@ static struct gru_thread_state *gru_alloc_gts(struct vm_area_struct *vma, gts->ts_vma = vma; gts->ts_tlb_int_select = -1; gts->ts_gms = gru_register_mmu_notifier(); + gts->ts_sizeavail = GRU_SIZEAVAIL(PAGE_SHIFT); if (!gts->ts_gms) goto err; @@ -399,7 +407,7 @@ static void gru_free_gru_context(struct gru_thread_state *gts) struct gru_state *gru; gru = gts->ts_gru; - gru_dbg(grudev, "gts %p, gru %p\n", gts, gru); + gru_dbg(grudev, "gts %p, gid %d\n", gts, gru->gs_gid); spin_lock(&gru->gs_lock); gru->gs_gts[gts->ts_ctxnum] = NULL; @@ -408,6 +416,7 @@ static void gru_free_gru_context(struct gru_thread_state *gts) __clear_bit(gts->ts_ctxnum, &gru->gs_context_map); gts->ts_ctxnum = NULLCTX; gts->ts_gru = NULL; + gts->ts_blade = -1; spin_unlock(&gru->gs_lock); gts_drop(gts); @@ -432,8 +441,8 @@ static inline long gru_copy_handle(void *d, void *s) return GRU_HANDLE_BYTES; } -static void gru_prefetch_context(void *gseg, void *cb, void *cbe, unsigned long cbrmap, - unsigned long length) +static void gru_prefetch_context(void *gseg, void *cb, void *cbe, + unsigned long cbrmap, unsigned long length) { int i, scr; @@ -500,12 +509,12 @@ void gru_unload_context(struct gru_thread_state *gts, int savestate) zap_vma_ptes(gts->ts_vma, UGRUADDR(gts), GRU_GSEG_PAGESIZE); cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); + gru_dbg(grudev, "gts %p\n", gts); lock_cch_handle(cch); if (cch_interrupt_sync(cch)) BUG(); - gru_dbg(grudev, "gts %p\n", gts); - gru_unload_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum); + gru_unload_mm_tracker(gru, gts); if (savestate) gru_unload_context_data(gts->ts_gdata, gru->gs_gru_base_vaddr, ctxnum, gts->ts_cbr_map, @@ -534,7 +543,7 @@ static void gru_load_context(struct gru_thread_state *gts) cch = get_cch(gru->gs_gru_base_vaddr, ctxnum); lock_cch_handle(cch); - asid = gru_load_mm_tracker(gru, gts->ts_gms, gts->ts_ctxnum); + asid = gru_load_mm_tracker(gru, gts); cch->tfm_fault_bit_enable = (gts->ts_user_options == GRU_OPT_MISS_FMM_POLL || gts->ts_user_options == GRU_OPT_MISS_FMM_INTR); @@ -544,7 +553,8 @@ static void gru_load_context(struct gru_thread_state *gts) cch->tlb_int_select = gts->ts_tlb_int_select; } cch->tfm_done_bit_enable = 0; - err = cch_allocate(cch, asid, gts->ts_cbr_map, gts->ts_dsr_map); + err = cch_allocate(cch, asid, gts->ts_sizeavail, gts->ts_cbr_map, + gts->ts_dsr_map); if (err) { gru_dbg(grudev, "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n", @@ -565,11 +575,12 @@ static void gru_load_context(struct gru_thread_state *gts) /* * Update fields in an active CCH: * - retarget interrupts on local blade + * - update sizeavail mask * - force a delayed context unload by clearing the CCH asids. This * forces TLB misses for new GRU instructions. The context is unloaded * when the next TLB miss occurs. */ -static int gru_update_cch(struct gru_thread_state *gts, int int_select) +int gru_update_cch(struct gru_thread_state *gts, int force_unload) { struct gru_context_configuration_handle *cch; struct gru_state *gru = gts->ts_gru; @@ -583,9 +594,11 @@ static int gru_update_cch(struct gru_thread_state *gts, int int_select) goto exit; if (cch_interrupt(cch)) BUG(); - if (int_select >= 0) { - gts->ts_tlb_int_select = int_select; - cch->tlb_int_select = int_select; + if (!force_unload) { + for (i = 0; i < 8; i++) + cch->sizeavail[i] = gts->ts_sizeavail; + gts->ts_tlb_int_select = gru_cpu_fault_map_id(); + cch->tlb_int_select = gru_cpu_fault_map_id(); } else { for (i = 0; i < 8; i++) cch->asid[i] = 0; @@ -617,7 +630,7 @@ static int gru_retarget_intr(struct gru_thread_state *gts) gru_dbg(grudev, "retarget from %d to %d\n", gts->ts_tlb_int_select, gru_cpu_fault_map_id()); - return gru_update_cch(gts, gru_cpu_fault_map_id()); + return gru_update_cch(gts, 0); } @@ -688,7 +701,7 @@ static void gru_steal_context(struct gru_thread_state *gts) STAT(steal_context_failed); } gru_dbg(grudev, - "stole gru %x, ctxnum %d from gts %p. Need cb %d, ds %d;" + "stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;" " avail cb %ld, ds %ld\n", gru->gs_gid, ctxnum, ngts, cbr, dsr, hweight64(gru->gs_cbr_map), hweight64(gru->gs_dsr_map)); @@ -727,6 +740,7 @@ again: } reserve_gru_resources(gru, gts); gts->ts_gru = gru; + gts->ts_blade = gru->gs_blade_id; gts->ts_ctxnum = find_first_zero_bit(&gru->gs_context_map, GRU_NUM_CCH); BUG_ON(gts->ts_ctxnum == GRU_NUM_CCH); @@ -737,7 +751,7 @@ again: STAT(assign_context); gru_dbg(grudev, - "gseg %p, gts %p, gru %x, ctx %d, cbr %d, dsr %d\n", + "gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n", gseg_virtual_address(gts->ts_gru, gts->ts_ctxnum), gts, gts->ts_gru->gs_gid, gts->ts_ctxnum, gts->ts_cbr_au_count, gts->ts_dsr_au_count); @@ -773,8 +787,8 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_SIGBUS; again: - preempt_disable(); mutex_lock(>s->ts_ctxlock); + preempt_disable(); if (gts->ts_gru) { if (gts->ts_gru->gs_blade_id != uv_numa_blade_id()) { STAT(migrated_nopfn_unload); diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c index 73b0ca061bb5..ee74821b171c 100644 --- a/drivers/misc/sgi-gru/gruprocfs.c +++ b/drivers/misc/sgi-gru/gruprocfs.c @@ -62,7 +62,9 @@ static int statistics_show(struct seq_file *s, void *p) printstat(s, asid_wrap); printstat(s, asid_reuse); printstat(s, intr); + printstat(s, intr_mm_lock_failed); printstat(s, call_os); + printstat(s, call_os_offnode_reference); printstat(s, call_os_check_for_bug); printstat(s, call_os_wait_queue); printstat(s, user_flush_tlb); @@ -120,6 +122,30 @@ static ssize_t statistics_write(struct file *file, const char __user *userbuf, return count; } +static int mcs_statistics_show(struct seq_file *s, void *p) +{ + int op; + unsigned long total, count, max; + static char *id[] = {"cch_allocate", "cch_start", "cch_interrupt", + "cch_interrupt_sync", "cch_deallocate", "tgh_invalidate"}; + + for (op = 0; op < mcsop_last; op++) { + count = atomic_long_read(&mcs_op_statistics[op].count); + total = atomic_long_read(&mcs_op_statistics[op].total); + max = mcs_op_statistics[op].max; + seq_printf(s, "%-20s%12ld%12ld%12ld\n", id[op], count, + count ? total / count : 0, max); + } + return 0; +} + +static ssize_t mcs_statistics_write(struct file *file, + const char __user *userbuf, size_t count, loff_t *data) +{ + memset(mcs_op_statistics, 0, sizeof(mcs_op_statistics)); + return count; +} + static int options_show(struct seq_file *s, void *p) { seq_printf(s, "0x%lx\n", gru_options); @@ -135,6 +161,7 @@ static ssize_t options_write(struct file *file, const char __user *userbuf, if (copy_from_user (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf))) return -EFAULT; + buf[count - 1] = '\0'; if (!strict_strtoul(buf, 10, &val)) gru_options = val; @@ -199,7 +226,7 @@ static void seq_stop(struct seq_file *file, void *data) static void *seq_start(struct seq_file *file, loff_t *gid) { - if (*gid < GRU_MAX_GRUS) + if (*gid < gru_max_gids) return gid; return NULL; } @@ -207,7 +234,7 @@ static void *seq_start(struct seq_file *file, loff_t *gid) static void *seq_next(struct seq_file *file, void *data, loff_t *gid) { (*gid)++; - if (*gid < GRU_MAX_GRUS) + if (*gid < gru_max_gids) return gid; return NULL; } @@ -231,6 +258,11 @@ static int statistics_open(struct inode *inode, struct file *file) return single_open(file, statistics_show, NULL); } +static int mcs_statistics_open(struct inode *inode, struct file *file) +{ + return single_open(file, mcs_statistics_show, NULL); +} + static int options_open(struct inode *inode, struct file *file) { return single_open(file, options_show, NULL); @@ -255,6 +287,14 @@ static const struct file_operations statistics_fops = { .release = single_release, }; +static const struct file_operations mcs_statistics_fops = { + .open = mcs_statistics_open, + .read = seq_read, + .write = mcs_statistics_write, + .llseek = seq_lseek, + .release = single_release, +}; + static const struct file_operations options_fops = { .open = options_open, .read = seq_read, @@ -283,6 +323,7 @@ static struct proc_entry { struct proc_dir_entry *entry; } proc_files[] = { {"statistics", 0644, &statistics_fops}, + {"mcs_statistics", 0644, &mcs_statistics_fops}, {"debug_options", 0644, &options_fops}, {"cch_status", 0444, &cch_fops}, {"gru_status", 0444, &gru_fops}, diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index a78f70deeb59..bf1eeb7553ed 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h @@ -153,6 +153,7 @@ extern struct gru_stats_s gru_stats; extern struct gru_blade_state *gru_base[]; extern unsigned long gru_start_paddr, gru_end_paddr; +extern unsigned int gru_max_gids; #define GRU_MAX_BLADES MAX_NUMNODES #define GRU_MAX_GRUS (GRU_MAX_BLADES * GRU_CHIPLETS_PER_BLADE) @@ -184,7 +185,9 @@ struct gru_stats_s { atomic_long_t asid_wrap; atomic_long_t asid_reuse; atomic_long_t intr; + atomic_long_t intr_mm_lock_failed; atomic_long_t call_os; + atomic_long_t call_os_offnode_reference; atomic_long_t call_os_check_for_bug; atomic_long_t call_os_wait_queue; atomic_long_t user_flush_tlb; @@ -237,6 +240,17 @@ struct gru_stats_s { }; +enum mcs_op {cchop_allocate, cchop_start, cchop_interrupt, cchop_interrupt_sync, + cchop_deallocate, tghop_invalidate, mcsop_last}; + +struct mcs_op_statistic { + atomic_long_t count; + atomic_long_t total; + unsigned long max; +}; + +extern struct mcs_op_statistic mcs_op_statistics[mcsop_last]; + #define OPT_DPRINT 1 #define OPT_STATS 2 #define GRU_QUICKLOOK 4 @@ -278,13 +292,12 @@ struct gru_stats_s { /* Generate a GRU asid value from a GRU base asid & a virtual address. */ #if defined CONFIG_IA64 #define VADDR_HI_BIT 64 -#define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3) #elif defined CONFIG_X86_64 #define VADDR_HI_BIT 48 -#define GRUREGION(addr) (0) /* ZZZ could do better */ #else #error "Unsupported architecture" #endif +#define GRUREGION(addr) ((addr) >> (VADDR_HI_BIT - 3) & 3) #define GRUASID(asid, addr) ((asid) + GRUREGION(addr)) /*------------------------------------------------------------------------------ @@ -297,12 +310,12 @@ struct gru_state; * This structure is pointed to from the mmstruct via the notifier pointer. * There is one of these per address space. */ -struct gru_mm_tracker { - unsigned int mt_asid_gen; /* ASID wrap count */ - int mt_asid; /* current base ASID for gru */ - unsigned short mt_ctxbitmap; /* bitmap of contexts using +struct gru_mm_tracker { /* pack to reduce size */ + unsigned int mt_asid_gen:24; /* ASID wrap count */ + unsigned int mt_asid:24; /* current base ASID for gru */ + unsigned short mt_ctxbitmap:16;/* bitmap of contexts using asid */ -}; +} __attribute__ ((packed)); struct gru_mm_struct { struct mmu_notifier ms_notifier; @@ -348,6 +361,7 @@ struct gru_thread_state { long ts_user_options;/* misc user option flags */ pid_t ts_tgid_owner; /* task that is using the context - for migration */ + unsigned short ts_sizeavail; /* Pagesizes in use */ int ts_tsid; /* thread that owns the structure */ int ts_tlb_int_select;/* target cpu if interrupts @@ -359,6 +373,9 @@ struct gru_thread_state { required for contest */ unsigned char ts_cbr_au_count;/* Number of CBR resources required for contest */ + char ts_blade; /* If >= 0, migrate context if + ref from diferent blade */ + char ts_force_cch_reload; char ts_force_unload;/* force context to be unloaded after migration */ char ts_cbr_idx[GRU_CBR_AU];/* CBR numbers of each @@ -392,12 +409,12 @@ struct gru_state { gru segments (64) */ void *gs_gru_base_vaddr; /* Virtual address of gru segments (64) */ - unsigned char gs_gid; /* unique GRU number */ + unsigned short gs_gid; /* unique GRU number */ + unsigned short gs_blade_id; /* blade of GRU */ unsigned char gs_tgh_local_shift; /* used to pick TGH for local flush */ unsigned char gs_tgh_first_remote; /* starting TGH# for remote flush */ - unsigned short gs_blade_id; /* blade of GRU */ spinlock_t gs_asid_lock; /* lock used for assigning asids */ spinlock_t gs_lock; /* lock used for @@ -492,6 +509,10 @@ struct gru_blade_state { (i) < GRU_CHIPLETS_PER_BLADE; \ (i)++, (gru)++) +/* Scan all GRUs */ +#define foreach_gid(gid) \ + for ((gid) = 0; (gid) < gru_max_gids; (gid)++) + /* Scan all active GTSs on a gru. Note: must hold ss_lock to use this macro. */ #define for_each_gts_on_gru(gts, gru, ctxnum) \ for ((ctxnum) = 0; (ctxnum) < GRU_NUM_CCH; (ctxnum)++) \ @@ -578,9 +599,11 @@ extern struct gru_thread_state *gru_find_thread_state(struct vm_area_struct extern struct gru_thread_state *gru_alloc_thread_state(struct vm_area_struct *vma, int tsid); extern void gru_unload_context(struct gru_thread_state *gts, int savestate); +extern int gru_update_cch(struct gru_thread_state *gts, int force_unload); extern void gts_drop(struct gru_thread_state *gts); extern void gru_tgh_flush_init(struct gru_state *gru); extern int gru_kservices_init(struct gru_state *gru); +extern void gru_kservices_exit(struct gru_state *gru); extern irqreturn_t gru_intr(int irq, void *dev_id); extern int gru_handle_user_call_os(unsigned long address); extern int gru_user_flush_tlb(unsigned long arg); diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c index c84496a77691..1d125091f5e7 100644 --- a/drivers/misc/sgi-gru/grutlbpurge.c +++ b/drivers/misc/sgi-gru/grutlbpurge.c @@ -187,7 +187,7 @@ void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start, " FLUSH gruid %d, asid 0x%x, num %ld, cbmap 0x%x\n", gid, asid, num, asids->mt_ctxbitmap); tgh = get_lock_tgh_handle(gru); - tgh_invalidate(tgh, start, 0, asid, grupagesize, 0, + tgh_invalidate(tgh, start, ~0, asid, grupagesize, 0, num - 1, asids->mt_ctxbitmap); get_unlock_tgh_handle(tgh); } else { @@ -210,11 +210,10 @@ void gru_flush_all_tlb(struct gru_state *gru) { struct gru_tlb_global_handle *tgh; - gru_dbg(grudev, "gru %p, gid %d\n", gru, gru->gs_gid); + gru_dbg(grudev, "gid %d\n", gru->gs_gid); tgh = get_lock_tgh_handle(gru); - tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0); + tgh_invalidate(tgh, 0, ~0, 0, 1, 1, GRUMAXINVAL - 1, 0xffff); get_unlock_tgh_handle(tgh); - preempt_enable(); } /* diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h index 275b78896a73..114444cfd496 100644 --- a/drivers/misc/sgi-xp/xpc.h +++ b/drivers/misc/sgi-xp/xpc.h @@ -92,7 +92,9 @@ struct xpc_rsvd_page { u8 pad1[3]; /* align to next u64 in 1st 64-byte cacheline */ union { unsigned long vars_pa; /* phys address of struct xpc_vars */ - unsigned long activate_mq_gpa; /* gru phy addr of activate_mq */ + unsigned long activate_gru_mq_desc_gpa; /* phys addr of */ + /* activate mq's */ + /* gru mq descriptor */ } sn; unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */ u64 pad2[10]; /* align to last u64 in 2nd 64-byte cacheline */ @@ -189,7 +191,9 @@ struct xpc_gru_mq_uv { int irq; /* irq raised when message is received in mq */ int mmr_blade; /* blade where watchlist was allocated from */ unsigned long mmr_offset; /* offset of irq mmr located on mmr_blade */ + unsigned long mmr_value; /* value of irq mmr located on mmr_blade */ int watchlist_num; /* number of watchlist allocatd by BIOS */ + void *gru_mq_desc; /* opaque structure used by the GRU driver */ }; /* @@ -197,6 +201,7 @@ struct xpc_gru_mq_uv { * heartbeat, partition active state, and channel state. This is UV only. */ struct xpc_activate_mq_msghdr_uv { + unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */ short partid; /* sender's partid */ u8 act_state; /* sender's act_state at time msg sent */ u8 type; /* message's type */ @@ -232,7 +237,7 @@ struct xpc_activate_mq_msg_heartbeat_req_uv { struct xpc_activate_mq_msg_activate_req_uv { struct xpc_activate_mq_msghdr_uv hdr; unsigned long rp_gpa; - unsigned long activate_mq_gpa; + unsigned long activate_gru_mq_desc_gpa; }; struct xpc_activate_mq_msg_deactivate_req_uv { @@ -263,7 +268,7 @@ struct xpc_activate_mq_msg_chctl_openreply_uv { short ch_number; short remote_nentries; /* ??? Is this needed? What is? */ short local_nentries; /* ??? Is this needed? What is? */ - unsigned long local_notify_mq_gpa; + unsigned long notify_gru_mq_desc_gpa; }; /* @@ -510,8 +515,8 @@ struct xpc_channel_sn2 { }; struct xpc_channel_uv { - unsigned long remote_notify_mq_gpa; /* gru phys address of remote */ - /* partition's notify mq */ + void *cached_notify_gru_mq_desc; /* remote partition's notify mq's */ + /* gru mq descriptor */ struct xpc_send_msg_slot_uv *send_msg_slots; void *recv_msg_slots; /* each slot will hold a xpc_notify_mq_msg_uv */ @@ -682,8 +687,12 @@ struct xpc_partition_sn2 { }; struct xpc_partition_uv { - unsigned long remote_activate_mq_gpa; /* gru phys address of remote */ - /* partition's activate mq */ + unsigned long activate_gru_mq_desc_gpa; /* phys addr of parititon's */ + /* activate mq's gru mq */ + /* descriptor */ + void *cached_activate_gru_mq_desc; /* cached copy of partition's */ + /* activate mq's gru mq descriptor */ + struct mutex cached_activate_gru_mq_desc_mutex; spinlock_t flags_lock; /* protect updating of flags */ unsigned int flags; /* general flags */ u8 remote_act_state; /* remote partition's act_state */ @@ -694,8 +703,9 @@ struct xpc_partition_uv { /* struct xpc_partition_uv flags */ -#define XPC_P_HEARTBEAT_OFFLINE_UV 0x00000001 -#define XPC_P_ENGAGED_UV 0x00000002 +#define XPC_P_HEARTBEAT_OFFLINE_UV 0x00000001 +#define XPC_P_ENGAGED_UV 0x00000002 +#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV 0x00000004 /* struct xpc_partition_uv act_state change requests */ @@ -804,6 +814,7 @@ extern void xpc_activate_kthreads(struct xpc_channel *, int); extern void xpc_create_kthreads(struct xpc_channel *, int, int); extern void xpc_disconnect_wait(int); extern int (*xpc_setup_partitions_sn) (void); +extern void (*xpc_teardown_partitions_sn) (void); extern enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *, u64 *, unsigned long *, size_t *); @@ -846,8 +857,8 @@ extern void (*xpc_send_chctl_openrequest) (struct xpc_channel *, unsigned long *); extern void (*xpc_send_chctl_openreply) (struct xpc_channel *, unsigned long *); -extern void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *, - unsigned long); +extern enum xp_retval (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *, + unsigned long); extern enum xp_retval (*xpc_send_payload) (struct xpc_channel *, u32, void *, u16, u8, xpc_notify_func, void *); diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c index 45fd653dbe31..99a2534c38a1 100644 --- a/drivers/misc/sgi-xp/xpc_channel.c +++ b/drivers/misc/sgi-xp/xpc_channel.c @@ -183,6 +183,7 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number, &part->remote_openclose_args[ch_number]; struct xpc_channel *ch = &part->channels[ch_number]; enum xp_retval reason; + enum xp_retval ret; spin_lock_irqsave(&ch->lock, irq_flags); @@ -399,8 +400,13 @@ again: DBUG_ON(args->local_nentries == 0); DBUG_ON(args->remote_nentries == 0); + ret = xpc_save_remote_msgqueue_pa(ch, args->local_msgqueue_pa); + if (ret != xpSuccess) { + XPC_DISCONNECT_CHANNEL(ch, ret, &irq_flags); + spin_unlock_irqrestore(&ch->lock, irq_flags); + return; + } ch->flags |= XPC_C_ROPENREPLY; - xpc_save_remote_msgqueue_pa(ch, args->local_msgqueue_pa); if (args->local_nentries < ch->remote_nentries) { dev_dbg(xpc_chan, "XPC_CHCTL_OPENREPLY: new " diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 6576170de962..1ab9fda87fab 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -171,6 +171,7 @@ static struct notifier_block xpc_die_notifier = { }; int (*xpc_setup_partitions_sn) (void); +void (*xpc_teardown_partitions_sn) (void); enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *buf, u64 *cookie, unsigned long *rp_pa, size_t *len); @@ -217,8 +218,8 @@ void (*xpc_send_chctl_openrequest) (struct xpc_channel *ch, void (*xpc_send_chctl_openreply) (struct xpc_channel *ch, unsigned long *irq_flags); -void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch, - unsigned long msgqueue_pa); +enum xp_retval (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch, + unsigned long msgqueue_pa); enum xp_retval (*xpc_send_payload) (struct xpc_channel *ch, u32 flags, void *payload, u16 payload_size, @@ -998,6 +999,7 @@ xpc_setup_partitions(void) static void xpc_teardown_partitions(void) { + xpc_teardown_partitions_sn(); kfree(xpc_partitions); } diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c index 2e975762c32b..eaaa964942de 100644 --- a/drivers/misc/sgi-xp/xpc_sn2.c +++ b/drivers/misc/sgi-xp/xpc_sn2.c @@ -66,6 +66,12 @@ xpc_setup_partitions_sn_sn2(void) return 0; } +static void +xpc_teardown_partitions_sn_sn2(void) +{ + /* nothing needs to be done */ +} + /* SH_IPI_ACCESS shub register value on startup */ static u64 xpc_sh1_IPI_access_sn2; static u64 xpc_sh2_IPI_access0_sn2; @@ -436,11 +442,12 @@ xpc_send_chctl_local_msgrequest_sn2(struct xpc_channel *ch) XPC_SEND_LOCAL_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST); } -static void +static enum xp_retval xpc_save_remote_msgqueue_pa_sn2(struct xpc_channel *ch, unsigned long msgqueue_pa) { ch->sn.sn2.remote_msgqueue_pa = msgqueue_pa; + return xpSuccess; } /* @@ -1737,20 +1744,20 @@ xpc_clear_remote_msgqueue_flags_sn2(struct xpc_channel *ch) { struct xpc_channel_sn2 *ch_sn2 = &ch->sn.sn2; struct xpc_msg_sn2 *msg; - s64 put; + s64 put, remote_nentries = ch->remote_nentries; /* flags are zeroed when the buffer is allocated */ - if (ch_sn2->remote_GP.put < ch->remote_nentries) + if (ch_sn2->remote_GP.put < remote_nentries) return; - put = max(ch_sn2->w_remote_GP.put, ch->remote_nentries); + put = max(ch_sn2->w_remote_GP.put, remote_nentries); do { msg = (struct xpc_msg_sn2 *)((u64)ch_sn2->remote_msgqueue + - (put % ch->remote_nentries) * + (put % remote_nentries) * ch->entry_size); DBUG_ON(!(msg->flags & XPC_M_SN2_READY)); DBUG_ON(!(msg->flags & XPC_M_SN2_DONE)); - DBUG_ON(msg->number != put - ch->remote_nentries); + DBUG_ON(msg->number != put - remote_nentries); msg->flags = 0; } while (++put < ch_sn2->remote_GP.put); } @@ -2315,6 +2322,7 @@ xpc_init_sn2(void) size_t buf_size; xpc_setup_partitions_sn = xpc_setup_partitions_sn_sn2; + xpc_teardown_partitions_sn = xpc_teardown_partitions_sn_sn2; xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2; xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_sn2; xpc_increment_heartbeat = xpc_increment_heartbeat_sn2; diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index 29c0502a96b2..f7fff4727edb 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -31,6 +31,21 @@ #include "../sgi-gru/grukservices.h" #include "xpc.h" +#if defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV +struct uv_IO_APIC_route_entry { + __u64 vector : 8, + delivery_mode : 3, + dest_mode : 1, + delivery_status : 1, + polarity : 1, + __reserved_1 : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15, + dest : 32; +}; +#endif + static atomic64_t xpc_heartbeat_uv; static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV); @@ -56,26 +71,52 @@ xpc_setup_partitions_sn_uv(void) for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { part_uv = &xpc_partitions[partid].sn.uv; + mutex_init(&part_uv->cached_activate_gru_mq_desc_mutex); spin_lock_init(&part_uv->flags_lock); part_uv->remote_act_state = XPC_P_AS_INACTIVE; } return 0; } +static void +xpc_teardown_partitions_sn_uv(void) +{ + short partid; + struct xpc_partition_uv *part_uv; + unsigned long irq_flags; + + for (partid = 0; partid < XP_MAX_NPARTITIONS_UV; partid++) { + part_uv = &xpc_partitions[partid].sn.uv; + + if (part_uv->cached_activate_gru_mq_desc != NULL) { + mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex); + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + kfree(part_uv->cached_activate_gru_mq_desc); + part_uv->cached_activate_gru_mq_desc = NULL; + mutex_unlock(&part_uv-> + cached_activate_gru_mq_desc_mutex); + } + } +} + static int xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) { + int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + #if defined CONFIG_X86_64 mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset); if (mq->irq < 0) { dev_err(xpc_part, "uv_setup_irq() returned error=%d\n", - mq->irq); + -mq->irq); + return mq->irq; } -#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV - int mmr_pnode; - unsigned long mmr_value; + mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset); +#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0) mq->irq = SGI_XPC_ACTIVATE; else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0) @@ -83,10 +124,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) else return -EINVAL; - mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); - mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq; - - uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value); + mq->mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq; + uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mq->mmr_value); #else #error not a supported configuration #endif @@ -127,7 +166,7 @@ xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq) return ret; } #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV - ret = sn_mq_watchlist_alloc(mq->mmr_blade, uv_gpa(mq->address), + ret = sn_mq_watchlist_alloc(mq->mmr_blade, (void *)uv_gpa(mq->address), mq->order, &mq->mmr_offset); if (ret < 0) { dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n", @@ -168,12 +207,22 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name, int pg_order; struct page *page; struct xpc_gru_mq_uv *mq; + struct uv_IO_APIC_route_entry *mmr_value; mq = kmalloc(sizeof(struct xpc_gru_mq_uv), GFP_KERNEL); if (mq == NULL) { dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() " "a xpc_gru_mq_uv structure\n"); ret = -ENOMEM; + goto out_0; + } + + mq->gru_mq_desc = kzalloc(sizeof(struct gru_message_queue_desc), + GFP_KERNEL); + if (mq->gru_mq_desc == NULL) { + dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to kmalloc() " + "a gru_message_queue_desc structure\n"); + ret = -ENOMEM; goto out_1; } @@ -194,14 +243,6 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name, } mq->address = page_address(page); - ret = gru_create_message_queue(mq->address, mq_size); - if (ret != 0) { - dev_err(xpc_part, "gru_create_message_queue() returned " - "error=%d\n", ret); - ret = -EINVAL; - goto out_3; - } - /* enable generation of irq when GRU mq operation occurs to this mq */ ret = xpc_gru_mq_watchlist_alloc_uv(mq); if (ret != 0) @@ -214,10 +255,20 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name, ret = request_irq(mq->irq, irq_handler, 0, irq_name, NULL); if (ret != 0) { dev_err(xpc_part, "request_irq(irq=%d) returned error=%d\n", - mq->irq, ret); + mq->irq, -ret); goto out_5; } + mmr_value = (struct uv_IO_APIC_route_entry *)&mq->mmr_value; + ret = gru_create_message_queue(mq->gru_mq_desc, mq->address, mq_size, + nid, mmr_value->vector, mmr_value->dest); + if (ret != 0) { + dev_err(xpc_part, "gru_create_message_queue() returned " + "error=%d\n", ret); + ret = -EINVAL; + goto out_6; + } + /* allow other partitions to access this GRU mq */ xp_ret = xp_expand_memprotect(xp_pa(mq->address), mq_size); if (xp_ret != xpSuccess) { @@ -237,8 +288,10 @@ out_4: out_3: free_pages((unsigned long)mq->address, pg_order); out_2: - kfree(mq); + kfree(mq->gru_mq_desc); out_1: + kfree(mq); +out_0: return ERR_PTR(ret); } @@ -268,13 +321,14 @@ xpc_destroy_gru_mq_uv(struct xpc_gru_mq_uv *mq) } static enum xp_retval -xpc_send_gru_msg(unsigned long mq_gpa, void *msg, size_t msg_size) +xpc_send_gru_msg(struct gru_message_queue_desc *gru_mq_desc, void *msg, + size_t msg_size) { enum xp_retval xp_ret; int ret; while (1) { - ret = gru_send_message_gpa(mq_gpa, msg, msg_size); + ret = gru_send_message_gpa(gru_mq_desc, msg, msg_size); if (ret == MQE_OK) { xp_ret = xpSuccess; break; @@ -421,7 +475,15 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part, part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV; part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */ part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies; - part_uv->remote_activate_mq_gpa = msg->activate_mq_gpa; + + if (msg->activate_gru_mq_desc_gpa != + part_uv->activate_gru_mq_desc_gpa) { + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags &= ~XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + part_uv->activate_gru_mq_desc_gpa = + msg->activate_gru_mq_desc_gpa; + } spin_unlock_irqrestore(&xpc_activate_IRQ_rcvd_lock, irq_flags); (*wakeup_hb_checker)++; @@ -498,7 +560,7 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part, args = &part->remote_openclose_args[msg->ch_number]; args->remote_nentries = msg->remote_nentries; args->local_nentries = msg->local_nentries; - args->local_msgqueue_pa = msg->local_notify_mq_gpa; + args->local_msgqueue_pa = msg->notify_gru_mq_desc_gpa; spin_lock_irqsave(&part->chctl_lock, irq_flags); part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENREPLY; @@ -558,9 +620,10 @@ xpc_handle_activate_IRQ_uv(int irq, void *dev_id) short partid; struct xpc_partition *part; int wakeup_hb_checker = 0; + int part_referenced; while (1) { - msg_hdr = gru_get_next_message(xpc_activate_mq_uv->address); + msg_hdr = gru_get_next_message(xpc_activate_mq_uv->gru_mq_desc); if (msg_hdr == NULL) break; @@ -571,14 +634,15 @@ xpc_handle_activate_IRQ_uv(int irq, void *dev_id) partid); } else { part = &xpc_partitions[partid]; - if (xpc_part_ref(part)) { - xpc_handle_activate_mq_msg_uv(part, msg_hdr, - &wakeup_hb_checker); + + part_referenced = xpc_part_ref(part); + xpc_handle_activate_mq_msg_uv(part, msg_hdr, + &wakeup_hb_checker); + if (part_referenced) xpc_part_deref(part); - } } - gru_free_message(xpc_activate_mq_uv->address, msg_hdr); + gru_free_message(xpc_activate_mq_uv->gru_mq_desc, msg_hdr); } if (wakeup_hb_checker) @@ -588,21 +652,73 @@ xpc_handle_activate_IRQ_uv(int irq, void *dev_id) } static enum xp_retval +xpc_cache_remote_gru_mq_desc_uv(struct gru_message_queue_desc *gru_mq_desc, + unsigned long gru_mq_desc_gpa) +{ + enum xp_retval ret; + + ret = xp_remote_memcpy(uv_gpa(gru_mq_desc), gru_mq_desc_gpa, + sizeof(struct gru_message_queue_desc)); + if (ret == xpSuccess) + gru_mq_desc->mq = NULL; + + return ret; +} + +static enum xp_retval xpc_send_activate_IRQ_uv(struct xpc_partition *part, void *msg, size_t msg_size, int msg_type) { struct xpc_activate_mq_msghdr_uv *msg_hdr = msg; + struct xpc_partition_uv *part_uv = &part->sn.uv; + struct gru_message_queue_desc *gru_mq_desc; + unsigned long irq_flags; + enum xp_retval ret; DBUG_ON(msg_size > XPC_ACTIVATE_MSG_SIZE_UV); msg_hdr->type = msg_type; - msg_hdr->partid = XPC_PARTID(part); + msg_hdr->partid = xp_partition_id; msg_hdr->act_state = part->act_state; msg_hdr->rp_ts_jiffies = xpc_rsvd_page->ts_jiffies; + mutex_lock(&part_uv->cached_activate_gru_mq_desc_mutex); +again: + if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) { + gru_mq_desc = part_uv->cached_activate_gru_mq_desc; + if (gru_mq_desc == NULL) { + gru_mq_desc = kmalloc(sizeof(struct + gru_message_queue_desc), + GFP_KERNEL); + if (gru_mq_desc == NULL) { + ret = xpNoMemory; + goto done; + } + part_uv->cached_activate_gru_mq_desc = gru_mq_desc; + } + + ret = xpc_cache_remote_gru_mq_desc_uv(gru_mq_desc, + part_uv-> + activate_gru_mq_desc_gpa); + if (ret != xpSuccess) + goto done; + + spin_lock_irqsave(&part_uv->flags_lock, irq_flags); + part_uv->flags |= XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV; + spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); + } + /* ??? Is holding a spin_lock (ch->lock) during this call a bad idea? */ - return xpc_send_gru_msg(part->sn.uv.remote_activate_mq_gpa, msg, - msg_size); + ret = xpc_send_gru_msg(part_uv->cached_activate_gru_mq_desc, msg, + msg_size); + if (ret != xpSuccess) { + smp_rmb(); /* ensure a fresh copy of part_uv->flags */ + if (!(part_uv->flags & XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV)) + goto again; + } +done: + mutex_unlock(&part_uv->cached_activate_gru_mq_desc_mutex); + return ret; } static void @@ -620,7 +736,7 @@ static void xpc_send_activate_IRQ_ch_uv(struct xpc_channel *ch, unsigned long *irq_flags, void *msg, size_t msg_size, int msg_type) { - struct xpc_partition *part = &xpc_partitions[ch->number]; + struct xpc_partition *part = &xpc_partitions[ch->partid]; enum xp_retval ret; ret = xpc_send_activate_IRQ_uv(part, msg, msg_size, msg_type); @@ -692,7 +808,8 @@ xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa, static int xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp) { - rp->sn.activate_mq_gpa = uv_gpa(xpc_activate_mq_uv->address); + rp->sn.activate_gru_mq_desc_gpa = + uv_gpa(xpc_activate_mq_uv->gru_mq_desc); return 0; } @@ -787,7 +904,8 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp, part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */ part->remote_rp_ts_jiffies = remote_rp->ts_jiffies; - part->sn.uv.remote_activate_mq_gpa = remote_rp->sn.activate_mq_gpa; + part->sn.uv.activate_gru_mq_desc_gpa = + remote_rp->sn.activate_gru_mq_desc_gpa; /* * ??? Is it a good idea to make this conditional on what is @@ -795,7 +913,8 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp, */ if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) { msg.rp_gpa = uv_gpa(xpc_rsvd_page); - msg.activate_mq_gpa = xpc_rsvd_page->sn.activate_mq_gpa; + msg.activate_gru_mq_desc_gpa = + xpc_rsvd_page->sn.activate_gru_mq_desc_gpa; xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV); } @@ -857,7 +976,8 @@ xpc_get_fifo_entry_uv(struct xpc_fifo_head_uv *head) if (head->first == NULL) head->last = NULL; } - head->n_entries++; + head->n_entries--; + BUG_ON(head->n_entries < 0); spin_unlock_irqrestore(&head->lock, irq_flags); first->next = NULL; return first; @@ -876,8 +996,7 @@ xpc_put_fifo_entry_uv(struct xpc_fifo_head_uv *head, else head->first = last; head->last = last; - head->n_entries--; - BUG_ON(head->n_entries < 0); + head->n_entries++; spin_unlock_irqrestore(&head->lock, irq_flags); } @@ -1037,6 +1156,12 @@ xpc_setup_msg_structures_uv(struct xpc_channel *ch) DBUG_ON(ch->flags & XPC_C_SETUP); + ch_uv->cached_notify_gru_mq_desc = kmalloc(sizeof(struct + gru_message_queue_desc), + GFP_KERNEL); + if (ch_uv->cached_notify_gru_mq_desc == NULL) + return xpNoMemory; + ret = xpc_allocate_send_msg_slot_uv(ch); if (ret == xpSuccess) { @@ -1060,7 +1185,8 @@ xpc_teardown_msg_structures_uv(struct xpc_channel *ch) DBUG_ON(!spin_is_locked(&ch->lock)); - ch_uv->remote_notify_mq_gpa = 0; + kfree(ch_uv->cached_notify_gru_mq_desc); + ch_uv->cached_notify_gru_mq_desc = NULL; if (ch->flags & XPC_C_SETUP) { xpc_init_fifo_uv(&ch_uv->msg_slot_free_list); @@ -1111,7 +1237,7 @@ xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags) msg.ch_number = ch->number; msg.local_nentries = ch->local_nentries; msg.remote_nentries = ch->remote_nentries; - msg.local_notify_mq_gpa = uv_gpa(xpc_notify_mq_uv); + msg.notify_gru_mq_desc_gpa = uv_gpa(xpc_notify_mq_uv->gru_mq_desc); xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg), XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV); } @@ -1128,11 +1254,15 @@ xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number) xpc_wakeup_channel_mgr(part); } -static void +static enum xp_retval xpc_save_remote_msgqueue_pa_uv(struct xpc_channel *ch, - unsigned long msgqueue_pa) + unsigned long gru_mq_desc_gpa) { - ch->sn.uv.remote_notify_mq_gpa = msgqueue_pa; + struct xpc_channel_uv *ch_uv = &ch->sn.uv; + + DBUG_ON(ch_uv->cached_notify_gru_mq_desc == NULL); + return xpc_cache_remote_gru_mq_desc_uv(ch_uv->cached_notify_gru_mq_desc, + gru_mq_desc_gpa); } static void @@ -1339,7 +1469,8 @@ xpc_handle_notify_IRQ_uv(int irq, void *dev_id) short partid; struct xpc_partition *part; - while ((msg = gru_get_next_message(xpc_notify_mq_uv)) != NULL) { + while ((msg = gru_get_next_message(xpc_notify_mq_uv->gru_mq_desc)) != + NULL) { partid = msg->hdr.partid; if (partid < 0 || partid >= XP_MAX_NPARTITIONS_UV) { @@ -1354,7 +1485,7 @@ xpc_handle_notify_IRQ_uv(int irq, void *dev_id) } } - gru_free_message(xpc_notify_mq_uv, msg); + gru_free_message(xpc_notify_mq_uv->gru_mq_desc, msg); } return IRQ_HANDLED; @@ -1438,7 +1569,8 @@ xpc_send_payload_uv(struct xpc_channel *ch, u32 flags, void *payload, msg->hdr.msg_slot_number = msg_slot->msg_slot_number; memcpy(&msg->payload, payload, payload_size); - ret = xpc_send_gru_msg(ch->sn.uv.remote_notify_mq_gpa, msg, msg_size); + ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg, + msg_size); if (ret == xpSuccess) goto out_1; @@ -1529,7 +1661,7 @@ xpc_received_payload_uv(struct xpc_channel *ch, void *payload) msg->hdr.partid = xp_partition_id; msg->hdr.size = 0; /* size of zero indicates this is an ACK */ - ret = xpc_send_gru_msg(ch->sn.uv.remote_notify_mq_gpa, msg, + ret = xpc_send_gru_msg(ch->sn.uv.cached_notify_gru_mq_desc, msg, sizeof(struct xpc_notify_mq_msghdr_uv)); if (ret != xpSuccess) XPC_DEACTIVATE_PARTITION(&xpc_partitions[ch->partid], ret); @@ -1541,6 +1673,7 @@ int xpc_init_uv(void) { xpc_setup_partitions_sn = xpc_setup_partitions_sn_uv; + xpc_teardown_partitions_sn = xpc_teardown_partitions_sn_uv; xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv; xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv; xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv; |