diff options
364 files changed, 5858 insertions, 2441 deletions
diff --git a/Documentation/dontdiff b/Documentation/dontdiff index 9a33bb94f74f..d4fda25db868 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff @@ -111,6 +111,7 @@ mkdep mktables modpost modversions.h* +offset.h offsets.h oui.c* parse.c* diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 26414bc87c65..e67c90d4ee17 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -66,6 +66,14 @@ Who: Paul E. McKenney <paulmck@us.ibm.com> --------------------------- +What: remove verify_area() +When: July 2006 +Files: Various uaccess.h headers. +Why: Deprecated and redundant. access_ok() should be used instead. +Who: Jesper Juhl <juhl-lkml@dif.dk> + +--------------------------- + What: IEEE1394 Audio and Music Data Transmission Protocol driver, Connection Management Procedures driver When: November 2005 diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 35159176997b..9f11d36a8c10 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -49,6 +49,7 @@ show up in /proc/sys/kernel: - shmmax [ sysv ipc ] - shmmni - stop-a [ SPARC only ] +- suid_dumpable - sysrq ==> Documentation/sysrq.txt - tainted - threads-max @@ -300,6 +301,25 @@ kernel. This value defaults to SHMMAX. ============================================================== +suid_dumpable: + +This value can be used to query and set the core dump mode for setuid +or otherwise protected/tainted binaries. The modes are + +0 - (default) - traditional behaviour. Any process which has changed + privilege levels or is execute only will not be dumped +1 - (debug) - all processes dump core when possible. The core dump is + owned by the current user and no security is applied. This is + intended for system debugging situations only. Ptrace is unchecked. +2 - (suidsafe) - any binary which normally would not be dumped is dumped + readable by root only. This allows the end user to remove + such a dump but not access it directly. For security reasons + core dumps in this mode will not overwrite one another or + other files. This mode is appropriate when adminstrators are + attempting to debug problems in a normal environment. + +============================================================== + tainted: Non-zero if the kernel has been tainted. Numeric values, which diff --git a/Documentation/tty.txt b/Documentation/tty.txt index 3958cf746dde..8ff7bc2a0811 100644 --- a/Documentation/tty.txt +++ b/Documentation/tty.txt @@ -22,7 +22,7 @@ copy of the structure. You must not re-register over the top of the line discipline even with the same data or your computer again will be eaten by demons. -In order to remove a line discipline call tty_register_ldisc passing NULL. +In order to remove a line discipline call tty_unregister_ldisc(). In ancient times this always worked. In modern times the function will return -EBUSY if the ldisc is currently in use. Since the ldisc referencing code manages the module counts this should not usually be a concern. diff --git a/MAINTAINERS b/MAINTAINERS index 5eaa6807cdb7..651af5012c98 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -304,7 +304,7 @@ S: Maintained ARM/PT DIGITAL BOARD PORT P: Stefan Eletzhofer M: stefan.eletzhofer@eletztrick.de -L: linux-arm-kernel@lists.arm.linux.org.uk +L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) W: http://www.arm.linux.org.uk/ S: Maintained @@ -317,21 +317,21 @@ S: Maintained ARM/STRONGARM110 PORT P: Russell King M: rmk@arm.linux.org.uk -L: linux-arm-kernel@lists.arm.linux.org.uk +L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) W: http://www.arm.linux.org.uk/ S: Maintained ARM/S3C2410 ARM ARCHITECTURE P: Ben Dooks M: ben-s3c2410@fluff.org -L: linux-arm-kernel@lists.arm.linux.org.uk +L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) W: http://www.fluff.org/ben/linux/ S: Maintained ARM/S3C2440 ARM ARCHITECTURE P: Ben Dooks M: ben-s3c2440@fluff.org -L: linux-arm-kernel@lists.arm.linux.org.uk +L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) W: http://www.fluff.org/ben/linux/ S: Maintained @@ -1860,7 +1860,7 @@ S: Maintained PXA2xx SUPPORT P: Nicolas Pitre M: nico@cam.org -L: linux-arm-kernel@lists.arm.linux.org.uk +L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) S: Maintained QLOGIC QLA2XXX FC-SCSI DRIVER @@ -2162,7 +2162,7 @@ SHARP LH SUPPORT (LH7952X & LH7A40X) P: Marc Singer M: elf@buici.com W: http://projects.buici.com/arm -L: linux-arm-kernel@lists.arm.linux.org.uk +L: linux-arm-kernel@lists.arm.linux.org.uk (subscribers-only) S: Maintained SPARC (sparc32): @@ -518,7 +518,7 @@ CFLAGS += $(call add-align,CONFIG_CC_ALIGN_LOOPS,-loops) CFLAGS += $(call add-align,CONFIG_CC_ALIGN_JUMPS,-jumps) ifdef CONFIG_FRAME_POINTER -CFLAGS += -fno-omit-frame-pointer +CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) else CFLAGS += -fomit-frame-pointer endif diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index f7c96635d3b4..c5739d6309df 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -509,7 +509,7 @@ config NR_CPUS depends on SMP default "64" -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool "Discontiguous Memory Support (EXPERIMENTAL)" depends on EXPERIMENTAL help @@ -518,6 +518,8 @@ config DISCONTIGMEM or have huge holes in the physical address space for other reasons. See <file:Documentation/vm/numa> for more. +source "mm/Kconfig" + config NUMA bool "NUMA Support (EXPERIMENTAL)" depends on DISCONTIGMEM diff --git a/arch/alpha/defconfig b/arch/alpha/defconfig index 5e39b7a7c8f4..6da9c3dbde44 100644 --- a/arch/alpha/defconfig +++ b/arch/alpha/defconfig @@ -96,7 +96,7 @@ CONFIG_ALPHA_CORE_AGP=y CONFIG_ALPHA_BROKEN_IRQ_MASK=y CONFIG_EISA=y # CONFIG_SMP is not set -# CONFIG_DISCONTIGMEM is not set +# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_VERBOSE_MCHECK=y CONFIG_VERBOSE_MCHECK_ON=1 CONFIG_PCI_LEGACY_PROC=y diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index ba81c4422aaf..c7481d59b6df 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -327,8 +327,6 @@ void __init mem_init(void) extern char _text, _etext, _data, _edata; extern char __init_begin, __init_end; unsigned long nid, i; - struct page * lmem_map; - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); reservedpages = 0; @@ -338,10 +336,10 @@ void __init mem_init(void) */ totalram_pages += free_all_bootmem_node(NODE_DATA(nid)); - lmem_map = node_mem_map(nid); pfn = NODE_DATA(nid)->node_start_pfn; for (i = 0; i < node_spanned_pages(nid); i++, pfn++) - if (page_is_ram(pfn) && PageReserved(lmem_map+i)) + if (page_is_ram(pfn) && + PageReserved(nid_page_nr(nid, i))) reservedpages++; } @@ -373,18 +371,18 @@ show_mem(void) show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_online_node(nid) { - struct page * lmem_map = node_mem_map(nid); i = node_spanned_pages(nid); while (i-- > 0) { + struct page *page = nid_page_nr(nid, i); total++; - if (PageReserved(lmem_map+i)) + if (PageReserved(page)) reserved++; - else if (PageSwapCache(lmem_map+i)) + else if (PageSwapCache(page)) cached++; - else if (!page_count(lmem_map+i)) + else if (!page_count(page)) free++; else - shared += page_count(lmem_map + i) - 1; + shared += page_count(page) - 1; } } printk("%ld pages of RAM\n",total); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ee8a9ad7bbd9..07ba77c19f6c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -346,7 +346,7 @@ config PREEMPT Say Y here if you are building a kernel for a desktop, embedded or real-time system. Say N if you are unsure. -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool default (ARCH_LH7A40X && !LH7A40X_CONTIGMEM) help @@ -355,6 +355,8 @@ config DISCONTIGMEM or have huge holes in the physical address space for other reasons. See <file:Documentation/vm/numa> for more. +source "mm/Kconfig" + config LEDS bool "Timer and CPU usage LEDs" depends on ARCH_CDB89712 || ARCH_CO285 || ARCH_EBSA110 || \ diff --git a/arch/arm/boot/install.sh b/arch/arm/boot/install.sh index 935bb27369e9..9f9bed207345 100644 --- a/arch/arm/boot/install.sh +++ b/arch/arm/boot/install.sh @@ -21,8 +21,8 @@ # # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi if [ "$(basename $2)" = "zImage" ]; then # Compressed install diff --git a/arch/arm26/Kconfig b/arch/arm26/Kconfig index 6caed90661fc..dc0c1936969b 100644 --- a/arch/arm26/Kconfig +++ b/arch/arm26/Kconfig @@ -179,6 +179,8 @@ config CMDLINE time by entering them here. As a minimum, you should specify the memory size and the root device (e.g., mem=64M root=/dev/nfs). +source "mm/Kconfig" + endmenu source "drivers/base/Kconfig" diff --git a/arch/arm26/boot/install.sh b/arch/arm26/boot/install.sh index c628328dd9ec..8a8399b26cf7 100644 --- a/arch/arm26/boot/install.sh +++ b/arch/arm26/boot/install.sh @@ -23,8 +23,8 @@ # User may have a custom install script -if [ -x /sbin/installkernel ]; then - exec /sbin/installkernel "$@" +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then + exec /sbin/${CROSS_COMPILE}installkernel "$@" fi if [ "$2" = "zImage" ]; then diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 4332ca348d51..f848e3761491 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -74,6 +74,8 @@ config PREEMPT Say Y here if you are building a kernel for a desktop, embedded or real-time system. Say N if you are unsure. +source mm/Kconfig + endmenu menu "Hardware setup" diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 2b19372767eb..c93f95146cc2 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -74,6 +74,8 @@ config HIGHPTE with a lot of RAM, this can be wasteful of precious low memory. Setting this option will put user-space page tables in high memory. +source "mm/Kconfig" + choice prompt "uClinux kernel load address" depends on !MMU diff --git a/arch/h8300/Kconfig.cpu b/arch/h8300/Kconfig.cpu index d9dd62a565a9..a380167a13cf 100644 --- a/arch/h8300/Kconfig.cpu +++ b/arch/h8300/Kconfig.cpu @@ -180,4 +180,7 @@ config CPU_H8S config PREEMPT bool "Preemptible Kernel" default n + +source "mm/Kconfig" + endmenu diff --git a/arch/h8300/platform/h8300h/ptrace_h8300h.c b/arch/h8300/platform/h8300h/ptrace_h8300h.c index 18e51a7167d3..6ac93c05a1ae 100644 --- a/arch/h8300/platform/h8300h/ptrace_h8300h.c +++ b/arch/h8300/platform/h8300h/ptrace_h8300h.c @@ -245,12 +245,12 @@ static unsigned short *getnextpc(struct task_struct *child, unsigned short *pc) addr = h8300_get_reg(child, regno-1+PT_ER1); return (unsigned short *)addr; case relb: - if ((inst = 0x55) || isbranch(child,inst & 0x0f)) + if (inst == 0x55 || isbranch(child,inst & 0x0f)) pc = (unsigned short *)((unsigned long)pc + ((signed char)(*fetch_p))); return pc+1; /* skip myself */ case relw: - if ((inst = 0x5c) || isbranch(child,(*fetch_p & 0xf0) >> 4)) + if (inst == 0x5c || isbranch(child,(*fetch_p & 0xf0) >> 4)) pc = (unsigned short *)((unsigned long)pc + ((signed short)(*(pc+1)))); return pc+2; /* skip myself */ diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index dfd904f6883b..d4ae5f9ceae6 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -68,7 +68,6 @@ config X86_VOYAGER config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" - select DISCONTIGMEM select NUMA help This option is used for getting Linux to run on a (IBM/Sequent) NUMA @@ -783,25 +782,48 @@ comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support" comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI) -config DISCONTIGMEM - bool - depends on NUMA - default y - config HAVE_ARCH_BOOTMEM_NODE bool depends on NUMA default y -config HAVE_MEMORY_PRESENT +config ARCH_HAVE_MEMORY_PRESENT bool depends on DISCONTIGMEM default y config NEED_NODE_MEMMAP_SIZE bool - depends on DISCONTIGMEM + depends on DISCONTIGMEM || SPARSEMEM + default y + +config HAVE_ARCH_ALLOC_REMAP + bool + depends on NUMA + default y + +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on NUMA + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + depends on ARCH_SPARSEMEM_ENABLE + +source "mm/Kconfig" + +config HAVE_ARCH_EARLY_PFN_TO_NID + bool default y + depends on NUMA config HIGHPTE bool "Allocate 3rd-level pagetables from highmem" @@ -939,6 +961,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source kernel/Kconfig.hz + endmenu diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 1c36ca332a96..bf7c9ba709f3 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -17,6 +17,13 @@ # 20050320 Kianusch Sayah Karadji <kianusch@sk-tech.net> # Added support for GEODE CPU +HAS_BIARCH := $(call cc-option-yn, -m32) +ifeq ($(HAS_BIARCH),y) +AS := $(AS) --32 +LD := $(LD) -m elf_i386 +CC := $(CC) -m32 +endif + LDFLAGS := -m elf_i386 OBJCOPYFLAGS := -O binary -R .note -R .comment -S LDFLAGS_vmlinux := diff --git a/arch/i386/boot/install.sh b/arch/i386/boot/install.sh index 90f2452b3b9e..f17b40dfc0f4 100644 --- a/arch/i386/boot/install.sh +++ b/arch/i386/boot/install.sh @@ -21,8 +21,8 @@ # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi # Default install - same as make zlilo diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index d509836b70c3..8d993fa71754 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -1133,7 +1133,7 @@ inline void smp_local_timer_interrupt(struct pt_regs * regs) } #ifdef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode_vm(regs)); #endif } diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index d199e525680a..b9954248d0aa 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -635,7 +635,7 @@ void __init cpu_init (void) /* Clear all 6 debug registers: */ -#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) ); +#define CD(register) set_debugreg(0, register) CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index e1c2042b9b7e..d66b09e0c820 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -375,6 +375,19 @@ int mtrr_add_page(unsigned long base, unsigned long size, return error; } +static int mtrr_check(unsigned long base, unsigned long size) +{ + if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { + printk(KERN_WARNING + "mtrr: size and base must be multiples of 4 kiB\n"); + printk(KERN_DEBUG + "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + dump_stack(); + return -1; + } + return 0; +} + /** * mtrr_add - Add a memory type region * @base: Physical base address of region @@ -415,11 +428,8 @@ int mtrr_add(unsigned long base, unsigned long size, unsigned int type, char increment) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - printk(KERN_WARNING "mtrr: size and base must be multiples of 4 kiB\n"); - printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + if (mtrr_check(base, size)) return -EINVAL; - } return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, increment); } @@ -511,11 +521,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) int mtrr_del(int reg, unsigned long base, unsigned long size) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - printk(KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n"); - printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + if (mtrr_check(base, size)) return -EINVAL; - } return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); } diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 7323c19f354e..8bd77d948a84 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -86,7 +86,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if ( cpu_has(c, X86_FEATURE_TSC) ) { - seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", + seq_printf(m, "cpu MHz\t\t: %u.%03u\n", cpu_khz / 1000, (cpu_khz % 1000)); } diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 903190a4b3ff..180f070d03cb 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -1,97 +1,17 @@ #include <linux/config.h> #include <linux/module.h> -#include <linux/smp.h> -#include <linux/user.h> -#include <linux/elfcore.h> -#include <linux/mca.h> -#include <linux/sched.h> -#include <linux/in6.h> -#include <linux/interrupt.h> -#include <linux/smp_lock.h> -#include <linux/pm.h> -#include <linux/pci.h> -#include <linux/apm_bios.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/tty.h> -#include <linux/highmem.h> -#include <linux/time.h> - -#include <asm/semaphore.h> -#include <asm/processor.h> -#include <asm/i387.h> -#include <asm/uaccess.h> #include <asm/checksum.h> -#include <asm/io.h> -#include <asm/delay.h> -#include <asm/irq.h> -#include <asm/mmx.h> #include <asm/desc.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#include <asm/nmi.h> -#include <asm/ist.h> -#include <asm/kdebug.h> - -extern void dump_thread(struct pt_regs *, struct user *); -extern spinlock_t rtc_lock; /* This is definitely a GPL-only symbol */ EXPORT_SYMBOL_GPL(cpu_gdt_table); -#if defined(CONFIG_APM_MODULE) -extern void machine_real_restart(unsigned char *, int); -EXPORT_SYMBOL(machine_real_restart); -extern void default_idle(void); -EXPORT_SYMBOL(default_idle); -#endif - -#ifdef CONFIG_SMP -extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); -extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); -#endif - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) -extern struct drive_info_struct drive_info; -EXPORT_SYMBOL(drive_info); -#endif - -extern unsigned long cpu_khz; -extern unsigned long get_cmos_time(void); - -/* platform dependent support */ -EXPORT_SYMBOL(boot_cpu_data); -#ifdef CONFIG_DISCONTIGMEM -EXPORT_SYMBOL(node_data); -EXPORT_SYMBOL(physnode_map); -#endif -#ifdef CONFIG_X86_NUMAQ -EXPORT_SYMBOL(xquad_portio); -#endif -EXPORT_SYMBOL(dump_thread); -EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL_GPL(kernel_fpu_begin); -EXPORT_SYMBOL(__ioremap); -EXPORT_SYMBOL(ioremap_nocache); -EXPORT_SYMBOL(iounmap); -EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(pm_idle); -EXPORT_SYMBOL(pm_power_off); -EXPORT_SYMBOL(get_cmos_time); -EXPORT_SYMBOL(cpu_khz); -EXPORT_SYMBOL(apm_info); - EXPORT_SYMBOL(__down_failed); EXPORT_SYMBOL(__down_failed_interruptible); EXPORT_SYMBOL(__down_failed_trylock); EXPORT_SYMBOL(__up_wakeup); /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); -/* Delay loops */ -EXPORT_SYMBOL(__ndelay); -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__delay); -EXPORT_SYMBOL(__const_udelay); EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); @@ -105,87 +25,11 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strstr); -EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); -EXPORT_SYMBOL(clear_user); -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(__copy_from_user_ll); -EXPORT_SYMBOL(__copy_to_user_ll); -EXPORT_SYMBOL(strnlen_user); - -EXPORT_SYMBOL(dma_alloc_coherent); -EXPORT_SYMBOL(dma_free_coherent); - -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_mem_start); -#endif - -#ifdef CONFIG_PCI_BIOS -EXPORT_SYMBOL(pcibios_set_irq_routing); -EXPORT_SYMBOL(pcibios_get_irq_routing_table); -#endif - -#ifdef CONFIG_X86_USE_3DNOW -EXPORT_SYMBOL(_mmx_memcpy); -EXPORT_SYMBOL(mmx_clear_page); -EXPORT_SYMBOL(mmx_copy_page); -#endif - -#ifdef CONFIG_X86_HT -EXPORT_SYMBOL(smp_num_siblings); -EXPORT_SYMBOL(cpu_sibling_map); -#endif - #ifdef CONFIG_SMP -EXPORT_SYMBOL(cpu_data); -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(cpu_callout_map); +extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); +extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); EXPORT_SYMBOL(__write_lock_failed); EXPORT_SYMBOL(__read_lock_failed); - -/* Global SMP stuff */ -EXPORT_SYMBOL(smp_call_function); - -/* TLB flushing */ -EXPORT_SYMBOL(flush_tlb_page); -#endif - -#ifdef CONFIG_X86_IO_APIC -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); -#endif - -#ifdef CONFIG_MCA -EXPORT_SYMBOL(machine_id); -#endif - -#ifdef CONFIG_VT -EXPORT_SYMBOL(screen_info); -#endif - -EXPORT_SYMBOL(get_wchan); - -EXPORT_SYMBOL(rtc_lock); - -EXPORT_SYMBOL_GPL(set_nmi_callback); -EXPORT_SYMBOL_GPL(unset_nmi_callback); - -EXPORT_SYMBOL(register_die_notifier); -#ifdef CONFIG_HAVE_DEC_LOCK -EXPORT_SYMBOL(_atomic_dec_and_lock); -#endif - -EXPORT_SYMBOL(__PAGE_KERNEL); - -#ifdef CONFIG_HIGHMEM -EXPORT_SYMBOL(kmap); -EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); -#endif - -#if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) -EXPORT_SYMBOL(ist_info); #endif EXPORT_SYMBOL(csum_partial); diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c index c55e037f08f7..b817168d9c62 100644 --- a/arch/i386/kernel/i387.c +++ b/arch/i386/kernel/i387.c @@ -10,6 +10,7 @@ #include <linux/config.h> #include <linux/sched.h> +#include <linux/module.h> #include <asm/processor.h> #include <asm/i387.h> #include <asm/math_emu.h> @@ -79,6 +80,7 @@ void kernel_fpu_begin(void) } clts(); } +EXPORT_SYMBOL_GPL(kernel_fpu_begin); void restore_fpu( struct task_struct *tsk ) { @@ -526,6 +528,7 @@ int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu ) return fpvalid; } +EXPORT_SYMBOL(dump_fpu); int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu) { diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 7a324e8b86f9..08540bc4ba3e 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -31,7 +31,7 @@ #include <linux/mc146818rtc.h> #include <linux/compiler.h> #include <linux/acpi.h> - +#include <linux/module.h> #include <linux/sysdev.h> #include <asm/io.h> #include <asm/smp.h> @@ -812,6 +812,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) } return best_guess; } +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); /* * This function currently is only a helper for the i386 smp boot process where @@ -1659,6 +1660,12 @@ static void __init setup_ioapic_ids_from_mpc(void) unsigned long flags; /* + * Don't check I/O APIC IDs for xAPIC systems. They have + * no meaning without the serial APIC bus. + */ + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15)) + return; + /* * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ @@ -1684,10 +1691,6 @@ static void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; } - /* Don't check I/O APIC IDs for some xAPIC systems. They have - * no meaning without the serial APIC bus. */ - if (NO_IOAPIC_CHECK) - continue; /* * Sanity check, is the ID really free? Every APIC in a * system must have a unique ID or we get lots of nice diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 59ff9b455069..3762f6b35ab2 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -23,6 +23,9 @@ * Rusty Russell). * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes * interface to access function arguments. + * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston + * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi + * <prasanna@in.ibm.com> added function-return probes. */ #include <linux/config.h> @@ -30,15 +33,14 @@ #include <linux/ptrace.h> #include <linux/spinlock.h> #include <linux/preempt.h> +#include <asm/cacheflush.h> #include <asm/kdebug.h> #include <asm/desc.h> -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 - static struct kprobe *current_kprobe; static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags; +static struct kprobe *kprobe_prev; +static unsigned long kprobe_status_prev, kprobe_old_eflags_prev, kprobe_saved_eflags_prev; static struct pt_regs jprobe_saved_regs; static long *jprobe_saved_esp; /* copy of the kernel stack at the probe fire time */ @@ -68,16 +70,50 @@ int arch_prepare_kprobe(struct kprobe *p) void arch_copy_kprobe(struct kprobe *p) { memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + p->opcode = *p->addr; } -void arch_remove_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) +void arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; - regs->eip = (unsigned long)p->addr; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void arch_remove_kprobe(struct kprobe *p) +{ +} + +static inline void save_previous_kprobe(void) +{ + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; + kprobe_old_eflags_prev = kprobe_old_eflags; + kprobe_saved_eflags_prev = kprobe_saved_eflags; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; + kprobe_old_eflags = kprobe_old_eflags_prev; + kprobe_saved_eflags = kprobe_saved_eflags_prev; +} + +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) +{ + current_kprobe = p; + kprobe_saved_eflags = kprobe_old_eflags + = (regs->eflags & (TF_MASK | IF_MASK)); + if (is_IF_modifier(p->opcode)) + kprobe_saved_eflags &= ~IF_MASK; } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) @@ -91,6 +127,50 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->eip = (unsigned long)&p->ainsn.insn; } +struct task_struct *arch_get_kprobe_task(void *ptr) +{ + return ((struct thread_info *) (((unsigned long) ptr) & + (~(THREAD_SIZE -1))))->task; +} + +void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +{ + unsigned long *sara = (unsigned long *)®s->esp; + struct kretprobe_instance *ri; + static void *orig_ret_addr; + + /* + * Save the return address when the return probe hits + * the first time, and use it to populate the (krprobe + * instance)->ret_addr for subsequent return probes at + * the same addrress since stack address would have + * the kretprobe_trampoline by then. + */ + if (((void*) *sara) != kretprobe_trampoline) + orig_ret_addr = (void*) *sara; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->stack_addr = sara; + ri->ret_addr = orig_ret_addr; + add_rp_inst(ri); + /* Replace the return addr with trampoline addr */ + *sara = (unsigned long) &kretprobe_trampoline; + } else { + rp->nmissed++; + } +} + +void arch_kprobe_flush_task(struct task_struct *tk) +{ + struct kretprobe_instance *ri; + while ((ri = get_rp_inst_tsk(tk)) != NULL) { + *((unsigned long *)(ri->stack_addr)) = + (unsigned long) ri->ret_addr; + recycle_rp_inst(ri); + } +} + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled thorough out this function. @@ -127,8 +207,18 @@ static int kprobe_handler(struct pt_regs *regs) unlock_kprobes(); goto no_kprobe; } - disarm_kprobe(p, regs); - ret = 1; + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(); + set_current_kprobe(p, regs); + p->nmissed++; + prepare_singlestep(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; } else { p = current_kprobe; if (p->break_handler && p->break_handler(p, regs)) { @@ -163,11 +253,7 @@ static int kprobe_handler(struct pt_regs *regs) } kprobe_status = KPROBE_HIT_ACTIVE; - current_kprobe = p; - kprobe_saved_eflags = kprobe_old_eflags - = (regs->eflags & (TF_MASK | IF_MASK)); - if (is_IF_modifier(p->opcode)) - kprobe_saved_eflags &= ~IF_MASK; + set_current_kprobe(p, regs); if (p->pre_handler && p->pre_handler(p, regs)) /* handler has already set things up, so skip ss setup */ @@ -184,6 +270,55 @@ no_kprobe: } /* + * For function-return probes, init_kprobes() establishes a probepoint + * here. When a retprobed function returns, this probe is hit and + * trampoline_probe_handler() runs, calling the kretprobe's handler. + */ + void kretprobe_trampoline_holder(void) + { + asm volatile ( ".global kretprobe_trampoline\n" + "kretprobe_trampoline: \n" + "nop\n"); + } + +/* + * Called when we hit the probe point at kretprobe_trampoline + */ +int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct task_struct *tsk; + struct kretprobe_instance *ri; + struct hlist_head *head; + struct hlist_node *node; + unsigned long *sara = ((unsigned long *) ®s->esp) - 1; + + tsk = arch_get_kprobe_task(sara); + head = kretprobe_inst_table_head(tsk); + + hlist_for_each_entry(ri, node, head, hlist) { + if (ri->stack_addr == sara && ri->rp) { + if (ri->rp->handler) + ri->rp->handler(ri, regs); + } + } + return 0; +} + +void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + struct kretprobe_instance *ri; + /* RA already popped */ + unsigned long *sara = ((unsigned long *)®s->esp) - 1; + + while ((ri = get_rp_inst(sara))) { + regs->eip = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri); + } + regs->eflags &= ~TF_MASK; +} + +/* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "int 3" * instruction. To avoid the SMP problems that can occur when we @@ -263,13 +398,22 @@ static inline int post_kprobe_handler(struct pt_regs *regs) if (!kprobe_running()) return 0; - if (current_kprobe->post_handler) + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; current_kprobe->post_handler(current_kprobe, regs, 0); + } - resume_execution(current_kprobe, regs); + if (current_kprobe->post_handler != trampoline_post_handler) + resume_execution(current_kprobe, regs); regs->eflags |= kprobe_saved_eflags; + /*Restore back the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } unlock_kprobes(); +out: preempt_enable_no_resched(); /* diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 1347ab4939e7..383a11600d2c 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -914,7 +914,10 @@ void __init mp_register_ioapic ( mp_ioapics[idx].mpc_apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15)) + mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + else + mp_ioapics[idx].mpc_apicid = id; mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); /* @@ -1055,11 +1058,20 @@ void __init mp_config_acpi_legacy_irqs (void) } } +#define MAX_GSI_NUM 4096 + int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) { int ioapic = -1; int ioapic_pin = 0; int idx, bit = 0; + static int pci_irq = 16; + /* + * Mapping between Global System Interrups, which + * represent all possible interrupts, and IRQs + * assigned to actual devices. + */ + static int gsi_to_irq[MAX_GSI_NUM]; #ifdef CONFIG_ACPI_BUS /* Don't set up the ACPI SCI because it's already set up */ @@ -1094,11 +1106,26 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); - return gsi; + return gsi_to_irq[gsi]; } mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); + if (edge_level) { + /* + * For PCI devices assign IRQs in order, avoiding gaps + * due to unused I/O APIC pins. + */ + int irq = gsi; + if (gsi < MAX_GSI_NUM) { + gsi = pci_irq++; + gsi_to_irq[irq] = gsi; + } else { + printk(KERN_ERR "GSI %u is too high\n", gsi); + return gsi; + } + } + io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1, active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1); diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 2c0ee9c2d020..da6c46d667cb 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -28,8 +28,7 @@ #include <linux/sysctl.h> #include <asm/smp.h> -#include <asm/mtrr.h> -#include <asm/mpspec.h> +#include <asm/div64.h> #include <asm/nmi.h> #include "mach_traps.h" @@ -324,6 +323,16 @@ static void clear_msr_range(unsigned int base, unsigned int n) wrmsr(base+i, 0, 0); } +static inline void write_watchdog_counter(const char *descr) +{ + u64 count = (u64)cpu_khz * 1000; + + do_div(count, nmi_hz); + if(descr) + Dprintk("setting %s to -0x%08Lx\n", descr, count); + wrmsrl(nmi_perfctr_msr, 0 - count); +} + static void setup_k7_watchdog(void) { unsigned int evntsel; @@ -339,8 +348,7 @@ static void setup_k7_watchdog(void) | K7_NMI_EVENT; wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); - Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1); + write_watchdog_counter("K7_PERFCTR0"); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= K7_EVNTSEL_ENABLE; wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); @@ -361,8 +369,7 @@ static void setup_p6_watchdog(void) | P6_NMI_EVENT; wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); - Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0); + write_watchdog_counter("P6_PERFCTR0"); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= P6_EVNTSEL0_ENABLE; wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); @@ -402,8 +409,7 @@ static int setup_p4_watchdog(void) wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); - Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); + write_watchdog_counter("P4_IQ_COUNTER0"); apic_write(APIC_LVTPC, APIC_DM_NMI); wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); return 1; @@ -518,7 +524,7 @@ void nmi_watchdog_tick (struct pt_regs * regs) * other P6 variant */ apic_write(APIC_LVTPC, APIC_DM_NMI); } - wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); + write_watchdog_counter(NULL); } } diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c index 4de2e03c7b45..1e51427cc9eb 100644 --- a/arch/i386/kernel/pci-dma.c +++ b/arch/i386/kernel/pci-dma.c @@ -11,6 +11,7 @@ #include <linux/mm.h> #include <linux/string.h> #include <linux/pci.h> +#include <linux/module.h> #include <asm/io.h> struct dma_coherent_mem { @@ -54,6 +55,7 @@ void *dma_alloc_coherent(struct device *dev, size_t size, } return ret; } +EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) @@ -68,6 +70,7 @@ void dma_free_coherent(struct device *dev, size_t size, } else free_pages((unsigned long)vaddr, order); } +EXPORT_SYMBOL(dma_free_coherent); int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, dma_addr_t device_addr, size_t size, int flags) diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 96e3ea6b17c7..aea2ce1145df 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -37,6 +37,7 @@ #include <linux/kallsyms.h> #include <linux/ptrace.h> #include <linux/random.h> +#include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -73,6 +74,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk) * Powermanagement idle function, if any.. */ void (*pm_idle)(void); +EXPORT_SYMBOL(pm_idle); static DEFINE_PER_CPU(unsigned int, cpu_idle_state); void disable_hlt(void) @@ -105,6 +107,9 @@ void default_idle(void) cpu_relax(); } } +#ifdef CONFIG_APM_MODULE +EXPORT_SYMBOL(default_idle); +#endif /* * On SMP it's slightly faster (but much more power-consuming!) @@ -262,7 +267,7 @@ void show_regs(struct pt_regs * regs) printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (regs->xcs & 3) + if (user_mode(regs)) printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); printk(" EFLAGS: %08lx %s (%s)\n", regs->eflags, print_tainted(), system_utsname.release); @@ -325,6 +330,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) /* Ok, create the new process.. */ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); } +EXPORT_SYMBOL(kernel_thread); /* * Free current thread data structures etc.. @@ -334,6 +340,13 @@ void exit_thread(void) struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(tsk); + /* The process may have allocated an io port bitmap... nuke it. */ if (unlikely(NULL != t->io_bitmap_ptr)) { int cpu = get_cpu(); @@ -357,6 +370,13 @@ void flush_thread(void) { struct task_struct *tsk = current; + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(tsk); + memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* @@ -508,6 +528,7 @@ void dump_thread(struct pt_regs * regs, struct user * dump) dump->u_fpvalid = dump_fpu (regs, &dump->i387); } +EXPORT_SYMBOL(dump_thread); /* * Capture the user space registers if the task is not running (in user space) @@ -627,13 +648,13 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas * Now maybe reload the debug registers */ if (unlikely(next->debugreg[7])) { - loaddebug(next, 0); - loaddebug(next, 1); - loaddebug(next, 2); - loaddebug(next, 3); + set_debugreg(current->thread.debugreg[0], 0); + set_debugreg(current->thread.debugreg[1], 1); + set_debugreg(current->thread.debugreg[2], 2); + set_debugreg(current->thread.debugreg[3], 3); /* no 4 and 5 */ - loaddebug(next, 6); - loaddebug(next, 7); + set_debugreg(current->thread.debugreg[6], 6); + set_debugreg(current->thread.debugreg[7], 7); } if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) @@ -731,6 +752,7 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16); return 0; } +EXPORT_SYMBOL(get_wchan); /* * sys_alloc_thread_area: get a yet unused TLS descriptor index. diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index e34f651fa13c..0da59b42843c 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -668,7 +668,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) info.si_code = TRAP_BRKPT; /* User-mode eip? */ - info.si_addr = user_mode(regs) ? (void __user *) regs->eip : NULL; + info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL; /* Send us the fakey SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 6dc27eb70ee7..db912209a8d3 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -2,6 +2,7 @@ * linux/arch/i386/kernel/reboot.c */ +#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/delay.h> @@ -19,6 +20,7 @@ * Power off function, if any */ void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); static int reboot_mode; static int reboot_thru_bios; @@ -295,6 +297,9 @@ void machine_real_restart(unsigned char *code, int length) : : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100))); } +#ifdef CONFIG_APM_MODULE +EXPORT_SYMBOL(machine_real_restart); +#endif void machine_restart(char * __unused) { diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 2bfbddebdbf8..30406fd0b64c 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -23,8 +23,10 @@ * This file handles the architecture-dependent parts of initialization */ +#include <linux/config.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/mmzone.h> #include <linux/tty.h> #include <linux/ioport.h> #include <linux/acpi.h> @@ -73,6 +75,7 @@ EXPORT_SYMBOL(efi_enabled); struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; /* common cpu data for all cpus */ struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; +EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; @@ -90,12 +93,18 @@ extern acpi_interrupt_flags acpi_sci_flags; /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; +#ifdef CONFIG_MCA +EXPORT_SYMBOL(machine_id); +#endif unsigned int machine_submodel_id; unsigned int BIOS_revision; unsigned int mca_pentium_flag; /* For PCI or other memory-mapped resources */ unsigned long pci_mem_start = 0x10000000; +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pci_mem_start); +#endif /* Boot loader ID as an integer, for the benefit of proc_dointvec */ int bootloader_type; @@ -107,14 +116,26 @@ static unsigned int highmem_pages = -1; * Setup options */ struct drive_info_struct { char dummy[32]; } drive_info; +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \ + defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) +EXPORT_SYMBOL(drive_info); +#endif struct screen_info screen_info; +#ifdef CONFIG_VT +EXPORT_SYMBOL(screen_info); +#endif struct apm_info apm_info; +EXPORT_SYMBOL(apm_info); struct sys_desc_table_struct { unsigned short length; unsigned char table[0]; }; struct edid_info edid_info; struct ist_info ist_info; +#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \ + defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) +EXPORT_SYMBOL(ist_info); +#endif struct e820map e820; extern void early_cpu_init(void); @@ -1022,7 +1043,7 @@ static void __init reserve_ebda_region(void) reserve_bootmem(addr, PAGE_SIZE); } -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES void __init setup_bootmem_allocator(void); static unsigned long __init setup_memory(void) { @@ -1072,9 +1093,9 @@ void __init zone_sizes_init(void) free_area_init(zones_size); } #else -extern unsigned long setup_memory(void); +extern unsigned long __init setup_memory(void); extern void zone_sizes_init(void); -#endif /* !CONFIG_DISCONTIGMEM */ +#endif /* !CONFIG_NEED_MULTIPLE_NODES */ void __init setup_bootmem_allocator(void) { @@ -1475,6 +1496,7 @@ void __init setup_arch(char **cmdline_p) #endif paging_init(); remapped_pgdat_init(); + sparse_init(); zone_sizes_init(); /* diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index ea46d028af08..b9b8f4e20fad 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -346,8 +346,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) extern void __user __kernel_sigreturn; extern void __user __kernel_rt_sigreturn; -static void setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs * regs) +static int setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs * regs) { void __user *restorer; struct sigframe __user *frame; @@ -429,13 +429,14 @@ static void setup_frame(int sig, struct k_sigaction *ka, current->comm, current->pid, frame, regs->eip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { void __user *restorer; @@ -522,20 +523,23 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->eip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } /* * OK, we're invoking a handler */ -static void +static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigset_t *oldset, struct pt_regs * regs) { + int ret; + /* Are we from a system call? */ if (regs->orig_eax >= 0) { /* If so, check system call restarting.. */ @@ -569,17 +573,19 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, /* Set up the stack frame */ if (ka->sa.sa_flags & SA_SIGINFO) - setup_rt_frame(sig, ka, info, oldset, regs); + ret = setup_rt_frame(sig, ka, info, oldset, regs); else - setup_frame(sig, ka, oldset, regs); + ret = setup_frame(sig, ka, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (ret && !(ka->sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } + + return ret; } /* @@ -599,7 +605,7 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) * kernel mode. Just return without doing anything * if so. */ - if ((regs->xcs & 3) != 3) + if (!user_mode(regs)) return 1; if (current->flags & PF_FREEZE) { @@ -618,12 +624,11 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) * inside the kernel. */ if (unlikely(current->thread.debugreg[7])) { - loaddebug(¤t->thread, 7); + set_debugreg(current->thread.debugreg[7], 7); } /* Whee! Actually deliver the signal. */ - handle_signal(signr, &info, &ka, oldset, regs); - return 1; + return handle_signal(signr, &info, &ka, oldset, regs); } no_signal: diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 6223c33ac91c..68be7d0c7238 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -19,6 +19,7 @@ #include <linux/mc146818rtc.h> #include <linux/cache.h> #include <linux/interrupt.h> +#include <linux/module.h> #include <asm/mtrr.h> #include <asm/tlbflush.h> @@ -452,6 +453,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) preempt_enable(); } +EXPORT_SYMBOL(flush_tlb_page); static void do_flush_tlb_all(void* info) { @@ -547,6 +549,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, return 0; } +EXPORT_SYMBOL(smp_call_function); static void stop_this_cpu (void * dummy) { diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index bc1bb6919e6a..c20d96d5c15c 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -60,6 +60,9 @@ static int __initdata smp_b_stepping; /* Number of siblings per CPU package */ int smp_num_siblings = 1; +#ifdef CONFIG_X86_HT +EXPORT_SYMBOL(smp_num_siblings); +#endif int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ EXPORT_SYMBOL(phys_proc_id); int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */ @@ -67,13 +70,16 @@ EXPORT_SYMBOL(cpu_core_id); /* bitmap of online cpus */ cpumask_t cpu_online_map; +EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; +EXPORT_SYMBOL(cpu_callout_map); static cpumask_t smp_commenced_mask; /* Per CPU bogomips and other parameters */ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_data); u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff }; @@ -199,7 +205,7 @@ static void __init synchronize_tsc_bp (void) unsigned long long t0; unsigned long long sum, avg; long long delta; - unsigned long one_usec; + unsigned int one_usec; int buggy = 0; printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); @@ -885,8 +891,14 @@ static void smp_tune_scheduling (void) static int boot_cpu_logical_apicid; /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; +#ifdef CONFIG_X86_NUMAQ +EXPORT_SYMBOL(xquad_portio); +#endif cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; +#ifdef CONFIG_X86_HT +EXPORT_SYMBOL(cpu_sibling_map); +#endif cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_core_map); diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index a0dcb7c87c30..e68d9fdb0759 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -77,11 +77,13 @@ u64 jiffies_64 = INITIAL_JIFFIES; EXPORT_SYMBOL(jiffies_64); -unsigned long cpu_khz; /* Detected as we calibrate the TSC */ +unsigned int cpu_khz; /* Detected as we calibrate the TSC */ +EXPORT_SYMBOL(cpu_khz); extern unsigned long wall_jiffies; DEFINE_SPINLOCK(rtc_lock); +EXPORT_SYMBOL(rtc_lock); DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); @@ -324,6 +326,8 @@ unsigned long get_cmos_time(void) return retval; } +EXPORT_SYMBOL(get_cmos_time); + static void sync_cmos_clock(unsigned long dummy); static struct timer_list sync_cmos_timer = diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c index 8e201219f525..37353bd31803 100644 --- a/arch/i386/kernel/timers/common.c +++ b/arch/i386/kernel/timers/common.c @@ -139,6 +139,15 @@ bad_calibration: } #endif + +unsigned long read_timer_tsc(void) +{ + unsigned long retval; + rdtscl(retval); + return retval; +} + + /* calculate cpu_khz */ void init_cpu_khz(void) { @@ -154,7 +163,8 @@ void init_cpu_khz(void) :"=a" (cpu_khz), "=d" (edx) :"r" (tsc_quotient), "0" (eax), "1" (edx)); - printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); + printk("Detected %u.%03u MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); } } } diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c index a3d6a288088b..7e39ed8e33f8 100644 --- a/arch/i386/kernel/timers/timer.c +++ b/arch/i386/kernel/timers/timer.c @@ -64,3 +64,12 @@ struct timer_opts* __init select_timer(void) panic("select_timer: Cannot find a suitable timer\n"); return NULL; } + +int read_current_timer(unsigned long *timer_val) +{ + if (cur_timer->read_timer) { + *timer_val = cur_timer->read_timer(); + return 0; + } + return -1; +} diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index f778f471a09a..d766e0963ac1 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -158,7 +158,7 @@ static int __init init_hpet(char* override) { unsigned long eax=0, edx=1000; ASM_DIV64_REG(cpu_khz, edx, tsc_quotient, eax, edx); - printk("Detected %lu.%03lu MHz processor.\n", + printk("Detected %u.%03u MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); } set_cyc2ns_scale(cpu_khz/1000); @@ -186,6 +186,7 @@ static struct timer_opts timer_hpet = { .get_offset = get_offset_hpet, .monotonic_clock = monotonic_clock_hpet, .delay = delay_hpet, + .read_timer = read_timer_tsc, }; struct init_timer_opts __initdata timer_hpet_init = { diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c index d77f22030fe6..4ef20e663498 100644 --- a/arch/i386/kernel/timers/timer_pm.c +++ b/arch/i386/kernel/timers/timer_pm.c @@ -246,6 +246,7 @@ static struct timer_opts timer_pmtmr = { .get_offset = get_offset_pmtmr, .monotonic_clock = monotonic_clock_pmtmr, .delay = delay_pmtmr, + .read_timer = read_timer_tsc, }; struct init_timer_opts __initdata timer_pmtmr_init = { diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 180444d87824..54c36b182021 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -256,7 +256,7 @@ static unsigned long loops_per_jiffy_ref = 0; #ifndef CONFIG_SMP static unsigned long fast_gettimeoffset_ref = 0; -static unsigned long cpu_khz_ref = 0; +static unsigned int cpu_khz_ref = 0; #endif static int @@ -323,7 +323,7 @@ static inline void cpufreq_delayed_get(void) { return; } int recalibrate_cpu_khz(void) { #ifndef CONFIG_SMP - unsigned long cpu_khz_old = cpu_khz; + unsigned int cpu_khz_old = cpu_khz; if (cpu_has_tsc) { init_cpu_khz(); @@ -534,7 +534,8 @@ static int __init init_tsc(char* override) :"=a" (cpu_khz), "=d" (edx) :"r" (tsc_quotient), "0" (eax), "1" (edx)); - printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); + printk("Detected %u.%03u MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); } set_cyc2ns_scale(cpu_khz/1000); return 0; @@ -572,6 +573,7 @@ static struct timer_opts timer_tsc = { .get_offset = get_offset_tsc, .monotonic_clock = monotonic_clock_tsc, .delay = delay_tsc, + .read_timer = read_timer_tsc, }; struct init_timer_opts __initdata timer_tsc_init = { diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 83c579e82a81..e4d4e2162c7a 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -104,6 +104,7 @@ int register_die_notifier(struct notifier_block *nb) spin_unlock_irqrestore(&die_notifier_lock, flags); return err; } +EXPORT_SYMBOL(register_die_notifier); static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) { @@ -209,7 +210,7 @@ void show_registers(struct pt_regs *regs) esp = (unsigned long) (®s->esp); ss = __KERNEL_DS; - if (regs->xcs & 3) { + if (user_mode(regs)) { in_kernel = 0; esp = regs->esp; ss = regs->xss & 0xffff; @@ -265,7 +266,7 @@ static void handle_BUG(struct pt_regs *regs) char c; unsigned long eip; - if (regs->xcs & 3) + if (user_mode(regs)) goto no_bug; /* Not in kernel */ eip = regs->eip; @@ -353,7 +354,7 @@ void die(const char * str, struct pt_regs * regs, long err) static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) { - if (!(regs->eflags & VM_MASK) && !(3 & regs->xcs)) + if (!user_mode_vm(regs)) die(str, regs, err); } @@ -366,7 +367,7 @@ static void do_trap(int trapnr, int signr, char *str, int vm86, goto trap_signal; } - if (!(regs->xcs & 3)) + if (!user_mode(regs)) goto kernel_trap; trap_signal: { @@ -488,7 +489,7 @@ fastcall void do_general_protection(struct pt_regs * regs, long error_code) if (regs->eflags & VM_MASK) goto gp_in_vm86; - if (!(regs->xcs & 3)) + if (!user_mode(regs)) goto gp_in_kernel; current->thread.error_code = error_code; @@ -636,11 +637,13 @@ void set_nmi_callback(nmi_callback_t callback) { nmi_callback = callback; } +EXPORT_SYMBOL_GPL(set_nmi_callback); void unset_nmi_callback(void) { nmi_callback = dummy_nmi_callback; } +EXPORT_SYMBOL_GPL(unset_nmi_callback); #ifdef CONFIG_KPROBES fastcall void do_int3(struct pt_regs *regs, long error_code) @@ -682,7 +685,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code) unsigned int condition; struct task_struct *tsk = current; - __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, SIGTRAP) == NOTIFY_STOP) @@ -713,7 +716,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code) * check for kernel mode by just checking the CPL * of CS. */ - if ((regs->xcs & 3) == 0) + if (!user_mode(regs)) goto clear_TF_reenable; } @@ -724,9 +727,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code) * the signal is delivered. */ clear_dr7: - __asm__("movl %0,%%db7" - : /* no output */ - : "r" (0)); + set_debugreg(0, 7); return; debug_vm86: diff --git a/arch/i386/lib/dec_and_lock.c b/arch/i386/lib/dec_and_lock.c index ab43394dc775..8b81b2524fa6 100644 --- a/arch/i386/lib/dec_and_lock.c +++ b/arch/i386/lib/dec_and_lock.c @@ -8,6 +8,7 @@ */ #include <linux/spinlock.h> +#include <linux/module.h> #include <asm/atomic.h> int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) @@ -38,3 +39,4 @@ slow_path: spin_unlock(lock); return 0; } +EXPORT_SYMBOL(_atomic_dec_and_lock); diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c index eb0cdfe9280f..c49a6acbee56 100644 --- a/arch/i386/lib/delay.c +++ b/arch/i386/lib/delay.c @@ -13,6 +13,7 @@ #include <linux/config.h> #include <linux/sched.h> #include <linux/delay.h> +#include <linux/module.h> #include <asm/processor.h> #include <asm/delay.h> #include <asm/timer.h> @@ -47,3 +48,8 @@ void __ndelay(unsigned long nsecs) { __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ } + +EXPORT_SYMBOL(__delay); +EXPORT_SYMBOL(__const_udelay); +EXPORT_SYMBOL(__udelay); +EXPORT_SYMBOL(__ndelay); diff --git a/arch/i386/lib/mmx.c b/arch/i386/lib/mmx.c index 01f8b1a2cc84..2afda94dffd3 100644 --- a/arch/i386/lib/mmx.c +++ b/arch/i386/lib/mmx.c @@ -3,6 +3,7 @@ #include <linux/string.h> #include <linux/sched.h> #include <linux/hardirq.h> +#include <linux/module.h> #include <asm/i387.h> @@ -397,3 +398,7 @@ void mmx_copy_page(void *to, void *from) else fast_copy_page(to, from); } + +EXPORT_SYMBOL(_mmx_memcpy); +EXPORT_SYMBOL(mmx_clear_page); +EXPORT_SYMBOL(mmx_copy_page); diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c index 51aa2bbb0269..4cf981d70f45 100644 --- a/arch/i386/lib/usercopy.c +++ b/arch/i386/lib/usercopy.c @@ -84,6 +84,7 @@ __strncpy_from_user(char *dst, const char __user *src, long count) __do_strncpy_from_user(dst, src, count, res); return res; } +EXPORT_SYMBOL(__strncpy_from_user); /** * strncpy_from_user: - Copy a NUL terminated string from userspace. @@ -111,7 +112,7 @@ strncpy_from_user(char *dst, const char __user *src, long count) __do_strncpy_from_user(dst, src, count, res); return res; } - +EXPORT_SYMBOL(strncpy_from_user); /* * Zero Userspace @@ -157,6 +158,7 @@ clear_user(void __user *to, unsigned long n) __do_clear_user(to, n); return n; } +EXPORT_SYMBOL(clear_user); /** * __clear_user: - Zero a block of memory in user space, with less checking. @@ -175,6 +177,7 @@ __clear_user(void __user *to, unsigned long n) __do_clear_user(to, n); return n; } +EXPORT_SYMBOL(__clear_user); /** * strlen_user: - Get the size of a string in user space. @@ -218,6 +221,7 @@ long strnlen_user(const char __user *s, long n) :"cc"); return res & mask; } +EXPORT_SYMBOL(strnlen_user); #ifdef CONFIG_X86_INTEL_USERCOPY static unsigned long @@ -570,6 +574,7 @@ survive: n = __copy_user_intel(to, from, n); return n; } +EXPORT_SYMBOL(__copy_to_user_ll); unsigned long __copy_from_user_ll(void *to, const void __user *from, unsigned long n) @@ -581,6 +586,7 @@ __copy_from_user_ll(void *to, const void __user *from, unsigned long n) n = __copy_user_zeroing_intel(to, from, n); return n; } +EXPORT_SYMBOL(__copy_from_user_ll); /** * copy_to_user: - Copy a block of data into user space. diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index a6e0ddd65bd0..8c8527593da0 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -1288,7 +1288,7 @@ smp_local_timer_interrupt(struct pt_regs * regs) per_cpu(prof_counter, cpu); } - update_process_times(user_mode(regs)); + update_process_times(user_mode_vm(regs)); } if( ((1<<cpu) & voyager_extended_vic_processors) == 0) diff --git a/arch/i386/mm/Makefile b/arch/i386/mm/Makefile index fc3272506846..80908b5aa60f 100644 --- a/arch/i386/mm/Makefile +++ b/arch/i386/mm/Makefile @@ -4,7 +4,7 @@ obj-y := init.o pgtable.o fault.o ioremap.o extable.o pageattr.o mmap.o -obj-$(CONFIG_DISCONTIGMEM) += discontig.o +obj-$(CONFIG_NUMA) += discontig.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap.o diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 1726b4096b10..f429c871e845 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -29,12 +29,14 @@ #include <linux/highmem.h> #include <linux/initrd.h> #include <linux/nodemask.h> +#include <linux/module.h> #include <asm/e820.h> #include <asm/setup.h> #include <asm/mmzone.h> #include <bios_ebda.h> struct pglist_data *node_data[MAX_NUMNODES]; +EXPORT_SYMBOL(node_data); bootmem_data_t node0_bdata; /* @@ -42,12 +44,16 @@ bootmem_data_t node0_bdata; * populated the following initialisation. * * 1) node_online_map - the map of all nodes configured (online) in the system - * 2) physnode_map - the mapping between a pfn and owning node - * 3) node_start_pfn - the starting page frame number for a node + * 2) node_start_pfn - the starting page frame number for a node * 3) node_end_pfn - the ending page fram number for a node */ +unsigned long node_start_pfn[MAX_NUMNODES]; +unsigned long node_end_pfn[MAX_NUMNODES]; + +#ifdef CONFIG_DISCONTIGMEM /* + * 4) physnode_map - the mapping between a pfn and owning node * physnode_map keeps track of the physical memory layout of a generic * numa node on a 256Mb break (each element of the array will * represent 256Mb of memory and will be marked by the node id. so, @@ -59,6 +65,7 @@ bootmem_data_t node0_bdata; * physnode_map[8- ] = -1; */ s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1}; +EXPORT_SYMBOL(physnode_map); void memory_present(int nid, unsigned long start, unsigned long end) { @@ -85,9 +92,7 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, return (nr_pages + 1) * sizeof(struct page); } - -unsigned long node_start_pfn[MAX_NUMNODES]; -unsigned long node_end_pfn[MAX_NUMNODES]; +#endif extern unsigned long find_max_low_pfn(void); extern void find_max_pfn(void); @@ -108,6 +113,9 @@ unsigned long node_remap_offset[MAX_NUMNODES]; void *node_remap_start_vaddr[MAX_NUMNODES]; void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); +void *node_remap_end_vaddr[MAX_NUMNODES]; +void *node_remap_alloc_vaddr[MAX_NUMNODES]; + /* * FLAT - support for basic PC memory model with discontig enabled, essentially * a single node with all available processors in it with a flat @@ -146,6 +154,21 @@ static void __init find_max_pfn_node(int nid) BUG(); } +/* Find the owning node for a pfn. */ +int early_pfn_to_nid(unsigned long pfn) +{ + int nid; + + for_each_node(nid) { + if (node_end_pfn[nid] == 0) + break; + if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn) + return nid; + } + + return 0; +} + /* * Allocate memory for the pg_data_t for this node via a crude pre-bootmem * method. For node zero take this from the bottom of memory, for @@ -163,6 +186,21 @@ static void __init allocate_pgdat(int nid) } } +void *alloc_remap(int nid, unsigned long size) +{ + void *allocation = node_remap_alloc_vaddr[nid]; + + size = ALIGN(size, L1_CACHE_BYTES); + + if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) + return 0; + + node_remap_alloc_vaddr[nid] += size; + memset(allocation, 0, size); + + return allocation; +} + void __init remap_numa_kva(void) { void *vaddr; @@ -170,8 +208,6 @@ void __init remap_numa_kva(void) int node; for_each_online_node(node) { - if (node == 0) - continue; for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); set_pmd_pfn((ulong) vaddr, @@ -185,13 +221,9 @@ static unsigned long calculate_numa_remap_pages(void) { int nid; unsigned long size, reserve_pages = 0; + unsigned long pfn; for_each_online_node(nid) { - if (nid == 0) - continue; - if (!node_remap_size[nid]) - continue; - /* * The acpi/srat node info can show hot-add memroy zones * where memory could be added but not currently present. @@ -208,11 +240,24 @@ static unsigned long calculate_numa_remap_pages(void) size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; /* now the roundup is correct, convert to PAGE_SIZE pages */ size = size * PTRS_PER_PTE; + + /* + * Validate the region we are allocating only contains valid + * pages. + */ + for (pfn = node_end_pfn[nid] - size; + pfn < node_end_pfn[nid]; pfn++) + if (!page_is_ram(pfn)) + break; + + if (pfn != node_end_pfn[nid]) + size = 0; + printk("Reserving %ld pages of KVA for lmem_map of node %d\n", size, nid); node_remap_size[nid] = size; - reserve_pages += size; node_remap_offset[nid] = reserve_pages; + reserve_pages += size; printk("Shrinking node %d from %ld pages to %ld pages\n", nid, node_end_pfn[nid], node_end_pfn[nid] - size); node_end_pfn[nid] -= size; @@ -265,12 +310,18 @@ unsigned long __init setup_memory(void) (ulong) pfn_to_kaddr(max_low_pfn)); for_each_online_node(nid) { node_remap_start_vaddr[nid] = pfn_to_kaddr( - (highstart_pfn + reserve_pages) - node_remap_offset[nid]); + highstart_pfn + node_remap_offset[nid]); + /* Init the node remap allocator */ + node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + + (node_remap_size[nid] * PAGE_SIZE); + node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + + ALIGN(sizeof(pg_data_t), PAGE_SIZE); + allocate_pgdat(nid); printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, (ulong) node_remap_start_vaddr[nid], - (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages - - node_remap_offset[nid] + node_remap_size[nid])); + (ulong) pfn_to_kaddr(highstart_pfn + + node_remap_offset[nid] + node_remap_size[nid])); } printk("High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); @@ -333,23 +384,9 @@ void __init zone_sizes_init(void) } zholes_size = get_zholes_size(nid); - /* - * We let the lmem_map for node 0 be allocated from the - * normal bootmem allocator, but other nodes come from the - * remapped KVA area - mbligh - */ - if (!nid) - free_area_init_node(nid, NODE_DATA(nid), - zones_size, start, zholes_size); - else { - unsigned long lmem_map; - lmem_map = (unsigned long)node_remap_start_vaddr[nid]; - lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1; - lmem_map &= PAGE_MASK; - NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map; - free_area_init_node(nid, NODE_DATA(nid), zones_size, - start, zholes_size); - } + + free_area_init_node(nid, NODE_DATA(nid), zones_size, start, + zholes_size); } return; } @@ -358,24 +395,26 @@ void __init set_highmem_pages_init(int bad_ppro) { #ifdef CONFIG_HIGHMEM struct zone *zone; + struct page *page; for_each_zone(zone) { - unsigned long node_pfn, node_high_size, zone_start_pfn; - struct page * zone_mem_map; - + unsigned long node_pfn, zone_start_pfn, zone_end_pfn; + if (!is_highmem(zone)) continue; - printk("Initializing %s for node %d\n", zone->name, - zone->zone_pgdat->node_id); - - node_high_size = zone->spanned_pages; - zone_mem_map = zone->zone_mem_map; zone_start_pfn = zone->zone_start_pfn; + zone_end_pfn = zone_start_pfn + zone->spanned_pages; + + printk("Initializing %s for node %d (%08lx:%08lx)\n", + zone->name, zone->zone_pgdat->node_id, + zone_start_pfn, zone_end_pfn); - for (node_pfn = 0; node_pfn < node_high_size; node_pfn++) { - one_highpage_init((struct page *)(zone_mem_map + node_pfn), - zone_start_pfn + node_pfn, bad_ppro); + for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { + if (!pfn_valid(node_pfn)) + continue; + page = pfn_to_page(node_pfn); + one_highpage_init(page, node_pfn, bad_ppro); } } totalram_pages += totalhigh_pages; diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c index fc4c4cad4e98..4b7aaf99d7ea 100644 --- a/arch/i386/mm/highmem.c +++ b/arch/i386/mm/highmem.c @@ -1,4 +1,5 @@ #include <linux/highmem.h> +#include <linux/module.h> void *kmap(struct page *page) { @@ -87,3 +88,8 @@ struct page *kmap_atomic_to_page(void *ptr) return pte_page(*pte); } +EXPORT_SYMBOL(kmap); +EXPORT_SYMBOL(kunmap); +EXPORT_SYMBOL(kmap_atomic); +EXPORT_SYMBOL(kunmap_atomic); +EXPORT_SYMBOL(kmap_atomic_to_page); diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 8766c771bb45..3672e2ef51ae 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -191,7 +191,7 @@ static inline int page_kills_ppro(unsigned long pagenr) extern int is_available_memory(efi_memory_desc_t *); -static inline int page_is_ram(unsigned long pagenr) +int page_is_ram(unsigned long pagenr) { int i; unsigned long addr, end; @@ -276,7 +276,9 @@ void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) SetPageReserved(page); } -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA +extern void set_highmem_pages_init(int); +#else static void __init set_highmem_pages_init(int bad_ppro) { int pfn; @@ -284,9 +286,7 @@ static void __init set_highmem_pages_init(int bad_ppro) one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); totalram_pages += totalhigh_pages; } -#else -extern void set_highmem_pages_init(int); -#endif /* !CONFIG_DISCONTIGMEM */ +#endif /* CONFIG_FLATMEM */ #else #define kmap_init() do { } while (0) @@ -295,12 +295,13 @@ extern void set_highmem_pages_init(int); #endif /* CONFIG_HIGHMEM */ unsigned long long __PAGE_KERNEL = _PAGE_KERNEL; +EXPORT_SYMBOL(__PAGE_KERNEL); unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; -#ifndef CONFIG_DISCONTIGMEM -#define remap_numa_kva() do {} while (0) -#else +#ifdef CONFIG_NUMA extern void __init remap_numa_kva(void); +#else +#define remap_numa_kva() do {} while (0) #endif static void __init pagetable_init (void) @@ -525,7 +526,7 @@ static void __init set_max_mapnr_init(void) #else num_physpages = max_low_pfn; #endif -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_FLATMEM max_mapnr = num_physpages; #endif } @@ -539,7 +540,7 @@ void __init mem_init(void) int tmp; int bad_ppro; -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_FLATMEM if (!mem_map) BUG(); #endif diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index ab542792b27b..d393eefc7052 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -11,6 +11,7 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/module.h> #include <asm/io.h> #include <asm/fixmap.h> #include <asm/cacheflush.h> @@ -165,7 +166,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l } return (void __iomem *) (offset + (char __iomem *)addr); } - +EXPORT_SYMBOL(__ioremap); /** * ioremap_nocache - map bus memory into CPU space @@ -222,6 +223,7 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) return p; } +EXPORT_SYMBOL(ioremap_nocache); void iounmap(volatile void __iomem *addr) { @@ -255,6 +257,7 @@ out_unlock: write_unlock(&vmlist_lock); kfree(p); } +EXPORT_SYMBOL(iounmap); void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) { diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index dd81479ff88a..270c59f099a4 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -30,13 +30,14 @@ void show_mem(void) struct page *page; pg_data_t *pgdat; unsigned long i; + struct page_state ps; printk("Mem-info:\n"); show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { - page = pgdat->node_mem_map + i; + page = pgdat_page_nr(pgdat, i); total++; if (PageHighMem(page)) highmem++; @@ -53,6 +54,13 @@ void show_mem(void) printk("%d reserved pages\n",reserved); printk("%d pages shared\n",shared); printk("%d pages swap cached\n",cached); + + get_page_state(&ps); + printk("%lu pages dirty\n", ps.nr_dirty); + printk("%lu pages writeback\n", ps.nr_writeback); + printk("%lu pages mapped\n", ps.nr_mapped); + printk("%lu pages slab\n", ps.nr_slab); + printk("%lu pages pagetables\n", ps.nr_page_table_pages); } /* diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c index 52d72e074f7f..65dfd2edb671 100644 --- a/arch/i386/oprofile/backtrace.c +++ b/arch/i386/oprofile/backtrace.c @@ -91,7 +91,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) head = (struct frame_head *)regs->ebp; #endif - if (!user_mode(regs)) { + if (!user_mode_vm(regs)) { while (depth-- && valid_kernel_stack(head, regs)) head = dump_backtrace(head); return; diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index da21b1d07c15..83458f81e661 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -227,6 +227,24 @@ static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i } /* + * The VIA pirq rules are nibble-based, like ALI, + * but without the ugly irq number munging. + * However, for 82C586, nibble map is different . + */ +static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + static unsigned int pirqmap[4] = { 3, 2, 5, 1 }; + return read_config_nybble(router, 0x55, pirqmap[pirq-1]); +} + +static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + static unsigned int pirqmap[4] = { 3, 2, 5, 1 }; + write_config_nybble(router, 0x55, pirqmap[pirq-1], irq); + return 1; +} + +/* * ITE 8330G pirq rules are nibble-based * FIXME: pirqmap may be { 1, 0, 3, 2 }, * 2+3 are both mapped to irq 9 on my system @@ -512,6 +530,10 @@ static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, switch(device) { case PCI_DEVICE_ID_VIA_82C586_0: + r->name = "VIA"; + r->get = pirq_via586_get; + r->set = pirq_via586_set; + return 1; case PCI_DEVICE_ID_VIA_82C596: case PCI_DEVICE_ID_VIA_82C686: case PCI_DEVICE_ID_VIA_8231: diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c index 141421b673b0..b9d65f0bc2d1 100644 --- a/arch/i386/pci/pcbios.c +++ b/arch/i386/pci/pcbios.c @@ -4,6 +4,7 @@ #include <linux/pci.h> #include <linux/init.h> +#include <linux/module.h> #include "pci.h" #include "pci-functions.h" @@ -456,7 +457,7 @@ struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void) free_page(page); return rt; } - +EXPORT_SYMBOL(pcibios_get_irq_routing_table); int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) { @@ -473,6 +474,7 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) "S" (&pci_indirect)); return !(ret & 0xff00); } +EXPORT_SYMBOL(pcibios_set_irq_routing); static int __init pci_pcbios_init(void) { diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index cf337c673d92..6f521cf19a13 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -94,13 +94,13 @@ static void fix_processor_context(void) * Now maybe reload the debug registers */ if (current->thread.debugreg[7]){ - loaddebug(¤t->thread, 0); - loaddebug(¤t->thread, 1); - loaddebug(¤t->thread, 2); - loaddebug(¤t->thread, 3); - /* no 4 and 5 */ - loaddebug(¤t->thread, 6); - loaddebug(¤t->thread, 7); + set_debugreg(current->thread.debugreg[0], 0); + set_debugreg(current->thread.debugreg[1], 1); + set_debugreg(current->thread.debugreg[2], 2); + set_debugreg(current->thread.debugreg[3], 3); + /* no 4 and 5 */ + set_debugreg(current->thread.debugreg[6], 6); + set_debugreg(current->thread.debugreg[7], 7); } } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index ce4dfa8b834d..01b78e7f992e 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -161,6 +161,8 @@ config IA64_PAGE_SIZE_64KB endchoice +source kernel/Kconfig.hz + config IA64_BRL_EMU bool depends on ITANIUM @@ -197,7 +199,7 @@ config HOLES_IN_ZONE bool default y if VIRTUAL_MEM_MAP -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool "Discontiguous memory support" depends on (IA64_DIG || IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB) && NUMA && VIRTUAL_MEM_MAP default y if (IA64_SGI_SN2 || IA64_GENERIC) && NUMA @@ -300,6 +302,8 @@ config PREEMPT Say Y here if you are building a kernel for a desktop, embedded or real-time system. Say N if you are unsure. +source "mm/Kconfig" + config HAVE_DEC_LOCK bool depends on (SMP || PREEMPT) diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug index de9d507ba0fd..fda67ac993d7 100644 --- a/arch/ia64/Kconfig.debug +++ b/arch/ia64/Kconfig.debug @@ -2,6 +2,17 @@ menu "Kernel hacking" source "lib/Kconfig.debug" +config KPROBES + bool "Kprobes" + depends on DEBUG_KERNEL + help + Kprobes allows you to trap at almost any kernel address and + execute a callback function. register_kprobe() establishes + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + + choice prompt "Physical memory granularity" default IA64_GRANULE_64MB diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig index a01bb02d074d..487d2e36b0a6 100644 --- a/arch/ia64/configs/sn2_defconfig +++ b/arch/ia64/configs/sn2_defconfig @@ -78,7 +78,7 @@ CONFIG_IA64_L1_CACHE_SHIFT=7 CONFIG_NUMA=y CONFIG_VIRTUAL_MEM_MAP=y CONFIG_HOLES_IN_ZONE=y -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y # CONFIG_IA64_CYCLONE is not set CONFIG_IOSAPIC=y CONFIG_IA64_SGI_SN_SIM=y diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig index 7be8096e0561..8444add76380 100644 --- a/arch/ia64/defconfig +++ b/arch/ia64/defconfig @@ -84,7 +84,7 @@ CONFIG_IA64_L1_CACHE_SHIFT=7 CONFIG_NUMA=y CONFIG_VIRTUAL_MEM_MAP=y CONFIG_HOLES_IN_ZONE=y -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y CONFIG_IA64_CYCLONE=y CONFIG_IOSAPIC=y CONFIG_FORCE_MAX_ZONEORDER=18 diff --git a/arch/ia64/ia32/ia32priv.h b/arch/ia64/ia32/ia32priv.h index b2de948bdaea..e3e9290e3ff2 100644 --- a/arch/ia64/ia32/ia32priv.h +++ b/arch/ia64/ia32/ia32priv.h @@ -241,7 +241,7 @@ typedef struct compat_siginfo { /* POSIX.1b timers */ struct { - timer_t _tid; /* timer id */ + compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ char _pad[sizeof(unsigned int) - sizeof(int)]; compat_sigval_t _sigval; /* same as below */ diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 4c73d8ba2e3d..b2e2f6509eb0 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o +obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o mca_recovery-y += mca_drv.o mca_drv_asm.o diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S new file mode 100644 index 000000000000..b7fa3ccd2b0f --- /dev/null +++ b/arch/ia64/kernel/jprobes.S @@ -0,0 +1,61 @@ +/* + * Jprobe specific operations + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) Intel Corporation, 2005 + * + * 2005-May Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy + * <anil.s.keshavamurthy@intel.com> initial implementation + * + * Jprobes (a.k.a. "jump probes" which is built on-top of kprobes) allow a + * probe to be inserted into the beginning of a function call. The fundamental + * difference between a jprobe and a kprobe is the jprobe handler is executed + * in the same context as the target function, while the kprobe handlers + * are executed in interrupt context. + * + * For jprobes we initially gain control by placing a break point in the + * first instruction of the targeted function. When we catch that specific + * break, we: + * * set the return address to our jprobe_inst_return() function + * * jump to the jprobe handler function + * + * Since we fixed up the return address, the jprobe handler will return to our + * jprobe_inst_return() function, giving us control again. At this point we + * are back in the parents frame marker, so we do yet another call to our + * jprobe_break() function to fix up the frame marker as it would normally + * exist in the target function. + * + * Our jprobe_return function then transfers control back to kprobes.c by + * executing a break instruction using one of our reserved numbers. When we + * catch that break in kprobes.c, we continue like we do for a normal kprobe + * by single stepping the emulated instruction, and then returning execution + * to the correct location. + */ +#include <asm/asmmacro.h> + + /* + * void jprobe_break(void) + */ +ENTRY(jprobe_break) + break.m 0x80300 +END(jprobe_break) + + /* + * void jprobe_inst_return(void) + */ +GLOBAL_ENTRY(jprobe_inst_return) + br.call.sptk.many b0=jprobe_break +END(jprobe_inst_return) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c new file mode 100644 index 000000000000..5978823d5c63 --- /dev/null +++ b/arch/ia64/kernel/kprobes.c @@ -0,0 +1,601 @@ +/* + * Kernel Probes (KProbes) + * arch/ia64/kernel/kprobes.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * Copyright (C) Intel Corporation, 2005 + * + * 2005-Apr Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy + * <anil.s.keshavamurthy@intel.com> adapted from i386 + */ + +#include <linux/config.h> +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/preempt.h> +#include <linux/moduleloader.h> + +#include <asm/pgtable.h> +#include <asm/kdebug.h> + +extern void jprobe_inst_return(void); + +/* kprobe_status settings */ +#define KPROBE_HIT_ACTIVE 0x00000001 +#define KPROBE_HIT_SS 0x00000002 + +static struct kprobe *current_kprobe, *kprobe_prev; +static unsigned long kprobe_status, kprobe_status_prev; +static struct pt_regs jprobe_saved_regs; + +enum instruction_type {A, I, M, F, B, L, X, u}; +static enum instruction_type bundle_encoding[32][3] = { + { M, I, I }, /* 00 */ + { M, I, I }, /* 01 */ + { M, I, I }, /* 02 */ + { M, I, I }, /* 03 */ + { M, L, X }, /* 04 */ + { M, L, X }, /* 05 */ + { u, u, u }, /* 06 */ + { u, u, u }, /* 07 */ + { M, M, I }, /* 08 */ + { M, M, I }, /* 09 */ + { M, M, I }, /* 0A */ + { M, M, I }, /* 0B */ + { M, F, I }, /* 0C */ + { M, F, I }, /* 0D */ + { M, M, F }, /* 0E */ + { M, M, F }, /* 0F */ + { M, I, B }, /* 10 */ + { M, I, B }, /* 11 */ + { M, B, B }, /* 12 */ + { M, B, B }, /* 13 */ + { u, u, u }, /* 14 */ + { u, u, u }, /* 15 */ + { B, B, B }, /* 16 */ + { B, B, B }, /* 17 */ + { M, M, B }, /* 18 */ + { M, M, B }, /* 19 */ + { u, u, u }, /* 1A */ + { u, u, u }, /* 1B */ + { M, F, B }, /* 1C */ + { M, F, B }, /* 1D */ + { u, u, u }, /* 1E */ + { u, u, u }, /* 1F */ +}; + +/* + * In this function we check to see if the instruction + * is IP relative instruction and update the kprobe + * inst flag accordingly + */ +static void update_kprobe_inst_flag(uint template, uint slot, uint major_opcode, + unsigned long kprobe_inst, struct kprobe *p) +{ + p->ainsn.inst_flag = 0; + p->ainsn.target_br_reg = 0; + + if (bundle_encoding[template][slot] == B) { + switch (major_opcode) { + case INDIRECT_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + case IP_RELATIVE_PREDICT_OPCODE: + case IP_RELATIVE_BRANCH_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; + break; + case IP_RELATIVE_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + } + } else if (bundle_encoding[template][slot] == X) { + switch (major_opcode) { + case LONG_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + } + } + return; +} + +/* + * In this function we check to see if the instruction + * on which we are inserting kprobe is supported. + * Returns 0 if supported + * Returns -EINVAL if unsupported + */ +static int unsupported_inst(uint template, uint slot, uint major_opcode, + unsigned long kprobe_inst, struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr; + + if (bundle_encoding[template][slot] == I) { + switch (major_opcode) { + case 0x0: //I_UNIT_MISC_OPCODE: + /* + * Check for Integer speculation instruction + * - Bit 33-35 to be equal to 0x1 + */ + if (((kprobe_inst >> 33) & 0x7) == 1) { + printk(KERN_WARNING + "Kprobes on speculation inst at <0x%lx> not supported\n", + addr); + return -EINVAL; + } + + /* + * IP relative mov instruction + * - Bit 27-35 to be equal to 0x30 + */ + if (((kprobe_inst >> 27) & 0x1FF) == 0x30) { + printk(KERN_WARNING + "Kprobes on \"mov r1=ip\" at <0x%lx> not supported\n", + addr); + return -EINVAL; + + } + } + } + return 0; +} + + +/* + * In this function we check to see if the instruction + * (qp) cmpx.crel.ctype p1,p2=r2,r3 + * on which we are inserting kprobe is cmp instruction + * with ctype as unc. + */ +static uint is_cmp_ctype_unc_inst(uint template, uint slot, uint major_opcode, +unsigned long kprobe_inst) +{ + cmp_inst_t cmp_inst; + uint ctype_unc = 0; + + if (!((bundle_encoding[template][slot] == I) || + (bundle_encoding[template][slot] == M))) + goto out; + + if (!((major_opcode == 0xC) || (major_opcode == 0xD) || + (major_opcode == 0xE))) + goto out; + + cmp_inst.l = kprobe_inst; + if ((cmp_inst.f.x2 == 0) || (cmp_inst.f.x2 == 1)) { + /* Integere compare - Register Register (A6 type)*/ + if ((cmp_inst.f.tb == 0) && (cmp_inst.f.ta == 0) + &&(cmp_inst.f.c == 1)) + ctype_unc = 1; + } else if ((cmp_inst.f.x2 == 2)||(cmp_inst.f.x2 == 3)) { + /* Integere compare - Immediate Register (A8 type)*/ + if ((cmp_inst.f.ta == 0) &&(cmp_inst.f.c == 1)) + ctype_unc = 1; + } +out: + return ctype_unc; +} + +/* + * In this function we override the bundle with + * the break instruction at the given slot. + */ +static void prepare_break_inst(uint template, uint slot, uint major_opcode, + unsigned long kprobe_inst, struct kprobe *p) +{ + unsigned long break_inst = BREAK_INST; + bundle_t *bundle = &p->ainsn.insn.bundle; + + /* + * Copy the original kprobe_inst qualifying predicate(qp) + * to the break instruction iff !is_cmp_ctype_unc_inst + * because for cmp instruction with ctype equal to unc, + * which is a special instruction always needs to be + * executed regradless of qp + */ + if (!is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst)) + break_inst |= (0x3f & kprobe_inst); + + switch (slot) { + case 0: + bundle->quad0.slot0 = break_inst; + break; + case 1: + bundle->quad0.slot1_p0 = break_inst; + bundle->quad1.slot1_p1 = break_inst >> (64-46); + break; + case 2: + bundle->quad1.slot2 = break_inst; + break; + } + + /* + * Update the instruction flag, so that we can + * emulate the instruction properly after we + * single step on original instruction + */ + update_kprobe_inst_flag(template, slot, major_opcode, kprobe_inst, p); +} + +static inline void get_kprobe_inst(bundle_t *bundle, uint slot, + unsigned long *kprobe_inst, uint *major_opcode) +{ + unsigned long kprobe_inst_p0, kprobe_inst_p1; + unsigned int template; + + template = bundle->quad0.template; + + switch (slot) { + case 0: + *major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT); + *kprobe_inst = bundle->quad0.slot0; + break; + case 1: + *major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT); + kprobe_inst_p0 = bundle->quad0.slot1_p0; + kprobe_inst_p1 = bundle->quad1.slot1_p1; + *kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46)); + break; + case 2: + *major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT); + *kprobe_inst = bundle->quad1.slot2; + break; + } +} + +static int valid_kprobe_addr(int template, int slot, unsigned long addr) +{ + if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) { + printk(KERN_WARNING "Attempting to insert unaligned kprobe at 0x%lx\n", + addr); + return -EINVAL; + } + return 0; +} + +static inline void save_previous_kprobe(void) +{ + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; +} + +static inline void set_current_kprobe(struct kprobe *p) +{ + current_kprobe = p; +} + +int arch_prepare_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long) p->addr; + unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL); + unsigned long kprobe_inst=0; + unsigned int slot = addr & 0xf, template, major_opcode = 0; + bundle_t *bundle = &p->ainsn.insn.bundle; + + memcpy(&p->opcode.bundle, kprobe_addr, sizeof(bundle_t)); + memcpy(&p->ainsn.insn.bundle, kprobe_addr, sizeof(bundle_t)); + + template = bundle->quad0.template; + + if(valid_kprobe_addr(template, slot, addr)) + return -EINVAL; + + /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ + if (slot == 1 && bundle_encoding[template][1] == L) + slot++; + + /* Get kprobe_inst and major_opcode from the bundle */ + get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); + + if (unsupported_inst(template, slot, major_opcode, kprobe_inst, p)) + return -EINVAL; + + prepare_break_inst(template, slot, major_opcode, kprobe_inst, p); + + return 0; +} + +void arch_arm_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr; + unsigned long arm_addr = addr & ~0xFULL; + + memcpy((char *)arm_addr, &p->ainsn.insn.bundle, sizeof(bundle_t)); + flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); +} + +void arch_disarm_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr; + unsigned long arm_addr = addr & ~0xFULL; + + /* p->opcode contains the original unaltered bundle */ + memcpy((char *) arm_addr, (char *) &p->opcode.bundle, sizeof(bundle_t)); + flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); +} + +void arch_remove_kprobe(struct kprobe *p) +{ +} + +/* + * We are resuming execution after a single step fault, so the pt_regs + * structure reflects the register state after we executed the instruction + * located in the kprobe (p->ainsn.insn.bundle). We still need to adjust + * the ip to point back to the original stack address. To set the IP address + * to original stack address, handle the case where we need to fixup the + * relative IP address and/or fixup branch register. + */ +static void resume_execution(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long bundle_addr = ((unsigned long) (&p->opcode.bundle)) & ~0xFULL; + unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL; + unsigned long template; + int slot = ((unsigned long)p->addr & 0xf); + + template = p->opcode.bundle.quad0.template; + + if (slot == 1 && bundle_encoding[template][1] == L) + slot = 2; + + if (p->ainsn.inst_flag) { + + if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) { + /* Fix relative IP address */ + regs->cr_iip = (regs->cr_iip - bundle_addr) + resume_addr; + } + + if (p->ainsn.inst_flag & INST_FLAG_FIX_BRANCH_REG) { + /* + * Fix target branch register, software convention is + * to use either b0 or b6 or b7, so just checking + * only those registers + */ + switch (p->ainsn.target_br_reg) { + case 0: + if ((regs->b0 == bundle_addr) || + (regs->b0 == bundle_addr + 0x10)) { + regs->b0 = (regs->b0 - bundle_addr) + + resume_addr; + } + break; + case 6: + if ((regs->b6 == bundle_addr) || + (regs->b6 == bundle_addr + 0x10)) { + regs->b6 = (regs->b6 - bundle_addr) + + resume_addr; + } + break; + case 7: + if ((regs->b7 == bundle_addr) || + (regs->b7 == bundle_addr + 0x10)) { + regs->b7 = (regs->b7 - bundle_addr) + + resume_addr; + } + break; + } /* end switch */ + } + goto turn_ss_off; + } + + if (slot == 2) { + if (regs->cr_iip == bundle_addr + 0x10) { + regs->cr_iip = resume_addr + 0x10; + } + } else { + if (regs->cr_iip == bundle_addr) { + regs->cr_iip = resume_addr; + } + } + +turn_ss_off: + /* Turn off Single Step bit */ + ia64_psr(regs)->ss = 0; +} + +static void prepare_ss(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long bundle_addr = (unsigned long) &p->opcode.bundle; + unsigned long slot = (unsigned long)p->addr & 0xf; + + /* Update instruction pointer (IIP) and slot number (IPSR.ri) */ + regs->cr_iip = bundle_addr & ~0xFULL; + + if (slot > 2) + slot = 0; + + ia64_psr(regs)->ri = slot; + + /* turn on single stepping */ + ia64_psr(regs)->ss = 1; +} + +static int pre_kprobes_handler(struct die_args *args) +{ + struct kprobe *p; + int ret = 0; + struct pt_regs *regs = args->regs; + kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs); + + preempt_disable(); + + /* Handle recursion cases */ + if (kprobe_running()) { + p = get_kprobe(addr); + if (p) { + if (kprobe_status == KPROBE_HIT_SS) { + unlock_kprobes(); + goto no_kprobe; + } + /* We have reentered the pre_kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(); + set_current_kprobe(p); + p->nmissed++; + prepare_ss(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; + } else if (args->err == __IA64_BREAK_JPROBE) { + /* + * jprobe instrumented function just completed + */ + p = current_kprobe; + if (p->break_handler && p->break_handler(p, regs)) { + goto ss_probe; + } + } else { + /* Not our break */ + goto no_kprobe; + } + } + + lock_kprobes(); + p = get_kprobe(addr); + if (!p) { + unlock_kprobes(); + goto no_kprobe; + } + + kprobe_status = KPROBE_HIT_ACTIVE; + set_current_kprobe(p); + + if (p->pre_handler && p->pre_handler(p, regs)) + /* + * Our pre-handler is specifically requesting that we just + * do a return. This is handling the case where the + * pre-handler is really our special jprobe pre-handler. + */ + return 1; + +ss_probe: + prepare_ss(p, regs); + kprobe_status = KPROBE_HIT_SS; + return 1; + +no_kprobe: + preempt_enable_no_resched(); + return ret; +} + +static int post_kprobes_handler(struct pt_regs *regs) +{ + if (!kprobe_running()) + return 0; + + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; + current_kprobe->post_handler(current_kprobe, regs, 0); + } + + resume_execution(current_kprobe, regs); + + /*Restore back the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } + + unlock_kprobes(); + +out: + preempt_enable_no_resched(); + return 1; +} + +static int kprobes_fault_handler(struct pt_regs *regs, int trapnr) +{ + if (!kprobe_running()) + return 0; + + if (current_kprobe->fault_handler && + current_kprobe->fault_handler(current_kprobe, regs, trapnr)) + return 1; + + if (kprobe_status & KPROBE_HIT_SS) { + resume_execution(current_kprobe, regs); + unlock_kprobes(); + preempt_enable_no_resched(); + } + + return 0; +} + +int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, + void *data) +{ + struct die_args *args = (struct die_args *)data; + switch(val) { + case DIE_BREAK: + if (pre_kprobes_handler(args)) + return NOTIFY_STOP; + break; + case DIE_SS: + if (post_kprobes_handler(args->regs)) + return NOTIFY_STOP; + break; + case DIE_PAGE_FAULT: + if (kprobes_fault_handler(args->regs, args->trapnr)) + return NOTIFY_STOP; + default: + break; + } + return NOTIFY_DONE; +} + +int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + unsigned long addr = ((struct fnptr *)(jp->entry))->ip; + + /* save architectural state */ + jprobe_saved_regs = *regs; + + /* after rfi, execute the jprobe instrumented function */ + regs->cr_iip = addr & ~0xFULL; + ia64_psr(regs)->ri = addr & 0xf; + regs->r1 = ((struct fnptr *)(jp->entry))->gp; + + /* + * fix the return address to our jprobe_inst_return() function + * in the jprobes.S file + */ + regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip; + + return 1; +} + +int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + *regs = jprobe_saved_regs; + return 1; +} diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 1861173bd4f6..e7e520d90f03 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -21,12 +21,26 @@ #include <asm/intrinsics.h> #include <asm/processor.h> #include <asm/uaccess.h> +#include <asm/kdebug.h> extern spinlock_t timerlist_lock; fpswa_interface_t *fpswa_interface; EXPORT_SYMBOL(fpswa_interface); +struct notifier_block *ia64die_chain; +static DEFINE_SPINLOCK(die_notifier_lock); + +int register_die_notifier(struct notifier_block *nb) +{ + int err = 0; + unsigned long flags; + spin_lock_irqsave(&die_notifier_lock, flags); + err = notifier_chain_register(&ia64die_chain, nb); + spin_unlock_irqrestore(&die_notifier_lock, flags); + return err; +} + void __init trap_init (void) { @@ -137,6 +151,10 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs) switch (break_num) { case 0: /* unknown error (used by GCC for __builtin_abort()) */ + if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP) + == NOTIFY_STOP) { + return; + } die_if_kernel("bugcheck!", regs, break_num); sig = SIGILL; code = ILL_ILLOPC; break; @@ -189,6 +207,15 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs) sig = SIGILL; code = __ILL_BNDMOD; break; + case 0x80200: + case 0x80300: + if (notify_die(DIE_BREAK, "kprobe", regs, break_num, TRAP_BRKPT, SIGTRAP) + == NOTIFY_STOP) { + return; + } + sig = SIGTRAP; code = TRAP_BRKPT; + break; + default: if (break_num < 0x40000 || break_num > 0x100000) die_if_kernel("Bad break", regs, break_num); @@ -548,7 +575,11 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, #endif break; case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break; - case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break; + case 36: + if (notify_die(DIE_SS, "ss", ®s, vector, + vector, SIGTRAP) == NOTIFY_STOP) + return; + siginfo.si_code = TRAP_TRACE; ifa = 0; break; } siginfo.si_signo = SIGTRAP; siginfo.si_errno = 0; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index c00710929390..f3fd528ead3b 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -560,14 +560,15 @@ void show_mem(void) int shared = 0, cached = 0, reserved = 0; printk("Node ID: %d\n", pgdat->node_id); for(i = 0; i < pgdat->node_spanned_pages; i++) { + struct page *page = pgdat_page_nr(pgdat, i); if (!ia64_pfn_valid(pgdat->node_start_pfn+i)) continue; - if (PageReserved(pgdat->node_mem_map+i)) + if (PageReserved(page)) reserved++; - else if (PageSwapCache(pgdat->node_mem_map+i)) + else if (PageSwapCache(page)) cached++; - else if (page_count(pgdat->node_mem_map+i)) - shared += page_count(pgdat->node_mem_map+i)-1; + else if (page_count(page)) + shared += page_count(page)-1; } total_present += present; total_reserved += reserved; diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 4174ec999dde..ff62551eb3a1 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -14,6 +14,7 @@ #include <asm/processor.h> #include <asm/system.h> #include <asm/uaccess.h> +#include <asm/kdebug.h> extern void die (char *, struct pt_regs *, long); @@ -102,6 +103,13 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re goto bad_area_no_up; #endif + /* + * This is to handle the kprobes on user space access instructions + */ + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, code, TRAP_BRKPT, + SIGSEGV) == NOTIFY_STOP) + return; + down_read(&mm->mmap_sem); vma = find_vma_prev(mm, address, &prev_vma); diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 64c133344afe..42ca8a39798d 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -172,11 +172,13 @@ config NOHIGHMEM bool default y -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool "Internal RAM Support" depends on CHIP_M32700 || CHIP_M32102 || CHIP_VDEC2 || CHIP_OPSP default y +source "mm/Kconfig" + config IRAM_START hex "Internal memory start address (hex)" default "00f00000" diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index bc423d838fb8..d9a40b1fe8ba 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -49,7 +49,7 @@ void show_mem(void) printk("Free swap: %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { - page = pgdat->node_mem_map + i; + page = pgdat_page_nr(pgdat, i); total++; if (PageHighMem(page)) highmem++; @@ -152,7 +152,7 @@ int __init reservedpages_count(void) reservedpages = 0; for_each_online_node(nid) for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++) - if (PageReserved(NODE_DATA(nid)->node_mem_map + i)) + if (PageReserved(nid_page_nr(nid, i))) reservedpages++; return reservedpages; diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index d0713c7d9f0a..691a2469ff36 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -357,6 +357,8 @@ config 060_WRITETHROUGH is hardwired on. The 53c710 SCSI driver is known to suffer from this problem. +source "mm/Kconfig" + endmenu menu "General setup" diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig index e729bd280623..dbfcdc8e6087 100644 --- a/arch/m68knommu/Kconfig +++ b/arch/m68knommu/Kconfig @@ -532,6 +532,8 @@ config ROMKERNEL endchoice +source "mm/Kconfig" + endmenu config ISA_DMA_API diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index ab9944693f1f..94f5a8eb2c22 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -492,7 +492,7 @@ config SGI_SN0_N_MODE which allows for more memory. Your system is most probably running in M-Mode, so you should say N here. -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool default y if SGI_IP27 help diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 13472292d0ec..b5bab3a42fc4 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -82,7 +82,7 @@ CONFIG_STOP_MACHINE=y # CONFIG_SGI_IP22 is not set CONFIG_SGI_IP27=y # CONFIG_SGI_SN0_N_MODE is not set -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y CONFIG_NUMA=y # CONFIG_MAPPED_KERNEL is not set # CONFIG_REPLICATE_KTEXT is not set diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 0a44a98d7adc..a160d04f7dbe 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -549,9 +549,8 @@ void __init mem_init(void) */ numslots = node_getlastslot(node); for (slot = 1; slot <= numslots; slot++) { - p = NODE_DATA(node)->node_mem_map + - (slot_getbasepfn(node, slot) - - slot_getbasepfn(node, 0)); + p = nid_page_nr(node, slot_getbasepfn(node, slot) - + slot_getbasepfn(node, 0)); /* * Free valid memory in current slot. diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index e7e7c56fc212..ce327c799b44 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -148,7 +148,7 @@ config HOTPLUG_CPU default y if SMP select HOTPLUG -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool "Discontiguous memory support (EXPERIMENTAL)" depends on EXPERIMENTAL help @@ -157,6 +157,8 @@ config DISCONTIGMEM or have huge holes in the physical address space for other reasons. See <file:Documentation/vm/numa> for more. +source "mm/Kconfig" + config PREEMPT bool # bool "Preemptible Kernel" diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index cac37589e35c..2886ad70db48 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -506,7 +506,7 @@ void show_mem(void) for (j = node_start_pfn(i); j < node_end_pfn(i); j++) { struct page *p; - p = node_mem_map(i) + j - node_start_pfn(i); + p = nid_page_nr(i, j) - node_start_pfn(i); total++; if (PageReserved(p)) diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig index 10162b187bcf..848f43970a4b 100644 --- a/arch/ppc/Kconfig +++ b/arch/ppc/Kconfig @@ -905,6 +905,8 @@ config PREEMPT config HIGHMEM bool "High memory support" +source "mm/Kconfig" + source "fs/Kconfig.binfmt" config PROC_DEVICETREE diff --git a/arch/ppc/boot/simple/misc.c b/arch/ppc/boot/simple/misc.c index ab0f9902cb67..e02de5b467a4 100644 --- a/arch/ppc/boot/simple/misc.c +++ b/arch/ppc/boot/simple/misc.c @@ -222,7 +222,7 @@ decompress_kernel(unsigned long load_addr, int num_words, unsigned long cksum) puts("\n"); puts("Uncompressing Linux..."); - gunzip(0x0, 0x400000, zimage_start, &zimage_size); + gunzip(NULL, 0x400000, zimage_start, &zimage_size); puts("done.\n"); /* get the bi_rec address */ diff --git a/arch/ppc/boot/simple/mpc10x_memory.c b/arch/ppc/boot/simple/mpc10x_memory.c index 977daedc14c0..20d92a34ceb8 100644 --- a/arch/ppc/boot/simple/mpc10x_memory.c +++ b/arch/ppc/boot/simple/mpc10x_memory.c @@ -33,7 +33,7 @@ #define MPC10X_PCI_OP(rw, size, type, op, mask) \ static void \ -mpc10x_##rw##_config_##size(unsigned int *cfg_addr, \ +mpc10x_##rw##_config_##size(unsigned int __iomem *cfg_addr, \ unsigned int *cfg_data, int devfn, int offset, \ type val) \ { \ diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index bae56ec76ea7..cb27068bfcd4 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -217,13 +217,49 @@ config HMT This option enables hardware multithreading on RS64 cpus. pSeries systems p620 and p660 have such a cpu type. -config DISCONTIGMEM - bool "Discontiguous Memory Support" +config ARCH_SELECT_MEMORY_MODEL + def_bool y + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on !NUMA + +config ARCH_DISCONTIGMEM_ENABLE + def_bool y depends on SMP && PPC_PSERIES +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on ARCH_DISCONTIGMEM_ENABLE + +config ARCH_FLATMEM_ENABLE + def_bool y + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on ARCH_DISCONTIGMEM_ENABLE + +source "mm/Kconfig" + +config HAVE_ARCH_EARLY_PFN_TO_NID + def_bool y + depends on NEED_MULTIPLE_NODES + +# Some NUMA nodes have memory ranges that span +# other nodes. Even though a pfn is valid and +# between a node's start and end pfns, it may not +# reside on that node. +# +# This is a relatively temporary hack that should +# be able to go away when sparsemem is fully in +# place +config NODES_SPAN_OTHER_NODES + def_bool y + depends on NEED_MULTIPLE_NODES + config NUMA bool "NUMA support" - depends on DISCONTIGMEM + default y if DISCONTIGMEM || SPARSEMEM config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" diff --git a/arch/ppc64/boot/install.sh b/arch/ppc64/boot/install.sh index 955c5681db6c..cb2d6626b555 100644 --- a/arch/ppc64/boot/install.sh +++ b/arch/ppc64/boot/install.sh @@ -22,8 +22,8 @@ # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi # Default install diff --git a/arch/ppc64/configs/pSeries_defconfig b/arch/ppc64/configs/pSeries_defconfig index 3eb5ef25d3a3..d0db8b5966c0 100644 --- a/arch/ppc64/configs/pSeries_defconfig +++ b/arch/ppc64/configs/pSeries_defconfig @@ -88,7 +88,7 @@ CONFIG_IBMVIO=y CONFIG_IOMMU_VMERGE=y CONFIG_SMP=y CONFIG_NR_CPUS=128 -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y CONFIG_NUMA=y CONFIG_SCHED_SMT=y # CONFIG_PREEMPT is not set diff --git a/arch/ppc64/defconfig b/arch/ppc64/defconfig index 2f31bf3046f9..b8e2066dde77 100644 --- a/arch/ppc64/defconfig +++ b/arch/ppc64/defconfig @@ -89,7 +89,7 @@ CONFIG_BOOTX_TEXT=y CONFIG_IOMMU_VMERGE=y CONFIG_SMP=y CONFIG_NR_CPUS=32 -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y # CONFIG_NUMA is not set # CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c index e950a2058a19..782ce3efa2c1 100644 --- a/arch/ppc64/kernel/kprobes.c +++ b/arch/ppc64/kernel/kprobes.c @@ -32,15 +32,14 @@ #include <linux/ptrace.h> #include <linux/spinlock.h> #include <linux/preempt.h> +#include <asm/cacheflush.h> #include <asm/kdebug.h> #include <asm/sstep.h> -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 - static struct kprobe *current_kprobe; static unsigned long kprobe_status, kprobe_saved_msr; +static struct kprobe *kprobe_prev; +static unsigned long kprobe_status_prev, kprobe_saved_msr_prev; static struct pt_regs jprobe_saved_regs; int arch_prepare_kprobe(struct kprobe *p) @@ -61,16 +60,25 @@ int arch_prepare_kprobe(struct kprobe *p) void arch_copy_kprobe(struct kprobe *p) { memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + p->opcode = *p->addr; } -void arch_remove_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) +void arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; - regs->nip = (unsigned long)p->addr; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void arch_remove_kprobe(struct kprobe *p) +{ } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) @@ -83,6 +91,20 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->nip = (unsigned long)&p->ainsn.insn; } +static inline void save_previous_kprobe(void) +{ + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; + kprobe_saved_msr_prev = kprobe_saved_msr; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; + kprobe_saved_msr = kprobe_saved_msr_prev; +} + static inline int kprobe_handler(struct pt_regs *regs) { struct kprobe *p; @@ -101,8 +123,19 @@ static inline int kprobe_handler(struct pt_regs *regs) unlock_kprobes(); goto no_kprobe; } - disarm_kprobe(p, regs); - ret = 1; + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(); + current_kprobe = p; + kprobe_saved_msr = regs->msr; + p->nmissed++; + prepare_singlestep(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; } else { p = current_kprobe; if (p->break_handler && p->break_handler(p, regs)) { @@ -184,13 +217,21 @@ static inline int post_kprobe_handler(struct pt_regs *regs) if (!kprobe_running()) return 0; - if (current_kprobe->post_handler) + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; current_kprobe->post_handler(current_kprobe, regs, 0); + } resume_execution(current_kprobe, regs); regs->msr |= kprobe_saved_msr; + /*Restore back the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } unlock_kprobes(); +out: preempt_enable_no_resched(); /* diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index 7d060ddb5e93..0a47a5ef428d 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -1074,6 +1074,7 @@ void __init setup_arch(char **cmdline_p) /* set up the bootmem stuff with available memory */ do_init_bootmem(); + sparse_init(); /* initialize the syscall map in systemcfg */ setup_syscall_map(); diff --git a/arch/ppc64/mm/Makefile b/arch/ppc64/mm/Makefile index ac522d57b2a7..3695d00d347f 100644 --- a/arch/ppc64/mm/Makefile +++ b/arch/ppc64/mm/Makefile @@ -6,6 +6,6 @@ EXTRA_CFLAGS += -mno-minimal-toc obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o \ slb_low.o slb.o stab.o mmap.o -obj-$(CONFIG_DISCONTIGMEM) += numa.o +obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PPC_MULTIPLATFORM) += hash_native.o diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index 6fa1e6490b57..b50b3a446dbe 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -98,7 +98,7 @@ void show_mem(void) printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { - page = pgdat->node_mem_map + i; + page = pgdat_page_nr(pgdat, i); total++; if (PageReserved(page)) reserved++; @@ -531,7 +531,7 @@ EXPORT_SYMBOL(page_is_ram); * Initialize the bootmem system and give it all the memory we * have available. */ -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES void __init do_init_bootmem(void) { unsigned long i; @@ -553,12 +553,20 @@ void __init do_init_bootmem(void) max_pfn = max_low_pfn; - /* add all physical memory to the bootmem map. Also find the first */ + /* Add all physical memory to the bootmem map, mark each area + * present. + */ for (i=0; i < lmb.memory.cnt; i++) { unsigned long physbase, size; + unsigned long start_pfn, end_pfn; physbase = lmb.memory.region[i].physbase; size = lmb.memory.region[i].size; + + start_pfn = physbase >> PAGE_SHIFT; + end_pfn = start_pfn + (size >> PAGE_SHIFT); + memory_present(0, start_pfn, end_pfn); + free_bootmem(physbase, size); } @@ -597,7 +605,7 @@ void __init paging_init(void) free_area_init_node(0, NODE_DATA(0), zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); } -#endif /* CONFIG_DISCONTIGMEM */ +#endif /* ! CONFIG_NEED_MULTIPLE_NODES */ static struct kcore_list kcore_vmem; @@ -628,7 +636,7 @@ module_init(setup_kcore); void __init mem_init(void) { -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NEED_MULTIPLE_NODES int nid; #endif pg_data_t *pgdat; @@ -639,7 +647,7 @@ void __init mem_init(void) num_physpages = max_low_pfn; /* RAM is assumed contiguous */ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NEED_MULTIPLE_NODES for_each_online_node(nid) { if (NODE_DATA(nid)->node_spanned_pages != 0) { printk("freeing bootmem node %x\n", nid); @@ -654,7 +662,7 @@ void __init mem_init(void) for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { - page = pgdat->node_mem_map + i; + page = pgdat_page_nr(pgdat, i); if (PageReserved(page)) reservedpages++; } diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c index ea862ec643d3..cafd91aef289 100644 --- a/arch/ppc64/mm/numa.c +++ b/arch/ppc64/mm/numa.c @@ -440,6 +440,8 @@ new_range: for (i = start ; i < (start+size); i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = numa_domain; + memory_present(numa_domain, start >> PAGE_SHIFT, + (start + size) >> PAGE_SHIFT); if (--ranges) goto new_range; @@ -481,6 +483,7 @@ static void __init setup_nonnuma(void) for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; + memory_present(0, 0, init_node_data[0].node_end_pfn); } static void __init dump_numa_topology(void) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ab79af84699a..32696c1d9280 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -226,6 +226,8 @@ config WARN_STACK_SIZE This allows you to specify the maximum frame size a function may have without the compiler complaining about it. +source "mm/Kconfig" + comment "I/O subsystem configuration" config MACHCHK_WARNING diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index 278a8139cb18..d4026f62cb06 100644 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -21,8 +21,8 @@ # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi # Default install - same as make zlilo diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index bf33dcfec7db..3898f66d0b2f 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -45,7 +45,7 @@ typedef struct compat_siginfo { /* POSIX.1b timers */ struct { - timer_t _tid; /* timer id */ + compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ compat_sigval_t _sigval; /* same as below */ int _sys_private; /* not to be passed to user */ diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 3468d5127223..a7c8bfc11604 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -486,7 +486,7 @@ config CPU_SUBTYPE_ST40 depends on CPU_SUBTYPE_ST40STB1 || CPU_SUBTYPE_ST40GX1 default y -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool depends on SH_HP690 default y @@ -496,6 +496,8 @@ config DISCONTIGMEM or have huge holes in the physical address space for other reasons. See <file:Documentation/vm/numa> for more. +source "mm/Kconfig" + config ZERO_PAGE_OFFSET hex "Zero page offset" default "0x00001000" if !(SH_MPC1211 || SH_SH03) diff --git a/arch/sh64/Kconfig b/arch/sh64/Kconfig index 76eb81fba45e..708e59736a4d 100644 --- a/arch/sh64/Kconfig +++ b/arch/sh64/Kconfig @@ -217,6 +217,8 @@ config PREEMPT bool "Preemptible Kernel (EXPERIMENTAL)" depends on EXPERIMENTAL +source "mm/Kconfig" + endmenu menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)" diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 237f922520fd..262e13d086fe 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -264,7 +264,11 @@ config SUNOS_EMUL want to run SunOS binaries on an Ultra you must also say Y to "Kernel support for 32-bit a.out binaries" above. -source "drivers/parport/Kconfig" +source "mm/Kconfig" + +endmenu + +source "drivers/Kconfig" config PRINTER tristate "Parallel printer support" @@ -291,6 +295,8 @@ config PRINTER If you have more than 8 printers, you need to increase the LP_NO macro in lp.c and the PARPORT_MAX macro in parport.h. +source "mm/Kconfig" + endmenu source "drivers/base/Kconfig" @@ -372,18 +378,8 @@ config UNIX98_PTY_COUNT endmenu -source "drivers/input/Kconfig" - source "fs/Kconfig" -source "sound/Kconfig" - -source "drivers/usb/Kconfig" - -source "drivers/infiniband/Kconfig" - -source "drivers/char/watchdog/Kconfig" - source "arch/sparc/Kconfig.debug" source "security/Kconfig" diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index a72fd15d5ea8..e2b050eb3b96 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -484,6 +484,8 @@ config CMDLINE NOTE: This option WILL override the PROM bootargs setting! +source "mm/Kconfig" + endmenu source "drivers/base/Kconfig" diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c index 7066d7ba667a..bdac631cf011 100644 --- a/arch/sparc64/kernel/kprobes.c +++ b/arch/sparc64/kernel/kprobes.c @@ -6,7 +6,6 @@ #include <linux/config.h> #include <linux/kernel.h> #include <linux/kprobes.h> - #include <asm/kdebug.h> #include <asm/signal.h> @@ -47,25 +46,59 @@ void arch_copy_kprobe(struct kprobe *p) { p->ainsn.insn[0] = *p->addr; p->ainsn.insn[1] = BREAKPOINT_INSTRUCTION_2; + p->opcode = *p->addr; } -void arch_remove_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { + *p->addr = BREAKPOINT_INSTRUCTION; + flushi(p->addr); } -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 +void arch_disarm_kprobe(struct kprobe *p) +{ + *p->addr = p->opcode; + flushi(p->addr); +} + +void arch_remove_kprobe(struct kprobe *p) +{ +} static struct kprobe *current_kprobe; static unsigned long current_kprobe_orig_tnpc; static unsigned long current_kprobe_orig_tstate_pil; static unsigned int kprobe_status; +static struct kprobe *kprobe_prev; +static unsigned long kprobe_orig_tnpc_prev; +static unsigned long kprobe_orig_tstate_pil_prev; +static unsigned int kprobe_status_prev; -static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +static inline void save_previous_kprobe(void) +{ + kprobe_status_prev = kprobe_status; + kprobe_orig_tnpc_prev = current_kprobe_orig_tnpc; + kprobe_orig_tstate_pil_prev = current_kprobe_orig_tstate_pil; + kprobe_prev = current_kprobe; +} + +static inline void restore_previous_kprobe(void) +{ + kprobe_status = kprobe_status_prev; + current_kprobe_orig_tnpc = kprobe_orig_tnpc_prev; + current_kprobe_orig_tstate_pil = kprobe_orig_tstate_pil_prev; + current_kprobe = kprobe_prev; +} + +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) { current_kprobe_orig_tnpc = regs->tnpc; current_kprobe_orig_tstate_pil = (regs->tstate & TSTATE_PIL); + current_kprobe = p; +} + +static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +{ regs->tstate |= TSTATE_PIL; /*single step inline, if it a breakpoint instruction*/ @@ -78,17 +111,6 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) } } -static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) -{ - *p->addr = p->opcode; - flushi(p->addr); - - regs->tpc = (unsigned long) p->addr; - regs->tnpc = current_kprobe_orig_tnpc; - regs->tstate = ((regs->tstate & ~TSTATE_PIL) | - current_kprobe_orig_tstate_pil); -} - static int kprobe_handler(struct pt_regs *regs) { struct kprobe *p; @@ -109,8 +131,18 @@ static int kprobe_handler(struct pt_regs *regs) unlock_kprobes(); goto no_kprobe; } - disarm_kprobe(p, regs); - ret = 1; + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(); + set_current_kprobe(p, regs); + p->nmissed++; + kprobe_status = KPROBE_REENTER; + prepare_singlestep(p, regs); + return 1; } else { p = current_kprobe; if (p->break_handler && p->break_handler(p, regs)) @@ -138,8 +170,8 @@ static int kprobe_handler(struct pt_regs *regs) goto no_kprobe; } + set_current_kprobe(p, regs); kprobe_status = KPROBE_HIT_ACTIVE; - current_kprobe = p; if (p->pre_handler && p->pre_handler(p, regs)) return 1; @@ -245,12 +277,20 @@ static inline int post_kprobe_handler(struct pt_regs *regs) if (!kprobe_running()) return 0; - if (current_kprobe->post_handler) + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; current_kprobe->post_handler(current_kprobe, regs, 0); + } resume_execution(current_kprobe, regs); + /*Restore back the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } unlock_kprobes(); +out: preempt_enable_no_resched(); return 1; @@ -392,3 +432,4 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } return 0; } + diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c index 9a375e975cff..f28428f4170e 100644 --- a/arch/sparc64/kernel/signal32.c +++ b/arch/sparc64/kernel/signal32.c @@ -102,7 +102,7 @@ typedef struct compat_siginfo{ /* POSIX.1b timers */ struct { - timer_t _tid; /* timer id */ + compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ compat_sigval_t _sigval; /* same as below */ int _sys_private; /* not to be passed to user */ diff --git a/arch/um/Kconfig b/arch/um/Kconfig index b8e952c88fd1..9469e77303e6 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -74,6 +74,7 @@ config MODE_SKAS option will shrink the UML binary slightly. source "arch/um/Kconfig_arch" +source "mm/Kconfig" config LD_SCRIPT_STATIC bool diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 804c6bbdf67c..157584ae4792 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -8,6 +8,7 @@ #include "linux/kernel.h" #include "linux/sched.h" #include "linux/interrupt.h" +#include "linux/string.h" #include "linux/mm.h" #include "linux/slab.h" #include "linux/utsname.h" @@ -322,12 +323,7 @@ void do_uml_exitcalls(void) char *uml_strdup(char *string) { - char *new; - - new = kmalloc(strlen(string) + 1, GFP_KERNEL); - if(new == NULL) return(NULL); - strcpy(new, string); - return(new); + return kstrdup(string, GFP_KERNEL); } int copy_to_user_proc(void __user *to, void *from, int size) diff --git a/arch/v850/Kconfig b/arch/v850/Kconfig index 90cd4baa75ee..27febd6ffa80 100644 --- a/arch/v850/Kconfig +++ b/arch/v850/Kconfig @@ -218,6 +218,8 @@ menu "Processor type and features" a lot of RAM, and you need to able to allocate very large contiguous chunks. If unsure, say N. +source "mm/Kconfig" + endmenu diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 289f448ac89c..db259757dc8a 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -265,7 +265,7 @@ config NUMA_EMU into virtual nodes when booted with "numa=fake=N", where N is the number of nodes. This is only useful for debugging. -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool depends on NUMA default y @@ -274,6 +274,27 @@ config NUMA bool default n +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on NUMA + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on !NUMA + +source "mm/Kconfig" + +config HAVE_ARCH_EARLY_PFN_TO_NID + def_bool y + config HAVE_DEC_LOCK bool depends on SMP @@ -381,6 +402,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source kernel/Kconfig.hz + endmenu # diff --git a/arch/x86_64/boot/install.sh b/arch/x86_64/boot/install.sh index 90f2452b3b9e..f17b40dfc0f4 100644 --- a/arch/x86_64/boot/install.sh +++ b/arch/x86_64/boot/install.sh @@ -21,8 +21,8 @@ # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi # Default install - same as make zlilo diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c index fbd09b5126ce..66e2821533db 100644 --- a/arch/x86_64/ia32/ia32_signal.c +++ b/arch/x86_64/ia32/ia32_signal.c @@ -428,8 +428,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) return (void __user *)((rsp - frame_size) & -8UL); } -void ia32_setup_frame(int sig, struct k_sigaction *ka, - compat_sigset_t *set, struct pt_regs * regs) +int ia32_setup_frame(int sig, struct k_sigaction *ka, + compat_sigset_t *set, struct pt_regs * regs) { struct sigframe __user *frame; int err = 0; @@ -514,14 +514,15 @@ void ia32_setup_frame(int sig, struct k_sigaction *ka, current->comm, current->pid, frame, regs->rip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } -void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - compat_sigset_t *set, struct pt_regs * regs) +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + compat_sigset_t *set, struct pt_regs * regs) { struct rt_sigframe __user *frame; int err = 0; @@ -613,9 +614,9 @@ void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->rip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } - diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index 504e63474993..c9a6b812e926 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -40,11 +40,7 @@ int fix_aperture __initdata = 1; static u32 __init allocate_aperture(void) { -#ifdef CONFIG_DISCONTIGMEM pg_data_t *nd0 = NODE_DATA(0); -#else - pg_data_t *nd0 = &contig_page_data; -#endif u32 aper_size; void *p; diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index e3a19e8ebbf8..9631c747c5e3 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c @@ -2,20 +2,24 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/string.h> +#include <linux/tty.h> #include <asm/io.h> #include <asm/processor.h> /* Simple VGA output */ #ifdef __i386__ +#include <asm/setup.h> #define VGABASE (__ISA_IO_base + 0xb8000) #else +#include <asm/bootsetup.h> #define VGABASE ((void __iomem *)0xffffffff800b8000UL) #endif -#define MAX_YPOS 25 -#define MAX_XPOS 80 +#define MAX_YPOS max_ypos +#define MAX_XPOS max_xpos +static int max_ypos = 25, max_xpos = 80; static int current_ypos = 1, current_xpos = 0; static void early_vga_write(struct console *con, const char *str, unsigned n) @@ -196,7 +200,10 @@ int __init setup_early_printk(char *opt) } else if (!strncmp(buf, "ttyS", 4)) { early_serial_init(buf); early_console = &early_serial_console; - } else if (!strncmp(buf, "vga", 3)) { + } else if (!strncmp(buf, "vga", 3) + && SCREEN_INFO.orig_video_isVGA == 1) { + max_xpos = SCREEN_INFO.orig_video_cols; + max_ypos = SCREEN_INFO.orig_video_lines; early_console = &early_vga_console; } early_console_initialized = 1; diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index 0f8c78dcd38c..cf6ab147a2a5 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c @@ -94,7 +94,7 @@ void __init x86_64_start_kernel(char * real_mode_data) s = strstr(saved_command_line, "earlyprintk="); if (s != NULL) setup_early_printk(s); -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA s = strstr(saved_command_line, "numa="); if (s != NULL) numa_setup(s+5); diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index 7873d9ba8814..19eafa0aa95c 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c @@ -157,14 +157,13 @@ static unsigned int startup_8259A_irq(unsigned int irq) } static struct hw_interrupt_type i8259A_irq_type = { - "XT-PIC", - startup_8259A_irq, - shutdown_8259A_irq, - enable_8259A_irq, - disable_8259A_irq, - mask_and_ack_8259A, - end_8259A_irq, - NULL + .typename = "XT-PIC", + .startup = startup_8259A_irq, + .shutdown = shutdown_8259A_irq, + .enable = enable_8259A_irq, + .disable = disable_8259A_irq, + .ack = mask_and_ack_8259A, + .end = end_8259A_irq, }; /* diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index f77f8a0ff187..4e680f87a75f 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -27,6 +27,8 @@ * <prasanna@in.ibm.com> adapted for x86_64 * 2005-Mar Roland McGrath <roland@redhat.com> * Fixed to handle %rip-relative addressing mode correctly. + * 2005-May Rusty Lynch <rusty.lynch@intel.com> + * Added function return probes functionality */ #include <linux/config.h> @@ -37,18 +39,16 @@ #include <linux/slab.h> #include <linux/preempt.h> #include <linux/moduleloader.h> - +#include <asm/cacheflush.h> #include <asm/pgtable.h> #include <asm/kdebug.h> static DECLARE_MUTEX(kprobe_mutex); -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 - static struct kprobe *current_kprobe; static unsigned long kprobe_status, kprobe_old_rflags, kprobe_saved_rflags; +static struct kprobe *kprobe_prev; +static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev; static struct pt_regs jprobe_saved_regs; static long *jprobe_saved_rsp; static kprobe_opcode_t *get_insn_slot(void); @@ -214,6 +214,21 @@ void arch_copy_kprobe(struct kprobe *p) BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ *ripdisp = disp; } + p->opcode = *p->addr; +} + +void arch_arm_kprobe(struct kprobe *p) +{ + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void arch_disarm_kprobe(struct kprobe *p) +{ + *p->addr = p->opcode; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } void arch_remove_kprobe(struct kprobe *p) @@ -223,10 +238,29 @@ void arch_remove_kprobe(struct kprobe *p) down(&kprobe_mutex); } -static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) +static inline void save_previous_kprobe(void) { - *p->addr = p->opcode; - regs->rip = (unsigned long)p->addr; + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; + kprobe_old_rflags_prev = kprobe_old_rflags; + kprobe_saved_rflags_prev = kprobe_saved_rflags; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; + kprobe_old_rflags = kprobe_old_rflags_prev; + kprobe_saved_rflags = kprobe_saved_rflags_prev; +} + +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) +{ + current_kprobe = p; + kprobe_saved_rflags = kprobe_old_rflags + = (regs->eflags & (TF_MASK | IF_MASK)); + if (is_IF_modifier(p->ainsn.insn)) + kprobe_saved_rflags &= ~IF_MASK; } static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) @@ -240,6 +274,50 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->rip = (unsigned long)p->ainsn.insn; } +struct task_struct *arch_get_kprobe_task(void *ptr) +{ + return ((struct thread_info *) (((unsigned long) ptr) & + (~(THREAD_SIZE -1))))->task; +} + +void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +{ + unsigned long *sara = (unsigned long *)regs->rsp; + struct kretprobe_instance *ri; + static void *orig_ret_addr; + + /* + * Save the return address when the return probe hits + * the first time, and use it to populate the (krprobe + * instance)->ret_addr for subsequent return probes at + * the same addrress since stack address would have + * the kretprobe_trampoline by then. + */ + if (((void*) *sara) != kretprobe_trampoline) + orig_ret_addr = (void*) *sara; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->stack_addr = sara; + ri->ret_addr = orig_ret_addr; + add_rp_inst(ri); + /* Replace the return addr with trampoline addr */ + *sara = (unsigned long) &kretprobe_trampoline; + } else { + rp->nmissed++; + } +} + +void arch_kprobe_flush_task(struct task_struct *tk) +{ + struct kretprobe_instance *ri; + while ((ri = get_rp_inst_tsk(tk)) != NULL) { + *((unsigned long *)(ri->stack_addr)) = + (unsigned long) ri->ret_addr; + recycle_rp_inst(ri); + } +} + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled thorough out this function. @@ -264,9 +342,30 @@ int kprobe_handler(struct pt_regs *regs) regs->eflags |= kprobe_saved_rflags; unlock_kprobes(); goto no_kprobe; + } else if (kprobe_status == KPROBE_HIT_SSDONE) { + /* TODO: Provide re-entrancy from + * post_kprobes_handler() and avoid exception + * stack corruption while single-stepping on + * the instruction of the new probe. + */ + arch_disarm_kprobe(p); + regs->rip = (unsigned long)p->addr; + ret = 1; + } else { + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the + * handler. We here save the original kprobe + * variables and just single step on instruction + * of the new probe without calling any user + * handlers. + */ + save_previous_kprobe(); + set_current_kprobe(p, regs); + p->nmissed++; + prepare_singlestep(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; } - disarm_kprobe(p, regs); - ret = 1; } else { p = current_kprobe; if (p->break_handler && p->break_handler(p, regs)) { @@ -296,11 +395,7 @@ int kprobe_handler(struct pt_regs *regs) } kprobe_status = KPROBE_HIT_ACTIVE; - current_kprobe = p; - kprobe_saved_rflags = kprobe_old_rflags - = (regs->eflags & (TF_MASK | IF_MASK)); - if (is_IF_modifier(p->ainsn.insn)) - kprobe_saved_rflags &= ~IF_MASK; + set_current_kprobe(p, regs); if (p->pre_handler && p->pre_handler(p, regs)) /* handler has already set things up, so skip ss setup */ @@ -317,6 +412,55 @@ no_kprobe: } /* + * For function-return probes, init_kprobes() establishes a probepoint + * here. When a retprobed function returns, this probe is hit and + * trampoline_probe_handler() runs, calling the kretprobe's handler. + */ + void kretprobe_trampoline_holder(void) + { + asm volatile ( ".global kretprobe_trampoline\n" + "kretprobe_trampoline: \n" + "nop\n"); + } + +/* + * Called when we hit the probe point at kretprobe_trampoline + */ +int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct task_struct *tsk; + struct kretprobe_instance *ri; + struct hlist_head *head; + struct hlist_node *node; + unsigned long *sara = (unsigned long *)regs->rsp - 1; + + tsk = arch_get_kprobe_task(sara); + head = kretprobe_inst_table_head(tsk); + + hlist_for_each_entry(ri, node, head, hlist) { + if (ri->stack_addr == sara && ri->rp) { + if (ri->rp->handler) + ri->rp->handler(ri, regs); + } + } + return 0; +} + +void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + struct kretprobe_instance *ri; + /* RA already popped */ + unsigned long *sara = ((unsigned long *)regs->rsp) - 1; + + while ((ri = get_rp_inst(sara))) { + regs->rip = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri); + } + regs->eflags &= ~TF_MASK; +} + +/* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "int 3" * instruction. To avoid the SMP problems that can occur when we @@ -401,13 +545,23 @@ int post_kprobe_handler(struct pt_regs *regs) if (!kprobe_running()) return 0; - if (current_kprobe->post_handler) + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; current_kprobe->post_handler(current_kprobe, regs, 0); + } - resume_execution(current_kprobe, regs); + if (current_kprobe->post_handler != trampoline_post_handler) + resume_execution(current_kprobe, regs); regs->eflags |= kprobe_saved_rflags; - unlock_kprobes(); + /* Restore the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } else { + unlock_kprobes(); + } +out: preempt_enable_no_resched(); /* diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 61a63be6b294..9c5aa2a790c7 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -23,6 +23,7 @@ #include <linux/kernel_stat.h> #include <linux/mc146818rtc.h> #include <linux/acpi.h> +#include <linux/module.h> #include <asm/smp.h> #include <asm/mtrr.h> @@ -45,7 +46,8 @@ int acpi_found_madt; int apic_version [MAX_APICS]; unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; -cpumask_t pci_bus_to_cpumask [256] = { [0 ... 255] = CPU_MASK_ALL }; +unsigned char pci_bus_to_node [256]; +EXPORT_SYMBOL(pci_bus_to_node); static int mp_current_pci_id = 0; /* I/O APIC entries */ @@ -904,11 +906,20 @@ void __init mp_config_acpi_legacy_irqs (void) return; } +#define MAX_GSI_NUM 4096 + int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) { int ioapic = -1; int ioapic_pin = 0; int idx, bit = 0; + static int pci_irq = 16; + /* + * Mapping between Global System Interrupts, which + * represent all possible interrupts, to the IRQs + * assigned to actual devices. + */ + static int gsi_to_irq[MAX_GSI_NUM]; if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return gsi; @@ -943,11 +954,21 @@ int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); - return gsi; + return gsi_to_irq[gsi]; } mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); + if (edge_level) { + /* + * For PCI devices assign IRQs in order, avoiding gaps + * due to unused I/O APIC pins. + */ + int irq = gsi; + gsi = pci_irq++; + gsi_to_irq[irq] = gsi; + } + io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1, active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1); diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index dce8bab4306c..e59d1f9d6163 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -34,6 +34,7 @@ #include <linux/ptrace.h> #include <linux/utsname.h> #include <linux/random.h> +#include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -293,6 +294,14 @@ void exit_thread(void) { struct task_struct *me = current; struct thread_struct *t = &me->thread; + + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(me); + if (me->thread.io_bitmap_ptr) { struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); @@ -312,6 +321,13 @@ void flush_thread(void) struct task_struct *tsk = current; struct thread_info *t = current_thread_info(); + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(tsk); + if (t->flags & _TIF_ABI_PENDING) t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 99f038ede23c..000015dd5a8b 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -40,6 +40,8 @@ #include <linux/acpi.h> #include <linux/kallsyms.h> #include <linux/edd.h> +#include <linux/mmzone.h> + #include <asm/mtrr.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -348,7 +350,7 @@ static __init void parse_cmdline_early (char ** cmdline_p) if (!memcmp(from, "mem=", 4)) parse_memopt(from+4, &from); -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA if (!memcmp(from, "numa=", 5)) numa_setup(from+5); #endif @@ -377,17 +379,20 @@ static __init void parse_cmdline_early (char ** cmdline_p) *cmdline_p = command_line; } -#ifndef CONFIG_DISCONTIGMEM -static void __init contig_initmem_init(void) +#ifndef CONFIG_NUMA +static void __init +contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) { - unsigned long bootmap_size, bootmap; - bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; - bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); - if (bootmap == -1L) - panic("Cannot find bootmem map of size %ld\n",bootmap_size); - bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); - e820_bootmem_free(&contig_page_data, 0, end_pfn << PAGE_SHIFT); - reserve_bootmem(bootmap, bootmap_size); + unsigned long bootmap_size, bootmap; + + memory_present(0, start_pfn, end_pfn); + bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; + bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); + if (bootmap == -1L) + panic("Cannot find bootmem map of size %ld\n",bootmap_size); + bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); + e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT); + reserve_bootmem(bootmap, bootmap_size); } #endif @@ -554,10 +559,10 @@ void __init setup_arch(char **cmdline_p) acpi_numa_init(); #endif -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA numa_initmem_init(0, end_pfn); #else - contig_initmem_init(); + contig_initmem_init(0, end_pfn); #endif /* Reserve direct mapping */ @@ -618,6 +623,8 @@ void __init setup_arch(char **cmdline_p) } } #endif + + sparse_init(); paging_init(); check_ioapic(); diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index 429c0269dc4e..98b7ba95d581 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c @@ -34,9 +34,9 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs); -void ia32_setup_frame(int sig, struct k_sigaction *ka, +int ia32_setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs); asmlinkage long @@ -238,7 +238,7 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) return (void __user *)round_down(rsp - size, 16); } -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { struct rt_sigframe __user *frame; @@ -327,20 +327,23 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->rip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } /* * OK, we're invoking a handler */ -static void +static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs) { + int ret; + #ifdef DEBUG_SIG printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n", current->pid, sig, @@ -384,20 +387,22 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, #ifdef CONFIG_IA32_EMULATION if (test_thread_flag(TIF_IA32)) { if (ka->sa.sa_flags & SA_SIGINFO) - ia32_setup_rt_frame(sig, ka, info, oldset, regs); + ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); else - ia32_setup_frame(sig, ka, oldset, regs); + ret = ia32_setup_frame(sig, ka, oldset, regs); } else #endif - setup_rt_frame(sig, ka, info, oldset, regs); + ret = setup_rt_frame(sig, ka, info, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (ret && !(ka->sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } + + return ret; } /* @@ -417,7 +422,7 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) * kernel mode. Just return without doing anything * if so. */ - if ((regs->cs & 3) != 3) + if (!user_mode(regs)) return 1; if (try_to_freeze(0)) @@ -434,11 +439,10 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) * inside the kernel. */ if (current->thread.debugreg7) - asm volatile("movq %0,%%db7" : : "r" (current->thread.debugreg7)); + set_debugreg(current->thread.debugreg7, 7); /* Whee! Actually deliver the signal. */ - handle_signal(signr, &info, &ka, oldset, regs); - return 1; + return handle_signal(signr, &info, &ka, oldset, regs); } no_signal: diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index fb8c809b4cd9..66bf6ddeb0c3 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -64,6 +64,7 @@ static int notsc __initdata = 0; unsigned int cpu_khz; /* TSC clocks / usec, not used here */ static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ +static int hpet_use_timer; unsigned long vxtime_hz = PIT_TICK_RATE; int report_lost_ticks; /* command line option */ unsigned long long monotonic_base; @@ -105,7 +106,9 @@ static inline unsigned int do_gettimeoffset_tsc(void) static inline unsigned int do_gettimeoffset_hpet(void) { - return ((hpet_readl(HPET_COUNTER) - vxtime.last) * vxtime.quot) >> 32; + /* cap counter read to one tick to avoid inconsistencies */ + unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last; + return (min(counter,hpet_tick) * vxtime.quot) >> 32; } unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; @@ -301,7 +304,7 @@ unsigned long long monotonic_clock(void) last_offset = vxtime.last; base = monotonic_base; - this_offset = hpet_readl(HPET_T0_CMP) - hpet_tick; + this_offset = hpet_readl(HPET_COUNTER); } while (read_seqretry(&xtime_lock, seq)); offset = (this_offset - last_offset); @@ -377,7 +380,14 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) write_seqlock(&xtime_lock); - if (vxtime.hpet_address) { + if (vxtime.hpet_address) + offset = hpet_readl(HPET_COUNTER); + + if (hpet_use_timer) { + /* if we're using the hpet timer functionality, + * we can more accurately know the counter value + * when the timer interrupt occured. + */ offset = hpet_readl(HPET_T0_CMP) - hpet_tick; delay = hpet_readl(HPET_COUNTER) - offset; } else { @@ -803,17 +813,18 @@ static int hpet_timer_stop_set_go(unsigned long tick) * Set up timer 0, as periodic with first interrupt to happen at hpet_tick, * and period also hpet_tick. */ - - hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | + if (hpet_use_timer) { + hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT, HPET_T0_CFG); - hpet_writel(hpet_tick, HPET_T0_CMP); - hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */ - + hpet_writel(hpet_tick, HPET_T0_CMP); + hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */ + cfg |= HPET_CFG_LEGACY; + } /* * Go! */ - cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY; + cfg |= HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); return 0; @@ -834,8 +845,7 @@ static int hpet_init(void) id = hpet_readl(HPET_ID); - if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) || - !(id & HPET_ID_LEGSUP)) + if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER)) return -1; hpet_period = hpet_readl(HPET_PERIOD); @@ -845,6 +855,8 @@ static int hpet_init(void) hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) / hpet_period; + hpet_use_timer = (id & HPET_ID_LEGSUP); + return hpet_timer_stop_set_go(hpet_tick); } @@ -901,9 +913,11 @@ void __init time_init(void) set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); - if (!hpet_init()) { + if (!hpet_init()) vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period; + + if (hpet_use_timer) { cpu_khz = hpet_calibrate_tsc(); timename = "HPET"; #ifdef CONFIG_X86_PM_TIMER @@ -968,7 +982,7 @@ void __init time_init_gtod(void) if (unsynchronized_tsc()) notsc = 1; if (vxtime.hpet_address && notsc) { - timetype = "HPET"; + timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; vxtime.mode = VXTIME_HPET; do_gettimeoffset = do_gettimeoffset_hpet; @@ -983,7 +997,7 @@ void __init time_init_gtod(void) printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n"); #endif } else { - timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; + timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; vxtime.mode = VXTIME_TSC; } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 3dfec8fdabcd..121646fc43f6 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -274,7 +274,7 @@ EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) { int i; - int in_kernel = (regs->cs & 3) == 0; + int in_kernel = !user_mode(regs); unsigned long rsp; const int cpu = safe_smp_processor_id(); struct task_struct *cur = cpu_pda[cpu].pcurrent; @@ -318,7 +318,7 @@ void handle_BUG(struct pt_regs *regs) struct bug_frame f; char tmp; - if (regs->cs & 3) + if (user_mode(regs)) return; if (__copy_from_user(&f, (struct bug_frame *) regs->rip, sizeof(struct bug_frame))) @@ -437,7 +437,7 @@ static void do_trap(int trapnr, int signr, char *str, } #endif - if ((regs->cs & 3) != 0) { + if (user_mode(regs)) { struct task_struct *tsk = current; if (exception_trace && unhandled_signal(tsk, signr)) @@ -522,7 +522,7 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) } #endif - if ((regs->cs & 3)!=0) { + if (user_mode(regs)) { struct task_struct *tsk = current; if (exception_trace && unhandled_signal(tsk, SIGSEGV)) @@ -638,7 +638,7 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *eregs) if (eregs == (struct pt_regs *)eregs->rsp) ; /* Exception from user space */ - else if (eregs->cs & 3) + else if (user_mode(eregs)) regs = ((struct pt_regs *)current->thread.rsp0) - 1; /* Exception from kernel and interrupts are enabled. Move to kernel process stack. */ @@ -669,7 +669,7 @@ asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) } #endif - asm("movq %%db6,%0" : "=r" (condition)); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, SIGTRAP) == NOTIFY_STOP) @@ -697,7 +697,7 @@ asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) * allowing programs to debug themselves without the ptrace() * interface. */ - if ((regs->cs & 3) == 0) + if (!user_mode(regs)) goto clear_TF_reenable; /* * Was the TF flag set by a debugger? If so, clear it now, @@ -715,13 +715,13 @@ asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_BRKPT; - if ((regs->cs & 3) == 0) + if (!user_mode(regs)) goto clear_dr7; info.si_addr = (void __user *)regs->rip; force_sig_info(SIGTRAP, &info, tsk); clear_dr7: - asm volatile("movq %0,%%db7"::"r"(0UL)); + set_debugreg(0UL, 7); return; clear_TF_reenable: @@ -756,7 +756,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) unsigned short cwd, swd; conditional_sti(regs); - if ((regs->cs & 3) == 0 && + if (!user_mode(regs) && kernel_math_error(regs, "kernel x87 math error")) return; @@ -822,7 +822,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) unsigned short mxcsr; conditional_sti(regs); - if ((regs->cs & 3) == 0 && + if (!user_mode(regs) && kernel_math_error(regs, "kernel simd math error")) return; diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c index aed61a668a1b..33a873a3c223 100644 --- a/arch/x86_64/lib/delay.c +++ b/arch/x86_64/lib/delay.c @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/delay.h> #include <asm/delay.h> +#include <asm/msr.h> #ifdef CONFIG_SMP #include <asm/smp.h> @@ -19,6 +20,12 @@ int x86_udelay_tsc = 0; /* Delay via TSC */ +int read_current_timer(unsigned long *timer_value) +{ + rdtscll(*timer_value); + return 0; +} + void __delay(unsigned long loops) { unsigned bclock, now; diff --git a/arch/x86_64/mm/Makefile b/arch/x86_64/mm/Makefile index 66c354ad80ca..1d232a87f113 100644 --- a/arch/x86_64/mm/Makefile +++ b/arch/x86_64/mm/Makefile @@ -4,7 +4,7 @@ obj-y := init.o fault.o ioremap.o extable.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_DISCONTIGMEM) += numa.o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_K8_NUMA) += k8topology.o obj-$(CONFIG_ACPI_NUMA) += srat.o diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 57d3ab15a5c7..2f187986f940 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -74,7 +74,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, instr = (unsigned char *)convert_rip_to_linear(current, regs); max_instr = instr + 15; - if ((regs->cs & 3) != 0 && instr >= (unsigned char *)TASK_SIZE) + if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) return 0; while (scan_more && instr < max_instr) { @@ -106,7 +106,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, /* Could check the LDT for lm, but for now it's good enough to assume that long mode only uses well known segments or kernel. */ - scan_more = ((regs->cs & 3) == 0) || (regs->cs == __USER_CS); + scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS); break; case 0x60: diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index dbe53b4c7e66..72e4b364ed73 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -318,7 +318,7 @@ void zap_low_mappings(void) flush_tlb_all(); } -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NUMA void __init paging_init(void) { { @@ -427,13 +427,16 @@ void __init mem_init(void) reservedpages = 0; /* this will put all low memory onto the freelists */ -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA totalram_pages += numa_free_all_bootmem(); tmp = 0; /* should count reserved pages here for all nodes */ #else + +#ifdef CONFIG_FLATMEM max_mapnr = end_pfn; if (!mem_map) BUG(); +#endif totalram_pages += free_all_bootmem(); @@ -515,7 +518,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) void __init reserve_bootmem_generic(unsigned long phys, unsigned len) { /* Should check here against the e820 map to avoid double free */ -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA int nid = phys_to_nid(phys); reserve_bootmem_node(NODE_DATA(nid), phys, len); #else diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c index 58aac23760ef..6972df480d2b 100644 --- a/arch/x86_64/mm/ioremap.c +++ b/arch/x86_64/mm/ioremap.c @@ -178,7 +178,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) return (__force void __iomem *)phys_to_virt(phys_addr); -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_FLATMEM /* * Don't allow anybody to remap normal RAM that we're using.. */ diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index fd9f25d7a6c4..84cde796ecb1 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -66,6 +66,13 @@ int __init compute_hash_shift(struct node *nodes, int numnodes) return -1; } +#ifdef CONFIG_SPARSEMEM +int early_pfn_to_nid(unsigned long pfn) +{ + return phys_to_nid(pfn << PAGE_SHIFT); +} +#endif + /* Initialize bootmem allocator for a node */ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) { @@ -80,6 +87,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en start_pfn = start >> PAGE_SHIFT; end_pfn = end >> PAGE_SHIFT; + memory_present(nodeid, start_pfn, end_pfn); nodedata_phys = find_e820_area(start, end, pgdat_size); if (nodedata_phys == -1L) panic("Cannot find memory pgdat in node %d\n", nodeid); diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c index 62349c78db57..7e7d0c2a0025 100644 --- a/arch/x86_64/pci/k8-bus.c +++ b/arch/x86_64/pci/k8-bus.c @@ -53,25 +53,11 @@ fill_mp_bus_to_cpumask(void) for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus); j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); j++) - pci_bus_to_cpumask[j] = - node_to_cpumask(NODE_ID(nid)); + pci_bus_to_node[j] = NODE_ID(nid); } } } - /* quick sanity check */ - printed = 0; - for (i = 0; i < 256; i++) { - if (cpus_empty(pci_bus_to_cpumask[i])) { - pci_bus_to_cpumask[i] = CPU_MASK_ALL; - if (printed) - continue; - printk(KERN_ERR - "k8-bus.c: some busses have empty cpu mask\n"); - printed = 1; - } - } - return 0; } diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 92b0352c8e92..bd7524cfff33 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1,4 +1,4 @@ -/* +/* * Quick & dirty crypto testing module. * * This will only exist until we have a better testing mechanism @@ -9,11 +9,12 @@ * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) + * Software Foundation; either version 2 of the License, or (at your option) * any later version. * - * 14 - 09 - 2003 - * Rewritten by Kartikey Mahendra Bhatt + * 2004-08-09 Added cipher speed tests (Reyk Floeter <reyk@vantronix.net>) + * 2003-09-14 Rewritten by Kartikey Mahendra Bhatt + * */ #include <linux/init.h> @@ -25,12 +26,15 @@ #include <linux/crypto.h> #include <linux/highmem.h> #include <linux/moduleparam.h> +#include <linux/jiffies.h> +#include <linux/timex.h> +#include <linux/interrupt.h> #include "tcrypt.h" /* * Need to kmalloc() memory for testing kmap(). */ -#define TVMEMSIZE 4096 +#define TVMEMSIZE 16384 #define XBUFSIZE 32768 /* @@ -55,19 +59,23 @@ static unsigned int IDX[8] = { IDX1, IDX2, IDX3, IDX4, IDX5, IDX6, IDX7, IDX8 }; +/* + * Used by test_cipher_speed() + */ +static unsigned int sec; + static int mode; static char *xbuf; static char *tvmem; static char *check[] = { "des", "md5", "des3_ede", "rot13", "sha1", "sha256", "blowfish", - "twofish", "serpent", "sha384", "sha512", "md4", "aes", "cast6", - "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea", + "twofish", "serpent", "sha384", "sha512", "md4", "aes", "cast6", + "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea", "khazad", "wp512", "wp384", "wp256", "tnepres", NULL }; -static void -hexdump(unsigned char *buf, unsigned int len) +static void hexdump(unsigned char *buf, unsigned int len) { while (len--) printk("%02x", *buf++); @@ -75,29 +83,29 @@ hexdump(unsigned char *buf, unsigned int len) printk("\n"); } -static void -test_hash (char * algo, struct hash_testvec * template, unsigned int tcount) +static void test_hash(char *algo, struct hash_testvec *template, + unsigned int tcount) { - char *p; - unsigned int i, j, k, temp; - struct scatterlist sg[8]; - char result[64]; - struct crypto_tfm *tfm; - struct hash_testvec *hash_tv; - unsigned int tsize; - - printk("\ntesting %s\n", algo); - - tsize = sizeof (struct hash_testvec); + char *p; + unsigned int i, j, k, temp; + struct scatterlist sg[8]; + char result[64]; + struct crypto_tfm *tfm; + struct hash_testvec *hash_tv; + unsigned int tsize; + + printk("\ntesting %s\n", algo); + + tsize = sizeof(struct hash_testvec); tsize *= tcount; - + if (tsize > TVMEMSIZE) { printk("template (%u) too big for tvmem (%u)\n", tsize, TVMEMSIZE); return; } memcpy(tvmem, template, tsize); - hash_tv = (void *) tvmem; + hash_tv = (void *)tvmem; tfm = crypto_alloc_tfm(algo, 0); if (tfm == NULL) { printk("failed to load transform for %s\n", algo); @@ -105,70 +113,71 @@ test_hash (char * algo, struct hash_testvec * template, unsigned int tcount) } for (i = 0; i < tcount; i++) { - printk ("test %u:\n", i + 1); - memset (result, 0, 64); + printk("test %u:\n", i + 1); + memset(result, 0, 64); p = hash_tv[i].plaintext; - sg[0].page = virt_to_page (p); - sg[0].offset = offset_in_page (p); + sg[0].page = virt_to_page(p); + sg[0].offset = offset_in_page(p); sg[0].length = hash_tv[i].psize; - crypto_digest_init (tfm); + crypto_digest_init(tfm); if (tfm->crt_u.digest.dit_setkey) { - crypto_digest_setkey (tfm, hash_tv[i].key, - hash_tv[i].ksize); + crypto_digest_setkey(tfm, hash_tv[i].key, + hash_tv[i].ksize); } - crypto_digest_update (tfm, sg, 1); - crypto_digest_final (tfm, result); + crypto_digest_update(tfm, sg, 1); + crypto_digest_final(tfm, result); - hexdump (result, crypto_tfm_alg_digestsize (tfm)); + hexdump(result, crypto_tfm_alg_digestsize(tfm)); printk("%s\n", - memcmp(result, hash_tv[i].digest, - crypto_tfm_alg_digestsize(tfm)) ? "fail" : - "pass"); + memcmp(result, hash_tv[i].digest, + crypto_tfm_alg_digestsize(tfm)) ? + "fail" : "pass"); } - printk ("testing %s across pages\n", algo); + printk("testing %s across pages\n", algo); /* setup the dummy buffer first */ - memset(xbuf, 0, XBUFSIZE); + memset(xbuf, 0, XBUFSIZE); j = 0; for (i = 0; i < tcount; i++) { if (hash_tv[i].np) { j++; - printk ("test %u:\n", j); - memset (result, 0, 64); + printk("test %u:\n", j); + memset(result, 0, 64); temp = 0; for (k = 0; k < hash_tv[i].np; k++) { - memcpy (&xbuf[IDX[k]], hash_tv[i].plaintext + temp, - hash_tv[i].tap[k]); + memcpy(&xbuf[IDX[k]], + hash_tv[i].plaintext + temp, + hash_tv[i].tap[k]); temp += hash_tv[i].tap[k]; p = &xbuf[IDX[k]]; - sg[k].page = virt_to_page (p); - sg[k].offset = offset_in_page (p); + sg[k].page = virt_to_page(p); + sg[k].offset = offset_in_page(p); sg[k].length = hash_tv[i].tap[k]; } - crypto_digest_digest (tfm, sg, hash_tv[i].np, result); - - hexdump (result, crypto_tfm_alg_digestsize (tfm)); + crypto_digest_digest(tfm, sg, hash_tv[i].np, result); + + hexdump(result, crypto_tfm_alg_digestsize(tfm)); printk("%s\n", - memcmp(result, hash_tv[i].digest, - crypto_tfm_alg_digestsize(tfm)) ? "fail" : - "pass"); + memcmp(result, hash_tv[i].digest, + crypto_tfm_alg_digestsize(tfm)) ? + "fail" : "pass"); } } - - crypto_free_tfm (tfm); + + crypto_free_tfm(tfm); } #ifdef CONFIG_CRYPTO_HMAC -static void -test_hmac(char *algo, struct hmac_testvec * template, unsigned int tcount) +static void test_hmac(char *algo, struct hmac_testvec *template, + unsigned int tcount) { char *p; unsigned int i, j, k, temp; @@ -185,8 +194,8 @@ test_hmac(char *algo, struct hmac_testvec * template, unsigned int tcount) } printk("\ntesting hmac_%s\n", algo); - - tsize = sizeof (struct hmac_testvec); + + tsize = sizeof(struct hmac_testvec); tsize *= tcount; if (tsize > TVMEMSIZE) { printk("template (%u) too big for tvmem (%u)\n", tsize, @@ -195,7 +204,7 @@ test_hmac(char *algo, struct hmac_testvec * template, unsigned int tcount) } memcpy(tvmem, template, tsize); - hmac_tv = (void *) tvmem; + hmac_tv = (void *)tvmem; for (i = 0; i < tcount; i++) { printk("test %u:\n", i + 1); @@ -219,34 +228,35 @@ test_hmac(char *algo, struct hmac_testvec * template, unsigned int tcount) printk("\ntesting hmac_%s across pages\n", algo); memset(xbuf, 0, XBUFSIZE); - + j = 0; for (i = 0; i < tcount; i++) { if (hmac_tv[i].np) { j++; - printk ("test %u:\n",j); - memset (result, 0, 64); + printk("test %u:\n",j); + memset(result, 0, 64); temp = 0; klen = hmac_tv[i].ksize; for (k = 0; k < hmac_tv[i].np; k++) { - memcpy (&xbuf[IDX[k]], hmac_tv[i].plaintext + temp, - hmac_tv[i].tap[k]); + memcpy(&xbuf[IDX[k]], + hmac_tv[i].plaintext + temp, + hmac_tv[i].tap[k]); temp += hmac_tv[i].tap[k]; p = &xbuf[IDX[k]]; - sg[k].page = virt_to_page (p); - sg[k].offset = offset_in_page (p); + sg[k].page = virt_to_page(p); + sg[k].offset = offset_in_page(p); sg[k].length = hmac_tv[i].tap[k]; } - crypto_hmac(tfm, hmac_tv[i].key, &klen, sg, hmac_tv[i].np, - result); + crypto_hmac(tfm, hmac_tv[i].key, &klen, sg, + hmac_tv[i].np, result); hexdump(result, crypto_tfm_alg_digestsize(tfm)); - + printk("%s\n", - memcmp(result, hmac_tv[i].digest, - crypto_tfm_alg_digestsize(tfm)) ? "fail" : - "pass"); + memcmp(result, hmac_tv[i].digest, + crypto_tfm_alg_digestsize(tfm)) ? + "fail" : "pass"); } } out: @@ -255,8 +265,8 @@ out: #endif /* CONFIG_CRYPTO_HMAC */ -static void -test_cipher(char * algo, int mode, int enc, struct cipher_testvec * template, unsigned int tcount) +static void test_cipher(char *algo, int mode, int enc, + struct cipher_testvec *template, unsigned int tcount) { unsigned int ret, i, j, k, temp; unsigned int tsize; @@ -265,22 +275,22 @@ test_cipher(char * algo, int mode, int enc, struct cipher_testvec * template, un char *key; struct cipher_testvec *cipher_tv; struct scatterlist sg[8]; - char e[11], m[4]; + const char *e, *m; if (enc == ENCRYPT) - strncpy(e, "encryption", 11); + e = "encryption"; else - strncpy(e, "decryption", 11); + e = "decryption"; if (mode == MODE_ECB) - strncpy(m, "ECB", 4); + m = "ECB"; else - strncpy(m, "CBC", 4); + m = "CBC"; - printk("\ntesting %s %s %s \n", algo, m, e); + printk("\ntesting %s %s %s\n", algo, m, e); - tsize = sizeof (struct cipher_testvec); + tsize = sizeof (struct cipher_testvec); tsize *= tcount; - + if (tsize > TVMEMSIZE) { printk("template (%u) too big for tvmem (%u)\n", tsize, TVMEMSIZE); @@ -288,112 +298,113 @@ test_cipher(char * algo, int mode, int enc, struct cipher_testvec * template, un } memcpy(tvmem, template, tsize); - cipher_tv = (void *) tvmem; + cipher_tv = (void *)tvmem; + + if (mode) + tfm = crypto_alloc_tfm(algo, 0); + else + tfm = crypto_alloc_tfm(algo, CRYPTO_TFM_MODE_CBC); - if (mode) - tfm = crypto_alloc_tfm (algo, 0); - else - tfm = crypto_alloc_tfm (algo, CRYPTO_TFM_MODE_CBC); - if (tfm == NULL) { printk("failed to load transform for %s %s\n", algo, m); return; } - + j = 0; for (i = 0; i < tcount; i++) { if (!(cipher_tv[i].np)) { - j++; + j++; printk("test %u (%d bit key):\n", j, cipher_tv[i].klen * 8); tfm->crt_flags = 0; - if (cipher_tv[i].wk) + if (cipher_tv[i].wk) tfm->crt_flags |= CRYPTO_TFM_REQ_WEAK_KEY; key = cipher_tv[i].key; - + ret = crypto_cipher_setkey(tfm, key, cipher_tv[i].klen); if (ret) { printk("setkey() failed flags=%x\n", tfm->crt_flags); - + if (!cipher_tv[i].fail) goto out; - } + } p = cipher_tv[i].input; sg[0].page = virt_to_page(p); sg[0].offset = offset_in_page(p); sg[0].length = cipher_tv[i].ilen; - + if (!mode) { crypto_cipher_set_iv(tfm, cipher_tv[i].iv, - crypto_tfm_alg_ivsize (tfm)); + crypto_tfm_alg_ivsize(tfm)); } - + if (enc) ret = crypto_cipher_encrypt(tfm, sg, sg, cipher_tv[i].ilen); else ret = crypto_cipher_decrypt(tfm, sg, sg, cipher_tv[i].ilen); - - + + if (ret) { printk("%s () failed flags=%x\n", e, tfm->crt_flags); goto out; - } - + } + q = kmap(sg[0].page) + sg[0].offset; hexdump(q, cipher_tv[i].rlen); - - printk("%s\n", - memcmp(q, cipher_tv[i].result, cipher_tv[i].rlen) ? "fail" : - "pass"); + + printk("%s\n", + memcmp(q, cipher_tv[i].result, + cipher_tv[i].rlen) ? "fail" : "pass"); } } - - printk("\ntesting %s %s %s across pages (chunking) \n", algo, m, e); + + printk("\ntesting %s %s %s across pages (chunking)\n", algo, m, e); memset(xbuf, 0, XBUFSIZE); - + j = 0; for (i = 0; i < tcount; i++) { if (cipher_tv[i].np) { - j++; + j++; printk("test %u (%d bit key):\n", j, cipher_tv[i].klen * 8); - tfm->crt_flags = 0; - if (cipher_tv[i].wk) + tfm->crt_flags = 0; + if (cipher_tv[i].wk) tfm->crt_flags |= CRYPTO_TFM_REQ_WEAK_KEY; key = cipher_tv[i].key; - - ret = crypto_cipher_setkey(tfm, key, cipher_tv[i].klen); + + ret = crypto_cipher_setkey(tfm, key, cipher_tv[i].klen); if (ret) { printk("setkey() failed flags=%x\n", tfm->crt_flags); - + if (!cipher_tv[i].fail) goto out; } temp = 0; for (k = 0; k < cipher_tv[i].np; k++) { - memcpy (&xbuf[IDX[k]], cipher_tv[i].input + temp, - cipher_tv[i].tap[k]); + memcpy(&xbuf[IDX[k]], + cipher_tv[i].input + temp, + cipher_tv[i].tap[k]); temp += cipher_tv[i].tap[k]; p = &xbuf[IDX[k]]; - sg[k].page = virt_to_page (p); - sg[k].offset = offset_in_page (p); + sg[k].page = virt_to_page(p); + sg[k].offset = offset_in_page(p); sg[k].length = cipher_tv[i].tap[k]; } - + if (!mode) { crypto_cipher_set_iv(tfm, cipher_tv[i].iv, - crypto_tfm_alg_ivsize (tfm)); + crypto_tfm_alg_ivsize(tfm)); } - + if (enc) ret = crypto_cipher_encrypt(tfm, sg, sg, cipher_tv[i].ilen); else ret = crypto_cipher_decrypt(tfm, sg, sg, cipher_tv[i].ilen); - + if (ret) { printk("%s () failed flags=%x\n", e, tfm->crt_flags); goto out; @@ -404,9 +415,9 @@ test_cipher(char * algo, int mode, int enc, struct cipher_testvec * template, un printk("page %u\n", k); q = kmap(sg[k].page) + sg[k].offset; hexdump(q, cipher_tv[i].tap[k]); - printk("%s\n", - memcmp(q, cipher_tv[i].result + temp, - cipher_tv[i].tap[k]) ? "fail" : + printk("%s\n", + memcmp(q, cipher_tv[i].result + temp, + cipher_tv[i].tap[k]) ? "fail" : "pass"); temp += cipher_tv[i].tap[k]; } @@ -417,8 +428,169 @@ out: crypto_free_tfm(tfm); } -static void -test_deflate(void) +static int test_cipher_jiffies(struct crypto_tfm *tfm, int enc, char *p, + int blen, int sec) +{ + struct scatterlist sg[8]; + unsigned long start, end; + int bcount; + int ret; + + sg[0].page = virt_to_page(p); + sg[0].offset = offset_in_page(p); + sg[0].length = blen; + + for (start = jiffies, end = start + sec * HZ, bcount = 0; + time_before(jiffies, end); bcount++) { + if (enc) + ret = crypto_cipher_encrypt(tfm, sg, sg, blen); + else + ret = crypto_cipher_decrypt(tfm, sg, sg, blen); + + if (ret) + return ret; + } + + printk("%d operations in %d seconds (%ld bytes)\n", + bcount, sec, (long)bcount * blen); + return 0; +} + +static int test_cipher_cycles(struct crypto_tfm *tfm, int enc, char *p, + int blen) +{ + struct scatterlist sg[8]; + unsigned long cycles = 0; + int ret = 0; + int i; + + sg[0].page = virt_to_page(p); + sg[0].offset = offset_in_page(p); + sg[0].length = blen; + + local_bh_disable(); + local_irq_disable(); + + /* Warm-up run. */ + for (i = 0; i < 4; i++) { + if (enc) + ret = crypto_cipher_encrypt(tfm, sg, sg, blen); + else + ret = crypto_cipher_decrypt(tfm, sg, sg, blen); + + if (ret) + goto out; + } + + /* The real thing. */ + for (i = 0; i < 8; i++) { + cycles_t start, end; + + start = get_cycles(); + if (enc) + ret = crypto_cipher_encrypt(tfm, sg, sg, blen); + else + ret = crypto_cipher_decrypt(tfm, sg, sg, blen); + end = get_cycles(); + + if (ret) + goto out; + + cycles += end - start; + } + +out: + local_irq_enable(); + local_bh_enable(); + + if (ret == 0) + printk("1 operation in %lu cycles (%d bytes)\n", + (cycles + 4) / 8, blen); + + return ret; +} + +static void test_cipher_speed(char *algo, int mode, int enc, unsigned int sec, + struct cipher_testvec *template, + unsigned int tcount, struct cipher_speed *speed) +{ + unsigned int ret, i, j, iv_len; + unsigned char *key, *p, iv[128]; + struct crypto_tfm *tfm; + const char *e, *m; + + if (enc == ENCRYPT) + e = "encryption"; + else + e = "decryption"; + if (mode == MODE_ECB) + m = "ECB"; + else + m = "CBC"; + + printk("\ntesting speed of %s %s %s\n", algo, m, e); + + if (mode) + tfm = crypto_alloc_tfm(algo, 0); + else + tfm = crypto_alloc_tfm(algo, CRYPTO_TFM_MODE_CBC); + + if (tfm == NULL) { + printk("failed to load transform for %s %s\n", algo, m); + return; + } + + for (i = 0; speed[i].klen != 0; i++) { + if ((speed[i].blen + speed[i].klen) > TVMEMSIZE) { + printk("template (%u) too big for tvmem (%u)\n", + speed[i].blen + speed[i].klen, TVMEMSIZE); + goto out; + } + + printk("test %u (%d bit key, %d byte blocks): ", i, + speed[i].klen * 8, speed[i].blen); + + memset(tvmem, 0xff, speed[i].klen + speed[i].blen); + + /* set key, plain text and IV */ + key = (unsigned char *)tvmem; + for (j = 0; j < tcount; j++) { + if (template[j].klen == speed[i].klen) { + key = template[j].key; + break; + } + } + p = (unsigned char *)tvmem + speed[i].klen; + + ret = crypto_cipher_setkey(tfm, key, speed[i].klen); + if (ret) { + printk("setkey() failed flags=%x\n", tfm->crt_flags); + goto out; + } + + if (!mode) { + iv_len = crypto_tfm_alg_ivsize(tfm); + memset(&iv, 0xff, iv_len); + crypto_cipher_set_iv(tfm, iv, iv_len); + } + + if (sec) + ret = test_cipher_jiffies(tfm, enc, p, speed[i].blen, + sec); + else + ret = test_cipher_cycles(tfm, enc, p, speed[i].blen); + + if (ret) { + printk("%s() failed flags=%x\n", e, tfm->crt_flags); + break; + } + } + +out: + crypto_free_tfm(tfm); +} + +static void test_deflate(void) { unsigned int i; char result[COMP_BUF_SIZE]; @@ -436,7 +608,7 @@ test_deflate(void) } memcpy(tvmem, deflate_comp_tv_template, tsize); - tv = (void *) tvmem; + tv = (void *)tvmem; tfm = crypto_alloc_tfm("deflate", 0); if (tfm == NULL) { @@ -446,7 +618,7 @@ test_deflate(void) for (i = 0; i < DEFLATE_COMP_TEST_VECTORS; i++) { int ilen, ret, dlen = COMP_BUF_SIZE; - + printk("test %u:\n", i + 1); memset(result, 0, sizeof (result)); @@ -473,11 +645,11 @@ test_deflate(void) } memcpy(tvmem, deflate_decomp_tv_template, tsize); - tv = (void *) tvmem; + tv = (void *)tvmem; for (i = 0; i < DEFLATE_DECOMP_TEST_VECTORS; i++) { int ilen, ret, dlen = COMP_BUF_SIZE; - + printk("test %u:\n", i + 1); memset(result, 0, sizeof (result)); @@ -497,8 +669,7 @@ out: crypto_free_tfm(tfm); } -static void -test_crc32c(void) +static void test_crc32c(void) { #define NUMVEC 6 #define VECSIZE 40 @@ -511,7 +682,7 @@ test_crc32c(void) 0xd579c862, 0xba979ad0, 0x2b29d913 }; static u32 tot_vec_results = 0x24c5d375; - + struct scatterlist sg[NUMVEC]; struct crypto_tfm *tfm; char *fmtdata = "testing crc32c initialized to %08x: %s\n"; @@ -525,18 +696,18 @@ test_crc32c(void) printk("failed to load transform for crc32c\n"); return; } - + crypto_digest_init(tfm); crypto_digest_final(tfm, (u8*)&crc); printk(fmtdata, crc, (crc == 0) ? "pass" : "ERROR"); - + /* * stuff test_vec with known values, simple incrementing * byte values. */ b = 0; for (i = 0; i < NUMVEC; i++) { - for (j = 0; j < VECSIZE; j++) + for (j = 0; j < VECSIZE; j++) test_vec[i][j] = ++b; sg[i].page = virt_to_page(test_vec[i]); sg[i].offset = offset_in_page(test_vec[i]); @@ -548,11 +719,11 @@ test_crc32c(void) crypto_digest_final(tfm, (u8*)&crc); printk("testing crc32c setkey returns %08x : %s\n", crc, (crc == (SEEDTESTVAL ^ ~(u32)0)) ? "pass" : "ERROR"); - + printk("testing crc32c using update/final:\n"); pass = 1; /* assume all is well */ - + for (i = 0; i < NUMVEC; i++) { seed = ~(u32)0; (void)crypto_digest_setkey(tfm, (const u8*)&seed, sizeof(u32)); @@ -591,66 +762,64 @@ test_crc32c(void) printk(" %08x:BAD, wanted %08x\n", crc, tot_vec_results); pass = 0; } - + printk("\n%s\n", pass ? "pass" : "ERROR"); crypto_free_tfm(tfm); printk("crc32c test complete\n"); } -static void -test_available(void) +static void test_available(void) { char **name = check; - + while (*name) { printk("alg %s ", *name); printk((crypto_alg_available(*name, 0)) ? "found\n" : "not found\n"); name++; - } + } } -static void -do_test(void) +static void do_test(void) { switch (mode) { case 0: test_hash("md5", md5_tv_template, MD5_TEST_VECTORS); - + test_hash("sha1", sha1_tv_template, SHA1_TEST_VECTORS); - + //DES test_cipher ("des", MODE_ECB, ENCRYPT, des_enc_tv_template, DES_ENC_TEST_VECTORS); - test_cipher ("des", MODE_ECB, DECRYPT, des_dec_tv_template, DES_DEC_TEST_VECTORS); - test_cipher ("des", MODE_CBC, ENCRYPT, des_cbc_enc_tv_template, DES_CBC_ENC_TEST_VECTORS); - test_cipher ("des", MODE_CBC, DECRYPT, des_cbc_dec_tv_template, DES_CBC_DEC_TEST_VECTORS); - + test_cipher ("des", MODE_ECB, DECRYPT, des_dec_tv_template, DES_DEC_TEST_VECTORS); + test_cipher ("des", MODE_CBC, ENCRYPT, des_cbc_enc_tv_template, DES_CBC_ENC_TEST_VECTORS); + test_cipher ("des", MODE_CBC, DECRYPT, des_cbc_dec_tv_template, DES_CBC_DEC_TEST_VECTORS); + //DES3_EDE test_cipher ("des3_ede", MODE_ECB, ENCRYPT, des3_ede_enc_tv_template, DES3_EDE_ENC_TEST_VECTORS); - test_cipher ("des3_ede", MODE_ECB, DECRYPT, des3_ede_dec_tv_template, DES3_EDE_DEC_TEST_VECTORS); - + test_cipher ("des3_ede", MODE_ECB, DECRYPT, des3_ede_dec_tv_template, DES3_EDE_DEC_TEST_VECTORS); + test_hash("md4", md4_tv_template, MD4_TEST_VECTORS); - + test_hash("sha256", sha256_tv_template, SHA256_TEST_VECTORS); - + //BLOWFISH test_cipher ("blowfish", MODE_ECB, ENCRYPT, bf_enc_tv_template, BF_ENC_TEST_VECTORS); test_cipher ("blowfish", MODE_ECB, DECRYPT, bf_dec_tv_template, BF_DEC_TEST_VECTORS); test_cipher ("blowfish", MODE_CBC, ENCRYPT, bf_cbc_enc_tv_template, BF_CBC_ENC_TEST_VECTORS); test_cipher ("blowfish", MODE_CBC, DECRYPT, bf_cbc_dec_tv_template, BF_CBC_DEC_TEST_VECTORS); - + //TWOFISH test_cipher ("twofish", MODE_ECB, ENCRYPT, tf_enc_tv_template, TF_ENC_TEST_VECTORS); test_cipher ("twofish", MODE_ECB, DECRYPT, tf_dec_tv_template, TF_DEC_TEST_VECTORS); test_cipher ("twofish", MODE_CBC, ENCRYPT, tf_cbc_enc_tv_template, TF_CBC_ENC_TEST_VECTORS); test_cipher ("twofish", MODE_CBC, DECRYPT, tf_cbc_dec_tv_template, TF_CBC_DEC_TEST_VECTORS); - + //SERPENT test_cipher ("serpent", MODE_ECB, ENCRYPT, serpent_enc_tv_template, SERPENT_ENC_TEST_VECTORS); test_cipher ("serpent", MODE_ECB, DECRYPT, serpent_dec_tv_template, SERPENT_DEC_TEST_VECTORS); - + //TNEPRES test_cipher ("tnepres", MODE_ECB, ENCRYPT, tnepres_enc_tv_template, TNEPRES_ENC_TEST_VECTORS); test_cipher ("tnepres", MODE_ECB, DECRYPT, tnepres_dec_tv_template, TNEPRES_DEC_TEST_VECTORS); @@ -662,7 +831,7 @@ do_test(void) //CAST5 test_cipher ("cast5", MODE_ECB, ENCRYPT, cast5_enc_tv_template, CAST5_ENC_TEST_VECTORS); test_cipher ("cast5", MODE_ECB, DECRYPT, cast5_dec_tv_template, CAST5_DEC_TEST_VECTORS); - + //CAST6 test_cipher ("cast6", MODE_ECB, ENCRYPT, cast6_enc_tv_template, CAST6_ENC_TEST_VECTORS); test_cipher ("cast6", MODE_ECB, DECRYPT, cast6_dec_tv_template, CAST6_DEC_TEST_VECTORS); @@ -702,9 +871,9 @@ do_test(void) test_crc32c(); #ifdef CONFIG_CRYPTO_HMAC test_hmac("md5", hmac_md5_tv_template, HMAC_MD5_TEST_VECTORS); - test_hmac("sha1", hmac_sha1_tv_template, HMAC_SHA1_TEST_VECTORS); + test_hmac("sha1", hmac_sha1_tv_template, HMAC_SHA1_TEST_VECTORS); test_hmac("sha256", hmac_sha256_tv_template, HMAC_SHA256_TEST_VECTORS); -#endif +#endif test_hash("michael_mic", michael_mic_tv_template, MICHAEL_MIC_TEST_VECTORS); break; @@ -726,17 +895,17 @@ do_test(void) case 4: test_cipher ("des3_ede", MODE_ECB, ENCRYPT, des3_ede_enc_tv_template, DES3_EDE_ENC_TEST_VECTORS); - test_cipher ("des3_ede", MODE_ECB, DECRYPT, des3_ede_dec_tv_template, DES3_EDE_DEC_TEST_VECTORS); + test_cipher ("des3_ede", MODE_ECB, DECRYPT, des3_ede_dec_tv_template, DES3_EDE_DEC_TEST_VECTORS); break; case 5: test_hash("md4", md4_tv_template, MD4_TEST_VECTORS); break; - + case 6: test_hash("sha256", sha256_tv_template, SHA256_TEST_VECTORS); break; - + case 7: test_cipher ("blowfish", MODE_ECB, ENCRYPT, bf_enc_tv_template, BF_ENC_TEST_VECTORS); test_cipher ("blowfish", MODE_ECB, DECRYPT, bf_dec_tv_template, BF_DEC_TEST_VECTORS); @@ -750,7 +919,7 @@ do_test(void) test_cipher ("twofish", MODE_CBC, ENCRYPT, tf_cbc_enc_tv_template, TF_CBC_ENC_TEST_VECTORS); test_cipher ("twofish", MODE_CBC, DECRYPT, tf_cbc_dec_tv_template, TF_CBC_DEC_TEST_VECTORS); break; - + case 9: test_cipher ("serpent", MODE_ECB, ENCRYPT, serpent_enc_tv_template, SERPENT_ENC_TEST_VECTORS); test_cipher ("serpent", MODE_ECB, DECRYPT, serpent_dec_tv_template, SERPENT_DEC_TEST_VECTORS); @@ -758,13 +927,13 @@ do_test(void) case 10: test_cipher ("aes", MODE_ECB, ENCRYPT, aes_enc_tv_template, AES_ENC_TEST_VECTORS); - test_cipher ("aes", MODE_ECB, DECRYPT, aes_dec_tv_template, AES_DEC_TEST_VECTORS); + test_cipher ("aes", MODE_ECB, DECRYPT, aes_dec_tv_template, AES_DEC_TEST_VECTORS); break; case 11: test_hash("sha384", sha384_tv_template, SHA384_TEST_VECTORS); break; - + case 12: test_hash("sha512", sha512_tv_template, SHA512_TEST_VECTORS); break; @@ -852,21 +1021,84 @@ do_test(void) case 100: test_hmac("md5", hmac_md5_tv_template, HMAC_MD5_TEST_VECTORS); break; - + case 101: - test_hmac("sha1", hmac_sha1_tv_template, HMAC_SHA1_TEST_VECTORS); + test_hmac("sha1", hmac_sha1_tv_template, HMAC_SHA1_TEST_VECTORS); break; - + case 102: test_hmac("sha256", hmac_sha256_tv_template, HMAC_SHA256_TEST_VECTORS); break; #endif + case 200: + test_cipher_speed("aes", MODE_ECB, ENCRYPT, sec, NULL, 0, + aes_speed_template); + test_cipher_speed("aes", MODE_ECB, DECRYPT, sec, NULL, 0, + aes_speed_template); + test_cipher_speed("aes", MODE_CBC, ENCRYPT, sec, NULL, 0, + aes_speed_template); + test_cipher_speed("aes", MODE_CBC, DECRYPT, sec, NULL, 0, + aes_speed_template); + break; + + case 201: + test_cipher_speed("des3_ede", MODE_ECB, ENCRYPT, sec, + des3_ede_enc_tv_template, + DES3_EDE_ENC_TEST_VECTORS, + des3_ede_speed_template); + test_cipher_speed("des3_ede", MODE_ECB, DECRYPT, sec, + des3_ede_dec_tv_template, + DES3_EDE_DEC_TEST_VECTORS, + des3_ede_speed_template); + test_cipher_speed("des3_ede", MODE_CBC, ENCRYPT, sec, + des3_ede_enc_tv_template, + DES3_EDE_ENC_TEST_VECTORS, + des3_ede_speed_template); + test_cipher_speed("des3_ede", MODE_CBC, DECRYPT, sec, + des3_ede_dec_tv_template, + DES3_EDE_DEC_TEST_VECTORS, + des3_ede_speed_template); + break; + + case 202: + test_cipher_speed("twofish", MODE_ECB, ENCRYPT, sec, NULL, 0, + twofish_speed_template); + test_cipher_speed("twofish", MODE_ECB, DECRYPT, sec, NULL, 0, + twofish_speed_template); + test_cipher_speed("twofish", MODE_CBC, ENCRYPT, sec, NULL, 0, + twofish_speed_template); + test_cipher_speed("twofish", MODE_CBC, DECRYPT, sec, NULL, 0, + twofish_speed_template); + break; + + case 203: + test_cipher_speed("blowfish", MODE_ECB, ENCRYPT, sec, NULL, 0, + blowfish_speed_template); + test_cipher_speed("blowfish", MODE_ECB, DECRYPT, sec, NULL, 0, + blowfish_speed_template); + test_cipher_speed("blowfish", MODE_CBC, ENCRYPT, sec, NULL, 0, + blowfish_speed_template); + test_cipher_speed("blowfish", MODE_CBC, DECRYPT, sec, NULL, 0, + blowfish_speed_template); + break; + + case 204: + test_cipher_speed("des", MODE_ECB, ENCRYPT, sec, NULL, 0, + des_speed_template); + test_cipher_speed("des", MODE_ECB, DECRYPT, sec, NULL, 0, + des_speed_template); + test_cipher_speed("des", MODE_CBC, ENCRYPT, sec, NULL, 0, + des_speed_template); + test_cipher_speed("des", MODE_CBC, DECRYPT, sec, NULL, 0, + des_speed_template); + break; + case 1000: test_available(); break; - + default: /* useful for debugging */ printk("not testing anything\n"); @@ -874,8 +1106,7 @@ do_test(void) } } -static int __init -init(void) +static int __init init(void) { tvmem = kmalloc(TVMEMSIZE, GFP_KERNEL); if (tvmem == NULL) @@ -904,6 +1135,9 @@ module_init(init); module_exit(fini); module_param(mode, int, 0); +module_param(sec, uint, 0); +MODULE_PARM_DESC(sec, "Length in seconds of speed tests " + "(defaults to zero which uses CPU cycles instead)"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Quick & dirty crypto testing module"); diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h index a3097afae593..c01a0ce9b40a 100644 --- a/crypto/tcrypt.h +++ b/crypto/tcrypt.h @@ -9,10 +9,11 @@ * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) + * Software Foundation; either version 2 of the License, or (at your option) * any later version. * - * 14 - 09 - 2003 Changes by Kartikey Mahendra Bhatt + * 2004-08-09 Cipher speed tests by Reyk Floeter <reyk@vantronix.net> + * 2003-09-14 Changes by Kartikey Mahendra Bhatt * */ #ifndef _CRYPTO_TCRYPT_H @@ -29,19 +30,19 @@ struct hash_testvec { unsigned char psize; char digest[MAX_DIGEST_SIZE]; unsigned char np; - unsigned char tap[MAX_TAP]; + unsigned char tap[MAX_TAP]; char key[128]; /* only used with keyed hash algorithms */ unsigned char ksize; }; -struct hmac_testvec { +struct hmac_testvec { char key[128]; unsigned char ksize; char plaintext[128]; unsigned char psize; char digest[MAX_DIGEST_SIZE]; unsigned char np; - unsigned char tap[MAX_TAP]; + unsigned char tap[MAX_TAP]; }; struct cipher_testvec { @@ -55,7 +56,12 @@ struct cipher_testvec { char result[48]; unsigned char rlen; int np; - unsigned char tap[MAX_TAP]; + unsigned char tap[MAX_TAP]; +}; + +struct cipher_speed { + unsigned char klen; + unsigned int blen; }; /* @@ -155,7 +161,7 @@ static struct hash_testvec md5_tv_template[] = { #define SHA1_TEST_VECTORS 2 static struct hash_testvec sha1_tv_template[] = { - { + { .plaintext = "abc", .psize = 3, .digest = { 0xa9, 0x99, 0x3e, 0x36, 0x47, 0x06, 0x81, 0x6a, 0xba, 0x3e, @@ -175,8 +181,8 @@ static struct hash_testvec sha1_tv_template[] = { */ #define SHA256_TEST_VECTORS 2 -static struct hash_testvec sha256_tv_template[] = { - { +static struct hash_testvec sha256_tv_template[] = { + { .plaintext = "abc", .psize = 3, .digest = { 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, @@ -201,7 +207,7 @@ static struct hash_testvec sha256_tv_template[] = { #define SHA384_TEST_VECTORS 4 static struct hash_testvec sha384_tv_template[] = { - { + { .plaintext= "abc", .psize = 3, .digest = { 0xcb, 0x00, 0x75, 0x3f, 0x45, 0xa3, 0x5e, 0x8b, @@ -221,7 +227,7 @@ static struct hash_testvec sha384_tv_template[] = { 0x5f, 0xe9, 0x5b, 0x1f, 0xe3, 0xc8, 0x45, 0x2b}, }, { .plaintext = "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn" - "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", + "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", .psize = 112, .digest = { 0x09, 0x33, 0x0c, 0x33, 0xf7, 0x11, 0x47, 0xe8, 0x3d, 0x19, 0x2f, 0xc7, 0x82, 0xcd, 0x1b, 0x47, @@ -250,7 +256,7 @@ static struct hash_testvec sha384_tv_template[] = { #define SHA512_TEST_VECTORS 4 static struct hash_testvec sha512_tv_template[] = { - { + { .plaintext = "abc", .psize = 3, .digest = { 0xdd, 0xaf, 0x35, 0xa1, 0x93, 0x61, 0x7a, 0xba, @@ -303,14 +309,14 @@ static struct hash_testvec sha512_tv_template[] = { /* - * WHIRLPOOL test vectors from Whirlpool package + * WHIRLPOOL test vectors from Whirlpool package * by Vincent Rijmen and Paulo S. L. M. Barreto as part of the NESSIE * submission */ #define WP512_TEST_VECTORS 8 static struct hash_testvec wp512_tv_template[] = { - { + { .plaintext = "", .psize = 0, .digest = { 0x19, 0xFA, 0x61, 0xD7, 0x55, 0x22, 0xA4, 0x66, @@ -348,13 +354,13 @@ static struct hash_testvec wp512_tv_template[] = { }, { .plaintext = "message digest", .psize = 14, - .digest = { 0x37, 0x8C, 0x84, 0xA4, 0x12, 0x6E, 0x2D, 0xC6, - 0xE5, 0x6D, 0xCC, 0x74, 0x58, 0x37, 0x7A, 0xAC, - 0x83, 0x8D, 0x00, 0x03, 0x22, 0x30, 0xF5, 0x3C, - 0xE1, 0xF5, 0x70, 0x0C, 0x0F, 0xFB, 0x4D, 0x3B, - 0x84, 0x21, 0x55, 0x76, 0x59, 0xEF, 0x55, 0xC1, - 0x06, 0xB4, 0xB5, 0x2A, 0xC5, 0xA4, 0xAA, 0xA6, - 0x92, 0xED, 0x92, 0x00, 0x52, 0x83, 0x8F, 0x33, + .digest = { 0x37, 0x8C, 0x84, 0xA4, 0x12, 0x6E, 0x2D, 0xC6, + 0xE5, 0x6D, 0xCC, 0x74, 0x58, 0x37, 0x7A, 0xAC, + 0x83, 0x8D, 0x00, 0x03, 0x22, 0x30, 0xF5, 0x3C, + 0xE1, 0xF5, 0x70, 0x0C, 0x0F, 0xFB, 0x4D, 0x3B, + 0x84, 0x21, 0x55, 0x76, 0x59, 0xEF, 0x55, 0xC1, + 0x06, 0xB4, 0xB5, 0x2A, 0xC5, 0xA4, 0xAA, 0xA6, + 0x92, 0xED, 0x92, 0x00, 0x52, 0x83, 0x8F, 0x33, 0x62, 0xE8, 0x6D, 0xBD, 0x37, 0xA8, 0x90, 0x3E }, }, { .plaintext = "abcdefghijklmnopqrstuvwxyz", @@ -394,7 +400,7 @@ static struct hash_testvec wp512_tv_template[] = { }, { .plaintext = "abcdbcdecdefdefgefghfghighijhijk", .psize = 32, - .digest = { 0x2A, 0x98, 0x7E, 0xA4, 0x0F, 0x91, 0x70, 0x61, + .digest = { 0x2A, 0x98, 0x7E, 0xA4, 0x0F, 0x91, 0x70, 0x61, 0xF5, 0xD6, 0xF0, 0xA0, 0xE4, 0x64, 0x4F, 0x48, 0x8A, 0x7A, 0x5A, 0x52, 0xDE, 0xEE, 0x65, 0x62, 0x07, 0xC5, 0x62, 0xF9, 0x88, 0xE9, 0x5C, 0x69, @@ -408,7 +414,7 @@ static struct hash_testvec wp512_tv_template[] = { #define WP384_TEST_VECTORS 8 static struct hash_testvec wp384_tv_template[] = { - { + { .plaintext = "", .psize = 0, .digest = { 0x19, 0xFA, 0x61, 0xD7, 0x55, 0x22, 0xA4, 0x66, @@ -440,11 +446,11 @@ static struct hash_testvec wp384_tv_template[] = { }, { .plaintext = "message digest", .psize = 14, - .digest = { 0x37, 0x8C, 0x84, 0xA4, 0x12, 0x6E, 0x2D, 0xC6, - 0xE5, 0x6D, 0xCC, 0x74, 0x58, 0x37, 0x7A, 0xAC, - 0x83, 0x8D, 0x00, 0x03, 0x22, 0x30, 0xF5, 0x3C, - 0xE1, 0xF5, 0x70, 0x0C, 0x0F, 0xFB, 0x4D, 0x3B, - 0x84, 0x21, 0x55, 0x76, 0x59, 0xEF, 0x55, 0xC1, + .digest = { 0x37, 0x8C, 0x84, 0xA4, 0x12, 0x6E, 0x2D, 0xC6, + 0xE5, 0x6D, 0xCC, 0x74, 0x58, 0x37, 0x7A, 0xAC, + 0x83, 0x8D, 0x00, 0x03, 0x22, 0x30, 0xF5, 0x3C, + 0xE1, 0xF5, 0x70, 0x0C, 0x0F, 0xFB, 0x4D, 0x3B, + 0x84, 0x21, 0x55, 0x76, 0x59, 0xEF, 0x55, 0xC1, 0x06, 0xB4, 0xB5, 0x2A, 0xC5, 0xA4, 0xAA, 0xA6 }, }, { .plaintext = "abcdefghijklmnopqrstuvwxyz", @@ -478,7 +484,7 @@ static struct hash_testvec wp384_tv_template[] = { }, { .plaintext = "abcdbcdecdefdefgefghfghighijhijk", .psize = 32, - .digest = { 0x2A, 0x98, 0x7E, 0xA4, 0x0F, 0x91, 0x70, 0x61, + .digest = { 0x2A, 0x98, 0x7E, 0xA4, 0x0F, 0x91, 0x70, 0x61, 0xF5, 0xD6, 0xF0, 0xA0, 0xE4, 0x64, 0x4F, 0x48, 0x8A, 0x7A, 0x5A, 0x52, 0xDE, 0xEE, 0x65, 0x62, 0x07, 0xC5, 0x62, 0xF9, 0x88, 0xE9, 0x5C, 0x69, @@ -490,7 +496,7 @@ static struct hash_testvec wp384_tv_template[] = { #define WP256_TEST_VECTORS 8 static struct hash_testvec wp256_tv_template[] = { - { + { .plaintext = "", .psize = 0, .digest = { 0x19, 0xFA, 0x61, 0xD7, 0x55, 0x22, 0xA4, 0x66, @@ -516,9 +522,9 @@ static struct hash_testvec wp256_tv_template[] = { }, { .plaintext = "message digest", .psize = 14, - .digest = { 0x37, 0x8C, 0x84, 0xA4, 0x12, 0x6E, 0x2D, 0xC6, - 0xE5, 0x6D, 0xCC, 0x74, 0x58, 0x37, 0x7A, 0xAC, - 0x83, 0x8D, 0x00, 0x03, 0x22, 0x30, 0xF5, 0x3C, + .digest = { 0x37, 0x8C, 0x84, 0xA4, 0x12, 0x6E, 0x2D, 0xC6, + 0xE5, 0x6D, 0xCC, 0x74, 0x58, 0x37, 0x7A, 0xAC, + 0x83, 0x8D, 0x00, 0x03, 0x22, 0x30, 0xF5, 0x3C, 0xE1, 0xF5, 0x70, 0x0C, 0x0F, 0xFB, 0x4D, 0x3B }, }, { .plaintext = "abcdefghijklmnopqrstuvwxyz", @@ -546,7 +552,7 @@ static struct hash_testvec wp256_tv_template[] = { }, { .plaintext = "abcdbcdecdefdefgefghfghighijhijk", .psize = 32, - .digest = { 0x2A, 0x98, 0x7E, 0xA4, 0x0F, 0x91, 0x70, 0x61, + .digest = { 0x2A, 0x98, 0x7E, 0xA4, 0x0F, 0x91, 0x70, 0x61, 0xF5, 0xD6, 0xF0, 0xA0, 0xE4, 0x64, 0x4F, 0x48, 0x8A, 0x7A, 0x5A, 0x52, 0xDE, 0xEE, 0x65, 0x62, 0x07, 0xC5, 0x62, 0xF9, 0x88, 0xE9, 0x5C, 0x69 }, @@ -554,7 +560,7 @@ static struct hash_testvec wp256_tv_template[] = { }; /* - * TIGER test vectors from Tiger website + * TIGER test vectors from Tiger website */ #define TGR192_TEST_VECTORS 6 @@ -693,7 +699,7 @@ static struct hash_testvec tgr128_tv_template[] = { #define HMAC_MD5_TEST_VECTORS 7 static struct hmac_testvec hmac_md5_tv_template[] = -{ +{ { .key = { [0 ... 15] = 0x0b }, .ksize = 16, @@ -756,7 +762,7 @@ static struct hmac_testvec hmac_md5_tv_template[] = */ #define HMAC_SHA1_TEST_VECTORS 7 -static struct hmac_testvec hmac_sha1_tv_template[] = { +static struct hmac_testvec hmac_sha1_tv_template[] = { { .key = { [0 ... 19] = 0x0b }, .ksize = 20, @@ -766,11 +772,11 @@ static struct hmac_testvec hmac_sha1_tv_template[] = { 0xe2, 0x8b, 0xc0, 0xb6, 0xfb, 0x37, 0x8c, 0x8e, 0xf1, 0x46, 0xbe }, }, { - .key = { 'J', 'e', 'f', 'e' }, + .key = { 'J', 'e', 'f', 'e' }, .ksize = 4, .plaintext = "what do ya want for nothing?", .psize = 28, - .digest = { 0xef, 0xfc, 0xdf, 0x6a, 0xe5, 0xeb, 0x2f, 0xa2, 0xd2, 0x74, + .digest = { 0xef, 0xfc, 0xdf, 0x6a, 0xe5, 0xeb, 0x2f, 0xa2, 0xd2, 0x74, 0x16, 0xd5, 0xf1, 0x84, 0xdf, 0x9c, 0x25, 0x9a, 0x7c, 0x79 }, .np = 2, .tap = { 14, 14 } @@ -779,30 +785,30 @@ static struct hmac_testvec hmac_sha1_tv_template[] = { .ksize = 20, .plaintext = { [0 ... 49] = 0xdd }, .psize = 50, - .digest = { 0x12, 0x5d, 0x73, 0x42, 0xb9, 0xac, 0x11, 0xcd, 0x91, 0xa3, + .digest = { 0x12, 0x5d, 0x73, 0x42, 0xb9, 0xac, 0x11, 0xcd, 0x91, 0xa3, 0x9a, 0xf4, 0x8a, 0xa1, 0x7b, 0x4f, 0x63, 0xf1, 0x75, 0xd3 }, }, { .key = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19 }, .ksize = 25, .plaintext = { [0 ... 49] = 0xcd }, .psize = 50, - .digest = { 0x4c, 0x90, 0x07, 0xf4, 0x02, 0x62, 0x50, 0xc6, 0xbc, 0x84, + .digest = { 0x4c, 0x90, 0x07, 0xf4, 0x02, 0x62, 0x50, 0xc6, 0xbc, 0x84, 0x14, 0xf9, 0xbf, 0x50, 0xc8, 0x6c, 0x2d, 0x72, 0x35, 0xda }, }, { .key = { [0 ... 19] = 0x0c }, .ksize = 20, .plaintext = "Test With Truncation", .psize = 20, - .digest = { 0x4c, 0x1a, 0x03, 0x42, 0x4b, 0x55, 0xe0, 0x7f, 0xe7, 0xf2, + .digest = { 0x4c, 0x1a, 0x03, 0x42, 0x4b, 0x55, 0xe0, 0x7f, 0xe7, 0xf2, 0x7b, 0xe1, 0xd5, 0x8b, 0xb9, 0x32, 0x4a, 0x9a, 0x5a, 0x04 }, }, { .key = { [0 ... 79] = 0xaa }, .ksize = 80, .plaintext = "Test Using Larger Than Block-Size Key - Hash Key First", .psize = 54, - .digest = { 0xaa, 0x4a, 0xe5, 0xe1, 0x52, 0x72, 0xd0, 0x0e, 0x95, 0x70, + .digest = { 0xaa, 0x4a, 0xe5, 0xe1, 0x52, 0x72, 0xd0, 0x0e, 0x95, 0x70, 0x56, 0x37, 0xce, 0x8a, 0x3b, 0x55, 0xed, 0x40, 0x21, 0x12 }, }, { .key = { [0 ... 79] = 0xaa }, @@ -810,7 +816,7 @@ static struct hmac_testvec hmac_sha1_tv_template[] = { .plaintext = "Test Using Larger Than Block-Size Key and Larger Than One " "Block-Size Data", .psize = 73, - .digest = { 0xe8, 0xe9, 0x9d, 0x0f, 0x45, 0x23, 0x7d, 0x78, 0x6d, 0x6b, + .digest = { 0xe8, 0xe9, 0x9d, 0x0f, 0x45, 0x23, 0x7d, 0x78, 0x6d, 0x6b, 0xba, 0xa7, 0x96, 0x5c, 0x78, 0x08, 0xbb, 0xff, 0x1a, 0x91 }, }, }; @@ -1011,7 +1017,7 @@ static struct cipher_testvec des_enc_tv_template[] = { 0xf7, 0x9c, 0x89, 0x2a, 0x33, 0x8f, 0x4a, 0x8b }, .rlen = 32, .np = 3, - .tap = { 14, 10, 8 } + .tap = { 14, 10, 8 } }, { .key = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef }, .klen = 8, @@ -1024,7 +1030,7 @@ static struct cipher_testvec des_enc_tv_template[] = { 0xb4, 0x99, 0x26, 0xf7, 0x1f, 0xe1, 0xd4, 0x90 }, .rlen = 24, .np = 4, - .tap = { 2, 1, 3, 18 } + .tap = { 2, 1, 3, 18 } }, { .key = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef }, .klen = 8, @@ -1035,7 +1041,7 @@ static struct cipher_testvec des_enc_tv_template[] = { 0xf7, 0x9c, 0x89, 0x2a, 0x33, 0x8f, 0x4a, 0x8b }, .rlen = 16, .np = 5, - .tap = { 2, 2, 2, 2, 8 } + .tap = { 2, 2, 2, 2, 8 } }, { .key = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef }, .klen = 8, @@ -1044,7 +1050,7 @@ static struct cipher_testvec des_enc_tv_template[] = { .result = { 0xc9, 0x57, 0x44, 0x25, 0x6a, 0x5e, 0xd3, 0x1d }, .rlen = 8, .np = 8, - .tap = { 1, 1, 1, 1, 1, 1, 1, 1 } + .tap = { 1, 1, 1, 1, 1, 1, 1, 1 } }, }; @@ -1057,7 +1063,7 @@ static struct cipher_testvec des_dec_tv_template[] = { .result = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xe7 }, .rlen = 8, }, { /* Sbox test from NBS */ - .key = { 0x7c, 0xa1, 0x10, 0x45, 0x4a, 0x1a, 0x6e, 0x57 }, + .key = { 0x7c, 0xa1, 0x10, 0x45, 0x4a, 0x1a, 0x6e, 0x57 }, .klen = 8, .input = { 0x69, 0x0f, 0x5b, 0x0d, 0x9a, 0x26, 0x93, 0x9b }, .ilen = 8, @@ -1092,19 +1098,19 @@ static struct cipher_testvec des_cbc_enc_tv_template[] = { { /* From OpenSSL */ .key = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef}, .klen = 8, - .iv = { 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10}, - .input = { 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x20, - 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, - 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20 }, + .iv = { 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10}, + .input = { 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x20, + 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, + 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20 }, .ilen = 24, - .result = { 0xcc, 0xd1, 0x73, 0xff, 0xab, 0x20, 0x39, 0xf4, - 0xac, 0xd8, 0xae, 0xfd, 0xdf, 0xd8, 0xa1, 0xeb, - 0x46, 0x8e, 0x91, 0x15, 0x78, 0x88, 0xba, 0x68 }, + .result = { 0xcc, 0xd1, 0x73, 0xff, 0xab, 0x20, 0x39, 0xf4, + 0xac, 0xd8, 0xae, 0xfd, 0xdf, 0xd8, 0xa1, 0xeb, + 0x46, 0x8e, 0x91, 0x15, 0x78, 0x88, 0xba, 0x68 }, .rlen = 24, }, { /* FIPS Pub 81 */ .key = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef }, .klen = 8, - .iv = { 0x12, 0x34, 0x56, 0x78, 0x90, 0xab, 0xcd, 0xef }, + .iv = { 0x12, 0x34, 0x56, 0x78, 0x90, 0xab, 0xcd, 0xef }, .input = { 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74 }, .ilen = 8, .result = { 0xe5, 0xc7, 0xcd, 0xde, 0x87, 0x2b, 0xf2, 0x7c }, @@ -1117,7 +1123,7 @@ static struct cipher_testvec des_cbc_enc_tv_template[] = { .ilen = 8, .result = { 0x43, 0xe9, 0x34, 0x00, 0x8c, 0x38, 0x9c, 0x0f }, .rlen = 8, - }, { + }, { .key = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef }, .klen = 8, .iv = { 0x43, 0xe9, 0x34, 0x00, 0x8c, 0x38, 0x9c, 0x0f }, @@ -1125,18 +1131,18 @@ static struct cipher_testvec des_cbc_enc_tv_template[] = { .ilen = 8, .result = { 0x68, 0x37, 0x88, 0x49, 0x9a, 0x7c, 0x05, 0xf6 }, .rlen = 8, - }, { /* Copy of openssl vector for chunk testing */ + }, { /* Copy of openssl vector for chunk testing */ /* From OpenSSL */ .key = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef}, .klen = 8, - .iv = { 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10}, - .input = { 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x20, - 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, - 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20 }, + .iv = { 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10}, + .input = { 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x20, + 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, + 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20 }, .ilen = 24, - .result = { 0xcc, 0xd1, 0x73, 0xff, 0xab, 0x20, 0x39, 0xf4, - 0xac, 0xd8, 0xae, 0xfd, 0xdf, 0xd8, 0xa1, 0xeb, - 0x46, 0x8e, 0x91, 0x15, 0x78, 0x88, 0xba, 0x68 }, + .result = { 0xcc, 0xd1, 0x73, 0xff, 0xab, 0x20, 0x39, 0xf4, + 0xac, 0xd8, 0xae, 0xfd, 0xdf, 0xd8, 0xa1, 0xeb, + 0x46, 0x8e, 0x91, 0x15, 0x78, 0x88, 0xba, 0x68 }, .rlen = 24, .np = 2, .tap = { 13, 11 } @@ -1155,24 +1161,24 @@ static struct cipher_testvec des_cbc_dec_tv_template[] = { }, { .key = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef }, .klen = 8, - .iv = { 0xe5, 0xc7, 0xcd, 0xde, 0x87, 0x2b, 0xf2, 0x7c }, + .iv = { 0xe5, 0xc7, 0xcd, 0xde, 0x87, 0x2b, 0xf2, 0x7c }, .input = { 0x43, 0xe9, 0x34, 0x00, 0x8c, 0x38, 0x9c, 0x0f }, .ilen = 8, - .result = { 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20 }, + .result = { 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20 }, .rlen = 8, }, { .key = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef }, .klen = 8, .iv = { 0x43, 0xe9, 0x34, 0x00, 0x8c, 0x38, 0x9c, 0x0f }, - .input = { 0x68, 0x37, 0x88, 0x49, 0x9a, 0x7c, 0x05, 0xf6 }, + .input = { 0x68, 0x37, 0x88, 0x49, 0x9a, 0x7c, 0x05, 0xf6 }, .ilen = 8, .result = { 0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20 }, .rlen = 8, - }, { /* Copy of above, for chunk testing */ + }, { /* Copy of above, for chunk testing */ .key = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef }, .klen = 8, .iv = { 0x43, 0xe9, 0x34, 0x00, 0x8c, 0x38, 0x9c, 0x0f }, - .input = { 0x68, 0x37, 0x88, 0x49, 0x9a, 0x7c, 0x05, 0xf6 }, + .input = { 0x68, 0x37, 0x88, 0x49, 0x9a, 0x7c, 0x05, 0xf6 }, .ilen = 8, .result = { 0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20 }, .rlen = 8, @@ -1276,7 +1282,7 @@ static struct cipher_testvec bf_enc_tv_template[] = { .ilen = 8, .result = { 0xe8, 0x7a, 0x24, 0x4e, 0x2c, 0xc8, 0x5e, 0x82 }, .rlen = 8, - }, { /* Vary the keylength... */ + }, { /* Vary the keylength... */ .key = { 0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87, 0x78, 0x69, 0x5a, 0x4b, 0x3c, 0x2d, 0x1e, 0x0f }, .klen = 16, @@ -1297,9 +1303,9 @@ static struct cipher_testvec bf_enc_tv_template[] = { .key = { 0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87, 0x78, 0x69, 0x5a, 0x4b, 0x3c, 0x2d, 0x1e, 0x0f, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, - 0x04, 0x68, 0x91, 0x04, 0xc2, 0xfd, 0x3b, 0x2f, - 0x58, 0x40, 0x23, 0x64, 0x1a, 0xba, 0x61, 0x76, - 0x1f, 0x1f, 0x1f, 0x1f, 0x0e, 0x0e, 0x0e, 0x0e, + 0x04, 0x68, 0x91, 0x04, 0xc2, 0xfd, 0x3b, 0x2f, + 0x58, 0x40, 0x23, 0x64, 0x1a, 0xba, 0x61, 0x76, + 0x1f, 0x1f, 0x1f, 0x1f, 0x0e, 0x0e, 0x0e, 0x0e, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, .klen = 56, .input = { 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10 }, @@ -1331,7 +1337,7 @@ static struct cipher_testvec bf_dec_tv_template[] = { .ilen = 8, .result = { 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10 }, .rlen = 8, - }, { /* Vary the keylength... */ + }, { /* Vary the keylength... */ .key = { 0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87, 0x78, 0x69, 0x5a, 0x4b, 0x3c, 0x2d, 0x1e, 0x0f }, .klen = 16, @@ -1352,9 +1358,9 @@ static struct cipher_testvec bf_dec_tv_template[] = { .key = { 0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87, 0x78, 0x69, 0x5a, 0x4b, 0x3c, 0x2d, 0x1e, 0x0f, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, - 0x04, 0x68, 0x91, 0x04, 0xc2, 0xfd, 0x3b, 0x2f, - 0x58, 0x40, 0x23, 0x64, 0x1a, 0xba, 0x61, 0x76, - 0x1f, 0x1f, 0x1f, 0x1f, 0x0e, 0x0e, 0x0e, 0x0e, + 0x04, 0x68, 0x91, 0x04, 0xc2, 0xfd, 0x3b, 0x2f, + 0x58, 0x40, 0x23, 0x64, 0x1a, 0xba, 0x61, 0x76, + 0x1f, 0x1f, 0x1f, 0x1f, 0x0e, 0x0e, 0x0e, 0x0e, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, .klen = 56, .input = { 0xc0, 0x45, 0x04, 0x01, 0x2e, 0x4e, 0x1f, 0x53 }, @@ -1369,7 +1375,7 @@ static struct cipher_testvec bf_cbc_enc_tv_template[] = { .key = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87 }, .klen = 16, - .iv = { 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10 }, + .iv = { 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10 }, .input = { 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x20, 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20, @@ -1388,7 +1394,7 @@ static struct cipher_testvec bf_cbc_dec_tv_template[] = { .key = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87 }, .klen = 16, - .iv = { 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10 }, + .iv = { 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10 }, .input = { 0x6b, 0x77, 0xb4, 0xd6, 0x30, 0x06, 0xde, 0xe6, 0x05, 0xb1, 0x56, 0xe2, 0x74, 0x03, 0x97, 0x93, 0x58, 0xde, 0xb9, 0xe7, 0x15, 0x46, 0x16, 0xd9, @@ -1490,7 +1496,7 @@ static struct cipher_testvec tf_cbc_enc_tv_template[] = { .key = { [0 ... 15] = 0x00 }, .klen = 16, .iv = { 0x9f, 0x58, 0x9f, 0x5c, 0xf6, 0x12, 0x2c, 0x32, - 0xb6, 0xbf, 0xec, 0x2f, 0x2a, 0xe8, 0xc3, 0x5a }, + 0xb6, 0xbf, 0xec, 0x2f, 0x2a, 0xe8, 0xc3, 0x5a }, .input = { [0 ... 15] = 0x00 }, .ilen = 16, .result = { 0xd4, 0x91, 0xdb, 0x16, 0xe7, 0xb1, 0xc3, 0x9e, @@ -1528,7 +1534,7 @@ static struct cipher_testvec tf_cbc_dec_tv_template[] = { .klen = 16, .iv = { [0 ... 15] = 0x00 }, .input = { 0x9f, 0x58, 0x9f, 0x5c, 0xf6, 0x12, 0x2c, 0x32, - 0xb6, 0xbf, 0xec, 0x2f, 0x2a, 0xe8, 0xc3, 0x5a }, + 0xb6, 0xbf, 0xec, 0x2f, 0x2a, 0xe8, 0xc3, 0x5a }, .ilen = 16, .result = { [0 ... 15] = 0x00 }, .rlen = 16, @@ -1578,8 +1584,7 @@ static struct cipher_testvec tf_cbc_dec_tv_template[] = { #define TNEPRES_ENC_TEST_VECTORS 4 #define TNEPRES_DEC_TEST_VECTORS 4 -static struct cipher_testvec serpent_enc_tv_template[] = -{ +static struct cipher_testvec serpent_enc_tv_template[] = { { .input = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, @@ -1620,8 +1625,7 @@ static struct cipher_testvec serpent_enc_tv_template[] = }, }; -static struct cipher_testvec tnepres_enc_tv_template[] = -{ +static struct cipher_testvec tnepres_enc_tv_template[] = { { /* KeySize=128, PT=0, I=1 */ .input = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, @@ -1629,7 +1633,7 @@ static struct cipher_testvec tnepres_enc_tv_template[] = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, .klen = 16, .ilen = 16, - .result = { 0x49, 0xaf, 0xbf, 0xad, 0x9d, 0x5a, 0x34, 0x05, + .result = { 0x49, 0xaf, 0xbf, 0xad, 0x9d, 0x5a, 0x34, 0x05, 0x2c, 0xd8, 0xff, 0xa5, 0x98, 0x6b, 0xd2, 0xdd }, .rlen = 16, }, { /* KeySize=192, PT=0, I=1 */ @@ -1640,7 +1644,7 @@ static struct cipher_testvec tnepres_enc_tv_template[] = .input = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, .ilen = 16, - .result = { 0xe7, 0x8e, 0x54, 0x02, 0xc7, 0x19, 0x55, 0x68, + .result = { 0xe7, 0x8e, 0x54, 0x02, 0xc7, 0x19, 0x55, 0x68, 0xac, 0x36, 0x78, 0xf7, 0xa3, 0xf6, 0x0c, 0x66 }, .rlen = 16, }, { /* KeySize=256, PT=0, I=1 */ @@ -1652,7 +1656,7 @@ static struct cipher_testvec tnepres_enc_tv_template[] = .input = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, .ilen = 16, - .result = { 0xab, 0xed, 0x96, 0xe7, 0x66, 0xbf, 0x28, 0xcb, + .result = { 0xab, 0xed, 0x96, 0xe7, 0x66, 0xbf, 0x28, 0xcb, 0xc0, 0xeb, 0xd2, 0x1a, 0x82, 0xef, 0x08, 0x19 }, .rlen = 16, }, { /* KeySize=256, I=257 */ @@ -1664,15 +1668,14 @@ static struct cipher_testvec tnepres_enc_tv_template[] = .input = { 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 }, .ilen = 16, - .result = { 0x5c, 0xe7, 0x1c, 0x70, 0xd2, 0x88, 0x2e, 0x5b, + .result = { 0x5c, 0xe7, 0x1c, 0x70, 0xd2, 0x88, 0x2e, 0x5b, 0xb8, 0x32, 0xe4, 0x33, 0xf8, 0x9f, 0x26, 0xde }, .rlen = 16, }, }; -static struct cipher_testvec serpent_dec_tv_template[] = -{ +static struct cipher_testvec serpent_dec_tv_template[] = { { .input = { 0x12, 0x07, 0xfc, 0xce, 0x9b, 0xd0, 0xd6, 0x47, 0x6a, 0xe9, 0x8f, 0xbe, 0xd1, 0x43, 0xa0, 0xe2 }, @@ -1713,8 +1716,7 @@ static struct cipher_testvec serpent_dec_tv_template[] = }, }; -static struct cipher_testvec tnepres_dec_tv_template[] = -{ +static struct cipher_testvec tnepres_dec_tv_template[] = { { .input = { 0x41, 0xcc, 0x6b, 0x31, 0x59, 0x31, 0x45, 0x97, 0x6d, 0x6f, 0xbb, 0x38, 0x4b, 0x37, 0x21, 0x28 }, @@ -1726,7 +1728,7 @@ static struct cipher_testvec tnepres_dec_tv_template[] = .key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, .klen = 16, - .input = { 0xea, 0xf4, 0xd7, 0xfc, 0xd8, 0x01, 0x34, 0x47, + .input = { 0xea, 0xf4, 0xd7, 0xfc, 0xd8, 0x01, 0x34, 0x47, 0x81, 0x45, 0x0b, 0xfa, 0x0c, 0xd6, 0xad, 0x6e }, .ilen = 16, .result = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, @@ -1738,7 +1740,7 @@ static struct cipher_testvec tnepres_dec_tv_template[] = 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }, .klen = 32, - .input = { 0x64, 0xa9, 0x1a, 0x37, 0xed, 0x9f, 0xe7, 0x49, + .input = { 0x64, 0xa9, 0x1a, 0x37, 0xed, 0x9f, 0xe7, 0x49, 0xa8, 0x4e, 0x76, 0xd6, 0xf5, 0x0d, 0x78, 0xee }, .ilen = 16, .result = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, @@ -1747,7 +1749,7 @@ static struct cipher_testvec tnepres_dec_tv_template[] = }, { /* KeySize=128, I=121 */ .key = { [15] = 0x80 }, .klen = 16, - .input = { 0x3d, 0xda, 0xbf, 0xc0, 0x06, 0xda, 0xab, 0x06, + .input = { 0x3d, 0xda, 0xbf, 0xc0, 0x06, 0xda, 0xab, 0x06, 0x46, 0x2a, 0xf4, 0xef, 0x81, 0x54, 0x4e, 0x26 }, .ilen = 16, .result = { [0 ... 15] = 0x00 }, @@ -1760,58 +1762,56 @@ static struct cipher_testvec tnepres_dec_tv_template[] = #define CAST6_ENC_TEST_VECTORS 3 #define CAST6_DEC_TEST_VECTORS 3 -static struct cipher_testvec cast6_enc_tv_template[] = -{ +static struct cipher_testvec cast6_enc_tv_template[] = { { - .key = { 0x23, 0x42, 0xbb, 0x9e, 0xfa, 0x38, 0x54, 0x2c, + .key = { 0x23, 0x42, 0xbb, 0x9e, 0xfa, 0x38, 0x54, 0x2c, 0x0a, 0xf7, 0x56, 0x47, 0xf2, 0x9f, 0x61, 0x5d }, .klen = 16, .input = { [0 ... 15] = 0x00 }, .ilen = 16, - .result = { 0xc8, 0x42, 0xa0, 0x89, 0x72, 0xb4, 0x3d, 0x20, + .result = { 0xc8, 0x42, 0xa0, 0x89, 0x72, 0xb4, 0x3d, 0x20, 0x83, 0x6c, 0x91, 0xd1, 0xb7, 0x53, 0x0f, 0x6b }, .rlen = 16, }, { - .key = { 0x23, 0x42, 0xbb, 0x9e, 0xfa, 0x38, 0x54, 0x2c, - 0xbe, 0xd0, 0xac, 0x83, 0x94, 0x0a, 0xc2, 0x98, + .key = { 0x23, 0x42, 0xbb, 0x9e, 0xfa, 0x38, 0x54, 0x2c, + 0xbe, 0xd0, 0xac, 0x83, 0x94, 0x0a, 0xc2, 0x98, 0xba, 0xc7, 0x7a, 0x77, 0x17, 0x94, 0x28, 0x63 }, .klen = 24, .input = { [0 ... 15] = 0x00 }, .ilen = 16, - .result = { 0x1b, 0x38, 0x6c, 0x02, 0x10, 0xdc, 0xad, 0xcb, + .result = { 0x1b, 0x38, 0x6c, 0x02, 0x10, 0xdc, 0xad, 0xcb, 0xdd, 0x0e, 0x41, 0xaa, 0x08, 0xa7, 0xa7, 0xe8 }, .rlen = 16, }, { .key = { 0x23, 0x42, 0xbb, 0x9e, 0xfa, 0x38, 0x54, 0x2c, 0xbe, 0xd0, 0xac, 0x83, 0x94, 0x0a, 0xc2, 0x98, 0x8d, 0x7c, 0x47, 0xce, 0x26, 0x49, 0x08, 0x46, - 0x1c, 0xc1, 0xb5, 0x13, 0x7a, 0xe6, 0xb6, 0x04 }, + 0x1c, 0xc1, 0xb5, 0x13, 0x7a, 0xe6, 0xb6, 0x04 }, .klen = 32, .input = { [0 ... 15] = 0x00 }, .ilen = 16, - .result = { 0x4f, 0x6a, 0x20, 0x38, 0x28, 0x68, 0x97, 0xb9, + .result = { 0x4f, 0x6a, 0x20, 0x38, 0x28, 0x68, 0x97, 0xb9, 0xc9, 0x87, 0x01, 0x36, 0x55, 0x33, 0x17, 0xfa }, .rlen = 16, }, }; -static struct cipher_testvec cast6_dec_tv_template[] = -{ +static struct cipher_testvec cast6_dec_tv_template[] = { { - .key = { 0x23, 0x42, 0xbb, 0x9e, 0xfa, 0x38, 0x54, 0x2c, + .key = { 0x23, 0x42, 0xbb, 0x9e, 0xfa, 0x38, 0x54, 0x2c, 0x0a, 0xf7, 0x56, 0x47, 0xf2, 0x9f, 0x61, 0x5d }, .klen = 16, - .input = { 0xc8, 0x42, 0xa0, 0x89, 0x72, 0xb4, 0x3d, 0x20, + .input = { 0xc8, 0x42, 0xa0, 0x89, 0x72, 0xb4, 0x3d, 0x20, 0x83, 0x6c, 0x91, 0xd1, 0xb7, 0x53, 0x0f, 0x6b }, .ilen = 16, .result = { [0 ... 15] = 0x00 }, .rlen = 16, }, { - .key = { 0x23, 0x42, 0xbb, 0x9e, 0xfa, 0x38, 0x54, 0x2c, - 0xbe, 0xd0, 0xac, 0x83, 0x94, 0x0a, 0xc2, 0x98, + .key = { 0x23, 0x42, 0xbb, 0x9e, 0xfa, 0x38, 0x54, 0x2c, + 0xbe, 0xd0, 0xac, 0x83, 0x94, 0x0a, 0xc2, 0x98, 0xba, 0xc7, 0x7a, 0x77, 0x17, 0x94, 0x28, 0x63 }, .klen = 24, - .input = { 0x1b, 0x38, 0x6c, 0x02, 0x10, 0xdc, 0xad, 0xcb, + .input = { 0x1b, 0x38, 0x6c, 0x02, 0x10, 0xdc, 0xad, 0xcb, 0xdd, 0x0e, 0x41, 0xaa, 0x08, 0xa7, 0xa7, 0xe8 }, .ilen = 16, .result = { [0 ... 15] = 0x00 }, @@ -1820,9 +1820,9 @@ static struct cipher_testvec cast6_dec_tv_template[] = .key = { 0x23, 0x42, 0xbb, 0x9e, 0xfa, 0x38, 0x54, 0x2c, 0xbe, 0xd0, 0xac, 0x83, 0x94, 0x0a, 0xc2, 0x98, 0x8d, 0x7c, 0x47, 0xce, 0x26, 0x49, 0x08, 0x46, - 0x1c, 0xc1, 0xb5, 0x13, 0x7a, 0xe6, 0xb6, 0x04 }, + 0x1c, 0xc1, 0xb5, 0x13, 0x7a, 0xe6, 0xb6, 0x04 }, .klen = 32, - .input = { 0x4f, 0x6a, 0x20, 0x38, 0x28, 0x68, 0x97, 0xb9, + .input = { 0x4f, 0x6a, 0x20, 0x38, 0x28, 0x68, 0x97, 0xb9, 0xc9, 0x87, 0x01, 0x36, 0x55, 0x33, 0x17, 0xfa }, .ilen = 16, .result = { [0 ... 15] = 0x00 }, @@ -1837,9 +1837,9 @@ static struct cipher_testvec cast6_dec_tv_template[] = #define AES_ENC_TEST_VECTORS 3 #define AES_DEC_TEST_VECTORS 3 -static struct cipher_testvec aes_enc_tv_template[] = { +static struct cipher_testvec aes_enc_tv_template[] = { { /* From FIPS-197 */ - .key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + .key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, .klen = 16, .input = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, @@ -1853,7 +1853,7 @@ static struct cipher_testvec aes_enc_tv_template[] = { 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 }, .klen = 24, - .input = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + .input = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff }, .ilen = 16, .result = { 0xdd, 0xa9, 0x7c, 0xa4, 0x86, 0x4c, 0xdf, 0xe0, @@ -1865,7 +1865,7 @@ static struct cipher_testvec aes_enc_tv_template[] = { 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f }, .klen = 32, - .input = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + .input = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff }, .ilen = 16, .result = { 0x8e, 0xa2, 0xb7, 0xca, 0x51, 0x67, 0x45, 0xbf, @@ -1874,9 +1874,9 @@ static struct cipher_testvec aes_enc_tv_template[] = { }, }; -static struct cipher_testvec aes_dec_tv_template[] = { +static struct cipher_testvec aes_dec_tv_template[] = { { /* From FIPS-197 */ - .key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + .key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, .klen = 16, .input = { 0x69, 0xc4, 0xe0, 0xd8, 0x6a, 0x7b, 0x04, 0x30, @@ -1893,8 +1893,8 @@ static struct cipher_testvec aes_dec_tv_template[] = { .input = { 0xdd, 0xa9, 0x7c, 0xa4, 0x86, 0x4c, 0xdf, 0xe0, 0x6e, 0xaf, 0x70, 0xa0, 0xec, 0x0d, 0x71, 0x91 }, .ilen = 16, - .result = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, - 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff }, + .result = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff }, .rlen = 16, }, { .key = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, @@ -1905,7 +1905,7 @@ static struct cipher_testvec aes_dec_tv_template[] = { .input = { 0x8e, 0xa2, 0xb7, 0xca, 0x51, 0x67, 0x45, 0xbf, 0xea, 0xfc, 0x49, 0x90, 0x4b, 0x49, 0x60, 0x89 }, .ilen = 16, - .result = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + .result = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff }, .rlen = 16, }, @@ -1915,8 +1915,7 @@ static struct cipher_testvec aes_dec_tv_template[] = { #define CAST5_ENC_TEST_VECTORS 3 #define CAST5_DEC_TEST_VECTORS 3 -static struct cipher_testvec cast5_enc_tv_template[] = -{ +static struct cipher_testvec cast5_enc_tv_template[] = { { .key = { 0x01, 0x23, 0x45, 0x67, 0x12, 0x34, 0x56, 0x78, 0x23, 0x45, 0x67, 0x89, 0x34, 0x56, 0x78, 0x9a }, @@ -1943,8 +1942,7 @@ static struct cipher_testvec cast5_enc_tv_template[] = }, }; -static struct cipher_testvec cast5_dec_tv_template[] = -{ +static struct cipher_testvec cast5_dec_tv_template[] = { { .key = { 0x01, 0x23, 0x45, 0x67, 0x12, 0x34, 0x56, 0x78, 0x23, 0x45, 0x67, 0x89, 0x34, 0x56, 0x78, 0x9a }, @@ -1971,14 +1969,13 @@ static struct cipher_testvec cast5_dec_tv_template[] = }, }; -/* - * ARC4 test vectors from OpenSSL +/* + * ARC4 test vectors from OpenSSL */ #define ARC4_ENC_TEST_VECTORS 7 #define ARC4_DEC_TEST_VECTORS 7 -static struct cipher_testvec arc4_enc_tv_template[] = -{ +static struct cipher_testvec arc4_enc_tv_template[] = { { .key = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef }, .klen = 8, @@ -2044,8 +2041,7 @@ static struct cipher_testvec arc4_enc_tv_template[] = }, }; -static struct cipher_testvec arc4_dec_tv_template[] = -{ +static struct cipher_testvec arc4_dec_tv_template[] = { { .key = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef }, .klen = 8, @@ -2111,14 +2107,13 @@ static struct cipher_testvec arc4_dec_tv_template[] = }, }; -/* +/* * TEA test vectors */ #define TEA_ENC_TEST_VECTORS 4 #define TEA_DEC_TEST_VECTORS 4 -static struct cipher_testvec tea_enc_tv_template[] = -{ +static struct cipher_testvec tea_enc_tv_template[] = { { .key = { [0 ... 15] = 0x00 }, .klen = 16, @@ -2138,31 +2133,30 @@ static struct cipher_testvec tea_enc_tv_template[] = .key = { 0x09, 0x65, 0x43, 0x11, 0x66, 0x44, 0x39, 0x25, 0x51, 0x3a, 0x16, 0x10, 0x0a, 0x08, 0x12, 0x6e }, .klen = 16, - .input = { 0x6c, 0x6f, 0x6e, 0x67, 0x65, 0x72, 0x5f, 0x74, + .input = { 0x6c, 0x6f, 0x6e, 0x67, 0x65, 0x72, 0x5f, 0x74, 0x65, 0x73, 0x74, 0x5f, 0x76, 0x65, 0x63, 0x74 }, .ilen = 16, - .result = { 0xbe, 0x7a, 0xbb, 0x81, 0x95, 0x2d, 0x1f, 0x1e, + .result = { 0xbe, 0x7a, 0xbb, 0x81, 0x95, 0x2d, 0x1f, 0x1e, 0xdd, 0x89, 0xa1, 0x25, 0x04, 0x21, 0xdf, 0x95 }, .rlen = 16, }, { .key = { 0x4d, 0x76, 0x32, 0x17, 0x05, 0x3f, 0x75, 0x2c, 0x5d, 0x04, 0x16, 0x36, 0x15, 0x72, 0x63, 0x2f }, .klen = 16, - .input = { 0x54, 0x65, 0x61, 0x20, 0x69, 0x73, 0x20, 0x67, - 0x6f, 0x6f, 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, - 0x79, 0x6f, 0x75, 0x21, 0x21, 0x21, 0x20, 0x72, + .input = { 0x54, 0x65, 0x61, 0x20, 0x69, 0x73, 0x20, 0x67, + 0x6f, 0x6f, 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, + 0x79, 0x6f, 0x75, 0x21, 0x21, 0x21, 0x20, 0x72, 0x65, 0x61, 0x6c, 0x6c, 0x79, 0x21, 0x21, 0x21 }, .ilen = 32, - .result = { 0xe0, 0x4d, 0x5d, 0x3c, 0xb7, 0x8c, 0x36, 0x47, - 0x94, 0x18, 0x95, 0x91, 0xa9, 0xfc, 0x49, 0xf8, - 0x44, 0xd1, 0x2d, 0xc2, 0x99, 0xb8, 0x08, 0x2a, + .result = { 0xe0, 0x4d, 0x5d, 0x3c, 0xb7, 0x8c, 0x36, 0x47, + 0x94, 0x18, 0x95, 0x91, 0xa9, 0xfc, 0x49, 0xf8, + 0x44, 0xd1, 0x2d, 0xc2, 0x99, 0xb8, 0x08, 0x2a, 0x07, 0x89, 0x73, 0xc2, 0x45, 0x92, 0xc6, 0x90 }, .rlen = 32, } }; -static struct cipher_testvec tea_dec_tv_template[] = -{ +static struct cipher_testvec tea_dec_tv_template[] = { { .key = { [0 ... 15] = 0x00 }, .klen = 16, @@ -2183,9 +2177,9 @@ static struct cipher_testvec tea_dec_tv_template[] = 0x51, 0x3a, 0x16, 0x10, 0x0a, 0x08, 0x12, 0x6e }, .klen = 16, .input = { 0xbe, 0x7a, 0xbb, 0x81, 0x95, 0x2d, 0x1f, 0x1e, - 0xdd, 0x89, 0xa1, 0x25, 0x04, 0x21, 0xdf, 0x95 }, - .ilen = 16, - .result = { 0x6c, 0x6f, 0x6e, 0x67, 0x65, 0x72, 0x5f, 0x74, + 0xdd, 0x89, 0xa1, 0x25, 0x04, 0x21, 0xdf, 0x95 }, + .ilen = 16, + .result = { 0x6c, 0x6f, 0x6e, 0x67, 0x65, 0x72, 0x5f, 0x74, 0x65, 0x73, 0x74, 0x5f, 0x76, 0x65, 0x63, 0x74 }, .rlen = 16, }, { @@ -2193,26 +2187,25 @@ static struct cipher_testvec tea_dec_tv_template[] = 0x5d, 0x04, 0x16, 0x36, 0x15, 0x72, 0x63, 0x2f }, .klen = 16, .input = { 0xe0, 0x4d, 0x5d, 0x3c, 0xb7, 0x8c, 0x36, 0x47, - 0x94, 0x18, 0x95, 0x91, 0xa9, 0xfc, 0x49, 0xf8, - 0x44, 0xd1, 0x2d, 0xc2, 0x99, 0xb8, 0x08, 0x2a, - 0x07, 0x89, 0x73, 0xc2, 0x45, 0x92, 0xc6, 0x90 }, + 0x94, 0x18, 0x95, 0x91, 0xa9, 0xfc, 0x49, 0xf8, + 0x44, 0xd1, 0x2d, 0xc2, 0x99, 0xb8, 0x08, 0x2a, + 0x07, 0x89, 0x73, 0xc2, 0x45, 0x92, 0xc6, 0x90 }, .ilen = 32, - .result = { 0x54, 0x65, 0x61, 0x20, 0x69, 0x73, 0x20, 0x67, - 0x6f, 0x6f, 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, - 0x79, 0x6f, 0x75, 0x21, 0x21, 0x21, 0x20, 0x72, + .result = { 0x54, 0x65, 0x61, 0x20, 0x69, 0x73, 0x20, 0x67, + 0x6f, 0x6f, 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, + 0x79, 0x6f, 0x75, 0x21, 0x21, 0x21, 0x20, 0x72, 0x65, 0x61, 0x6c, 0x6c, 0x79, 0x21, 0x21, 0x21 }, .rlen = 32, } }; -/* - * XTEA test vectors +/* + * XTEA test vectors */ #define XTEA_ENC_TEST_VECTORS 4 #define XTEA_DEC_TEST_VECTORS 4 -static struct cipher_testvec xtea_enc_tv_template[] = -{ +static struct cipher_testvec xtea_enc_tv_template[] = { { .key = { [0 ... 15] = 0x00 }, .klen = 16, @@ -2232,31 +2225,30 @@ static struct cipher_testvec xtea_enc_tv_template[] = .key = { 0x09, 0x65, 0x43, 0x11, 0x66, 0x44, 0x39, 0x25, 0x51, 0x3a, 0x16, 0x10, 0x0a, 0x08, 0x12, 0x6e }, .klen = 16, - .input = { 0x6c, 0x6f, 0x6e, 0x67, 0x65, 0x72, 0x5f, 0x74, + .input = { 0x6c, 0x6f, 0x6e, 0x67, 0x65, 0x72, 0x5f, 0x74, 0x65, 0x73, 0x74, 0x5f, 0x76, 0x65, 0x63, 0x74 }, .ilen = 16, - .result = { 0xe2, 0x04, 0xdb, 0xf2, 0x89, 0x85, 0x9e, 0xea, + .result = { 0xe2, 0x04, 0xdb, 0xf2, 0x89, 0x85, 0x9e, 0xea, 0x61, 0x35, 0xaa, 0xed, 0xb5, 0xcb, 0x71, 0x2c }, .rlen = 16, }, { .key = { 0x4d, 0x76, 0x32, 0x17, 0x05, 0x3f, 0x75, 0x2c, 0x5d, 0x04, 0x16, 0x36, 0x15, 0x72, 0x63, 0x2f }, .klen = 16, - .input = { 0x54, 0x65, 0x61, 0x20, 0x69, 0x73, 0x20, 0x67, - 0x6f, 0x6f, 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, - 0x79, 0x6f, 0x75, 0x21, 0x21, 0x21, 0x20, 0x72, + .input = { 0x54, 0x65, 0x61, 0x20, 0x69, 0x73, 0x20, 0x67, + 0x6f, 0x6f, 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, + 0x79, 0x6f, 0x75, 0x21, 0x21, 0x21, 0x20, 0x72, 0x65, 0x61, 0x6c, 0x6c, 0x79, 0x21, 0x21, 0x21 }, .ilen = 32, - .result = { 0x0b, 0x03, 0xcd, 0x8a, 0xbe, 0x95, 0xfd, 0xb1, - 0xc1, 0x44, 0x91, 0x0b, 0xa5, 0xc9, 0x1b, 0xb4, - 0xa9, 0xda, 0x1e, 0x9e, 0xb1, 0x3e, 0x2a, 0x8f, + .result = { 0x0b, 0x03, 0xcd, 0x8a, 0xbe, 0x95, 0xfd, 0xb1, + 0xc1, 0x44, 0x91, 0x0b, 0xa5, 0xc9, 0x1b, 0xb4, + 0xa9, 0xda, 0x1e, 0x9e, 0xb1, 0x3e, 0x2a, 0x8f, 0xea, 0xa5, 0x6a, 0x85, 0xd1, 0xf4, 0xa8, 0xa5 }, .rlen = 32, } }; -static struct cipher_testvec xtea_dec_tv_template[] = -{ +static struct cipher_testvec xtea_dec_tv_template[] = { { .key = { [0 ... 15] = 0x00 }, .klen = 16, @@ -2276,24 +2268,24 @@ static struct cipher_testvec xtea_dec_tv_template[] = .key = { 0x09, 0x65, 0x43, 0x11, 0x66, 0x44, 0x39, 0x25, 0x51, 0x3a, 0x16, 0x10, 0x0a, 0x08, 0x12, 0x6e }, .klen = 16, - .input = { 0xe2, 0x04, 0xdb, 0xf2, 0x89, 0x85, 0x9e, 0xea, + .input = { 0xe2, 0x04, 0xdb, 0xf2, 0x89, 0x85, 0x9e, 0xea, 0x61, 0x35, 0xaa, 0xed, 0xb5, 0xcb, 0x71, 0x2c }, .ilen = 16, - .result = { 0x6c, 0x6f, 0x6e, 0x67, 0x65, 0x72, 0x5f, 0x74, + .result = { 0x6c, 0x6f, 0x6e, 0x67, 0x65, 0x72, 0x5f, 0x74, 0x65, 0x73, 0x74, 0x5f, 0x76, 0x65, 0x63, 0x74 }, .rlen = 16, }, { .key = { 0x4d, 0x76, 0x32, 0x17, 0x05, 0x3f, 0x75, 0x2c, 0x5d, 0x04, 0x16, 0x36, 0x15, 0x72, 0x63, 0x2f }, .klen = 16, - .input = { 0x0b, 0x03, 0xcd, 0x8a, 0xbe, 0x95, 0xfd, 0xb1, - 0xc1, 0x44, 0x91, 0x0b, 0xa5, 0xc9, 0x1b, 0xb4, - 0xa9, 0xda, 0x1e, 0x9e, 0xb1, 0x3e, 0x2a, 0x8f, + .input = { 0x0b, 0x03, 0xcd, 0x8a, 0xbe, 0x95, 0xfd, 0xb1, + 0xc1, 0x44, 0x91, 0x0b, 0xa5, 0xc9, 0x1b, 0xb4, + 0xa9, 0xda, 0x1e, 0x9e, 0xb1, 0x3e, 0x2a, 0x8f, 0xea, 0xa5, 0x6a, 0x85, 0xd1, 0xf4, 0xa8, 0xa5 }, .ilen = 32, - .result = { 0x54, 0x65, 0x61, 0x20, 0x69, 0x73, 0x20, 0x67, - 0x6f, 0x6f, 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, - 0x79, 0x6f, 0x75, 0x21, 0x21, 0x21, 0x20, 0x72, + .result = { 0x54, 0x65, 0x61, 0x20, 0x69, 0x73, 0x20, 0x67, + 0x6f, 0x6f, 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, + 0x79, 0x6f, 0x75, 0x21, 0x21, 0x21, 0x20, 0x72, 0x65, 0x61, 0x6c, 0x6c, 0x79, 0x21, 0x21, 0x21 }, .rlen = 32, } @@ -2305,9 +2297,9 @@ static struct cipher_testvec xtea_dec_tv_template[] = #define KHAZAD_ENC_TEST_VECTORS 5 #define KHAZAD_DEC_TEST_VECTORS 5 -static struct cipher_testvec khazad_enc_tv_template[] = { - { - .key = { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +static struct cipher_testvec khazad_enc_tv_template[] = { + { + .key = { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, .klen = 16, .input = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, @@ -2351,9 +2343,9 @@ static struct cipher_testvec khazad_enc_tv_template[] = { }, }; -static struct cipher_testvec khazad_dec_tv_template[] = { +static struct cipher_testvec khazad_dec_tv_template[] = { { - .key = { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + .key = { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, .klen = 16, .input = { 0X49, 0Xa4, 0Xce, 0X32, 0Xac, 0X19, 0X0e, 0X3f }, @@ -2697,8 +2689,7 @@ static struct comp_testvec deflate_decomp_tv_template[] = { */ #define MICHAEL_MIC_TEST_VECTORS 6 -static struct hash_testvec michael_mic_tv_template[] = -{ +static struct hash_testvec michael_mic_tv_template[] = { { .key = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, .ksize = 8, @@ -2743,4 +2734,88 @@ static struct hash_testvec michael_mic_tv_template[] = } }; +/* + * Cipher speed tests + */ +static struct cipher_speed aes_speed_template[] = { + { .klen = 16, .blen = 16, }, + { .klen = 16, .blen = 64, }, + { .klen = 16, .blen = 256, }, + { .klen = 16, .blen = 1024, }, + { .klen = 16, .blen = 8192, }, + { .klen = 24, .blen = 16, }, + { .klen = 24, .blen = 64, }, + { .klen = 24, .blen = 256, }, + { .klen = 24, .blen = 1024, }, + { .klen = 24, .blen = 8192, }, + { .klen = 32, .blen = 16, }, + { .klen = 32, .blen = 64, }, + { .klen = 32, .blen = 256, }, + { .klen = 32, .blen = 1024, }, + { .klen = 32, .blen = 8192, }, + + /* End marker */ + { .klen = 0, .blen = 0, } +}; + +static struct cipher_speed des3_ede_speed_template[] = { + { .klen = 24, .blen = 16, }, + { .klen = 24, .blen = 64, }, + { .klen = 24, .blen = 256, }, + { .klen = 24, .blen = 1024, }, + { .klen = 24, .blen = 8192, }, + + /* End marker */ + { .klen = 0, .blen = 0, } +}; + +static struct cipher_speed twofish_speed_template[] = { + { .klen = 16, .blen = 16, }, + { .klen = 16, .blen = 64, }, + { .klen = 16, .blen = 256, }, + { .klen = 16, .blen = 1024, }, + { .klen = 16, .blen = 8192, }, + { .klen = 24, .blen = 16, }, + { .klen = 24, .blen = 64, }, + { .klen = 24, .blen = 256, }, + { .klen = 24, .blen = 1024, }, + { .klen = 24, .blen = 8192, }, + { .klen = 32, .blen = 16, }, + { .klen = 32, .blen = 64, }, + { .klen = 32, .blen = 256, }, + { .klen = 32, .blen = 1024, }, + { .klen = 32, .blen = 8192, }, + + /* End marker */ + { .klen = 0, .blen = 0, } +}; + +static struct cipher_speed blowfish_speed_template[] = { + /* Don't support blowfish keys > 256 bit in this test */ + { .klen = 8, .blen = 16, }, + { .klen = 8, .blen = 64, }, + { .klen = 8, .blen = 256, }, + { .klen = 8, .blen = 1024, }, + { .klen = 8, .blen = 8192, }, + { .klen = 32, .blen = 16, }, + { .klen = 32, .blen = 64, }, + { .klen = 32, .blen = 256, }, + { .klen = 32, .blen = 1024, }, + { .klen = 32, .blen = 8192, }, + + /* End marker */ + { .klen = 0, .blen = 0, } +}; + +static struct cipher_speed des_speed_template[] = { + { .klen = 8, .blen = 16, }, + { .klen = 8, .blen = 64, }, + { .klen = 8, .blen = 256, }, + { .klen = 8, .blen = 1024, }, + { .klen = 8, .blen = 8192, }, + + /* End marker */ + { .klen = 0, .blen = 0, } +}; + #endif /* _CRYPTO_TCRYPT_H */ diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 43722af90bdd..c3fac7fd555e 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -270,10 +270,9 @@ int bus_add_device(struct device * dev) if (bus) { pr_debug("bus %s: add device %s\n", bus->name, dev->bus_id); - error = device_attach(dev); + device_attach(dev); klist_add_tail(&bus->klist_devices, &dev->knode_bus); - if (error >= 0) - error = device_add_attrs(bus, dev); + error = device_add_attrs(bus, dev); if (!error) { sysfs_create_link(&bus->devices.kobj, &dev->kobj, dev->bus_id); sysfs_create_link(&dev->kobj, &dev->bus->subsys.kset.kobj, "bus"); diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c index 638db06de2be..3410b4d294b9 100644 --- a/drivers/block/as-iosched.c +++ b/drivers/block/as-iosched.c @@ -1871,20 +1871,22 @@ static int as_init_queue(request_queue_t *q, elevator_t *e) if (!arq_pool) return -ENOMEM; - ad = kmalloc(sizeof(*ad), GFP_KERNEL); + ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node); if (!ad) return -ENOMEM; memset(ad, 0, sizeof(*ad)); ad->q = q; /* Identify what queue the data belongs to */ - ad->hash = kmalloc(sizeof(struct list_head)*AS_HASH_ENTRIES,GFP_KERNEL); + ad->hash = kmalloc_node(sizeof(struct list_head)*AS_HASH_ENTRIES, + GFP_KERNEL, q->node); if (!ad->hash) { kfree(ad); return -ENOMEM; } - ad->arq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, arq_pool); + ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, + mempool_free_slab, arq_pool, q->node); if (!ad->arq_pool) { kfree(ad->hash); kfree(ad); diff --git a/drivers/block/deadline-iosched.c b/drivers/block/deadline-iosched.c index 7f79f3dd0165..4bc2fea73273 100644 --- a/drivers/block/deadline-iosched.c +++ b/drivers/block/deadline-iosched.c @@ -711,18 +711,20 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e) if (!drq_pool) return -ENOMEM; - dd = kmalloc(sizeof(*dd), GFP_KERNEL); + dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node); if (!dd) return -ENOMEM; memset(dd, 0, sizeof(*dd)); - dd->hash = kmalloc(sizeof(struct list_head)*DL_HASH_ENTRIES,GFP_KERNEL); + dd->hash = kmalloc_node(sizeof(struct list_head)*DL_HASH_ENTRIES, + GFP_KERNEL, q->node); if (!dd->hash) { kfree(dd); return -ENOMEM; } - dd->drq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, drq_pool); + dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, + mempool_free_slab, drq_pool, q->node); if (!dd->drq_pool) { kfree(dd->hash); kfree(dd); diff --git a/drivers/block/genhd.c b/drivers/block/genhd.c index 53f7d846b747..47fd3659a061 100644 --- a/drivers/block/genhd.c +++ b/drivers/block/genhd.c @@ -40,7 +40,7 @@ static inline int major_to_index(int major) #ifdef CONFIG_PROC_FS /* get block device names in somewhat random order */ -int get_blkdev_list(char *p) +int get_blkdev_list(char *p, int used) { struct blk_major_name *n; int i, len; @@ -49,10 +49,18 @@ int get_blkdev_list(char *p) down(&block_subsys_sem); for (i = 0; i < ARRAY_SIZE(major_names); i++) { - for (n = major_names[i]; n; n = n->next) + for (n = major_names[i]; n; n = n->next) { + /* + * If the curent string plus the 5 extra characters + * in the line would run us off the page, then we're done + */ + if ((len + used + strlen(n->name) + 5) >= PAGE_SIZE) + goto page_full; len += sprintf(p+len, "%3d %s\n", n->major, n->name); + } } +page_full: up(&block_subsys_sem); return len; @@ -582,10 +590,16 @@ struct seq_operations diskstats_op = { .show = diskstats_show }; - struct gendisk *alloc_disk(int minors) { - struct gendisk *disk = kmalloc(sizeof(struct gendisk), GFP_KERNEL); + return alloc_disk_node(minors, -1); +} + +struct gendisk *alloc_disk_node(int minors, int node_id) +{ + struct gendisk *disk; + + disk = kmalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id); if (disk) { memset(disk, 0, sizeof(struct gendisk)); if (!init_disk_stats(disk)) { @@ -594,7 +608,7 @@ struct gendisk *alloc_disk(int minors) } if (minors > 1) { int size = (minors - 1) * sizeof(struct hd_struct *); - disk->part = kmalloc(size, GFP_KERNEL); + disk->part = kmalloc_node(size, GFP_KERNEL, node_id); if (!disk->part) { kfree(disk); return NULL; @@ -610,6 +624,7 @@ struct gendisk *alloc_disk(int minors) } EXPORT_SYMBOL(alloc_disk); +EXPORT_SYMBOL(alloc_disk_node); struct kobject *get_disk(struct gendisk *disk) { diff --git a/drivers/block/ioctl.c b/drivers/block/ioctl.c index 6d7bcc9da9e7..6e278474f9a8 100644 --- a/drivers/block/ioctl.c +++ b/drivers/block/ioctl.c @@ -133,11 +133,9 @@ static int put_u64(unsigned long arg, u64 val) return put_user(val, (u64 __user *)arg); } -int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, - unsigned long arg) +static int blkdev_locked_ioctl(struct file *file, struct block_device *bdev, + unsigned cmd, unsigned long arg) { - struct block_device *bdev = inode->i_bdev; - struct gendisk *disk = bdev->bd_disk; struct backing_dev_info *bdi; int ret, n; @@ -190,36 +188,72 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, return put_ulong(arg, bdev->bd_inode->i_size >> 9); case BLKGETSIZE64: return put_u64(arg, bdev->bd_inode->i_size); + } + return -ENOIOCTLCMD; +} + +static int blkdev_driver_ioctl(struct inode *inode, struct file *file, + struct gendisk *disk, unsigned cmd, unsigned long arg) +{ + int ret; + if (disk->fops->unlocked_ioctl) + return disk->fops->unlocked_ioctl(file, cmd, arg); + + if (disk->fops->ioctl) { + lock_kernel(); + ret = disk->fops->ioctl(inode, file, cmd, arg); + unlock_kernel(); + return ret; + } + + return -ENOTTY; +} + +int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, + unsigned long arg) +{ + struct block_device *bdev = inode->i_bdev; + struct gendisk *disk = bdev->bd_disk; + int ret, n; + + switch(cmd) { case BLKFLSBUF: if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (disk->fops->ioctl) { - ret = disk->fops->ioctl(inode, file, cmd, arg); - /* -EINVAL to handle old uncorrected drivers */ - if (ret != -EINVAL && ret != -ENOTTY) - return ret; - } + + ret = blkdev_driver_ioctl(inode, file, disk, cmd, arg); + /* -EINVAL to handle old uncorrected drivers */ + if (ret != -EINVAL && ret != -ENOTTY) + return ret; + + lock_kernel(); fsync_bdev(bdev); invalidate_bdev(bdev, 0); + unlock_kernel(); return 0; + case BLKROSET: - if (disk->fops->ioctl) { - ret = disk->fops->ioctl(inode, file, cmd, arg); - /* -EINVAL to handle old uncorrected drivers */ - if (ret != -EINVAL && ret != -ENOTTY) - return ret; - } + ret = blkdev_driver_ioctl(inode, file, disk, cmd, arg); + /* -EINVAL to handle old uncorrected drivers */ + if (ret != -EINVAL && ret != -ENOTTY) + return ret; if (!capable(CAP_SYS_ADMIN)) return -EACCES; if (get_user(n, (int __user *)(arg))) return -EFAULT; + lock_kernel(); set_device_ro(bdev, n); + unlock_kernel(); return 0; - default: - if (disk->fops->ioctl) - return disk->fops->ioctl(inode, file, cmd, arg); } - return -ENOTTY; + + lock_kernel(); + ret = blkdev_locked_ioctl(file, bdev, cmd, arg); + unlock_kernel(); + if (ret != -ENOIOCTLCMD) + return ret; + + return blkdev_driver_ioctl(inode, file, disk, cmd, arg); } /* Most of the generic ioctls are handled in the normal fallback path. diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 81fe3a0c1fe7..fd94ea27d594 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -28,6 +28,7 @@ #include <linux/slab.h> #include <linux/swap.h> #include <linux/writeback.h> +#include <linux/blkdev.h> /* * for max sense size @@ -716,7 +717,7 @@ struct request *blk_queue_find_tag(request_queue_t *q, int tag) { struct blk_queue_tag *bqt = q->queue_tags; - if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) + if (unlikely(bqt == NULL || tag >= bqt->max_depth)) return NULL; return bqt->tag_index[tag]; @@ -774,9 +775,9 @@ EXPORT_SYMBOL(blk_queue_free_tags); static int init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) { - int bits, i; struct request **tag_index; unsigned long *tag_map; + int nr_ulongs; if (depth > q->nr_requests * 2) { depth = q->nr_requests * 2; @@ -788,24 +789,17 @@ init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) if (!tag_index) goto fail; - bits = (depth / BLK_TAGS_PER_LONG) + 1; - tag_map = kmalloc(bits * sizeof(unsigned long), GFP_ATOMIC); + nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; + tag_map = kmalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC); if (!tag_map) goto fail; memset(tag_index, 0, depth * sizeof(struct request *)); - memset(tag_map, 0, bits * sizeof(unsigned long)); + memset(tag_map, 0, nr_ulongs * sizeof(unsigned long)); tags->max_depth = depth; - tags->real_max_depth = bits * BITS_PER_LONG; tags->tag_index = tag_index; tags->tag_map = tag_map; - /* - * set the upper bits if the depth isn't a multiple of the word size - */ - for (i = depth; i < bits * BLK_TAGS_PER_LONG; i++) - __set_bit(i, tag_map); - return 0; fail: kfree(tag_index); @@ -870,32 +864,24 @@ int blk_queue_resize_tags(request_queue_t *q, int new_depth) struct blk_queue_tag *bqt = q->queue_tags; struct request **tag_index; unsigned long *tag_map; - int bits, max_depth; + int max_depth, nr_ulongs; if (!bqt) return -ENXIO; /* - * don't bother sizing down - */ - if (new_depth <= bqt->real_max_depth) { - bqt->max_depth = new_depth; - return 0; - } - - /* * save the old state info, so we can copy it back */ tag_index = bqt->tag_index; tag_map = bqt->tag_map; - max_depth = bqt->real_max_depth; + max_depth = bqt->max_depth; if (init_tag_map(q, bqt, new_depth)) return -ENOMEM; memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *)); - bits = max_depth / BLK_TAGS_PER_LONG; - memcpy(bqt->tag_map, tag_map, bits * sizeof(unsigned long)); + nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG; + memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long)); kfree(tag_index); kfree(tag_map); @@ -925,11 +911,16 @@ void blk_queue_end_tag(request_queue_t *q, struct request *rq) BUG_ON(tag == -1); - if (unlikely(tag >= bqt->real_max_depth)) + if (unlikely(tag >= bqt->max_depth)) + /* + * This can happen after tag depth has been reduced. + * FIXME: how about a warning or info message here? + */ return; if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) { - printk("attempt to clear non-busy tag (%d)\n", tag); + printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n", + __FUNCTION__, tag); return; } @@ -938,7 +929,8 @@ void blk_queue_end_tag(request_queue_t *q, struct request *rq) rq->tag = -1; if (unlikely(bqt->tag_index[tag] == NULL)) - printk("tag %d is missing\n", tag); + printk(KERN_ERR "%s: tag %d is missing\n", + __FUNCTION__, tag); bqt->tag_index[tag] = NULL; bqt->busy--; @@ -967,24 +959,20 @@ EXPORT_SYMBOL(blk_queue_end_tag); int blk_queue_start_tag(request_queue_t *q, struct request *rq) { struct blk_queue_tag *bqt = q->queue_tags; - unsigned long *map = bqt->tag_map; - int tag = 0; + int tag; if (unlikely((rq->flags & REQ_QUEUED))) { printk(KERN_ERR - "request %p for device [%s] already tagged %d", - rq, rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); + "%s: request %p for device [%s] already tagged %d", + __FUNCTION__, rq, + rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag); BUG(); } - for (map = bqt->tag_map; *map == -1UL; map++) { - tag += BLK_TAGS_PER_LONG; - - if (tag >= bqt->max_depth) - return 1; - } + tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); + if (tag >= bqt->max_depth) + return 1; - tag += ffz(*map); __set_bit(tag, bqt->tag_map); rq->flags |= REQ_QUEUED; @@ -1020,7 +1008,8 @@ void blk_queue_invalidate_tags(request_queue_t *q) rq = list_entry_rq(tmp); if (rq->tag == -1) { - printk("bad tag found on list\n"); + printk(KERN_ERR + "%s: bad tag found on list\n", __FUNCTION__); list_del_init(&rq->queuelist); rq->flags &= ~REQ_QUEUED; } else @@ -1450,7 +1439,7 @@ EXPORT_SYMBOL(blk_remove_plug); */ void __generic_unplug_device(request_queue_t *q) { - if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) + if (unlikely(test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))) return; if (!blk_remove_plug(q)) @@ -1645,7 +1634,8 @@ static int blk_init_free_list(request_queue_t *q) init_waitqueue_head(&rl->wait[WRITE]); init_waitqueue_head(&rl->drain); - rl->rq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, request_cachep); + rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, + mempool_free_slab, request_cachep, q->node); if (!rl->rq_pool) return -ENOMEM; @@ -1657,8 +1647,15 @@ static int __make_request(request_queue_t *, struct bio *); request_queue_t *blk_alloc_queue(int gfp_mask) { - request_queue_t *q = kmem_cache_alloc(requestq_cachep, gfp_mask); + return blk_alloc_queue_node(gfp_mask, -1); +} +EXPORT_SYMBOL(blk_alloc_queue); + +request_queue_t *blk_alloc_queue_node(int gfp_mask, int node_id) +{ + request_queue_t *q; + q = kmem_cache_alloc_node(requestq_cachep, gfp_mask, node_id); if (!q) return NULL; @@ -1671,8 +1668,7 @@ request_queue_t *blk_alloc_queue(int gfp_mask) return q; } - -EXPORT_SYMBOL(blk_alloc_queue); +EXPORT_SYMBOL(blk_alloc_queue_node); /** * blk_init_queue - prepare a request queue for use with a block device @@ -1705,13 +1701,22 @@ EXPORT_SYMBOL(blk_alloc_queue); * blk_init_queue() must be paired with a blk_cleanup_queue() call * when the block device is deactivated (such as at module unload). **/ + request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) { - request_queue_t *q = blk_alloc_queue(GFP_KERNEL); + return blk_init_queue_node(rfn, lock, -1); +} +EXPORT_SYMBOL(blk_init_queue); + +request_queue_t * +blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) +{ + request_queue_t *q = blk_alloc_queue_node(GFP_KERNEL, node_id); if (!q) return NULL; + q->node = node_id; if (blk_init_free_list(q)) goto out_init; @@ -1754,12 +1759,11 @@ out_init: kmem_cache_free(requestq_cachep, q); return NULL; } - -EXPORT_SYMBOL(blk_init_queue); +EXPORT_SYMBOL(blk_init_queue_node); int blk_get_queue(request_queue_t *q) { - if (!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) { + if (likely(!test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { atomic_inc(&q->refcnt); return 0; } @@ -1838,7 +1842,6 @@ static void __freed_request(request_queue_t *q, int rw) clear_queue_congested(q, rw); if (rl->count[rw] + 1 <= q->nr_requests) { - smp_mb(); if (waitqueue_active(&rl->wait[rw])) wake_up(&rl->wait[rw]); @@ -1966,7 +1969,6 @@ static struct request *get_request_wait(request_queue_t *q, int rw) DEFINE_WAIT(wait); struct request *rq; - generic_unplug_device(q); do { struct request_list *rl = &q->rq; @@ -1978,6 +1980,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw) if (!rq) { struct io_context *ioc; + generic_unplug_device(q); io_schedule(); /* @@ -2581,7 +2584,7 @@ static int __make_request(request_queue_t *q, struct bio *bio) spin_lock_prefetch(q->queue_lock); barrier = bio_barrier(bio); - if (barrier && (q->ordered == QUEUE_ORDERED_NONE)) { + if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) { err = -EOPNOTSUPP; goto end_io; } @@ -2682,7 +2685,7 @@ get_rq: /* * REQ_BARRIER implies no merging, but lets make it explicit */ - if (barrier) + if (unlikely(barrier)) req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); req->errors = 0; @@ -2806,7 +2809,7 @@ static inline void block_wait_queue_running(request_queue_t *q) { DEFINE_WAIT(wait); - while (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) { + while (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) { struct request_list *rl = &q->rq; prepare_to_wait_exclusive(&rl->drain, &wait, @@ -2915,7 +2918,7 @@ end_io: goto end_io; } - if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) goto end_io; block_wait_queue_running(q); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 6f011d0d8e97..b35e08876dd4 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -472,17 +472,11 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) */ static void loop_add_bio(struct loop_device *lo, struct bio *bio) { - unsigned long flags; - - spin_lock_irqsave(&lo->lo_lock, flags); if (lo->lo_biotail) { lo->lo_biotail->bi_next = bio; lo->lo_biotail = bio; } else lo->lo_bio = lo->lo_biotail = bio; - spin_unlock_irqrestore(&lo->lo_lock, flags); - - up(&lo->lo_bh_mutex); } /* @@ -492,14 +486,12 @@ static struct bio *loop_get_bio(struct loop_device *lo) { struct bio *bio; - spin_lock_irq(&lo->lo_lock); if ((bio = lo->lo_bio)) { if (bio == lo->lo_biotail) lo->lo_biotail = NULL; lo->lo_bio = bio->bi_next; bio->bi_next = NULL; } - spin_unlock_irq(&lo->lo_lock); return bio; } @@ -509,35 +501,28 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio) struct loop_device *lo = q->queuedata; int rw = bio_rw(old_bio); - if (!lo) - goto out; + if (rw == READA) + rw = READ; + + BUG_ON(!lo || (rw != READ && rw != WRITE)); spin_lock_irq(&lo->lo_lock); if (lo->lo_state != Lo_bound) - goto inactive; - atomic_inc(&lo->lo_pending); - spin_unlock_irq(&lo->lo_lock); - - if (rw == WRITE) { - if (lo->lo_flags & LO_FLAGS_READ_ONLY) - goto err; - } else if (rw == READA) { - rw = READ; - } else if (rw != READ) { - printk(KERN_ERR "loop: unknown command (%x)\n", rw); - goto err; - } + goto out; + if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY))) + goto out; + lo->lo_pending++; loop_add_bio(lo, old_bio); + spin_unlock_irq(&lo->lo_lock); + up(&lo->lo_bh_mutex); return 0; -err: - if (atomic_dec_and_test(&lo->lo_pending)) - up(&lo->lo_bh_mutex); + out: + if (lo->lo_pending == 0) + up(&lo->lo_bh_mutex); + spin_unlock_irq(&lo->lo_lock); bio_io_error(old_bio, old_bio->bi_size); return 0; -inactive: - spin_unlock_irq(&lo->lo_lock); - goto out; } /* @@ -560,13 +545,11 @@ static void do_loop_switch(struct loop_device *, struct switch_request *); static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio) { - int ret; - if (unlikely(!bio->bi_bdev)) { do_loop_switch(lo, bio->bi_private); bio_put(bio); } else { - ret = do_bio_filebacked(lo, bio); + int ret = do_bio_filebacked(lo, bio); bio_endio(bio, bio->bi_size, ret); } } @@ -594,7 +577,7 @@ static int loop_thread(void *data) set_user_nice(current, -20); lo->lo_state = Lo_bound; - atomic_inc(&lo->lo_pending); + lo->lo_pending = 1; /* * up sem, we are running @@ -602,26 +585,37 @@ static int loop_thread(void *data) up(&lo->lo_sem); for (;;) { - down_interruptible(&lo->lo_bh_mutex); + int pending; + /* - * could be upped because of tear-down, not because of - * pending work + * interruptible just to not contribute to load avg */ - if (!atomic_read(&lo->lo_pending)) + if (down_interruptible(&lo->lo_bh_mutex)) + continue; + + spin_lock_irq(&lo->lo_lock); + + /* + * could be upped because of tear-down, not pending work + */ + if (unlikely(!lo->lo_pending)) { + spin_unlock_irq(&lo->lo_lock); break; + } bio = loop_get_bio(lo); - if (!bio) { - printk("loop: missing bio\n"); - continue; - } + lo->lo_pending--; + pending = lo->lo_pending; + spin_unlock_irq(&lo->lo_lock); + + BUG_ON(!bio); loop_handle_bio(lo, bio); /* * upped both for pending work and tear-down, lo_pending * will hit zero then */ - if (atomic_dec_and_test(&lo->lo_pending)) + if (unlikely(!pending)) break; } @@ -900,7 +894,8 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) spin_lock_irq(&lo->lo_lock); lo->lo_state = Lo_rundown; - if (atomic_dec_and_test(&lo->lo_pending)) + lo->lo_pending--; + if (!lo->lo_pending) up(&lo->lo_bh_mutex); spin_unlock_irq(&lo->lo_lock); diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index bc56770bcc90..7f3d78de265c 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -467,14 +467,12 @@ static int pkt_set_speed(struct pktcdvd_device *pd, unsigned write_speed, unsign * Queue a bio for processing by the low-level CD device. Must be called * from process context. */ -static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio, int high_prio_read) +static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio) { spin_lock(&pd->iosched.lock); if (bio_data_dir(bio) == READ) { pkt_add_list_last(bio, &pd->iosched.read_queue, &pd->iosched.read_queue_tail); - if (high_prio_read) - pd->iosched.high_prio_read = 1; } else { pkt_add_list_last(bio, &pd->iosched.write_queue, &pd->iosched.write_queue_tail); @@ -490,15 +488,16 @@ static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio, int high_p * requirements for CDRW drives: * - A cache flush command must be inserted before a read request if the * previous request was a write. - * - Switching between reading and writing is slow, so don't it more often + * - Switching between reading and writing is slow, so don't do it more often * than necessary. + * - Optimize for throughput at the expense of latency. This means that streaming + * writes will never be interrupted by a read, but if the drive has to seek + * before the next write, switch to reading instead if there are any pending + * read requests. * - Set the read speed according to current usage pattern. When only reading * from the device, it's best to use the highest possible read speed, but * when switching often between reading and writing, it's better to have the * same read and write speeds. - * - Reads originating from user space should have higher priority than reads - * originating from pkt_gather_data, because some process is usually waiting - * on reads of the first kind. */ static void pkt_iosched_process_queue(struct pktcdvd_device *pd) { @@ -512,21 +511,24 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd) for (;;) { struct bio *bio; - int reads_queued, writes_queued, high_prio_read; + int reads_queued, writes_queued; spin_lock(&pd->iosched.lock); reads_queued = (pd->iosched.read_queue != NULL); writes_queued = (pd->iosched.write_queue != NULL); - if (!reads_queued) - pd->iosched.high_prio_read = 0; - high_prio_read = pd->iosched.high_prio_read; spin_unlock(&pd->iosched.lock); if (!reads_queued && !writes_queued) break; if (pd->iosched.writing) { - if (high_prio_read || (!writes_queued && reads_queued)) { + int need_write_seek = 1; + spin_lock(&pd->iosched.lock); + bio = pd->iosched.write_queue; + spin_unlock(&pd->iosched.lock); + if (bio && (bio->bi_sector == pd->iosched.last_write)) + need_write_seek = 0; + if (need_write_seek && reads_queued) { if (atomic_read(&pd->cdrw.pending_bios) > 0) { VPRINTK("pktcdvd: write, waiting\n"); break; @@ -559,8 +561,10 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd) if (bio_data_dir(bio) == READ) pd->iosched.successive_reads += bio->bi_size >> 10; - else + else { pd->iosched.successive_reads = 0; + pd->iosched.last_write = bio->bi_sector + bio_sectors(bio); + } if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) { if (pd->read_speed == pd->write_speed) { pd->read_speed = MAX_SPEED; @@ -765,7 +769,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) atomic_inc(&pkt->io_wait); bio->bi_rw = READ; - pkt_queue_bio(pd, bio, 0); + pkt_queue_bio(pd, bio); frames_read++; } @@ -1062,7 +1066,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) atomic_set(&pkt->io_wait, 1); pkt->w_bio->bi_rw = WRITE; - pkt_queue_bio(pd, pkt->w_bio, 0); + pkt_queue_bio(pd, pkt->w_bio); } static void pkt_finish_packet(struct packet_data *pkt, int uptodate) @@ -2120,7 +2124,7 @@ static int pkt_make_request(request_queue_t *q, struct bio *bio) cloned_bio->bi_private = psd; cloned_bio->bi_end_io = pkt_end_io_read_cloned; pd->stats.secs_r += bio->bi_size >> 9; - pkt_queue_bio(pd, cloned_bio, 1); + pkt_queue_bio(pd, cloned_bio); return 0; } diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 9075bbb56ad4..f766bc22c6bb 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -576,7 +576,7 @@ static void __exit hci_uart_exit(void) #endif /* Release tty registration of line discipline */ - if ((err = tty_register_ldisc(N_HCI, NULL))) + if ((err = tty_unregister_ldisc(N_HCI))) BT_ERR("Can't unregister HCI line discipline (%d)", err); } diff --git a/drivers/char/amiserial.c b/drivers/char/amiserial.c index 1dc4259213a6..777bc499bbbd 100644 --- a/drivers/char/amiserial.c +++ b/drivers/char/amiserial.c @@ -861,13 +861,18 @@ static void change_speed(struct async_struct *info, static void rs_put_char(struct tty_struct *tty, unsigned char ch) { - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct async_struct *info; unsigned long flags; + if (!tty) + return; + + info = tty->driver_data; + if (serial_paranoia_check(info, tty->name, "rs_put_char")) return; - if (!tty || !info->xmit.buf) + if (!info->xmit.buf) return; local_irq_save(flags); @@ -910,13 +915,18 @@ static void rs_flush_chars(struct tty_struct *tty) static int rs_write(struct tty_struct * tty, const unsigned char *buf, int count) { int c, ret = 0; - struct async_struct *info = (struct async_struct *)tty->driver_data; + struct async_struct *info; unsigned long flags; + if (!tty) + return 0; + + info = tty->driver_data; + if (serial_paranoia_check(info, tty->name, "rs_write")) return 0; - if (!tty || !info->xmit.buf || !tmp_buf) + if (!info->xmit.buf || !tmp_buf) return 0; local_save_flags(flags); diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c index b3dbff1cf967..5079beda69b5 100644 --- a/drivers/char/n_hdlc.c +++ b/drivers/char/n_hdlc.c @@ -960,7 +960,7 @@ static char hdlc_unregister_fail[] __exitdata = static void __exit n_hdlc_exit(void) { /* Release tty registration of line discipline */ - int status = tty_register_ldisc(N_HDLC, NULL); + int status = tty_unregister_ldisc(N_HDLC); if (status) printk(hdlc_unregister_fail, status); diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c index 3883073ab48f..2291a87e8ada 100644 --- a/drivers/char/n_r3964.c +++ b/drivers/char/n_r3964.c @@ -200,7 +200,7 @@ static void __exit r3964_exit(void) TRACE_M ("cleanup_module()"); - status=tty_register_ldisc(N_R3964, NULL); + status=tty_unregister_ldisc(N_R3964); if(status!=0) { diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 31831030f73f..cc4b43bad703 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -251,7 +251,7 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num) static DEFINE_SPINLOCK(tty_ldisc_lock); static DECLARE_WAIT_QUEUE_HEAD(tty_ldisc_wait); -static struct tty_ldisc tty_ldiscs[NR_LDISCS]; /* line disc dispatch table */ +static struct tty_ldisc tty_ldiscs[NR_LDISCS]; /* line disc dispatch table */ int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc) { @@ -262,24 +262,35 @@ int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc) return -EINVAL; spin_lock_irqsave(&tty_ldisc_lock, flags); - if (new_ldisc) { - tty_ldiscs[disc] = *new_ldisc; - tty_ldiscs[disc].num = disc; - tty_ldiscs[disc].flags |= LDISC_FLAG_DEFINED; - tty_ldiscs[disc].refcount = 0; - } else { - if(tty_ldiscs[disc].refcount) - ret = -EBUSY; - else - tty_ldiscs[disc].flags &= ~LDISC_FLAG_DEFINED; - } + tty_ldiscs[disc] = *new_ldisc; + tty_ldiscs[disc].num = disc; + tty_ldiscs[disc].flags |= LDISC_FLAG_DEFINED; + tty_ldiscs[disc].refcount = 0; spin_unlock_irqrestore(&tty_ldisc_lock, flags); return ret; } - EXPORT_SYMBOL(tty_register_ldisc); +int tty_unregister_ldisc(int disc) +{ + unsigned long flags; + int ret = 0; + + if (disc < N_TTY || disc >= NR_LDISCS) + return -EINVAL; + + spin_lock_irqsave(&tty_ldisc_lock, flags); + if (tty_ldiscs[disc].refcount) + ret = -EBUSY; + else + tty_ldiscs[disc].flags &= ~LDISC_FLAG_DEFINED; + spin_unlock_irqrestore(&tty_ldisc_lock, flags); + + return ret; +} +EXPORT_SYMBOL(tty_unregister_ldisc); + struct tty_ldisc *tty_ldisc_get(int disc) { unsigned long flags; diff --git a/drivers/firmware/pcdp.c b/drivers/firmware/pcdp.c index df1b721154d2..839b44a7e08b 100644 --- a/drivers/firmware/pcdp.c +++ b/drivers/firmware/pcdp.c @@ -23,12 +23,15 @@ setup_serial_console(struct pcdp_uart *uart) { #ifdef CONFIG_SERIAL_8250_CONSOLE int mmio; - static char options[64]; + static char options[64], *p = options; mmio = (uart->addr.address_space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY); - snprintf(options, sizeof(options), "console=uart,%s,0x%lx,%lun%d", - mmio ? "mmio" : "io", uart->addr.address, uart->baud, - uart->bits ? uart->bits : 8); + p += sprintf(p, "console=uart,%s,0x%lx", + mmio ? "mmio" : "io", uart->addr.address); + if (uart->baud) + p += sprintf(p, ",%lu", uart->baud); + if (uart->bits) + p += sprintf(p, "n%d", uart->bits); return early_serial_console_init(options); #else diff --git a/drivers/firmware/pcdp.h b/drivers/firmware/pcdp.h index 863bb6f768c3..1dc7c88b7b4d 100644 --- a/drivers/firmware/pcdp.h +++ b/drivers/firmware/pcdp.h @@ -2,7 +2,7 @@ * Definitions for PCDP-defined console devices * * v1.0a: http://www.dig64.org/specifications/DIG64_HCDPv10a_01.pdf - * v2.0: http://www.dig64.org/specifications/DIG64_HCDPv20_042804.pdf + * v2.0: http://www.dig64.org/specifications/DIG64_PCDPv20.pdf * * (c) Copyright 2002, 2004 Hewlett-Packard Development Company, L.P. * Khalid Aziz <khalid.aziz@hp.com> diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 39f3e9101ed4..0a31cfda08a0 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -2657,16 +2657,63 @@ int ide_cdrom_lock_door (struct cdrom_device_info *cdi, int lock) } static +int ide_cdrom_get_capabilities(ide_drive_t *drive, struct atapi_capabilities_page *cap) +{ + struct cdrom_info *info = drive->driver_data; + struct cdrom_device_info *cdi = &info->devinfo; + struct packet_command cgc; + int stat, attempts = 3, size = sizeof(*cap); + + /* + * ACER50 (and others?) require the full spec length mode sense + * page capabilities size, but older drives break. + */ + if (!(!strcmp(drive->id->model, "ATAPI CD ROM DRIVE 50X MAX") || + !strcmp(drive->id->model, "WPI CDS-32X"))) + size -= sizeof(cap->pad); + + init_cdrom_command(&cgc, cap, size, CGC_DATA_UNKNOWN); + do { /* we seem to get stat=0x01,err=0x00 the first time (??) */ + stat = cdrom_mode_sense(cdi, &cgc, GPMODE_CAPABILITIES_PAGE, 0); + if (!stat) + break; + } while (--attempts); + return stat; +} + +static +void ide_cdrom_update_speed (ide_drive_t *drive, struct atapi_capabilities_page *cap) +{ + /* The ACER/AOpen 24X cdrom has the speed fields byte-swapped */ + if (!drive->id->model[0] && + !strncmp(drive->id->fw_rev, "241N", 4)) { + CDROM_STATE_FLAGS(drive)->current_speed = + (((unsigned int)cap->curspeed) + (176/2)) / 176; + CDROM_CONFIG_FLAGS(drive)->max_speed = + (((unsigned int)cap->maxspeed) + (176/2)) / 176; + } else { + CDROM_STATE_FLAGS(drive)->current_speed = + (ntohs(cap->curspeed) + (176/2)) / 176; + CDROM_CONFIG_FLAGS(drive)->max_speed = + (ntohs(cap->maxspeed) + (176/2)) / 176; + } +} + +static int ide_cdrom_select_speed (struct cdrom_device_info *cdi, int speed) { ide_drive_t *drive = (ide_drive_t*) cdi->handle; struct request_sense sense; + struct atapi_capabilities_page cap; int stat; if ((stat = cdrom_select_speed(drive, speed, &sense)) < 0) return stat; - cdi->speed = CDROM_STATE_FLAGS(drive)->current_speed; + if (!ide_cdrom_get_capabilities(drive, &cap)) { + ide_cdrom_update_speed(drive, &cap); + cdi->speed = CDROM_STATE_FLAGS(drive)->current_speed; + } return 0; } @@ -2869,31 +2916,6 @@ static int ide_cdrom_register (ide_drive_t *drive, int nslots) } static -int ide_cdrom_get_capabilities(ide_drive_t *drive, struct atapi_capabilities_page *cap) -{ - struct cdrom_info *info = drive->driver_data; - struct cdrom_device_info *cdi = &info->devinfo; - struct packet_command cgc; - int stat, attempts = 3, size = sizeof(*cap); - - /* - * ACER50 (and others?) require the full spec length mode sense - * page capabilities size, but older drives break. - */ - if (!(!strcmp(drive->id->model, "ATAPI CD ROM DRIVE 50X MAX") || - !strcmp(drive->id->model, "WPI CDS-32X"))) - size -= sizeof(cap->pad); - - init_cdrom_command(&cgc, cap, size, CGC_DATA_UNKNOWN); - do { /* we seem to get stat=0x01,err=0x00 the first time (??) */ - stat = cdrom_mode_sense(cdi, &cgc, GPMODE_CAPABILITIES_PAGE, 0); - if (!stat) - break; - } while (--attempts); - return stat; -} - -static int ide_cdrom_probe_capabilities (ide_drive_t *drive) { struct cdrom_info *info = drive->driver_data; @@ -2978,20 +3000,7 @@ int ide_cdrom_probe_capabilities (ide_drive_t *drive) } } - /* The ACER/AOpen 24X cdrom has the speed fields byte-swapped */ - if (!drive->id->model[0] && - !strncmp(drive->id->fw_rev, "241N", 4)) { - CDROM_STATE_FLAGS(drive)->current_speed = - (((unsigned int)cap.curspeed) + (176/2)) / 176; - CDROM_CONFIG_FLAGS(drive)->max_speed = - (((unsigned int)cap.maxspeed) + (176/2)) / 176; - } else { - CDROM_STATE_FLAGS(drive)->current_speed = - (ntohs(cap.curspeed) + (176/2)) / 176; - CDROM_CONFIG_FLAGS(drive)->max_speed = - (ntohs(cap.maxspeed) + (176/2)) / 176; - } - + ide_cdrom_update_speed(drive, &cap); /* don't print speed if the drive reported 0. */ printk(KERN_INFO "%s: ATAPI", drive->name); diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 3302cd8eab4c..d6f934886b04 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -1215,7 +1215,8 @@ static int ide_disk_probe(struct device *dev) if (!idkp) goto failed; - g = alloc_disk(1 << PARTN_BITS); + g = alloc_disk_node(1 << PARTN_BITS, + pcibus_to_node(drive->hwif->pci_dev->bus)); if (!g) goto out_free_idkp; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index c949e98df4b6..9eab6426148e 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -661,10 +661,12 @@ static void idefloppy_output_buffers (ide_drive_t *drive, idefloppy_pc_t *pc, un idefloppy_do_end_request(drive, 1, done >> 9); +#if IDEFLOPPY_DEBUG_BUGS if (bcount) { printk(KERN_ERR "%s: leftover data in idefloppy_output_buffers, bcount == %d\n", drive->name, bcount); idefloppy_write_zeros(drive, bcount); } +#endif } static void idefloppy_update_buffers (ide_drive_t *drive, idefloppy_pc_t *pc) @@ -1048,6 +1050,9 @@ static ide_startstop_t idefloppy_issue_pc (ide_drive_t *drive, idefloppy_pc_t *p atapi_bcount_t bcount; ide_handler_t *pkt_xfer_routine; +#if 0 /* Accessing floppy->pc is not valid here, the previous pc may be gone + and have lived on another thread's stack; that stack may have become + unmapped meanwhile (CONFIG_DEBUG_PAGEALLOC). */ #if IDEFLOPPY_DEBUG_BUGS if (floppy->pc->c[0] == IDEFLOPPY_REQUEST_SENSE_CMD && pc->c[0] == IDEFLOPPY_REQUEST_SENSE_CMD) { @@ -1055,6 +1060,7 @@ static ide_startstop_t idefloppy_issue_pc (ide_drive_t *drive, idefloppy_pc_t *p "Two request sense in serial were issued\n"); } #endif /* IDEFLOPPY_DEBUG_BUGS */ +#endif if (floppy->failed_pc == NULL && pc->c[0] != IDEFLOPPY_REQUEST_SENSE_CMD) diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 5d876f53c697..7df85af75371 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -977,8 +977,9 @@ static int ide_init_queue(ide_drive_t *drive) * limits and LBA48 we could raise it but as yet * do not. */ - - q = blk_init_queue(do_ide_request, &ide_lock); + + q = blk_init_queue_node(do_ide_request, &ide_lock, + pcibus_to_node(drive->hwif->pci_dev->bus)); if (!q) return 1; @@ -1095,7 +1096,8 @@ static int init_irq (ide_hwif_t *hwif) hwgroup->hwif->next = hwif; spin_unlock_irq(&ide_lock); } else { - hwgroup = kmalloc(sizeof(ide_hwgroup_t),GFP_KERNEL); + hwgroup = kmalloc_node(sizeof(ide_hwgroup_t), GFP_KERNEL, + pcibus_to_node(hwif->drives[0].hwif->pci_dev->bus)); if (!hwgroup) goto out_up; diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c index f6b85222ba3d..79ca38469159 100644 --- a/drivers/input/serio/serport.c +++ b/drivers/input/serio/serport.c @@ -257,7 +257,7 @@ static int __init serport_init(void) static void __exit serport_exit(void) { - tty_register_ldisc(N_MOUSE, NULL); + tty_unregister_ldisc(N_MOUSE); } module_init(serport_init); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index ee3c869d9701..200a0688f717 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -122,14 +122,6 @@ static struct hash_cell *__get_uuid_cell(const char *str) /*----------------------------------------------------------------- * Inserting, removing and renaming a device. *---------------------------------------------------------------*/ -static inline char *kstrdup(const char *str) -{ - char *r = kmalloc(strlen(str) + 1, GFP_KERNEL); - if (r) - strcpy(r, str); - return r; -} - static struct hash_cell *alloc_cell(const char *name, const char *uuid, struct mapped_device *md) { @@ -139,7 +131,7 @@ static struct hash_cell *alloc_cell(const char *name, const char *uuid, if (!hc) return NULL; - hc->name = kstrdup(name); + hc->name = kstrdup(name, GFP_KERNEL); if (!hc->name) { kfree(hc); return NULL; @@ -149,7 +141,7 @@ static struct hash_cell *alloc_cell(const char *name, const char *uuid, hc->uuid = NULL; else { - hc->uuid = kstrdup(uuid); + hc->uuid = kstrdup(uuid, GFP_KERNEL); if (!hc->uuid) { kfree(hc->name); kfree(hc); @@ -273,7 +265,7 @@ static int dm_hash_rename(const char *old, const char *new) /* * duplicate new. */ - new_name = kstrdup(new); + new_name = kstrdup(new, GFP_KERNEL); if (!new_name) return -ENOMEM; diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c index db4f369637b6..d5666c37cb0d 100644 --- a/drivers/net/appletalk/ltpc.c +++ b/drivers/net/appletalk/ltpc.c @@ -1109,8 +1109,7 @@ struct net_device * __init ltpc_probe(void) inb_p(io+1); inb_p(io+3); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(2*HZ/100); + msleep(20); inb_p(io+0); inb_p(io+2); @@ -1120,8 +1119,7 @@ struct net_device * __init ltpc_probe(void) inb_p(io+5); /* enable dma */ inb_p(io+6); /* tri-state interrupt line */ - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(HZ); + ssleep(1); /* now, figure out which dma channel we're using, unless it's already been specified */ diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 325495b8b60c..137226d98d47 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -2307,6 +2307,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) tso = e1000_tso(adapter, skb); if (tso < 0) { dev_kfree_skb_any(skb); + spin_unlock_irqrestore(&adapter->tx_lock, flags); return NETDEV_TX_OK; } diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 89454915b857..e44f8e9055ef 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -848,7 +848,7 @@ static void __exit sixpack_exit_driver(void) { int ret; - if ((ret = tty_register_ldisc(N_6PACK, NULL))) + if ((ret = tty_unregister_ldisc(N_6PACK))) printk(msg_unregfail, ret); } diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 62790511098f..3035422f5ad8 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -934,7 +934,7 @@ static void __exit mkiss_exit_driver(void) kfree(ax25_ctrls); ax25_ctrls = NULL; - if ((i = tty_register_ldisc(N_AX25, NULL))) + if ((i = tty_unregister_ldisc(N_AX25))) printk(KERN_ERR "mkiss: can't unregister line discipline (err = %d)\n", i); } diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index 7d23aa375908..b8d112348ba4 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -626,7 +626,7 @@ static void __exit irtty_sir_cleanup(void) { int err; - if ((err = tty_register_ldisc(N_IRDA, NULL))) { + if ((err = tty_unregister_ldisc(N_IRDA))) { IRDA_ERROR("%s(), can't unregister line discipline (err = %d)\n", __FUNCTION__, err); } diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index 33b9d79b1aad..5e48b9ab3045 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -1025,7 +1025,7 @@ static void async_lcp_peek(struct asyncppp *ap, unsigned char *data, static void __exit ppp_async_cleanup(void) { - if (tty_register_ldisc(N_PPP, NULL) != 0) + if (tty_unregister_ldisc(N_PPP) != 0) printk(KERN_ERR "failed to unregister PPP line discipline\n"); } diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c index 7d0150b4c629..fd9f50180355 100644 --- a/drivers/net/ppp_synctty.c +++ b/drivers/net/ppp_synctty.c @@ -793,7 +793,7 @@ err: static void __exit ppp_sync_cleanup(void) { - if (tty_register_ldisc(N_SYNC_PPP, NULL) != 0) + if (tty_unregister_ldisc(N_SYNC_PPP) != 0) printk(KERN_ERR "failed to unregister Sync PPP line discipline\n"); } diff --git a/drivers/net/slip.c b/drivers/net/slip.c index 8f7841c0374d..19112712daf0 100644 --- a/drivers/net/slip.c +++ b/drivers/net/slip.c @@ -1430,7 +1430,7 @@ static void __exit slip_exit(void) kfree(slip_devs); slip_devs = NULL; - if ((i = tty_register_ldisc(N_SLIP, NULL))) + if ((i = tty_unregister_ldisc(N_SLIP))) { printk(KERN_ERR "SLIP: can't unregister line discipline (err = %d)\n", i); } diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 1c540d825551..bdf672c48182 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -829,7 +829,7 @@ static void __exit exit_x25_asy(void) } kfree(x25_asy_devs); - tty_register_ldisc(N_X25, NULL); + tty_unregister_ldisc(N_X25); } module_init(init_x25_asy); diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c index ec8cf29ffced..6c42b573a95a 100644 --- a/drivers/net/wireless/strip.c +++ b/drivers/net/wireless/strip.c @@ -2828,7 +2828,7 @@ static void __exit strip_exit_driver(void) /* Unregister with the /proc/net file here. */ proc_net_remove("strip"); - if ((i = tty_register_ldisc(N_STRIP, NULL))) + if ((i = tty_unregister_ldisc(N_STRIP))) printk(KERN_ERR "STRIP: can't unregister line discipline (err = %d)\n", i); printk(signoff); diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index e7f3bcb79000..80edfa3abd29 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -2751,7 +2751,6 @@ enum parport_pc_pci_cards { netmos_9755, netmos_9805, netmos_9815, - netmos_9855, }; @@ -2826,7 +2825,6 @@ static struct parport_pc_pci { /* netmos_9755 */ { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */ /* netmos_9805 */ { 1, { { 0, -1 }, } }, /* untested */ /* netmos_9815 */ { 2, { { 0, -1 }, { 2, -1 }, } }, /* untested */ - /* netmos_9855 */ { 2, { { 0, -1 }, { 2, -1 }, } }, /* untested */ }; static struct pci_device_id parport_pc_pci_tbl[] = { @@ -2907,8 +2905,6 @@ static struct pci_device_id parport_pc_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, netmos_9805 }, { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9815, PCI_ANY_ID, PCI_ANY_ID, 0, 0, netmos_9815 }, - { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9855, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, netmos_9855 }, { 0, } /* terminate list */ }; MODULE_DEVICE_TABLE(pci,parport_pc_pci_tbl); diff --git a/drivers/parport/parport_serial.c b/drivers/parport/parport_serial.c index 6715a17b5d0f..00498e2f1205 100644 --- a/drivers/parport/parport_serial.c +++ b/drivers/parport/parport_serial.c @@ -34,6 +34,7 @@ enum parport_pc_pci_cards { titan_110l = 0, titan_210l, netmos_9xx5_combo, + netmos_9855, avlab_1s1p, avlab_1s1p_650, avlab_1s1p_850, @@ -87,6 +88,7 @@ static struct parport_pc_pci cards[] __devinitdata = { /* titan_110l */ { 1, { { 3, -1 }, } }, /* titan_210l */ { 1, { { 3, -1 }, } }, /* netmos_9xx5_combo */ { 1, { { 2, -1 }, }, netmos_parallel_init }, + /* netmos_9855 */ { 1, { { 0, -1 }, }, netmos_parallel_init }, /* avlab_1s1p */ { 1, { { 1, 2}, } }, /* avlab_1s1p_650 */ { 1, { { 1, 2}, } }, /* avlab_1s1p_850 */ { 1, { { 1, 2}, } }, @@ -120,7 +122,7 @@ static struct pci_device_id parport_serial_pci_tbl[] = { { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9845, PCI_ANY_ID, PCI_ANY_ID, 0, 0, netmos_9xx5_combo }, { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9855, - PCI_ANY_ID, PCI_ANY_ID, 0, 0, netmos_9xx5_combo }, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, netmos_9855 }, /* PCI_VENDOR_ID_AVLAB/Intek21 has another bunch of cards ...*/ { 0x14db, 0x2110, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1s1p}, { 0x14db, 0x2111, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1s1p_650}, @@ -207,6 +209,7 @@ static struct pci_board_no_ids pci_boards[] __devinitdata = { /* titan_110l */ { SPCI_FL_BASE1 | SPCI_FL_BASE_TABLE, 1, 921600 }, /* titan_210l */ { SPCI_FL_BASE1 | SPCI_FL_BASE_TABLE, 2, 921600 }, /* netmos_9xx5_combo */ { SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200, 0, 0, netmos_serial_init }, +/* netmos_9855 */ { SPCI_FL_BASE2 | SPCI_FL_BASE_TABLE, 1, 115200, 0, 0, netmos_serial_init }, /* avlab_1s1p (n/t) */ { SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 }, /* avlab_1s1p_650 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 }, /* avlab_1s1p_850 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 }, diff --git a/drivers/parport/probe.c b/drivers/parport/probe.c index c94963145e17..6e6f42d01e64 100644 --- a/drivers/parport/probe.c +++ b/drivers/parport/probe.c @@ -48,14 +48,6 @@ static void pretty_print(struct parport *port, int device) printk("\n"); } -static char *strdup(char *str) -{ - int n = strlen(str)+1; - char *s = kmalloc(n, GFP_KERNEL); - if (!s) return NULL; - return strcpy(s, str); -} - static void parse_data(struct parport *port, int device, char *str) { char *txt = kmalloc(strlen(str)+1, GFP_KERNEL); @@ -88,16 +80,16 @@ static void parse_data(struct parport *port, int device, char *str) if (!strcmp(p, "MFG") || !strcmp(p, "MANUFACTURER")) { if (info->mfr) kfree (info->mfr); - info->mfr = strdup(sep); + info->mfr = kstrdup(sep, GFP_KERNEL); } else if (!strcmp(p, "MDL") || !strcmp(p, "MODEL")) { if (info->model) kfree (info->model); - info->model = strdup(sep); + info->model = kstrdup(sep, GFP_KERNEL); } else if (!strcmp(p, "CLS") || !strcmp(p, "CLASS")) { int i; if (info->class_name) kfree (info->class_name); - info->class_name = strdup(sep); + info->class_name = kstrdup(sep, GFP_KERNEL); for (u = sep; *u; u++) *u = toupper(*u); for (i = 0; classes[i].token; i++) { @@ -112,7 +104,7 @@ static void parse_data(struct parport *port, int device, char *str) !strcmp(p, "COMMAND SET")) { if (info->cmdset) kfree (info->cmdset); - info->cmdset = strdup(sep); + info->cmdset = kstrdup(sep, GFP_KERNEL); /* if it speaks printer language, it's probably a printer */ if (strstr(sep, "PJL") || strstr(sep, "PCL")) @@ -120,7 +112,7 @@ static void parse_data(struct parport *port, int device, char *str) } else if (!strcmp(p, "DES") || !strcmp(p, "DESCRIPTION")) { if (info->description) kfree (info->description); - info->description = strdup(sep); + info->description = kstrdup(sep, GFP_KERNEL); } } rock_on: diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c index 03fc885db1c5..d136b3c8fac9 100644 --- a/drivers/pcmcia/cs.c +++ b/drivers/pcmcia/cs.c @@ -508,6 +508,10 @@ static int socket_setup(struct pcmcia_socket *skt, int initial_delay) cs_err(skt, "unsupported voltage key.\n"); return CS_BAD_TYPE; } + + if (skt->power_hook) + skt->power_hook(skt, HOOK_POWER_PRE); + skt->socket.flags = 0; skt->ops->set_socket(skt, &skt->socket); @@ -522,7 +526,12 @@ static int socket_setup(struct pcmcia_socket *skt, int initial_delay) return CS_BAD_TYPE; } - return socket_reset(skt); + status = socket_reset(skt); + + if (skt->power_hook) + skt->power_hook(skt, HOOK_POWER_POST); + + return status; } /* diff --git a/drivers/pcmcia/ti113x.h b/drivers/pcmcia/ti113x.h index a8a1d104524a..c7ba99871aca 100644 --- a/drivers/pcmcia/ti113x.h +++ b/drivers/pcmcia/ti113x.h @@ -611,6 +611,170 @@ out: } } + +/* Returns true value if the second slot of a two-slot controller is empty */ +static int ti12xx_2nd_slot_empty(struct yenta_socket *socket) +{ + struct pci_dev *func; + struct yenta_socket *slot2; + int devfn; + unsigned int state; + int ret = 1; + + /* catch the two-slot controllers */ + switch (socket->dev->device) { + case PCI_DEVICE_ID_TI_1220: + case PCI_DEVICE_ID_TI_1221: + case PCI_DEVICE_ID_TI_1225: + case PCI_DEVICE_ID_TI_1251A: + case PCI_DEVICE_ID_TI_1251B: + case PCI_DEVICE_ID_TI_1420: + case PCI_DEVICE_ID_TI_1450: + case PCI_DEVICE_ID_TI_1451A: + case PCI_DEVICE_ID_TI_1520: + case PCI_DEVICE_ID_TI_1620: + case PCI_DEVICE_ID_TI_4520: + case PCI_DEVICE_ID_TI_4450: + case PCI_DEVICE_ID_TI_4451: + /* + * there are way more, but they need to be added in yenta_socket.c + * and pci_ids.h first anyway. + */ + break; + + /* single-slot controllers have the 2nd slot empty always :) */ + default: + return 1; + } + + /* get other slot */ + devfn = socket->dev->devfn & ~0x07; + func = pci_get_slot(socket->dev->bus, + (socket->dev->devfn & 0x07) ? devfn : devfn | 0x01); + if (!func) + return 1; + + slot2 = pci_get_drvdata(func); + if (!slot2) + goto out; + + /* check state */ + yenta_get_status(&socket->socket, &state); + if (state & SS_DETECT) { + ret = 0; + goto out; + } + +out: + pci_dev_put(func); + return ret; +} + +/* + * TI specifiy parts for the power hook. + * + * some TI's with some CB's produces interrupt storm on power on. it has been + * seen with atheros wlan cards on TI1225 and TI1410. solution is simply to + * disable any CB interrupts during this time. + */ +static int ti12xx_power_hook(struct pcmcia_socket *sock, int operation) +{ + struct yenta_socket *socket = container_of(sock, struct yenta_socket, socket); + u32 mfunc, devctl, sysctl; + u8 gpio3; + + /* only POWER_PRE and POWER_POST are interesting */ + if ((operation != HOOK_POWER_PRE) && (operation != HOOK_POWER_POST)) + return 0; + + devctl = config_readb(socket, TI113X_DEVICE_CONTROL); + sysctl = config_readl(socket, TI113X_SYSTEM_CONTROL); + mfunc = config_readl(socket, TI122X_MFUNC); + + /* + * all serial/tied: only disable when modparm set. always doing it + * would mean a regression for working setups 'cos it disables the + * interrupts for both both slots on 2-slot controllers + * (and users of single slot controllers where it's save have to + * live with setting the modparm, most don't have to anyway) + */ + if (((devctl & TI113X_DCR_IMODE_MASK) == TI12XX_DCR_IMODE_ALL_SERIAL) && + (pwr_irqs_off || ti12xx_2nd_slot_empty(socket))) { + switch (socket->dev->device) { + case PCI_DEVICE_ID_TI_1250: + case PCI_DEVICE_ID_TI_1251A: + case PCI_DEVICE_ID_TI_1251B: + case PCI_DEVICE_ID_TI_1450: + case PCI_DEVICE_ID_TI_1451A: + case PCI_DEVICE_ID_TI_4450: + case PCI_DEVICE_ID_TI_4451: + /* these chips have no IRQSER setting in MFUNC3 */ + break; + + default: + if (operation == HOOK_POWER_PRE) + mfunc = (mfunc & ~TI122X_MFUNC3_MASK); + else + mfunc = (mfunc & ~TI122X_MFUNC3_MASK) | TI122X_MFUNC3_IRQSER; + } + + return 0; + } + + /* do the job differently for func0/1 */ + if ((PCI_FUNC(socket->dev->devfn) == 0) || + ((sysctl & TI122X_SCR_INTRTIE) && + (pwr_irqs_off || ti12xx_2nd_slot_empty(socket)))) { + /* some bridges are different */ + switch (socket->dev->device) { + case PCI_DEVICE_ID_TI_1250: + case PCI_DEVICE_ID_TI_1251A: + case PCI_DEVICE_ID_TI_1251B: + case PCI_DEVICE_ID_TI_1450: + /* those oldies use gpio3 for INTA */ + gpio3 = config_readb(socket, TI1250_GPIO3_CONTROL); + if (operation == HOOK_POWER_PRE) + gpio3 = (gpio3 & ~TI1250_GPIO_MODE_MASK) | 0x40; + else + gpio3 &= ~TI1250_GPIO_MODE_MASK; + config_writeb(socket, TI1250_GPIO3_CONTROL, gpio3); + break; + + default: + /* all new bridges are the same */ + if (operation == HOOK_POWER_PRE) + mfunc &= ~TI122X_MFUNC0_MASK; + else + mfunc |= TI122X_MFUNC0_INTA; + config_writel(socket, TI122X_MFUNC, mfunc); + } + } else { + switch (socket->dev->device) { + case PCI_DEVICE_ID_TI_1251A: + case PCI_DEVICE_ID_TI_1251B: + case PCI_DEVICE_ID_TI_1450: + /* those have INTA elsewhere and INTB in MFUNC0 */ + if (operation == HOOK_POWER_PRE) + mfunc &= ~TI122X_MFUNC0_MASK; + else + mfunc |= TI125X_MFUNC0_INTB; + config_writel(socket, TI122X_MFUNC, mfunc); + + break; + + default: + /* all new bridges are the same */ + if (operation == HOOK_POWER_PRE) + mfunc &= ~TI122X_MFUNC1_MASK; + else + mfunc |= TI122X_MFUNC1_INTB; + config_writel(socket, TI122X_MFUNC, mfunc); + } + } + + return 0; +} + static int ti12xx_override(struct yenta_socket *socket) { u32 val, val_orig; @@ -654,6 +818,9 @@ static int ti12xx_override(struct yenta_socket *socket) else ti12xx_irqroute_func1(socket); + /* install power hook */ + socket->socket.power_hook = ti12xx_power_hook; + return ti_override(socket); } diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c index 6404d97a12eb..bee05362fd24 100644 --- a/drivers/pcmcia/yenta_socket.c +++ b/drivers/pcmcia/yenta_socket.c @@ -32,6 +32,14 @@ static int disable_clkrun; module_param(disable_clkrun, bool, 0444); MODULE_PARM_DESC(disable_clkrun, "If PC card doesn't function properly, please try this option"); +static int isa_probe = 1; +module_param(isa_probe, bool, 0444); +MODULE_PARM_DESC(isa_probe, "If set ISA interrupts are probed (default). Set to N to disable probing"); + +static int pwr_irqs_off; +module_param(pwr_irqs_off, bool, 0644); +MODULE_PARM_DESC(pwr_irqs_off, "Force IRQs off during power-on of slot. Use only when seeing IRQ storms!"); + #if 0 #define debug(x,args...) printk(KERN_DEBUG "%s: " x, __func__ , ##args) #else @@ -150,15 +158,16 @@ static int yenta_get_status(struct pcmcia_socket *sock, unsigned int *value) val = (state & CB_3VCARD) ? SS_3VCARD : 0; val |= (state & CB_XVCARD) ? SS_XVCARD : 0; - val |= (state & (CB_CDETECT1 | CB_CDETECT2 | CB_5VCARD | CB_3VCARD - | CB_XVCARD | CB_YVCARD)) ? 0 : SS_PENDING; + val |= (state & (CB_5VCARD | CB_3VCARD | CB_XVCARD | CB_YVCARD)) ? 0 : SS_PENDING; + val |= (state & (CB_CDETECT1 | CB_CDETECT2)) ? SS_PENDING : 0; + if (state & CB_CBCARD) { val |= SS_CARDBUS; val |= (state & CB_CARDSTS) ? SS_STSCHG : 0; val |= (state & (CB_CDETECT1 | CB_CDETECT2)) ? 0 : SS_DETECT; val |= (state & CB_PWRCYCLE) ? SS_POWERON | SS_READY : 0; - } else { + } else if (state & CB_16BITCARD) { u8 status = exca_readb(socket, I365_STATUS); val |= ((status & I365_CS_DETECT) == I365_CS_DETECT) ? SS_DETECT : 0; if (exca_readb(socket, I365_INTCTL) & I365_PC_IOCARD) { @@ -405,11 +414,13 @@ static int yenta_set_mem_map(struct pcmcia_socket *sock, struct pccard_mem_map * } -static unsigned int yenta_events(struct yenta_socket *socket) + +static irqreturn_t yenta_interrupt(int irq, void *dev_id, struct pt_regs *regs) { + unsigned int events; + struct yenta_socket *socket = (struct yenta_socket *) dev_id; u8 csc; u32 cb_event; - unsigned int events; /* Clear interrupt status for the event */ cb_event = cb_readl(socket, CB_SOCKET_EVENT); @@ -426,20 +437,13 @@ static unsigned int yenta_events(struct yenta_socket *socket) events |= (csc & I365_CSC_BVD2) ? SS_BATWARN : 0; events |= (csc & I365_CSC_READY) ? SS_READY : 0; } - return events; -} - - -static irqreturn_t yenta_interrupt(int irq, void *dev_id, struct pt_regs *regs) -{ - unsigned int events; - struct yenta_socket *socket = (struct yenta_socket *) dev_id; - events = yenta_events(socket); - if (events) { + if (events) pcmcia_parse_events(&socket->socket, events); + + if (cb_event || csc) return IRQ_HANDLED; - } + return IRQ_NONE; } @@ -470,11 +474,22 @@ static void yenta_clear_maps(struct yenta_socket *socket) } } +/* redoes voltage interrogation if required */ +static void yenta_interrogate(struct yenta_socket *socket) +{ + u32 state; + + state = cb_readl(socket, CB_SOCKET_STATE); + if (!(state & (CB_5VCARD | CB_3VCARD | CB_XVCARD | CB_YVCARD)) || + (state & (CB_CDETECT1 | CB_CDETECT2 | CB_NOTACARD | CB_BADVCCREQ)) || + ((state & (CB_16BITCARD | CB_CBCARD)) == (CB_16BITCARD | CB_CBCARD))) + cb_writel(socket, CB_SOCKET_FORCE, CB_CVSTEST); +} + /* Called at resume and initialization events */ static int yenta_sock_init(struct pcmcia_socket *sock) { struct yenta_socket *socket = container_of(sock, struct yenta_socket, socket); - u32 state; u16 bridge; bridge = config_readw(socket, CB_BRIDGE_CONTROL) & ~CB_BRIDGE_INTR; @@ -486,10 +501,7 @@ static int yenta_sock_init(struct pcmcia_socket *sock) exca_writeb(socket, I365_GENCTL, 0x00); /* Redo card voltage interrogation */ - state = cb_readl(socket, CB_SOCKET_STATE); - if (!(state & (CB_CDETECT1 | CB_CDETECT2 | CB_5VCARD | - CB_3VCARD | CB_XVCARD | CB_YVCARD))) - cb_writel(socket, CB_SOCKET_FORCE, CB_CVSTEST); + yenta_interrogate(socket); yenta_clear_maps(socket); @@ -856,7 +868,10 @@ static void yenta_get_socket_capabilities(struct yenta_socket *socket, u32 isa_i socket->socket.features |= SS_CAP_PAGE_REGS | SS_CAP_PCCARD | SS_CAP_CARDBUS; socket->socket.map_size = 0x1000; socket->socket.pci_irq = socket->cb_irq; - socket->socket.irq_mask = yenta_probe_irq(socket, isa_irq_mask); + if (isa_probe) + socket->socket.irq_mask = yenta_probe_irq(socket, isa_irq_mask); + else + socket->socket.irq_mask = 0; socket->socket.cb_dev = socket->dev; printk(KERN_INFO "Yenta: ISA IRQ mask 0x%04x, PCI irq %d\n", @@ -996,6 +1011,7 @@ static int __devinit yenta_probe (struct pci_dev *dev, const struct pci_device_i } /* Figure out what the dang thing can do for the PCMCIA layer... */ + yenta_interrogate(socket); yenta_get_socket_capabilities(socket, isa_interrupts); printk(KERN_INFO "Socket status: %08x\n", cb_readl(socket, CB_SOCKET_STATE)); diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c index 9cc0015b717d..a699c30b2662 100644 --- a/drivers/scsi/dpt_i2o.c +++ b/drivers/scsi/dpt_i2o.c @@ -1126,11 +1126,11 @@ static int adpt_i2o_post_wait(adpt_hba* pHba, u32* msg, int len, int timeout) struct adpt_i2o_post_wait_data *p1, *p2; struct adpt_i2o_post_wait_data *wait_data = kmalloc(sizeof(struct adpt_i2o_post_wait_data),GFP_KERNEL); - adpt_wait_queue_t wait; + DECLARE_WAITQUEUE(wait, current); - if(!wait_data){ + if (!wait_data) return -ENOMEM; - } + /* * The spin locking is needed to keep anyone from playing * with the queue pointers and id while we do the same @@ -1148,12 +1148,7 @@ static int adpt_i2o_post_wait(adpt_hba* pHba, u32* msg, int len, int timeout) wait_data->wq = &adpt_wq_i2o_post; wait_data->status = -ETIMEDOUT; - // this code is taken from kernel/sched.c:interruptible_sleep_on_timeout - wait.task = current; - init_waitqueue_entry(&wait, current); - spin_lock_irqsave(&adpt_wq_i2o_post.lock, flags); - __add_wait_queue(&adpt_wq_i2o_post, &wait); - spin_unlock(&adpt_wq_i2o_post.lock); + add_wait_queue(&adpt_wq_i2o_post, &wait); msg[2] |= 0x80000000 | ((u32)wait_data->id); timeout *= HZ; @@ -1175,9 +1170,7 @@ static int adpt_i2o_post_wait(adpt_hba* pHba, u32* msg, int len, int timeout) if(pHba->host) spin_lock_irq(pHba->host->host_lock); } - spin_lock_irq(&adpt_wq_i2o_post.lock); - __remove_wait_queue(&adpt_wq_i2o_post, &wait); - spin_unlock_irqrestore(&adpt_wq_i2o_post.lock, flags); + remove_wait_queue(&adpt_wq_i2o_post, &wait); if(status == -ETIMEDOUT){ printk(KERN_INFO"dpti%d: POST WAIT TIMEOUT\n",pHba->unit); diff --git a/drivers/serial/sn_console.c b/drivers/serial/sn_console.c index fee6418e84c4..840815fde49b 100644 --- a/drivers/serial/sn_console.c +++ b/drivers/serial/sn_console.c @@ -572,6 +572,7 @@ static void sn_transmit_chars(struct sn_cons_port *port, int raw) if (uart_circ_empty(xmit) || uart_tx_stopped(&port->sc_port)) { /* Nothing to do. */ + ia64_sn_console_intr_disable(SAL_CONSOLE_INTR_XMIT); return; } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index f9f9561c6bad..c3e3a95d3804 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -453,17 +453,6 @@ static int usbfs_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static struct dentry * get_dentry(struct dentry *parent, const char *name) -{ - struct qstr qstr; - - qstr.name = name; - qstr.len = strlen(name); - qstr.hash = full_name_hash(name,qstr.len); - return lookup_hash(&qstr,parent); -} - - /* * fs_create_by_name - create a file, given a name * @name: name of file @@ -496,7 +485,7 @@ static int fs_create_by_name (const char *name, mode_t mode, *dentry = NULL; down(&parent->d_inode->i_sem); - *dentry = get_dentry (parent, name); + *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(dentry)) { if ((mode & S_IFMT) == S_IFDIR) error = usbfs_mkdir (parent->d_inode, *dentry, mode); diff --git a/drivers/usb/input/hid-core.c b/drivers/usb/input/hid-core.c index 2d8bd9dcc6ed..740dec1f521d 100644 --- a/drivers/usb/input/hid-core.c +++ b/drivers/usb/input/hid-core.c @@ -1762,7 +1762,7 @@ static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id) intf->altsetting->desc.bInterfaceNumber); if (!(hid = usb_hid_configure(intf))) - return -EIO; + return -ENODEV; hid_init_reports(hid); hid_dump_device(hid); @@ -1777,7 +1777,7 @@ static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id) if (!hid->claimed) { printk ("HID device not claimed by input or hiddev\n"); hid_disconnect(intf); - return -EIO; + return -ENODEV; } printk(KERN_INFO); diff --git a/drivers/usb/media/pwc/pwc-uncompress.c b/drivers/usb/media/pwc/pwc-uncompress.c index bc3b1635eab0..ef4204eab6c4 100644 --- a/drivers/usb/media/pwc/pwc-uncompress.c +++ b/drivers/usb/media/pwc/pwc-uncompress.c @@ -118,9 +118,9 @@ int pwc_decompress(struct pwc_device *pdev) return -ENXIO; /* No such device or address: missing decompressor */ } +#if 0 switch (pdev->type) { -#if 0 case 675: case 680: case 690: @@ -128,18 +128,17 @@ int pwc_decompress(struct pwc_device *pdev) case 730: case 740: case 750: - pwc_dec23_decompress(&pdev->image, &pdev->view, &pdev->offset, - yuv, image, - flags, + pwc_dec23_decompress(&pdev->image, &pdev->view, + &pdev->offset, yuv, image, flags, pdev->decompress_data, pdev->vbandlength); break; case 645: case 646: /* TODO & FIXME */ -#endif - return -ENXIO; /* No such device or address: missing decompressor */ + return -ENXIO; /* Missing decompressor */ break; } +#endif } return 0; } diff --git a/fs/block_dev.c b/fs/block_dev.c index c0cbd1bc1a02..e0df94c37b7e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -777,8 +777,7 @@ static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf, return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); } -static int block_ioctl(struct inode *inode, struct file *file, unsigned cmd, - unsigned long arg) +static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); } @@ -803,7 +802,7 @@ struct file_operations def_blk_fops = { .aio_write = blkdev_file_aio_write, .mmap = generic_file_mmap, .fsync = block_fsync, - .ioctl = block_ioctl, + .unlocked_ioctl = block_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_blkdev_ioctl, #endif diff --git a/fs/buffer.c b/fs/buffer.c index 0befa724ab98..13e5938a64f6 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -331,7 +331,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync) return ret; } -asmlinkage long sys_fsync(unsigned int fd) +static long do_fsync(unsigned int fd, int datasync) { struct file * file; struct address_space *mapping; @@ -342,14 +342,14 @@ asmlinkage long sys_fsync(unsigned int fd) if (!file) goto out; - mapping = file->f_mapping; - ret = -EINVAL; if (!file->f_op || !file->f_op->fsync) { /* Why? We can still call filemap_fdatawrite */ goto out_putf; } + mapping = file->f_mapping; + current->flags |= PF_SYNCWRITE; ret = filemap_fdatawrite(mapping); @@ -358,7 +358,7 @@ asmlinkage long sys_fsync(unsigned int fd) * which could cause livelocks in fsync_buffers_list */ down(&mapping->host->i_sem); - err = file->f_op->fsync(file, file->f_dentry, 0); + err = file->f_op->fsync(file, file->f_dentry, datasync); if (!ret) ret = err; up(&mapping->host->i_sem); @@ -373,39 +373,14 @@ out: return ret; } -asmlinkage long sys_fdatasync(unsigned int fd) +asmlinkage long sys_fsync(unsigned int fd) { - struct file * file; - struct address_space *mapping; - int ret, err; - - ret = -EBADF; - file = fget(fd); - if (!file) - goto out; - - ret = -EINVAL; - if (!file->f_op || !file->f_op->fsync) - goto out_putf; - - mapping = file->f_mapping; - - current->flags |= PF_SYNCWRITE; - ret = filemap_fdatawrite(mapping); - down(&mapping->host->i_sem); - err = file->f_op->fsync(file, file->f_dentry, 1); - if (!ret) - ret = err; - up(&mapping->host->i_sem); - err = filemap_fdatawait(mapping); - if (!ret) - ret = err; - current->flags &= ~PF_SYNCWRITE; + return do_fsync(fd, 0); +} -out_putf: - fput(file); -out: - return ret; +asmlinkage long sys_fdatasync(unsigned int fd) +{ + return do_fsync(fd, 1); } /* @@ -1951,7 +1926,6 @@ static int __block_prepare_write(struct inode *inode, struct page *page, if (err) break; if (buffer_new(bh)) { - clear_buffer_new(bh); unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); if (PageUptodate(page)) { @@ -1993,9 +1967,14 @@ static int __block_prepare_write(struct inode *inode, struct page *page, if (!buffer_uptodate(*wait_bh)) err = -EIO; } - if (!err) - return err; - + if (!err) { + bh = head; + do { + if (buffer_new(bh)) + clear_buffer_new(bh); + } while ((bh = bh->b_this_page) != head); + return 0; + } /* Error case: */ /* * Zero out any newly allocated blocks to avoid exposing stale diff --git a/fs/char_dev.c b/fs/char_dev.c index c1e3537909fc..e82aac9cc2f5 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -56,10 +56,21 @@ int get_chrdev_list(char *page) down(&chrdevs_lock); for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) { - for (cd = chrdevs[i]; cd; cd = cd->next) + for (cd = chrdevs[i]; cd; cd = cd->next) { + /* + * if the current name, plus the 5 extra characters + * in the device line for this entry + * would run us off the page, we're done + */ + if ((len+strlen(cd->name) + 5) >= PAGE_SIZE) + goto page_full; + + len += sprintf(page+len, "%3d %s\n", cd->major, cd->name); + } } +page_full: up(&chrdevs_lock); return len; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b529786699e7..a86ac4aeaedb 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -110,16 +110,6 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent) return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); } -static struct dentry * get_dentry(struct dentry *parent, const char *name) -{ - struct qstr qstr; - - qstr.name = name; - qstr.len = strlen(name); - qstr.hash = full_name_hash(name,qstr.len); - return lookup_hash(&qstr,parent); -} - static struct super_block *debug_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) @@ -157,7 +147,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode, *dentry = NULL; down(&parent->d_inode->i_sem); - *dentry = get_dentry (parent, name); + *dentry = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(dentry)) { if ((mode & S_IFMT) == S_IFDIR) error = debugfs_mkdir(parent->d_inode, *dentry, mode); diff --git a/fs/dquot.c b/fs/dquot.c index 3995ce7907cc..37212b039a4a 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -1519,14 +1519,22 @@ out_path: * This function is used when filesystem needs to initialize quotas * during mount time. */ -int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry) +int vfs_quota_on_mount(struct super_block *sb, char *qf_name, + int format_id, int type) { + struct dentry *dentry; int error; + dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name)); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + error = security_quota_on(dentry); - if (error) - return error; - return vfs_quota_on_inode(dentry->d_inode, type, format_id); + if (!error) + error = vfs_quota_on_inode(dentry->d_inode, type, format_id); + + dput(dentry); + return error; } /* Generic routine for getting common part of quota structure */ diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9900e333655a..6ab1dd0ca904 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -101,57 +101,6 @@ /* Maximum number of poll wake up nests we are allowing */ #define EP_MAX_POLLWAKE_NESTS 4 -/* Macro to allocate a "struct epitem" from the slab cache */ -#define EPI_MEM_ALLOC() (struct epitem *) kmem_cache_alloc(epi_cache, SLAB_KERNEL) - -/* Macro to free a "struct epitem" to the slab cache */ -#define EPI_MEM_FREE(p) kmem_cache_free(epi_cache, p) - -/* Macro to allocate a "struct eppoll_entry" from the slab cache */ -#define PWQ_MEM_ALLOC() (struct eppoll_entry *) kmem_cache_alloc(pwq_cache, SLAB_KERNEL) - -/* Macro to free a "struct eppoll_entry" to the slab cache */ -#define PWQ_MEM_FREE(p) kmem_cache_free(pwq_cache, p) - -/* Fast test to see if the file is an evenpoll file */ -#define IS_FILE_EPOLL(f) ((f)->f_op == &eventpoll_fops) - -/* Setup the structure that is used as key for the rb-tree */ -#define EP_SET_FFD(p, f, d) do { (p)->file = (f); (p)->fd = (d); } while (0) - -/* Compare rb-tree keys */ -#define EP_CMP_FFD(p1, p2) ((p1)->file > (p2)->file ? +1: \ - ((p1)->file < (p2)->file ? -1: (p1)->fd - (p2)->fd)) - -/* Special initialization for the rb-tree node to detect linkage */ -#define EP_RB_INITNODE(n) (n)->rb_parent = (n) - -/* Removes a node from the rb-tree and marks it for a fast is-linked check */ -#define EP_RB_ERASE(n, r) do { rb_erase(n, r); (n)->rb_parent = (n); } while (0) - -/* Fast check to verify that the item is linked to the main rb-tree */ -#define EP_RB_LINKED(n) ((n)->rb_parent != (n)) - -/* - * Remove the item from the list and perform its initialization. - * This is useful for us because we can test if the item is linked - * using "EP_IS_LINKED(p)". - */ -#define EP_LIST_DEL(p) do { list_del(p); INIT_LIST_HEAD(p); } while (0) - -/* Tells us if the item is currently linked */ -#define EP_IS_LINKED(p) (!list_empty(p)) - -/* Get the "struct epitem" from a wait queue pointer */ -#define EP_ITEM_FROM_WAIT(p) ((struct epitem *) container_of(p, struct eppoll_entry, wait)->base) - -/* Get the "struct epitem" from an epoll queue wrapper */ -#define EP_ITEM_FROM_EPQUEUE(p) (container_of(p, struct ep_pqueue, pt)->epi) - -/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ -#define EP_OP_HASH_EVENT(op) ((op) != EPOLL_CTL_DEL) - - struct epoll_filefd { struct file *file; int fd; @@ -357,6 +306,82 @@ static struct dentry_operations eventpollfs_dentry_operations = { +/* Fast test to see if the file is an evenpoll file */ +static inline int is_file_epoll(struct file *f) +{ + return f->f_op == &eventpoll_fops; +} + +/* Setup the structure that is used as key for the rb-tree */ +static inline void ep_set_ffd(struct epoll_filefd *ffd, + struct file *file, int fd) +{ + ffd->file = file; + ffd->fd = fd; +} + +/* Compare rb-tree keys */ +static inline int ep_cmp_ffd(struct epoll_filefd *p1, + struct epoll_filefd *p2) +{ + return (p1->file > p2->file ? +1: + (p1->file < p2->file ? -1 : p1->fd - p2->fd)); +} + +/* Special initialization for the rb-tree node to detect linkage */ +static inline void ep_rb_initnode(struct rb_node *n) +{ + n->rb_parent = n; +} + +/* Removes a node from the rb-tree and marks it for a fast is-linked check */ +static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r) +{ + rb_erase(n, r); + n->rb_parent = n; +} + +/* Fast check to verify that the item is linked to the main rb-tree */ +static inline int ep_rb_linked(struct rb_node *n) +{ + return n->rb_parent != n; +} + +/* + * Remove the item from the list and perform its initialization. + * This is useful for us because we can test if the item is linked + * using "ep_is_linked(p)". + */ +static inline void ep_list_del(struct list_head *p) +{ + list_del(p); + INIT_LIST_HEAD(p); +} + +/* Tells us if the item is currently linked */ +static inline int ep_is_linked(struct list_head *p) +{ + return !list_empty(p); +} + +/* Get the "struct epitem" from a wait queue pointer */ +static inline struct epitem * ep_item_from_wait(wait_queue_t *p) +{ + return container_of(p, struct eppoll_entry, wait)->base; +} + +/* Get the "struct epitem" from an epoll queue wrapper */ +static inline struct epitem * ep_item_from_epqueue(poll_table *p) +{ + return container_of(p, struct ep_pqueue, pt)->epi; +} + +/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ +static inline int ep_op_hash_event(int op) +{ + return op != EPOLL_CTL_DEL; +} + /* Initialize the poll safe wake up structure */ static void ep_poll_safewake_init(struct poll_safewake *psw) { @@ -456,7 +481,7 @@ void eventpoll_release_file(struct file *file) epi = list_entry(lsthead->next, struct epitem, fllink); ep = epi->ep; - EP_LIST_DEL(&epi->fllink); + ep_list_del(&epi->fllink); down_write(&ep->sem); ep_remove(ep, epi); up_write(&ep->sem); @@ -534,7 +559,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event) current, epfd, op, fd, event)); error = -EFAULT; - if (EP_OP_HASH_EVENT(op) && + if (ep_op_hash_event(op) && copy_from_user(&epds, event, sizeof(struct epoll_event))) goto eexit_1; @@ -560,7 +585,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event) * adding an epoll file descriptor inside itself. */ error = -EINVAL; - if (file == tfile || !IS_FILE_EPOLL(file)) + if (file == tfile || !is_file_epoll(file)) goto eexit_3; /* @@ -656,7 +681,7 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, * the user passed to us _is_ an eventpoll file. */ error = -EINVAL; - if (!IS_FILE_EPOLL(file)) + if (!is_file_epoll(file)) goto eexit_2; /* @@ -831,11 +856,11 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) struct epitem *epi, *epir = NULL; struct epoll_filefd ffd; - EP_SET_FFD(&ffd, file, fd); + ep_set_ffd(&ffd, file, fd); read_lock_irqsave(&ep->lock, flags); for (rbp = ep->rbr.rb_node; rbp; ) { epi = rb_entry(rbp, struct epitem, rbn); - kcmp = EP_CMP_FFD(&ffd, &epi->ffd); + kcmp = ep_cmp_ffd(&ffd, &epi->ffd); if (kcmp > 0) rbp = rbp->rb_right; else if (kcmp < 0) @@ -875,7 +900,7 @@ static void ep_release_epitem(struct epitem *epi) { if (atomic_dec_and_test(&epi->usecnt)) - EPI_MEM_FREE(epi); + kmem_cache_free(epi_cache, epi); } @@ -886,10 +911,10 @@ static void ep_release_epitem(struct epitem *epi) static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, poll_table *pt) { - struct epitem *epi = EP_ITEM_FROM_EPQUEUE(pt); + struct epitem *epi = ep_item_from_epqueue(pt); struct eppoll_entry *pwq; - if (epi->nwait >= 0 && (pwq = PWQ_MEM_ALLOC())) { + if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, SLAB_KERNEL))) { init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); pwq->whead = whead; pwq->base = epi; @@ -912,7 +937,7 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) while (*p) { parent = *p; epic = rb_entry(parent, struct epitem, rbn); - kcmp = EP_CMP_FFD(&epi->ffd, &epic->ffd); + kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd); if (kcmp > 0) p = &parent->rb_right; else @@ -932,17 +957,17 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, struct ep_pqueue epq; error = -ENOMEM; - if (!(epi = EPI_MEM_ALLOC())) + if (!(epi = kmem_cache_alloc(epi_cache, SLAB_KERNEL))) goto eexit_1; /* Item initialization follow here ... */ - EP_RB_INITNODE(&epi->rbn); + ep_rb_initnode(&epi->rbn); INIT_LIST_HEAD(&epi->rdllink); INIT_LIST_HEAD(&epi->fllink); INIT_LIST_HEAD(&epi->txlink); INIT_LIST_HEAD(&epi->pwqlist); epi->ep = ep; - EP_SET_FFD(&epi->ffd, tfile, fd); + ep_set_ffd(&epi->ffd, tfile, fd); epi->event = *event; atomic_set(&epi->usecnt, 1); epi->nwait = 0; @@ -978,7 +1003,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, ep_rbtree_insert(ep, epi); /* If the file is already "ready" we drop it inside the ready list */ - if ((revents & event->events) && !EP_IS_LINKED(&epi->rdllink)) { + if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); /* Notify waiting tasks that events are available */ @@ -1007,11 +1032,11 @@ eexit_2: * allocated wait queue. */ write_lock_irqsave(&ep->lock, flags); - if (EP_IS_LINKED(&epi->rdllink)) - EP_LIST_DEL(&epi->rdllink); + if (ep_is_linked(&epi->rdllink)) + ep_list_del(&epi->rdllink); write_unlock_irqrestore(&ep->lock, flags); - EPI_MEM_FREE(epi); + kmem_cache_free(epi_cache, epi); eexit_1: return error; } @@ -1050,14 +1075,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even * If the item is not linked to the hash it means that it's on its * way toward the removal. Do nothing in this case. */ - if (EP_RB_LINKED(&epi->rbn)) { + if (ep_rb_linked(&epi->rbn)) { /* * If the item is "hot" and it is not registered inside the ready * list, push it inside. If the item is not "hot" and it is currently * registered inside the ready list, unlink it. */ if (revents & event->events) { - if (!EP_IS_LINKED(&epi->rdllink)) { + if (!ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); /* Notify waiting tasks that events are available */ @@ -1097,9 +1122,9 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) while (!list_empty(lsthead)) { pwq = list_entry(lsthead->next, struct eppoll_entry, llink); - EP_LIST_DEL(&pwq->llink); + ep_list_del(&pwq->llink); remove_wait_queue(pwq->whead, &pwq->wait); - PWQ_MEM_FREE(pwq); + kmem_cache_free(pwq_cache, pwq); } } } @@ -1118,7 +1143,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi) * The check protect us from doing a double unlink ( crash ). */ error = -ENOENT; - if (!EP_RB_LINKED(&epi->rbn)) + if (!ep_rb_linked(&epi->rbn)) goto eexit_1; /* @@ -1133,14 +1158,14 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi) * This operation togheter with the above check closes the door to * double unlinks. */ - EP_RB_ERASE(&epi->rbn, &ep->rbr); + ep_rb_erase(&epi->rbn, &ep->rbr); /* * If the item we are going to remove is inside the ready file descriptors * we want to remove it from this list to avoid stale events. */ - if (EP_IS_LINKED(&epi->rdllink)) - EP_LIST_DEL(&epi->rdllink); + if (ep_is_linked(&epi->rdllink)) + ep_list_del(&epi->rdllink); error = 0; eexit_1: @@ -1174,8 +1199,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) /* Remove the current item from the list of epoll hooks */ spin_lock(&file->f_ep_lock); - if (EP_IS_LINKED(&epi->fllink)) - EP_LIST_DEL(&epi->fllink); + if (ep_is_linked(&epi->fllink)) + ep_list_del(&epi->fllink); spin_unlock(&file->f_ep_lock); /* We need to acquire the write IRQ lock before calling ep_unlink() */ @@ -1210,7 +1235,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k { int pwake = 0; unsigned long flags; - struct epitem *epi = EP_ITEM_FROM_WAIT(wait); + struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", @@ -1228,7 +1253,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k goto is_disabled; /* If this file is already in the ready list we exit soon */ - if (EP_IS_LINKED(&epi->rdllink)) + if (ep_is_linked(&epi->rdllink)) goto is_linked; list_add_tail(&epi->rdllink, &ep->rdllist); @@ -1307,7 +1332,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist lnk = lnk->next; /* If this file is already in the ready list we exit soon */ - if (!EP_IS_LINKED(&epi->txlink)) { + if (!ep_is_linked(&epi->txlink)) { /* * This is initialized in this way so that the default * behaviour of the reinjecting code will be to push back @@ -1322,7 +1347,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist /* * Unlink the item from the ready list. */ - EP_LIST_DEL(&epi->rdllink); + ep_list_del(&epi->rdllink); } } @@ -1401,7 +1426,7 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist) epi = list_entry(txlist->next, struct epitem, txlink); /* Unlink the current item from the transfer list */ - EP_LIST_DEL(&epi->txlink); + ep_list_del(&epi->txlink); /* * If the item is no more linked to the interest set, we don't @@ -1410,8 +1435,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist) * item is set to have an Edge Triggered behaviour, we don't have * to push it back either. */ - if (EP_RB_LINKED(&epi->rbn) && !(epi->event.events & EPOLLET) && - (epi->revents & epi->event.events) && !EP_IS_LINKED(&epi->rdllink)) { + if (ep_rb_linked(&epi->rbn) && !(epi->event.events & EPOLLET) && + (epi->revents & epi->event.events) && !ep_is_linked(&epi->rdllink)) { list_add_tail(&epi->rdllink, &ep->rdllist); ricnt++; } diff --git a/fs/exec.c b/fs/exec.c index 3a4b35a14c0d..48871917d363 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -58,6 +58,9 @@ int core_uses_pid; char core_pattern[65] = "core"; +int suid_dumpable = 0; + +EXPORT_SYMBOL(suid_dumpable); /* The maximal length of core_pattern is also specified in sysctl.c */ static struct linux_binfmt *formats; @@ -864,6 +867,9 @@ int flush_old_exec(struct linux_binprm * bprm) if (current->euid == current->uid && current->egid == current->gid) current->mm->dumpable = 1; + else + current->mm->dumpable = suid_dumpable; + name = bprm->filename; /* Copies the binary name from after last slash */ @@ -884,7 +890,7 @@ int flush_old_exec(struct linux_binprm * bprm) permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL) || (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { suid_keys(current); - current->mm->dumpable = 0; + current->mm->dumpable = suid_dumpable; } /* An exec changes our domain. We are no longer part of the thread @@ -1432,6 +1438,8 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) struct inode * inode; struct file * file; int retval = 0; + int fsuid = current->fsuid; + int flag = 0; binfmt = current->binfmt; if (!binfmt || !binfmt->core_dump) @@ -1441,6 +1449,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) up_write(&mm->mmap_sem); goto fail; } + + /* + * We cannot trust fsuid as being the "true" uid of the + * process nor do we know its entire history. We only know it + * was tainted so we dump it as root in mode 2. + */ + if (mm->dumpable == 2) { /* Setuid core dump mode */ + flag = O_EXCL; /* Stop rewrite attacks */ + current->fsuid = 0; /* Dump root private */ + } mm->dumpable = 0; init_completion(&mm->core_done); spin_lock_irq(¤t->sighand->siglock); @@ -1466,7 +1484,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) lock_kernel(); format_corename(corename, core_pattern, signr); unlock_kernel(); - file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE, 0600); + file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600); if (IS_ERR(file)) goto fail_unlock; inode = file->f_dentry->d_inode; @@ -1491,6 +1509,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) close_fail: filp_close(file, NULL); fail_unlock: + current->fsuid = fsuid; complete_all(&mm->core_done); fail: return retval; diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 25f4a64fd6bc..213148c36ebe 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -396,12 +396,12 @@ static size_t ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t size = sizeof(XATTR_NAME_ACL_ACCESS); + const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); if (!test_opt(inode->i_sb, POSIX_ACL)) return 0; if (list && size <= list_size) - memcpy(list, XATTR_NAME_ACL_ACCESS, size); + memcpy(list, POSIX_ACL_XATTR_ACCESS, size); return size; } @@ -409,12 +409,12 @@ static size_t ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size, const char *name, size_t name_len) { - const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT); + const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); if (!test_opt(inode->i_sb, POSIX_ACL)) return 0; if (list && size <= list_size) - memcpy(list, XATTR_NAME_ACL_DEFAULT, size); + memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); return size; } @@ -506,14 +506,14 @@ ext2_xattr_set_acl_default(struct inode *inode, const char *name, } struct xattr_handler ext2_xattr_acl_access_handler = { - .prefix = XATTR_NAME_ACL_ACCESS, + .prefix = POSIX_ACL_XATTR_ACCESS, .list = ext2_xattr_list_acl_access, .get = ext2_xattr_get_acl_access, .set = ext2_xattr_set_acl_access, }; struct xattr_handler ext2_xattr_acl_default_handler = { - .prefix = XATTR_NAME_ACL_DEFAULT, + .prefix = POSIX_ACL_XATTR_DEFAULT, .list = ext2_xattr_list_acl_default, .get = ext2_xattr_get_acl_default, .set = ext2_xattr_set_acl_default, diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index fed96ae81a7d..0bde85bafe38 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -4,7 +4,7 @@ (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> */ -#include <linux/xattr_acl.h> +#include <linux/posix_acl_xattr.h> #define EXT2_ACL_VERSION 0x0001 diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index 638c13a26c03..133f5aa581bb 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -417,12 +417,12 @@ static size_t ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, const char *name, size_t name_len) { - const size_t size = sizeof(XATTR_NAME_ACL_ACCESS); + const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); if (!test_opt(inode->i_sb, POSIX_ACL)) return 0; if (list && size <= list_len) - memcpy(list, XATTR_NAME_ACL_ACCESS, size); + memcpy(list, POSIX_ACL_XATTR_ACCESS, size); return size; } @@ -430,12 +430,12 @@ static size_t ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, const char *name, size_t name_len) { - const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT); + const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); if (!test_opt(inode->i_sb, POSIX_ACL)) return 0; if (list && size <= list_len) - memcpy(list, XATTR_NAME_ACL_DEFAULT, size); + memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); return size; } @@ -535,14 +535,14 @@ ext3_xattr_set_acl_default(struct inode *inode, const char *name, } struct xattr_handler ext3_xattr_acl_access_handler = { - .prefix = XATTR_NAME_ACL_ACCESS, + .prefix = POSIX_ACL_XATTR_ACCESS, .list = ext3_xattr_list_acl_access, .get = ext3_xattr_get_acl_access, .set = ext3_xattr_set_acl_access, }; struct xattr_handler ext3_xattr_acl_default_handler = { - .prefix = XATTR_NAME_ACL_DEFAULT, + .prefix = POSIX_ACL_XATTR_DEFAULT, .list = ext3_xattr_list_acl_default, .get = ext3_xattr_get_acl_default, .set = ext3_xattr_set_acl_default, diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index 98af0c0d0ba9..92d50b53a933 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h @@ -4,7 +4,7 @@ (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> */ -#include <linux/xattr_acl.h> +#include <linux/posix_acl_xattr.h> #define EXT3_ACL_VERSION 0x0001 diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 79742d824a0a..60e44e6dd7a6 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -932,8 +932,16 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, struct inode *dir = dentry->d_parent->d_inode; sb = dir->i_sb; - if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) - return NULL; + /* NFS may look up ".." - look at dx_root directory block */ + if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ + if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) + return NULL; + } else { + frame = frames; + frame->bh = NULL; /* for dx_release() */ + frame->at = (struct dx_entry *)frames; /* hack for zero entry*/ + dx_set_block(frame->at, 0); /* dx_root block is 0 */ + } hash = hinfo.hash; do { block = dx_get_block(frame->at); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 981ccb233ef5..9630fbfdc24a 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2348,22 +2348,8 @@ static int ext3_write_info(struct super_block *sb, int type) */ static int ext3_quota_on_mount(struct super_block *sb, int type) { - int err; - struct dentry *dentry; - struct qstr name = { .name = EXT3_SB(sb)->s_qf_names[type], - .hash = 0, - .len = strlen(EXT3_SB(sb)->s_qf_names[type])}; - - dentry = lookup_hash(&name, sb->s_root); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - err = vfs_quota_on_mount(type, EXT3_SB(sb)->s_jquota_fmt, dentry); - /* Now invalidate and put the dentry - quota got its own reference - * to inode and dentry has at least wrong hash so we had better - * throw it away */ - d_invalidate(dentry); - dput(dentry); - return err; + return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type], + EXT3_SB(sb)->s_jquota_fmt, type); } /* diff --git a/fs/file_table.c b/fs/file_table.c index 03d83cb686b1..fa7849fae134 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -63,42 +63,45 @@ static inline void file_free(struct file *f) */ struct file *get_empty_filp(void) { -static int old_max; + static int old_max; struct file * f; /* * Privileged users can go above max_files */ - if (files_stat.nr_files < files_stat.max_files || - capable(CAP_SYS_ADMIN)) { - f = kmem_cache_alloc(filp_cachep, GFP_KERNEL); - if (f) { - memset(f, 0, sizeof(*f)); - if (security_file_alloc(f)) { - file_free(f); - goto fail; - } - eventpoll_init_file(f); - atomic_set(&f->f_count, 1); - f->f_uid = current->fsuid; - f->f_gid = current->fsgid; - rwlock_init(&f->f_owner.lock); - /* f->f_version: 0 */ - INIT_LIST_HEAD(&f->f_list); - f->f_maxcount = INT_MAX; - return f; - } - } - + if (files_stat.nr_files >= files_stat.max_files && + !capable(CAP_SYS_ADMIN)) + goto over; + + f = kmem_cache_alloc(filp_cachep, GFP_KERNEL); + if (f == NULL) + goto fail; + + memset(f, 0, sizeof(*f)); + if (security_file_alloc(f)) + goto fail_sec; + + eventpoll_init_file(f); + atomic_set(&f->f_count, 1); + f->f_uid = current->fsuid; + f->f_gid = current->fsgid; + rwlock_init(&f->f_owner.lock); + /* f->f_version: 0 */ + INIT_LIST_HEAD(&f->f_list); + f->f_maxcount = INT_MAX; + return f; + +over: /* Ran out of filps - report that */ - if (files_stat.max_files >= old_max) { + if (files_stat.nr_files > old_max) { printk(KERN_INFO "VFS: file-max limit %d reached\n", files_stat.max_files); - old_max = files_stat.max_files; - } else { - /* Big problems... */ - printk(KERN_WARNING "VFS: filp allocation failed\n"); + old_max = files_stat.nr_files; } + goto fail; + +fail_sec: + file_free(f); fail: return NULL; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8e050fa58218..e94ab398b717 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -485,32 +485,6 @@ static void set_sb_syncing(int val) spin_unlock(&sb_lock); } -/* - * Find a superblock with inodes that need to be synced - */ -static struct super_block *get_super_to_sync(void) -{ - struct super_block *sb; -restart: - spin_lock(&sb_lock); - sb = sb_entry(super_blocks.prev); - for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) { - if (sb->s_syncing) - continue; - sb->s_syncing = 1; - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (!sb->s_root) { - drop_super(sb); - goto restart; - } - return sb; - } - spin_unlock(&sb_lock); - return NULL; -} - /** * sync_inodes - writes all inodes to disk * @wait: wait for completion @@ -530,23 +504,39 @@ restart: * outstanding dirty inodes, the writeback goes block-at-a-time within the * filesystem's write_inode(). This is extremely slow. */ -void sync_inodes(int wait) +static void __sync_inodes(int wait) { struct super_block *sb; - set_sb_syncing(0); - while ((sb = get_super_to_sync()) != NULL) { - sync_inodes_sb(sb, 0); - sync_blockdev(sb->s_bdev); - drop_super(sb); + spin_lock(&sb_lock); +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_syncing) + continue; + sb->s_syncing = 1; + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + if (sb->s_root) { + sync_inodes_sb(sb, wait); + sync_blockdev(sb->s_bdev); + } + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; } + spin_unlock(&sb_lock); +} + +void sync_inodes(int wait) +{ + set_sb_syncing(0); + __sync_inodes(0); + if (wait) { set_sb_syncing(0); - while ((sb = get_super_to_sync()) != NULL) { - sync_inodes_sb(sb, 1); - sync_blockdev(sb->s_bdev); - drop_super(sb); - } + __sync_inodes(1); } } diff --git a/fs/inode.c b/fs/inode.c index 801fe7f36280..1f9a3a2b89bc 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -500,7 +500,7 @@ repeat: continue; if (!test(inode, data)) continue; - if (inode->i_state & (I_FREEING|I_CLEAR)) { + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; } @@ -525,7 +525,7 @@ repeat: continue; if (inode->i_sb != sb) continue; - if (inode->i_state & (I_FREEING|I_CLEAR)) { + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { __wait_on_freeing_inode(inode); goto repeat; } @@ -727,7 +727,7 @@ EXPORT_SYMBOL(iunique); struct inode *igrab(struct inode *inode) { spin_lock(&inode_lock); - if (!(inode->i_state & I_FREEING)) + if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) __iget(inode); else /* @@ -1024,17 +1024,21 @@ static void generic_forget_inode(struct inode *inode) if (!(inode->i_state & (I_DIRTY|I_LOCK))) list_move(&inode->i_list, &inode_unused); inodes_stat.nr_unused++; - spin_unlock(&inode_lock); - if (!sb || (sb->s_flags & MS_ACTIVE)) + if (!sb || (sb->s_flags & MS_ACTIVE)) { + spin_unlock(&inode_lock); return; + } + inode->i_state |= I_WILL_FREE; + spin_unlock(&inode_lock); write_inode_now(inode, 1); spin_lock(&inode_lock); + inode->i_state &= ~I_WILL_FREE; inodes_stat.nr_unused--; hlist_del_init(&inode->i_hash); } list_del_init(&inode->i_list); list_del_init(&inode->i_sb_list); - inode->i_state|=I_FREEING; + inode->i_state |= I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); if (inode->i_data.nrpages) diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 30a2bf9eeda5..e892dab40c26 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -21,6 +21,7 @@ #include <linux/sched.h> #include <linux/fs.h> #include <linux/quotaops.h> +#include <linux/posix_acl_xattr.h> #include "jfs_incore.h" #include "jfs_xattr.h" #include "jfs_acl.h" @@ -36,11 +37,11 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type) switch(type) { case ACL_TYPE_ACCESS: - ea_name = XATTR_NAME_ACL_ACCESS; + ea_name = POSIX_ACL_XATTR_ACCESS; p_acl = &ji->i_acl; break; case ACL_TYPE_DEFAULT: - ea_name = XATTR_NAME_ACL_DEFAULT; + ea_name = POSIX_ACL_XATTR_DEFAULT; p_acl = &ji->i_default_acl; break; default: @@ -88,11 +89,11 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) switch(type) { case ACL_TYPE_ACCESS: - ea_name = XATTR_NAME_ACL_ACCESS; + ea_name = POSIX_ACL_XATTR_ACCESS; p_acl = &ji->i_acl; break; case ACL_TYPE_DEFAULT: - ea_name = XATTR_NAME_ACL_DEFAULT; + ea_name = POSIX_ACL_XATTR_DEFAULT; p_acl = &ji->i_default_acl; if (!S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; @@ -101,7 +102,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) return -EINVAL; } if (acl) { - size = xattr_acl_size(acl->a_count); + size = posix_acl_xattr_size(acl->a_count); value = kmalloc(size, GFP_KERNEL); if (!value) return -ENOMEM; diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index d2ae430adecf..a3acd3eec059 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -20,8 +20,6 @@ #ifdef CONFIG_JFS_POSIX_ACL -#include <linux/xattr_acl.h> - int jfs_permission(struct inode *, int, struct nameidata *); int jfs_init_acl(struct inode *, struct inode *); int jfs_setattr(struct dentry *, struct iattr *); diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 810a3653d8b3..ee32211288ce 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -24,6 +24,7 @@ #include <linux/completion.h> #include <linux/vfs.h> #include <linux/moduleparam.h> +#include <linux/posix_acl.h> #include <asm/uaccess.h> #include "jfs_incore.h" diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 6016373701a3..ee438d429d45 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -19,6 +19,7 @@ #include <linux/fs.h> #include <linux/xattr.h> +#include <linux/posix_acl_xattr.h> #include <linux/quotaops.h> #include "jfs_incore.h" #include "jfs_superblock.h" @@ -718,9 +719,9 @@ static int can_set_system_xattr(struct inode *inode, const char *name, return -EPERM; /* - * XATTR_NAME_ACL_ACCESS is tied to i_mode + * POSIX_ACL_XATTR_ACCESS is tied to i_mode */ - if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) { + if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) { acl = posix_acl_from_xattr(value, value_len); if (IS_ERR(acl)) { rc = PTR_ERR(acl); @@ -750,7 +751,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED; return 0; - } else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) { + } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { acl = posix_acl_from_xattr(value, value_len); if (IS_ERR(acl)) { rc = PTR_ERR(acl); diff --git a/fs/namei.c b/fs/namei.c index a7f7f44119b3..fa8df81ce8ca 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1577,19 +1577,35 @@ do_link: * * Simple function to lookup and return a dentry and create it * if it doesn't exist. Is SMP-safe. + * + * Returns with nd->dentry->d_inode->i_sem locked. */ struct dentry *lookup_create(struct nameidata *nd, int is_dir) { - struct dentry *dentry; + struct dentry *dentry = ERR_PTR(-EEXIST); down(&nd->dentry->d_inode->i_sem); - dentry = ERR_PTR(-EEXIST); + /* + * Yucky last component or no last component at all? + * (foo/., foo/.., /////) + */ if (nd->last_type != LAST_NORM) goto fail; nd->flags &= ~LOOKUP_PARENT; + + /* + * Do the final lookup. + */ dentry = lookup_hash(&nd->last, nd->dentry); if (IS_ERR(dentry)) goto fail; + + /* + * Special case - lookup gave negative, but... we had foo/bar/ + * From the vfs_mknod() POV we just have a negative dentry - + * all is fine. Let's be bastards - you had / on the end, you've + * been asking for (non-existent) directory. -ENOENT for you. + */ if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) goto enoent; return dentry; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index d6a30c844de3..6537f2c4ae44 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -751,11 +751,6 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, retval = -EFAULT; if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len)) goto out; - if (file->f_error) { - retval = file->f_error; - file->f_error = 0; - goto out; - } retval = -EFBIG; if (limit != RLIM_INFINITY) { if (pos >= limit) { diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ae3940dc85cc..de340ffd33c3 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -50,7 +50,6 @@ #include <linux/posix_acl.h> #ifdef CONFIG_NFSD_V4 #include <linux/posix_acl_xattr.h> -#include <linux/xattr_acl.h> #include <linux/xattr.h> #include <linux/nfs4.h> #include <linux/nfs4_acl.h> @@ -425,13 +424,13 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out_nfserr; if (pacl) { - error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS); + error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS); if (error < 0) goto out_nfserr; } if (dpacl) { - error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT); + error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT); if (error < 0) goto out_nfserr; } @@ -498,7 +497,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac struct posix_acl *pacl = NULL, *dpacl = NULL; unsigned int flags = 0; - pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS); + pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS); if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); if (IS_ERR(pacl)) { @@ -508,7 +507,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac } if (S_ISDIR(inode->i_mode)) { - dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT); + dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT); if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) dpacl = NULL; else if (IS_ERR(dpacl)) { diff --git a/fs/open.c b/fs/open.c index 963bd81a44c8..8ec63f735918 100644 --- a/fs/open.c +++ b/fs/open.c @@ -21,6 +21,7 @@ #include <linux/vfs.h> #include <asm/uaccess.h> #include <linux/fs.h> +#include <linux/personality.h> #include <linux/pagemap.h> #include <linux/syscalls.h> @@ -933,31 +934,27 @@ EXPORT_SYMBOL(fd_install); asmlinkage long sys_open(const char __user * filename, int flags, int mode) { char * tmp; - int fd, error; + int fd; + + if (force_o_largefile()) + flags |= O_LARGEFILE; -#if BITS_PER_LONG != 32 - flags |= O_LARGEFILE; -#endif tmp = getname(filename); fd = PTR_ERR(tmp); if (!IS_ERR(tmp)) { fd = get_unused_fd(); if (fd >= 0) { struct file *f = filp_open(tmp, flags, mode); - error = PTR_ERR(f); - if (IS_ERR(f)) - goto out_error; - fd_install(fd, f); + if (IS_ERR(f)) { + put_unused_fd(fd); + fd = PTR_ERR(f); + } else { + fd_install(fd, f); + } } -out: putname(tmp); } return fd; - -out_error: - put_unused_fd(fd); - fd = error; - goto out; } EXPORT_SYMBOL_GPL(sys_open); @@ -980,23 +977,15 @@ asmlinkage long sys_creat(const char __user * pathname, int mode) */ int filp_close(struct file *filp, fl_owner_t id) { - int retval; - - /* Report and clear outstanding errors */ - retval = filp->f_error; - if (retval) - filp->f_error = 0; + int retval = 0; if (!file_count(filp)) { printk(KERN_ERR "VFS: Close: file count is 0\n"); - return retval; + return 0; } - if (filp->f_op && filp->f_op->flush) { - int err = filp->f_op->flush(filp); - if (!retval) - retval = err; - } + if (filp->f_op && filp->f_op->flush) + retval = filp->f_op->flush(filp); dnotify_flush(filp, id); locks_remove_posix(filp, id); diff --git a/fs/proc/base.c b/fs/proc/base.c index e31903aadd96..ace151fa4878 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -314,7 +314,7 @@ static int may_ptrace_attach(struct task_struct *task) (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) goto out; rmb(); - if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) + if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE)) goto out; if (security_ptrace(current, task)) goto out; @@ -1113,7 +1113,9 @@ static int task_dumpable(struct task_struct *task) if (mm) dumpable = mm->dumpable; task_unlock(task); - return dumpable; + if(dumpable == 1) + return 1; + return 0; } diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 63a9fbf1ac51..94b570ad037d 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -451,7 +451,7 @@ static int devices_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { int len = get_chrdev_list(page); - len += get_blkdev_list(page+len); + len += get_blkdev_list(page+len, len); return proc_calc_metrics(page, start, off, count, eof, len); } diff --git a/fs/quota.c b/fs/quota.c index 3f0333a51a23..f5d1cff55196 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -149,36 +149,6 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t return error; } -static struct super_block *get_super_to_sync(int type) -{ - struct list_head *head; - int cnt, dirty; - -restart: - spin_lock(&sb_lock); - list_for_each(head, &super_blocks) { - struct super_block *sb = list_entry(head, struct super_block, s_list); - - /* This test just improves performance so it needn't be reliable... */ - for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) - if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) - && info_any_dirty(&sb_dqopt(sb)->info[cnt])) - dirty = 1; - if (!dirty) - continue; - sb->s_count++; - spin_unlock(&sb_lock); - down_read(&sb->s_umount); - if (!sb->s_root) { - drop_super(sb); - goto restart; - } - return sb; - } - spin_unlock(&sb_lock); - return NULL; -} - static void quota_sync_sb(struct super_block *sb, int type) { int cnt; @@ -219,17 +189,35 @@ static void quota_sync_sb(struct super_block *sb, int type) void sync_dquots(struct super_block *sb, int type) { + int cnt, dirty; + if (sb) { if (sb->s_qcop->quota_sync) quota_sync_sb(sb, type); + return; } - else { - while ((sb = get_super_to_sync(type)) != NULL) { - if (sb->s_qcop->quota_sync) - quota_sync_sb(sb, type); - drop_super(sb); - } + + spin_lock(&sb_lock); +restart: + list_for_each_entry(sb, &super_blocks, s_list) { + /* This test just improves performance so it needn't be reliable... */ + for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) + if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) + && info_any_dirty(&sb_dqopt(sb)->info[cnt])) + dirty = 1; + if (!dirty) + continue; + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + if (sb->s_root && sb->s_qcop->quota_sync) + quota_sync_sb(sb, type); + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; } + spin_unlock(&sb_lock); } /* Copy parameters and call proper function */ diff --git a/fs/read_write.c b/fs/read_write.c index c4c2bee373ed..9292f5fa4d62 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -203,6 +203,16 @@ Einval: return -EINVAL; } +static void wait_on_retry_sync_kiocb(struct kiocb *iocb) +{ + set_current_state(TASK_UNINTERRUPTIBLE); + if (!kiocbIsKicked(iocb)) + schedule(); + else + kiocbClearKicked(iocb); + __set_current_state(TASK_RUNNING); +} + ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) { struct kiocb kiocb; @@ -210,7 +220,10 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos); + while (-EIOCBRETRY == + (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos))) + wait_on_retry_sync_kiocb(&kiocb); + if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); *ppos = kiocb.ki_pos; @@ -258,7 +271,10 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos); + while (-EIOCBRETRY == + (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos))) + wait_on_retry_sync_kiocb(&kiocb); + if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); *ppos = kiocb.ki_pos; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b35b87744983..aae0779ed5b4 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1932,27 +1932,12 @@ static int reiserfs_write_info(struct super_block *sb, int type) } /* - * Turn on quotas during mount time - we need to find - * the quota file and such... + * Turn on quotas during mount time - we need to find the quota file and such... */ static int reiserfs_quota_on_mount(struct super_block *sb, int type) { - int err; - struct dentry *dentry; - struct qstr name = { .name = REISERFS_SB(sb)->s_qf_names[type], - .hash = 0, - .len = strlen(REISERFS_SB(sb)->s_qf_names[type])}; - - dentry = lookup_hash(&name, sb->s_root); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - err = vfs_quota_on_mount(type, REISERFS_SB(sb)->s_jquota_fmt, dentry); - /* Now invalidate and put the dentry - quota got its own reference - * to inode and dentry has at least wrong hash so we had better - * throw it away */ - d_invalidate(dentry); - dput(dentry); - return err; + return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type], + REISERFS_SB(sb)->s_jquota_fmt, type); } /* diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index e302071903a1..c312881c5f53 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -4,7 +4,7 @@ #include <linux/errno.h> #include <linux/pagemap.h> #include <linux/xattr.h> -#include <linux/xattr_acl.h> +#include <linux/posix_acl_xattr.h> #include <linux/reiserfs_xattr.h> #include <linux/reiserfs_acl.h> #include <asm/uaccess.h> @@ -192,11 +192,11 @@ reiserfs_get_acl(struct inode *inode, int type) switch (type) { case ACL_TYPE_ACCESS: - name = XATTR_NAME_ACL_ACCESS; + name = POSIX_ACL_XATTR_ACCESS; p_acl = &reiserfs_i->i_acl_access; break; case ACL_TYPE_DEFAULT: - name = XATTR_NAME_ACL_DEFAULT; + name = POSIX_ACL_XATTR_DEFAULT; p_acl = &reiserfs_i->i_acl_default; break; default: @@ -260,7 +260,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) switch (type) { case ACL_TYPE_ACCESS: - name = XATTR_NAME_ACL_ACCESS; + name = POSIX_ACL_XATTR_ACCESS; p_acl = &reiserfs_i->i_acl_access; if (acl) { mode_t mode = inode->i_mode; @@ -275,7 +275,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) } break; case ACL_TYPE_DEFAULT: - name = XATTR_NAME_ACL_DEFAULT; + name = POSIX_ACL_XATTR_DEFAULT; p_acl = &reiserfs_i->i_acl_default; if (!S_ISDIR (inode->i_mode)) return acl ? -EACCES : 0; @@ -468,7 +468,7 @@ static int posix_acl_access_get(struct inode *inode, const char *name, void *buffer, size_t size) { - if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1) return -EINVAL; return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); } @@ -477,7 +477,7 @@ static int posix_acl_access_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) { - if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1) return -EINVAL; return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); } @@ -487,7 +487,7 @@ posix_acl_access_del (struct inode *inode, const char *name) { struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); struct posix_acl **acl = &reiserfs_i->i_acl_access; - if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1) return -EINVAL; if (!IS_ERR (*acl) && *acl) { posix_acl_release (*acl); @@ -510,7 +510,7 @@ posix_acl_access_list (struct inode *inode, const char *name, int namelen, char } struct reiserfs_xattr_handler posix_acl_access_handler = { - .prefix = XATTR_NAME_ACL_ACCESS, + .prefix = POSIX_ACL_XATTR_ACCESS, .get = posix_acl_access_get, .set = posix_acl_access_set, .del = posix_acl_access_del, @@ -521,7 +521,7 @@ static int posix_acl_default_get (struct inode *inode, const char *name, void *buffer, size_t size) { - if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1) return -EINVAL; return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); } @@ -530,7 +530,7 @@ static int posix_acl_default_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) { - if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1) return -EINVAL; return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); } @@ -540,7 +540,7 @@ posix_acl_default_del (struct inode *inode, const char *name) { struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); struct posix_acl **acl = &reiserfs_i->i_acl_default; - if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1) + if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1) return -EINVAL; if (!IS_ERR (*acl) && *acl) { posix_acl_release (*acl); @@ -563,7 +563,7 @@ posix_acl_default_list (struct inode *inode, const char *name, int namelen, char } struct reiserfs_xattr_handler posix_acl_default_handler = { - .prefix = XATTR_NAME_ACL_DEFAULT, + .prefix = POSIX_ACL_XATTR_DEFAULT, .get = posix_acl_default_get, .set = posix_acl_default_set, .del = posix_acl_default_del, diff --git a/fs/super.c b/fs/super.c index 573bcc81bb82..25bc1ec6bc5d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -341,20 +341,22 @@ static inline void write_super(struct super_block *sb) */ void sync_supers(void) { - struct super_block * sb; -restart: + struct super_block *sb; + spin_lock(&sb_lock); - sb = sb_entry(super_blocks.next); - while (sb != sb_entry(&super_blocks)) +restart: + list_for_each_entry(sb, &super_blocks, s_list) { if (sb->s_dirt) { sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); write_super(sb); - drop_super(sb); - goto restart; - } else - sb = sb_entry(sb->s_list.next); + up_read(&sb->s_umount); + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; + } + } spin_unlock(&sb_lock); } @@ -381,20 +383,16 @@ void sync_filesystems(int wait) down(&mutex); /* Could be down_interruptible */ spin_lock(&sb_lock); - for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks); - sb = sb_entry(sb->s_list.next)) { + list_for_each_entry(sb, &super_blocks, s_list) { if (!sb->s_op->sync_fs) continue; if (sb->s_flags & MS_RDONLY) continue; sb->s_need_sync_fs = 1; } - spin_unlock(&sb_lock); restart: - spin_lock(&sb_lock); - for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks); - sb = sb_entry(sb->s_list.next)) { + list_for_each_entry(sb, &super_blocks, s_list) { if (!sb->s_need_sync_fs) continue; sb->s_need_sync_fs = 0; @@ -405,8 +403,11 @@ restart: down_read(&sb->s_umount); if (sb->s_root && (wait || sb->s_dirt)) sb->s_op->sync_fs(sb, wait); - drop_super(sb); - goto restart; + up_read(&sb->s_umount); + /* restart only when sb is no longer on the list */ + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto restart; } spin_unlock(&sb_lock); up(&mutex); @@ -422,21 +423,25 @@ restart: struct super_block * get_super(struct block_device *bdev) { - struct list_head *p; + struct super_block *sb; + if (!bdev) return NULL; -rescan: + spin_lock(&sb_lock); - list_for_each(p, &super_blocks) { - struct super_block *s = sb_entry(p); - if (s->s_bdev == bdev) { - s->s_count++; +rescan: + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_bdev == bdev) { + sb->s_count++; spin_unlock(&sb_lock); - down_read(&s->s_umount); - if (s->s_root) - return s; - drop_super(s); - goto rescan; + down_read(&sb->s_umount); + if (sb->s_root) + return sb; + up_read(&sb->s_umount); + /* restart only when sb is no longer on the list */ + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto rescan; } } spin_unlock(&sb_lock); @@ -447,20 +452,22 @@ EXPORT_SYMBOL(get_super); struct super_block * user_get_super(dev_t dev) { - struct list_head *p; + struct super_block *sb; -rescan: spin_lock(&sb_lock); - list_for_each(p, &super_blocks) { - struct super_block *s = sb_entry(p); - if (s->s_dev == dev) { - s->s_count++; +rescan: + list_for_each_entry(sb, &super_blocks, s_list) { + if (sb->s_dev == dev) { + sb->s_count++; spin_unlock(&sb_lock); - down_read(&s->s_umount); - if (s->s_root) - return s; - drop_super(s); - goto rescan; + down_read(&sb->s_umount); + if (sb->s_root) + return sb; + up_read(&sb->s_umount); + /* restart only when sb is no longer on the list */ + spin_lock(&sb_lock); + if (__put_super_and_need_restart(sb)) + goto rescan; } } spin_unlock(&sb_lock); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 37d7a6875d86..59734ba1ee60 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -8,6 +8,7 @@ #include <linux/mount.h> #include <linux/module.h> #include <linux/kobject.h> +#include <linux/namei.h> #include "sysfs.h" DECLARE_RWSEM(sysfs_rename_sem); @@ -99,7 +100,7 @@ static int create_dir(struct kobject * k, struct dentry * p, umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; down(&p->d_inode->i_sem); - *d = sysfs_get_dentry(p,n); + *d = lookup_one_len(n, p, strlen(n)); if (!IS_ERR(*d)) { error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR); if (!error) { @@ -315,7 +316,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) down(&parent->d_inode->i_sem); - new_dentry = sysfs_get_dentry(parent, new_name); + new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); if (!IS_ERR(new_dentry)) { if (!new_dentry->d_inode) { error = kobject_set_name(kobj, "%s", new_name); diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 849aac115460..e9cfa39f4099 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -5,6 +5,7 @@ #include <linux/module.h> #include <linux/dnotify.h> #include <linux/kobject.h> +#include <linux/namei.h> #include <asm/uaccess.h> #include <asm/semaphore.h> @@ -400,7 +401,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) int res = -ENOENT; down(&dir->d_inode->i_sem); - victim = sysfs_get_dentry(dir, attr->name); + victim = lookup_one_len(attr->name, dir, strlen(attr->name)); if (!IS_ERR(victim)) { /* make sure dentry is really there */ if (victim->d_inode && @@ -443,7 +444,7 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) int res = -ENOENT; down(&dir->d_inode->i_sem); - victim = sysfs_get_dentry(dir, attr->name); + victim = lookup_one_len(attr->name, dir, strlen(attr->name)); if (!IS_ERR(victim)) { if (victim->d_inode && (victim->d_parent->d_inode == dir->d_inode)) { diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index f11ac5ea7021..122145b0895c 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -11,6 +11,7 @@ #include <linux/kobject.h> #include <linux/module.h> #include <linux/dcache.h> +#include <linux/namei.h> #include <linux/err.h> #include "sysfs.h" @@ -68,7 +69,8 @@ void sysfs_remove_group(struct kobject * kobj, struct dentry * dir; if (grp->name) - dir = sysfs_get_dentry(kobj->dentry,grp->name); + dir = lookup_one_len(grp->name, kobj->dentry, + strlen(grp->name)); else dir = dget(kobj->dentry); diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 565cac1d4200..8de13bafaa76 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -166,16 +166,6 @@ int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *)) return error; } -struct dentry * sysfs_get_dentry(struct dentry * parent, const char * name) -{ - struct qstr qstr; - - qstr.name = name; - qstr.len = strlen(name); - qstr.hash = full_name_hash(name,qstr.len); - return lookup_hash(&qstr,parent); -} - /* * Get the name for corresponding element represented by the given sysfs_dirent */ diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 29da6f5f07c8..3f8953e0e5d0 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -7,7 +7,6 @@ extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *)); extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *, umode_t, int); -extern struct dentry * sysfs_get_dentry(struct dentry *, const char *); extern int sysfs_add_file(struct dentry *, const struct attribute *, int); extern void sysfs_hash_and_remove(struct dentry * dir, const char * name); diff --git a/include/asm-alpha/mmzone.h b/include/asm-alpha/mmzone.h index 726c150dcbe4..a011ef4cf3d3 100644 --- a/include/asm-alpha/mmzone.h +++ b/include/asm-alpha/mmzone.h @@ -57,7 +57,6 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) * Given a kernel address, find the home node of the underlying memory. */ #define kvaddr_to_nid(kaddr) pa_to_nid(__pa(kaddr)) -#define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define local_mapnr(kvaddr) \ @@ -108,7 +107,7 @@ PLAT_NODE_DATA_LOCALNR(unsigned long p, int n) #define pfn_to_page(pfn) \ ({ \ unsigned long kaddr = (unsigned long)__va((pfn) << PAGE_SHIFT); \ - (node_mem_map(kvaddr_to_nid(kaddr)) + local_mapnr(kaddr)); \ + (NODE_DATA(kvaddr_to_nid(kaddr))->node_mem_map + local_mapnr(kaddr)); \ }) #define page_to_pfn(page) \ diff --git a/include/asm-arm/thread_info.h b/include/asm-arm/thread_info.h index 66c585c50cf9..8252a4cd860f 100644 --- a/include/asm-arm/thread_info.h +++ b/include/asm-arm/thread_info.h @@ -49,7 +49,7 @@ struct cpu_context_save { */ struct thread_info { unsigned long flags; /* low level flags */ - __s32 preempt_count; /* 0 => preemptable, <0 => bug */ + int preempt_count; /* 0 => preemptable, <0 => bug */ mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ diff --git a/include/asm-arm26/thread_info.h b/include/asm-arm26/thread_info.h index 50f41b50268a..aff3e5699c64 100644 --- a/include/asm-arm26/thread_info.h +++ b/include/asm-arm26/thread_info.h @@ -44,7 +44,7 @@ struct cpu_context_save { */ struct thread_info { unsigned long flags; /* low level flags */ - __s32 preempt_count; /* 0 => preemptable, <0 => bug */ + int preempt_count; /* 0 => preemptable, <0 => bug */ mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ diff --git a/include/asm-cris/thread_info.h b/include/asm-cris/thread_info.h index 53193feb0826..5ba4b7865cc5 100644 --- a/include/asm-cris/thread_info.h +++ b/include/asm-cris/thread_info.h @@ -31,7 +31,7 @@ struct thread_info { struct exec_domain *exec_domain; /* execution domain */ unsigned long flags; /* low level flags */ __u32 cpu; /* current CPU */ - __s32 preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; /* thread address space: 0-0xBFFFFFFF for user-thead diff --git a/include/asm-frv/thread_info.h b/include/asm-frv/thread_info.h index b80a97f50af6..c8cba7836f0d 100644 --- a/include/asm-frv/thread_info.h +++ b/include/asm-frv/thread_info.h @@ -33,7 +33,7 @@ struct thread_info { unsigned long flags; /* low level flags */ unsigned long status; /* thread-synchronous flags */ __u32 cpu; /* current CPU */ - __s32 preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; /* thread address space: 0-0xBFFFFFFF for user-thead diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 3b709b84934f..9044aeb37828 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -29,7 +29,7 @@ do { \ #define DEFINE_PER_CPU(type, name) \ __typeof__(type) per_cpu__##name -#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var)) +#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) #define __get_cpu_var(var) per_cpu__##var #endif /* SMP */ diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h index ec96e8b0f190..5d9d70cd17fc 100644 --- a/include/asm-generic/topology.h +++ b/include/asm-generic/topology.h @@ -41,8 +41,15 @@ #ifndef node_to_first_cpu #define node_to_first_cpu(node) (0) #endif +#ifndef pcibus_to_node +#define pcibus_to_node(node) (-1) +#endif + #ifndef pcibus_to_cpumask -#define pcibus_to_cpumask(bus) (cpu_online_map) +#define pcibus_to_cpumask(bus) (pcibus_to_node(bus) == -1 ? \ + CPU_MASK_ALL : \ + node_to_cpumask(pcibus_to_node(bus)) \ + ) #endif #endif /* _ASM_GENERIC_TOPOLOGY_H */ diff --git a/include/asm-h8300/thread_info.h b/include/asm-h8300/thread_info.h index b07c9344776f..bfcc755c3bb1 100644 --- a/include/asm-h8300/thread_info.h +++ b/include/asm-h8300/thread_info.h @@ -23,7 +23,7 @@ struct thread_info { struct exec_domain *exec_domain; /* execution domain */ unsigned long flags; /* low level flags */ int cpu; /* cpu we're on */ - int preempt_count; /* 0 => preemptable, <0 => BUG*/ + int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; }; diff --git a/include/asm-i386/checksum.h b/include/asm-i386/checksum.h index 641342002bcd..f949e44c2a35 100644 --- a/include/asm-i386/checksum.h +++ b/include/asm-i386/checksum.h @@ -3,6 +3,8 @@ #include <linux/in6.h> +#include <asm/uaccess.h> + /* * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) diff --git a/include/asm-i386/genapic.h b/include/asm-i386/genapic.h index fc813b2e8274..b3783a32abee 100644 --- a/include/asm-i386/genapic.h +++ b/include/asm-i386/genapic.h @@ -78,7 +78,6 @@ struct genapic { .int_delivery_mode = INT_DELIVERY_MODE, \ .int_dest_mode = INT_DEST_MODE, \ .no_balance_irq = NO_BALANCE_IRQ, \ - .no_ioapic_check = NO_IOAPIC_CHECK, \ .ESR_DISABLE = esr_disable, \ .apic_destination_logical = APIC_DEST_LOGICAL, \ APICFUNC(apic_id_registered), \ diff --git a/include/asm-i386/kprobes.h b/include/asm-i386/kprobes.h index 4092f68d123a..8b6d3a90cd78 100644 --- a/include/asm-i386/kprobes.h +++ b/include/asm-i386/kprobes.h @@ -39,6 +39,9 @@ typedef u8 kprobe_opcode_t; : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) #define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry +#define ARCH_SUPPORTS_KRETPROBES + +void kretprobe_trampoline(void); /* Architecture specific copy of original instruction*/ struct arch_specific_insn { diff --git a/include/asm-i386/mach-bigsmp/mach_apic.h b/include/asm-i386/mach-bigsmp/mach_apic.h index 2339868270ef..ba936d4daedb 100644 --- a/include/asm-i386/mach-bigsmp/mach_apic.h +++ b/include/asm-i386/mach-bigsmp/mach_apic.h @@ -14,8 +14,6 @@ #define NO_BALANCE_IRQ (1) #define esr_disable (1) -#define NO_IOAPIC_CHECK (0) - static inline int apic_id_registered(void) { return (1); diff --git a/include/asm-i386/mach-default/mach_apic.h b/include/asm-i386/mach-default/mach_apic.h index 627f1cd084ba..3ef6292db780 100644 --- a/include/asm-i386/mach-default/mach_apic.h +++ b/include/asm-i386/mach-default/mach_apic.h @@ -19,8 +19,6 @@ static inline cpumask_t target_cpus(void) #define NO_BALANCE_IRQ (0) #define esr_disable (0) -#define NO_IOAPIC_CHECK (0) - #define INT_DELIVERY_MODE dest_LowestPrio #define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ diff --git a/include/asm-i386/mach-es7000/mach_apic.h b/include/asm-i386/mach-es7000/mach_apic.h index ceab2c464b13..b5f3f0d0b2bc 100644 --- a/include/asm-i386/mach-es7000/mach_apic.h +++ b/include/asm-i386/mach-es7000/mach_apic.h @@ -38,8 +38,6 @@ static inline cpumask_t target_cpus(void) #define WAKE_SECONDARY_VIA_INIT #endif -#define NO_IOAPIC_CHECK (1) - static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { return 0; diff --git a/include/asm-i386/mach-generic/mach_apic.h b/include/asm-i386/mach-generic/mach_apic.h index ab36d02ebede..b13767a4e934 100644 --- a/include/asm-i386/mach-generic/mach_apic.h +++ b/include/asm-i386/mach-generic/mach_apic.h @@ -5,7 +5,6 @@ #define esr_disable (genapic->ESR_DISABLE) #define NO_BALANCE_IRQ (genapic->no_balance_irq) -#define NO_IOAPIC_CHECK (genapic->no_ioapic_check) #define INT_DELIVERY_MODE (genapic->int_delivery_mode) #define INT_DEST_MODE (genapic->int_dest_mode) #undef APIC_DEST_LOGICAL diff --git a/include/asm-i386/mach-numaq/mach_apic.h b/include/asm-i386/mach-numaq/mach_apic.h index e1a04494764a..9d158095da82 100644 --- a/include/asm-i386/mach-numaq/mach_apic.h +++ b/include/asm-i386/mach-numaq/mach_apic.h @@ -17,8 +17,6 @@ static inline cpumask_t target_cpus(void) #define NO_BALANCE_IRQ (1) #define esr_disable (1) -#define NO_IOAPIC_CHECK (0) - #define INT_DELIVERY_MODE dest_LowestPrio #define INT_DEST_MODE 0 /* physical delivery on LOCAL quad */ diff --git a/include/asm-i386/mach-summit/mach_apic.h b/include/asm-i386/mach-summit/mach_apic.h index 74e9cbc8c01b..3d6d12937e1f 100644 --- a/include/asm-i386/mach-summit/mach_apic.h +++ b/include/asm-i386/mach-summit/mach_apic.h @@ -7,8 +7,6 @@ #define esr_disable (1) #define NO_BALANCE_IRQ (0) -#define NO_IOAPIC_CHECK (1) /* Don't check I/O APIC ID for xAPIC */ - /* In clustered mode, the high nibble of APIC ID is a cluster number. * The low nibble is a 4-bit bitmap. */ #define XAPIC_DEST_CPUS_SHIFT 4 diff --git a/include/asm-i386/mach-visws/mach_apic.h b/include/asm-i386/mach-visws/mach_apic.h index 4e6cdfb8b091..de438c7147a8 100644 --- a/include/asm-i386/mach-visws/mach_apic.h +++ b/include/asm-i386/mach-visws/mach_apic.h @@ -9,8 +9,6 @@ #define no_balance_irq (0) #define esr_disable (0) -#define NO_IOAPIC_CHECK (0) - #define INT_DELIVERY_MODE dest_LowestPrio #define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h index 13830ae67cac..33ce5d37e894 100644 --- a/include/asm-i386/mmzone.h +++ b/include/asm-i386/mmzone.h @@ -8,7 +8,9 @@ #include <asm/smp.h> -#ifdef CONFIG_DISCONTIGMEM +#if CONFIG_NUMA +extern struct pglist_data *node_data[]; +#define NODE_DATA(nid) (node_data[nid]) #ifdef CONFIG_NUMA #ifdef CONFIG_X86_NUMAQ @@ -21,8 +23,28 @@ #define get_zholes_size(n) (0) #endif /* CONFIG_NUMA */ -extern struct pglist_data *node_data[]; -#define NODE_DATA(nid) (node_data[nid]) +extern int get_memcfg_numa_flat(void ); +/* + * This allows any one NUMA architecture to be compiled + * for, and still fall back to the flat function if it + * fails. + */ +static inline void get_memcfg_numa(void) +{ +#ifdef CONFIG_X86_NUMAQ + if (get_memcfg_numaq()) + return; +#elif CONFIG_ACPI_SRAT + if (get_memcfg_from_srat()) + return; +#endif + + get_memcfg_numa_flat(); +} + +#endif /* CONFIG_NUMA */ + +#ifdef CONFIG_DISCONTIGMEM /* * generic node memory support, the following assumptions apply: @@ -48,26 +70,6 @@ static inline int pfn_to_nid(unsigned long pfn) #endif } -/* - * Following are macros that are specific to this numa platform. - */ -#define reserve_bootmem(addr, size) \ - reserve_bootmem_node(NODE_DATA(0), (addr), (size)) -#define alloc_bootmem(x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_low(x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0) -#define alloc_bootmem_pages(x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_low_pages(x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) -#define alloc_bootmem_node(ignore, x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_pages_node(ignore, x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_low_pages_node(ignore, x) \ - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) - #define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn) /* @@ -79,7 +81,6 @@ static inline int pfn_to_nid(unsigned long pfn) */ #define kvaddr_to_nid(kaddr) pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT) -#define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) \ ({ \ @@ -100,7 +101,7 @@ static inline int pfn_to_nid(unsigned long pfn) ({ \ unsigned long __pfn = pfn; \ int __node = pfn_to_nid(__pfn); \ - &node_mem_map(__node)[node_localnr(__pfn,__node)]; \ + &NODE_DATA(__node)->node_mem_map[node_localnr(__pfn,__node)]; \ }) #define page_to_pfn(pg) \ @@ -122,26 +123,34 @@ static inline int pfn_valid(int pfn) return (pfn < node_end_pfn(nid)); return 0; } -#endif +#endif /* CONFIG_X86_NUMAQ */ + +#endif /* CONFIG_DISCONTIGMEM */ + +#ifdef CONFIG_NEED_MULTIPLE_NODES -extern int get_memcfg_numa_flat(void ); /* - * This allows any one NUMA architecture to be compiled - * for, and still fall back to the flat function if it - * fails. + * Following are macros that are specific to this numa platform. */ -static inline void get_memcfg_numa(void) -{ -#ifdef CONFIG_X86_NUMAQ - if (get_memcfg_numaq()) - return; -#elif CONFIG_ACPI_SRAT - if (get_memcfg_from_srat()) - return; -#endif +#define reserve_bootmem(addr, size) \ + reserve_bootmem_node(NODE_DATA(0), (addr), (size)) +#define alloc_bootmem(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0) +#define alloc_bootmem_pages(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low_pages(x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) +#define alloc_bootmem_node(ignore, x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_pages_node(ignore, x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low_pages_node(ignore, x) \ + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) - get_memcfg_numa_flat(); -} +#endif /* CONFIG_NEED_MULTIPLE_NODES */ + +extern int early_pfn_to_nid(unsigned long pfn); -#endif /* CONFIG_DISCONTIGMEM */ #endif /* _ASM_MMZONE_H_ */ diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index 41400d342d44..dea8f8e6d86e 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -120,6 +120,8 @@ static __inline__ int get_order(unsigned long size) extern int sysctl_legacy_va_layout; +extern int page_is_ram(unsigned long pagenr); + #endif /* __ASSEMBLY__ */ #ifdef __ASSEMBLY__ @@ -135,11 +137,11 @@ extern int sysctl_legacy_va_layout; #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_FLATMEM #define pfn_to_page(pfn) (mem_map + (pfn)) #define page_to_pfn(page) ((unsigned long)((page) - mem_map)) #define pfn_valid(pfn) ((pfn) < max_mapnr) -#endif /* !CONFIG_DISCONTIGMEM */ +#endif /* CONFIG_FLATMEM */ #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) diff --git a/include/asm-i386/param.h b/include/asm-i386/param.h index b6440526e42a..fa02e67ea86b 100644 --- a/include/asm-i386/param.h +++ b/include/asm-i386/param.h @@ -1,8 +1,10 @@ +#include <linux/config.h> + #ifndef _ASMi386_PARAM_H #define _ASMi386_PARAM_H #ifdef __KERNEL__ -# define HZ 1000 /* Internal kernel timer frequency */ +# define HZ CONFIG_HZ /* Internal kernel timer frequency */ # define USER_HZ 100 /* .. some user interfaces are in "ticks" */ # define CLOCKS_PER_SEC (USER_HZ) /* like times() */ #endif diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index e9efe148fdf7..77c6497f416e 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -398,9 +398,9 @@ extern void noexec_setup(const char *str); #endif /* !__ASSEMBLY__ */ -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_FLATMEM #define kern_addr_valid(addr) (1) -#endif /* !CONFIG_DISCONTIGMEM */ +#endif /* CONFIG_FLATMEM */ #define io_remap_page_range(vma, vaddr, paddr, size, prot) \ remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot) diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 359bb0151742..c76c50e96225 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -501,12 +501,16 @@ static inline void load_esp0(struct tss_struct *tss, struct thread_struct *threa } while (0) /* - * This special macro can be used to load a debugging register + * These special macros can be used to get or set a debugging register */ -#define loaddebug(thread,register) \ - __asm__("movl %0,%%db" #register \ - : /* no output */ \ - :"r" ((thread)->debugreg[register])) +#define get_debugreg(var, register) \ + __asm__("movl %%db" #register ", %0" \ + :"=r" (var)) +#define set_debugreg(value, register) \ + __asm__("movl %0,%%db" #register \ + : /* no output */ \ + :"r" (value)) + /* Forward declaration, a strange C thing */ struct task_struct; diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h index 8618914b3521..eef9f93870d4 100644 --- a/include/asm-i386/ptrace.h +++ b/include/asm-i386/ptrace.h @@ -57,7 +57,8 @@ struct pt_regs { #ifdef __KERNEL__ struct task_struct; extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code); -#define user_mode(regs) ((VM_MASK & (regs)->eflags) || (3 & (regs)->xcs)) +#define user_mode(regs) (3 & (regs)->xcs) +#define user_mode_vm(regs) ((VM_MASK & (regs)->eflags) || user_mode(regs)) #define instruction_pointer(regs) ((regs)->eip) #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) extern unsigned long profile_pc(struct pt_regs *regs); diff --git a/include/asm-i386/sparsemem.h b/include/asm-i386/sparsemem.h new file mode 100644 index 000000000000..cfeed990585f --- /dev/null +++ b/include/asm-i386/sparsemem.h @@ -0,0 +1,31 @@ +#ifndef _I386_SPARSEMEM_H +#define _I386_SPARSEMEM_H +#ifdef CONFIG_SPARSEMEM + +/* + * generic non-linear memory support: + * + * 1) we will not split memory into more chunks than will fit into the + * flags field of the struct page + */ + +/* + * SECTION_SIZE_BITS 2^N: how big each section will be + * MAX_PHYSADDR_BITS 2^N: how much physical address space we have + * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space + */ +#ifdef CONFIG_X86_PAE +#define SECTION_SIZE_BITS 30 +#define MAX_PHYSADDR_BITS 36 +#define MAX_PHYSMEM_BITS 36 +#else +#define SECTION_SIZE_BITS 26 +#define MAX_PHYSADDR_BITS 32 +#define MAX_PHYSMEM_BITS 32 +#endif + +/* XXX: FIXME -- wli */ +#define kern_addr_valid(kaddr) (0) + +#endif /* CONFIG_SPARSEMEM */ +#endif /* _I386_SPARSEMEM_H */ diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h index 2cd57271801d..95add81237ea 100644 --- a/include/asm-i386/thread_info.h +++ b/include/asm-i386/thread_info.h @@ -31,7 +31,7 @@ struct thread_info { unsigned long flags; /* low level flags */ unsigned long status; /* thread-synchronous flags */ __u32 cpu; /* current CPU */ - __s32 preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; /* thread address space: diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h index c34709849839..dcf1e07db08a 100644 --- a/include/asm-i386/timer.h +++ b/include/asm-i386/timer.h @@ -22,6 +22,7 @@ struct timer_opts { unsigned long (*get_offset)(void); unsigned long long (*monotonic_clock)(void); void (*delay)(unsigned long); + unsigned long (*read_timer)(void); }; struct init_timer_opts { @@ -52,6 +53,7 @@ extern struct init_timer_opts timer_cyclone_init; #endif extern unsigned long calibrate_tsc(void); +extern unsigned long read_timer_tsc(void); extern void init_cpu_khz(void); extern int recalibrate_cpu_khz(void); #ifdef CONFIG_HPET_TIMER diff --git a/include/asm-i386/timex.h b/include/asm-i386/timex.h index b41e484c3445..292b5a68f627 100644 --- a/include/asm-i386/timex.h +++ b/include/asm-i386/timex.h @@ -47,6 +47,9 @@ static inline cycles_t get_cycles (void) return ret; } -extern unsigned long cpu_khz; +extern unsigned int cpu_khz; + +extern int read_current_timer(unsigned long *timer_value); +#define ARCH_HAS_READ_CURRENT_TIMER 1 #endif diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h index 98f9e6850cba..6d0f67507b21 100644 --- a/include/asm-i386/topology.h +++ b/include/asm-i386/topology.h @@ -60,12 +60,8 @@ static inline int node_to_first_cpu(int node) return first_cpu(mask); } -/* Returns the number of the node containing PCI bus number 'busnr' */ -static inline cpumask_t __pcibus_to_cpumask(int busnr) -{ - return node_to_cpumask(mp_bus_id_to_node[busnr]); -} -#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus->number) +#define pcibus_to_node(bus) mp_bus_id_to_node[(bus)->number] +#define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus)) /* sched_domains SD_NODE_INIT for NUMAQ machines */ #define SD_NODE_INIT (struct sched_domain) { \ diff --git a/include/asm-ia64/break.h b/include/asm-ia64/break.h index 97c7b2d79600..8167828edc4b 100644 --- a/include/asm-ia64/break.h +++ b/include/asm-ia64/break.h @@ -12,6 +12,8 @@ * OS-specific debug break numbers: */ #define __IA64_BREAK_KDB 0x80100 +#define __IA64_BREAK_KPROBE 0x80200 +#define __IA64_BREAK_JPROBE 0x80300 /* * OS-specific break numbers: diff --git a/include/asm-ia64/compat.h b/include/asm-ia64/compat.h index cc0ff0a4bdd0..0c05e5bad8a0 100644 --- a/include/asm-ia64/compat.h +++ b/include/asm-ia64/compat.h @@ -27,6 +27,7 @@ typedef u16 compat_ipc_pid_t; typedef s32 compat_daddr_t; typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; +typedef s32 compat_timer_t; typedef s32 compat_int_t; typedef s32 compat_long_t; diff --git a/include/asm-ia64/fcntl.h b/include/asm-ia64/fcntl.h index d193981bb1d8..c9f8d835d0cc 100644 --- a/include/asm-ia64/fcntl.h +++ b/include/asm-ia64/fcntl.h @@ -81,4 +81,6 @@ struct flock { #define F_LINUX_SPECIFIC_BASE 1024 +#define force_o_largefile() ( ! (current->personality & PER_LINUX32) ) + #endif /* _ASM_IA64_FCNTL_H */ diff --git a/include/asm-ia64/kdebug.h b/include/asm-ia64/kdebug.h new file mode 100644 index 000000000000..4d376e1663f7 --- /dev/null +++ b/include/asm-ia64/kdebug.h @@ -0,0 +1,61 @@ +#ifndef _IA64_KDEBUG_H +#define _IA64_KDEBUG_H 1 +/* + * include/asm-ia64/kdebug.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) Intel Corporation, 2005 + * + * 2005-Apr Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy + * <anil.s.keshavamurthy@intel.com> adopted from + * include/asm-x86_64/kdebug.h + */ +#include <linux/notifier.h> + +struct pt_regs; + +struct die_args { + struct pt_regs *regs; + const char *str; + long err; + int trapnr; + int signr; +}; + +int register_die_notifier(struct notifier_block *nb); +extern struct notifier_block *ia64die_chain; + +enum die_val { + DIE_BREAK = 1, + DIE_SS, + DIE_PAGE_FAULT, +}; + +static inline int notify_die(enum die_val val, char *str, struct pt_regs *regs, + long err, int trap, int sig) +{ + struct die_args args = { + .regs = regs, + .str = str, + .err = err, + .trapnr = trap, + .signr = sig + }; + + return notifier_call_chain(&ia64die_chain, val, &args); +} + +#endif diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h new file mode 100644 index 000000000000..7b700035e36d --- /dev/null +++ b/include/asm-ia64/kprobes.h @@ -0,0 +1,116 @@ +#ifndef _ASM_KPROBES_H +#define _ASM_KPROBES_H +/* + * Kernel Probes (KProbes) + * include/asm-ia64/kprobes.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * Copyright (C) Intel Corporation, 2005 + * + * 2005-Apr Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy + * <anil.s.keshavamurthy@intel.com> adapted from i386 + */ +#include <linux/types.h> +#include <linux/ptrace.h> +#include <asm/break.h> + +#define BREAK_INST (long)(__IA64_BREAK_KPROBE << 6) + +typedef union cmp_inst { + struct { + unsigned long long qp : 6; + unsigned long long p1 : 6; + unsigned long long c : 1; + unsigned long long r2 : 7; + unsigned long long r3 : 7; + unsigned long long p2 : 6; + unsigned long long ta : 1; + unsigned long long x2 : 2; + unsigned long long tb : 1; + unsigned long long opcode : 4; + unsigned long long reserved : 23; + }f; + unsigned long long l; +} cmp_inst_t; + +struct kprobe; + +typedef struct _bundle { + struct { + unsigned long long template : 5; + unsigned long long slot0 : 41; + unsigned long long slot1_p0 : 64-46; + } quad0; + struct { + unsigned long long slot1_p1 : 41 - (64-46); + unsigned long long slot2 : 41; + } quad1; +} __attribute__((__aligned__(16))) bundle_t; + +#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry + +#define SLOT0_OPCODE_SHIFT (37) +#define SLOT1_p1_OPCODE_SHIFT (37 - (64-46)) +#define SLOT2_OPCODE_SHIFT (37) + +#define INDIRECT_CALL_OPCODE (1) +#define IP_RELATIVE_CALL_OPCODE (5) +#define IP_RELATIVE_BRANCH_OPCODE (4) +#define IP_RELATIVE_PREDICT_OPCODE (7) +#define LONG_BRANCH_OPCODE (0xC) +#define LONG_CALL_OPCODE (0xD) + +typedef struct kprobe_opcode { + bundle_t bundle; +} kprobe_opcode_t; + +struct fnptr { + unsigned long ip; + unsigned long gp; +}; + +/* Architecture specific copy of original instruction*/ +struct arch_specific_insn { + /* copy of the instruction to be emulated */ + kprobe_opcode_t insn; + #define INST_FLAG_FIX_RELATIVE_IP_ADDR 1 + #define INST_FLAG_FIX_BRANCH_REG 2 + unsigned long inst_flag; + unsigned short target_br_reg; +}; + +/* ia64 does not need this */ +static inline void jprobe_return(void) +{ +} + +/* ia64 does not need this */ +static inline void arch_copy_kprobe(struct kprobe *p) +{ +} + +#ifdef CONFIG_KPROBES +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); +#else /* !CONFIG_KPROBES */ +static inline int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return 0; +} +#endif +#endif /* _ASM_KPROBES_H */ diff --git a/include/asm-ia64/param.h b/include/asm-ia64/param.h index 6c6b679b7a9e..5e1e0d2d7baf 100644 --- a/include/asm-ia64/param.h +++ b/include/asm-ia64/param.h @@ -27,7 +27,7 @@ */ # define HZ 32 # else -# define HZ 1024 +# define HZ CONFIG_HZ # endif # define USER_HZ HZ # define CLOCKS_PER_SEC HZ /* frequency at which times() counts */ diff --git a/include/asm-ia64/percpu.h b/include/asm-ia64/percpu.h index 1e87f19dad56..2b14dee29ce7 100644 --- a/include/asm-ia64/percpu.h +++ b/include/asm-ia64/percpu.h @@ -50,7 +50,7 @@ extern void *per_cpu_init(void); #else /* ! SMP */ -#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var)) +#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) #define __get_cpu_var(var) per_cpu__##var #define per_cpu_init() (__phys_per_cpu_start) diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h index 8d5b7e77028c..7dc8951708a3 100644 --- a/include/asm-ia64/thread_info.h +++ b/include/asm-ia64/thread_info.h @@ -25,7 +25,7 @@ struct thread_info { __u32 flags; /* thread_info flags (see TIF_*) */ __u32 cpu; /* current CPU */ mm_segment_t addr_limit; /* user-level address space limit */ - __s32 preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ + int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ struct restart_block restart_block; struct { int signo; diff --git a/include/asm-m32r/mmzone.h b/include/asm-m32r/mmzone.h index ebf0228fec42..d58878ec899e 100644 --- a/include/asm-m32r/mmzone.h +++ b/include/asm-m32r/mmzone.h @@ -14,7 +14,6 @@ extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[nid]) #define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn) -#define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) \ ({ \ @@ -32,7 +31,7 @@ extern struct pglist_data *node_data[]; ({ \ unsigned long __pfn = pfn; \ int __node = pfn_to_nid(__pfn); \ - &node_mem_map(__node)[node_localnr(__pfn,__node)]; \ + &NODE_DATA(__node)->node_mem_map[node_localnr(__pfn,__node)]; \ }) #define page_to_pfn(pg) \ diff --git a/include/asm-m32r/thread_info.h b/include/asm-m32r/thread_info.h index 9f3a0fcf6e2b..7a6be7727a92 100644 --- a/include/asm-m32r/thread_info.h +++ b/include/asm-m32r/thread_info.h @@ -28,7 +28,7 @@ struct thread_info { unsigned long flags; /* low level flags */ unsigned long status; /* thread-synchronous flags */ __u32 cpu; /* current CPU */ - __s32 preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; /* thread address space: 0-0xBFFFFFFF for user-thread diff --git a/include/asm-m32r/topology.h b/include/asm-m32r/topology.h index 299a89d91bde..d607eb32bd7e 100644 --- a/include/asm-m32r/topology.h +++ b/include/asm-m32r/topology.h @@ -1,48 +1,6 @@ -/* - * linux/include/asm-generic/topology.h - * - * Written by: Matthew Dobson, IBM Corporation - * - * Copyright (C) 2002, IBM Corp. - * - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Send feedback to <colpatch@us.ibm.com> - */ #ifndef _ASM_M32R_TOPOLOGY_H #define _ASM_M32R_TOPOLOGY_H -/* Other architectures wishing to use this simple topology API should fill - in the below functions as appropriate in their own <asm/topology.h> file. */ - -#define cpu_to_node(cpu) (0) - -#ifndef parent_node -#define parent_node(node) (0) -#endif -#ifndef node_to_cpumask -#define node_to_cpumask(node) (cpu_online_map) -#endif -#ifndef node_to_first_cpu -#define node_to_first_cpu(node) (0) -#endif -#ifndef pcibus_to_cpumask -#define pcibus_to_cpumask(bus) (cpu_online_map) -#endif +#include <asm-generic/topology.h> #endif /* _ASM_M32R_TOPOLOGY_H */ diff --git a/include/asm-m68k/thread_info.h b/include/asm-m68k/thread_info.h index 5f58939c59db..2aed24f6fd2e 100644 --- a/include/asm-m68k/thread_info.h +++ b/include/asm-m68k/thread_info.h @@ -8,7 +8,7 @@ struct thread_info { struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ - __s32 preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_count; /* 0 => preemptable, <0 => BUG */ __u32 cpu; /* should always be 0 on m68k */ struct restart_block restart_block; diff --git a/include/asm-m68knommu/thread_info.h b/include/asm-m68knommu/thread_info.h index c8153b7c1f5c..7b9a3fa3af5d 100644 --- a/include/asm-m68knommu/thread_info.h +++ b/include/asm-m68knommu/thread_info.h @@ -36,7 +36,7 @@ struct thread_info { struct exec_domain *exec_domain; /* execution domain */ unsigned long flags; /* low level flags */ int cpu; /* cpu we're on */ - int preempt_count; /* 0 => preemptable, <0 => BUG*/ + int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; }; diff --git a/include/asm-mips/compat.h b/include/asm-mips/compat.h index dce92079e7fc..d78002afb1e1 100644 --- a/include/asm-mips/compat.h +++ b/include/asm-mips/compat.h @@ -29,6 +29,7 @@ typedef s32 compat_caddr_t; typedef struct { s32 val[2]; } compat_fsid_t; +typedef s32 compat_timer_t; typedef s32 compat_int_t; typedef s32 compat_long_t; diff --git a/include/asm-mips/thread_info.h b/include/asm-mips/thread_info.h index 768900305e2f..42fcd6f2c206 100644 --- a/include/asm-mips/thread_info.h +++ b/include/asm-mips/thread_info.h @@ -27,7 +27,7 @@ struct thread_info { struct exec_domain *exec_domain; /* execution domain */ unsigned long flags; /* low level flags */ __u32 cpu; /* current CPU */ - __s32 preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; /* thread address space: 0-0xBFFFFFFF for user-thead diff --git a/include/asm-parisc/compat.h b/include/asm-parisc/compat.h index ca0eac647a05..7630d1ad2391 100644 --- a/include/asm-parisc/compat.h +++ b/include/asm-parisc/compat.h @@ -24,7 +24,7 @@ typedef u16 compat_nlink_t; typedef u16 compat_ipc_pid_t; typedef s32 compat_daddr_t; typedef u32 compat_caddr_t; -typedef u32 compat_timer_t; +typedef s32 compat_timer_t; typedef s32 compat_int_t; typedef s32 compat_long_t; diff --git a/include/asm-parisc/mmzone.h b/include/asm-parisc/mmzone.h index 928bf50c4693..595d3dce120a 100644 --- a/include/asm-parisc/mmzone.h +++ b/include/asm-parisc/mmzone.h @@ -19,7 +19,6 @@ extern struct node_map_data node_data[]; */ #define kvaddr_to_nid(kaddr) pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT) -#define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) \ ({ \ @@ -38,7 +37,7 @@ extern struct node_map_data node_data[]; ({ \ unsigned long __pfn = (pfn); \ int __node = pfn_to_nid(__pfn); \ - &node_mem_map(__node)[node_localnr(__pfn,__node)]; \ + &NODE_DATA(__node)->node_mem_map[node_localnr(__pfn,__node)]; \ }) #define page_to_pfn(pg) \ diff --git a/include/asm-parisc/thread_info.h b/include/asm-parisc/thread_info.h index fe9b7f8ae4c6..57bbb76cb6c1 100644 --- a/include/asm-parisc/thread_info.h +++ b/include/asm-parisc/thread_info.h @@ -12,7 +12,7 @@ struct thread_info { unsigned long flags; /* thread_info flags (see TIF_*) */ mm_segment_t addr_limit; /* user-level address space limit */ __u32 cpu; /* current CPU */ - __s32 preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ + int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ struct restart_block restart_block; }; diff --git a/include/asm-ppc/thread_info.h b/include/asm-ppc/thread_info.h index e3b5284a6f91..27903db42efc 100644 --- a/include/asm-ppc/thread_info.h +++ b/include/asm-ppc/thread_info.h @@ -20,7 +20,8 @@ struct thread_info { unsigned long flags; /* low level flags */ unsigned long local_flags; /* non-racy flags */ int cpu; /* cpu we're on */ - int preempt_count; + int preempt_count; /* 0 => preemptable, + <0 => BUG */ struct restart_block restart_block; }; diff --git a/include/asm-ppc64/compat.h b/include/asm-ppc64/compat.h index 09c28d28ce6c..12414f5fc666 100644 --- a/include/asm-ppc64/compat.h +++ b/include/asm-ppc64/compat.h @@ -26,6 +26,7 @@ typedef s32 compat_daddr_t; typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; typedef s32 compat_key_t; +typedef s32 compat_timer_t; typedef s32 compat_int_t; typedef s32 compat_long_t; diff --git a/include/asm-ppc64/mmzone.h b/include/asm-ppc64/mmzone.h index 0619a41a3c9d..ed473f4b0152 100644 --- a/include/asm-ppc64/mmzone.h +++ b/include/asm-ppc64/mmzone.h @@ -10,9 +10,20 @@ #include <linux/config.h> #include <asm/smp.h> -#ifdef CONFIG_DISCONTIGMEM +/* generic non-linear memory support: + * + * 1) we will not split memory into more chunks than will fit into the + * flags field of the struct page + */ + + +#ifdef CONFIG_NEED_MULTIPLE_NODES extern struct pglist_data *node_data[]; +/* + * Return a pointer to the node data for node n. + */ +#define NODE_DATA(nid) (node_data[nid]) /* * Following are specific to this numa platform. @@ -47,36 +58,32 @@ static inline int pa_to_nid(unsigned long pa) return nid; } -#define pfn_to_nid(pfn) pa_to_nid((pfn) << PAGE_SHIFT) - -/* - * Return a pointer to the node data for node n. - */ -#define NODE_DATA(nid) (node_data[nid]) - #define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn) /* * Following are macros that each numa implmentation must define. */ -/* - * Given a kernel address, find the home node of the underlying memory. - */ -#define kvaddr_to_nid(kaddr) pa_to_nid(__pa(kaddr)) - -#define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) (NODE_DATA(nid)->node_end_pfn) #define local_mapnr(kvaddr) \ ( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) +#ifdef CONFIG_DISCONTIGMEM + +/* + * Given a kernel address, find the home node of the underlying memory. + */ +#define kvaddr_to_nid(kaddr) pa_to_nid(__pa(kaddr)) + +#define pfn_to_nid(pfn) pa_to_nid((unsigned long)(pfn) << PAGE_SHIFT) + /* Written this way to avoid evaluating arguments twice */ #define discontigmem_pfn_to_page(pfn) \ ({ \ unsigned long __tmp = pfn; \ - (node_mem_map(pfn_to_nid(__tmp)) + \ + (NODE_DATA(pfn_to_nid(__tmp))->node_mem_map + \ node_localnr(__tmp, pfn_to_nid(__tmp))); \ }) @@ -91,4 +98,11 @@ static inline int pa_to_nid(unsigned long pa) #define discontigmem_pfn_valid(pfn) ((pfn) < num_physpages) #endif /* CONFIG_DISCONTIGMEM */ + +#endif /* CONFIG_NEED_MULTIPLE_NODES */ + +#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID +#define early_pfn_to_nid(pfn) pa_to_nid(((unsigned long)pfn) << PAGE_SHIFT) +#endif + #endif /* _ASM_MMZONE_H_ */ diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h index 257d87eb7c34..a5893a305a09 100644 --- a/include/asm-ppc64/page.h +++ b/include/asm-ppc64/page.h @@ -217,7 +217,8 @@ extern u64 ppc64_pft_size; /* Log 2 of page table size */ #define page_to_pfn(page) discontigmem_page_to_pfn(page) #define pfn_to_page(pfn) discontigmem_pfn_to_page(pfn) #define pfn_valid(pfn) discontigmem_pfn_valid(pfn) -#else +#endif +#ifdef CONFIG_FLATMEM #define pfn_to_page(pfn) (mem_map + (pfn)) #define page_to_pfn(page) ((unsigned long)((page) - mem_map)) #define pfn_valid(pfn) ((pfn) < max_mapnr) diff --git a/include/asm-ppc64/ppc32.h b/include/asm-ppc64/ppc32.h index 1d0404897550..6b44a8caf395 100644 --- a/include/asm-ppc64/ppc32.h +++ b/include/asm-ppc64/ppc32.h @@ -32,7 +32,7 @@ typedef struct compat_siginfo { /* POSIX.1b timers */ struct { - timer_t _tid; /* timer id */ + compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ compat_sigval_t _sigval; /* same as below */ int _sys_private; /* not to be passed to user */ diff --git a/include/asm-ppc64/sparsemem.h b/include/asm-ppc64/sparsemem.h new file mode 100644 index 000000000000..c5bd47e57f17 --- /dev/null +++ b/include/asm-ppc64/sparsemem.h @@ -0,0 +1,16 @@ +#ifndef _ASM_PPC64_SPARSEMEM_H +#define _ASM_PPC64_SPARSEMEM_H 1 + +#ifdef CONFIG_SPARSEMEM +/* + * SECTION_SIZE_BITS 2^N: how big each section will be + * MAX_PHYSADDR_BITS 2^N: how much physical address space we have + * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space + */ +#define SECTION_SIZE_BITS 24 +#define MAX_PHYSADDR_BITS 38 +#define MAX_PHYSMEM_BITS 36 + +#endif /* CONFIG_SPARSEMEM */ + +#endif /* _ASM_PPC64_SPARSEMEM_H */ diff --git a/include/asm-ppc64/thread_info.h b/include/asm-ppc64/thread_info.h index 48b7900e90ec..0494df6fca74 100644 --- a/include/asm-ppc64/thread_info.h +++ b/include/asm-ppc64/thread_info.h @@ -24,7 +24,7 @@ struct thread_info { struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ int cpu; /* cpu we're on */ - int preempt_count; + int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; /* set by force_successful_syscall_return */ unsigned char syscall_noerror; diff --git a/include/asm-ppc64/topology.h b/include/asm-ppc64/topology.h index d58d9dd79998..fcdcfd26a26b 100644 --- a/include/asm-ppc64/topology.h +++ b/include/asm-ppc64/topology.h @@ -59,10 +59,8 @@ static inline int node_to_first_cpu(int node) .nr_balance_failed = 0, \ } -#else /* !CONFIG_NUMA */ +#endif /* CONFIG_NUMA */ #include <asm-generic/topology.h> -#endif /* CONFIG_NUMA */ - #endif /* _ASM_PPC64_TOPOLOGY_H */ diff --git a/include/asm-s390/thread_info.h b/include/asm-s390/thread_info.h index aade85c53a63..fe101d41e849 100644 --- a/include/asm-s390/thread_info.h +++ b/include/asm-s390/thread_info.h @@ -50,7 +50,7 @@ struct thread_info { struct exec_domain *exec_domain; /* execution domain */ unsigned long flags; /* low level flags */ unsigned int cpu; /* current CPU */ - unsigned int preempt_count; /* 0 => preemptable */ + int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; }; diff --git a/include/asm-sh/thread_info.h b/include/asm-sh/thread_info.h index 4bbbd9f3c37e..46080cefaff8 100644 --- a/include/asm-sh/thread_info.h +++ b/include/asm-sh/thread_info.h @@ -20,7 +20,7 @@ struct thread_info { struct exec_domain *exec_domain; /* execution domain */ __u32 flags; /* low level flags */ __u32 cpu; - __s32 preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; __u8 supervisor_stack[0]; }; diff --git a/include/asm-sh64/thread_info.h b/include/asm-sh64/thread_info.h index 8a32d6bd0b79..10f024c6a2e3 100644 --- a/include/asm-sh64/thread_info.h +++ b/include/asm-sh64/thread_info.h @@ -22,7 +22,7 @@ struct thread_info { struct exec_domain *exec_domain; /* execution domain */ unsigned long flags; /* low level flags */ /* Put the 4 32-bit fields together to make asm offsetting easier. */ - __s32 preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_count; /* 0 => preemptable, <0 => BUG */ __u16 cpu; mm_segment_t addr_limit; diff --git a/include/asm-sparc/thread_info.h b/include/asm-sparc/thread_info.h index 104f03c55416..ff6ccb3d24c6 100644 --- a/include/asm-sparc/thread_info.h +++ b/include/asm-sparc/thread_info.h @@ -30,9 +30,9 @@ struct thread_info { struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ unsigned long flags; /* low level flags */ - int cpu; /* cpu we're on */ - int preempt_count; + int preempt_count; /* 0 => preemptable, + <0 => BUG */ int softirq_count; int hardirq_count; diff --git a/include/asm-sparc64/compat.h b/include/asm-sparc64/compat.h index 22f58055b8ab..b59122dd176d 100644 --- a/include/asm-sparc64/compat.h +++ b/include/asm-sparc64/compat.h @@ -25,6 +25,7 @@ typedef s32 compat_daddr_t; typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; typedef s32 compat_key_t; +typedef s32 compat_timer_t; typedef s32 compat_int_t; typedef s32 compat_long_t; diff --git a/include/asm-sparc64/thread_info.h b/include/asm-sparc64/thread_info.h index 517caaba1c87..0cd652956929 100644 --- a/include/asm-sparc64/thread_info.h +++ b/include/asm-sparc64/thread_info.h @@ -46,7 +46,7 @@ struct thread_info { unsigned long fault_address; struct pt_regs *kregs; struct exec_domain *exec_domain; - int preempt_count; + int preempt_count; /* 0 => preemptable, <0 => BUG */ int __pad; unsigned long *utraps; diff --git a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h index 1feaaf148ef1..97267f059ef5 100644 --- a/include/asm-um/thread_info.h +++ b/include/asm-um/thread_info.h @@ -17,7 +17,7 @@ struct thread_info { struct exec_domain *exec_domain; /* execution domain */ unsigned long flags; /* low level flags */ __u32 cpu; /* current CPU */ - __s32 preempt_count; /* 0 => preemptable, + int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; /* thread address space: 0-0xBFFFFFFF for user diff --git a/include/asm-v850/thread_info.h b/include/asm-v850/thread_info.h index e2ef44593752..e4cfad94a553 100644 --- a/include/asm-v850/thread_info.h +++ b/include/asm-v850/thread_info.h @@ -30,7 +30,8 @@ struct thread_info { struct exec_domain *exec_domain; /* execution domain */ unsigned long flags; /* low level flags */ int cpu; /* cpu we're on */ - int preempt_count; + int preempt_count; /* 0 => preemptable, + <0 => BUG */ struct restart_block restart_block; }; diff --git a/include/asm-x86_64/bitops.h b/include/asm-x86_64/bitops.h index 5dd7727c756b..a31bb99be53f 100644 --- a/include/asm-x86_64/bitops.h +++ b/include/asm-x86_64/bitops.h @@ -411,8 +411,6 @@ static __inline__ int ffs(int x) /* find last set bit */ #define fls(x) generic_fls(x) -#define ARCH_HAS_ATOMIC_UNSIGNED 1 - #endif /* __KERNEL__ */ #endif /* _X86_64_BITOPS_H */ diff --git a/include/asm-x86_64/ia32.h b/include/asm-x86_64/ia32.h index c0a7717923ed..6efa00fe4e7b 100644 --- a/include/asm-x86_64/ia32.h +++ b/include/asm-x86_64/ia32.h @@ -94,7 +94,7 @@ typedef struct compat_siginfo{ /* POSIX.1b timers */ struct { - int _tid; /* timer id */ + compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ compat_sigval_t _sigval; /* same as below */ int _sys_private; /* not to be passed to user */ diff --git a/include/asm-x86_64/io.h b/include/asm-x86_64/io.h index 94202703fae2..37fc3f149a5a 100644 --- a/include/asm-x86_64/io.h +++ b/include/asm-x86_64/io.h @@ -124,12 +124,7 @@ extern inline void * phys_to_virt(unsigned long address) /* * Change "struct page" to physical address. */ -#ifdef CONFIG_DISCONTIGMEM -#include <asm/mmzone.h> #define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) -#else -#define page_to_phys(page) ((page - mem_map) << PAGE_SHIFT) -#endif #include <asm-generic/iomap.h> diff --git a/include/asm-x86_64/kprobes.h b/include/asm-x86_64/kprobes.h index bfea52d516f8..6d6d883fdf6d 100644 --- a/include/asm-x86_64/kprobes.h +++ b/include/asm-x86_64/kprobes.h @@ -38,6 +38,9 @@ typedef u8 kprobe_opcode_t; : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) #define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry +#define ARCH_SUPPORTS_KRETPROBES + +void kretprobe_trampoline(void); /* Architecture specific copy of original instruction*/ struct arch_specific_insn { diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h index d95b7c240831..768413751b34 100644 --- a/include/asm-x86_64/mmzone.h +++ b/include/asm-x86_64/mmzone.h @@ -6,7 +6,7 @@ #include <linux/config.h> -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA #define VIRTUAL_BUG_ON(x) @@ -30,27 +30,23 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) return nid; } -#define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT) - -#define kvaddr_to_nid(kaddr) phys_to_nid(__pa(kaddr)) #define NODE_DATA(nid) (node_data[nid]) -#define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map) - -#define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map) #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) #define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \ NODE_DATA(nid)->node_spanned_pages) -#define local_mapnr(kvaddr) \ - ( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) ) +#ifdef CONFIG_DISCONTIGMEM + +#define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT) +#define kvaddr_to_nid(kaddr) phys_to_nid(__pa(kaddr)) /* AK: this currently doesn't deal with invalid addresses. We'll see if the 2.5 kernel doesn't pass them (2.4 used to). */ #define pfn_to_page(pfn) ({ \ int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT); \ - ((pfn) - node_start_pfn(nid)) + node_mem_map(nid); \ + ((pfn) - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map; \ }) #define page_to_pfn(page) \ @@ -60,4 +56,8 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) ({ u8 nid__ = pfn_to_nid(pfn); \ nid__ != 0xff && (pfn) >= node_start_pfn(nid__) && (pfn) <= node_end_pfn(nid__); })) #endif + +#define local_mapnr(kvaddr) \ + ( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) ) +#endif #endif diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h index 513e52c71821..bc700232728d 100644 --- a/include/asm-x86_64/msr.h +++ b/include/asm-x86_64/msr.h @@ -57,11 +57,6 @@ (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \ } while(0) -#define rdpmc(counter,low,high) \ - __asm__ __volatile__("rdpmc" \ - : "=a" (low), "=d" (high) \ - : "c" (counter)) - #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) #define rdpmc(counter,low,high) \ diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h index 9ce338c3a71e..60130f4ca986 100644 --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h @@ -119,7 +119,9 @@ extern __inline__ int get_order(unsigned long size) __pa(v); }) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#ifndef CONFIG_DISCONTIGMEM +#define __boot_va(x) __va(x) +#define __boot_pa(x) __pa(x) +#ifdef CONFIG_FLATMEM #define pfn_to_page(pfn) (mem_map + (pfn)) #define page_to_pfn(page) ((unsigned long)((page) - mem_map)) #define pfn_valid(pfn) ((pfn) < max_mapnr) diff --git a/include/asm-x86_64/param.h b/include/asm-x86_64/param.h index b707f0568c9e..40b11937180d 100644 --- a/include/asm-x86_64/param.h +++ b/include/asm-x86_64/param.h @@ -1,9 +1,11 @@ +#include <linux/config.h> + #ifndef _ASMx86_64_PARAM_H #define _ASMx86_64_PARAM_H #ifdef __KERNEL__ -# define HZ 1000 /* Internal kernel timer frequency */ -# define USER_HZ 100 /* .. some user interfaces are in "ticks */ +# define HZ CONFIG_HZ /* Internal kernel timer frequency */ +# define USER_HZ 100 /* .. some user interfaces are in "ticks */ #define CLOCKS_PER_SEC (USER_HZ) /* like times() */ #endif diff --git a/include/asm-x86_64/percpu.h b/include/asm-x86_64/percpu.h index 415d73f3c8ef..9c71855736fb 100644 --- a/include/asm-x86_64/percpu.h +++ b/include/asm-x86_64/percpu.h @@ -39,7 +39,7 @@ extern void setup_per_cpu_areas(void); #define DEFINE_PER_CPU(type, name) \ __typeof__(type) per_cpu__##name -#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var)) +#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu__##var)) #define __get_cpu_var(var) per_cpu__##var #endif /* SMP */ diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 8b55f139968f..106f666517bb 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -280,6 +280,14 @@ struct thread_struct { set_fs(USER_DS); \ } while(0) +#define get_debugreg(var, register) \ + __asm__("movq %%db" #register ", %0" \ + :"=r" (var)) +#define set_debugreg(value, register) \ + __asm__("movq %0,%%db" #register \ + : /* no output */ \ + :"r" (value)) + struct task_struct; struct mm_struct; diff --git a/include/asm-x86_64/ptrace.h b/include/asm-x86_64/ptrace.h index 5bbc8d3141c8..ca6f15ff61d4 100644 --- a/include/asm-x86_64/ptrace.h +++ b/include/asm-x86_64/ptrace.h @@ -82,6 +82,7 @@ struct pt_regs { #if defined(__KERNEL__) && !defined(__ASSEMBLY__) #define user_mode(regs) (!!((regs)->cs & 3)) +#define user_mode_vm(regs) user_mode(regs) #define instruction_pointer(regs) ((regs)->rip) extern unsigned long profile_pc(struct pt_regs *regs); void signal_fault(struct pt_regs *regs, void __user *frame, char *where); diff --git a/include/asm-x86_64/sparsemem.h b/include/asm-x86_64/sparsemem.h new file mode 100644 index 000000000000..dabb16714a71 --- /dev/null +++ b/include/asm-x86_64/sparsemem.h @@ -0,0 +1,26 @@ +#ifndef _ASM_X86_64_SPARSEMEM_H +#define _ASM_X86_64_SPARSEMEM_H 1 + +#ifdef CONFIG_SPARSEMEM + +/* + * generic non-linear memory support: + * + * 1) we will not split memory into more chunks than will fit into the flags + * field of the struct page + * + * SECTION_SIZE_BITS 2^n: size of each section + * MAX_PHYSADDR_BITS 2^n: max size of physical address space + * MAX_PHYSMEM_BITS 2^n: how much memory we can have in that space + * + */ + +#define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ +#define MAX_PHYSADDR_BITS 40 +#define MAX_PHYSMEM_BITS 40 + +extern int early_pfn_to_nid(unsigned long pfn); + +#endif /* CONFIG_SPARSEMEM */ + +#endif /* _ASM_X86_64_SPARSEMEM_H */ diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h index f4b3b249639c..08eb6e4f3737 100644 --- a/include/asm-x86_64/thread_info.h +++ b/include/asm-x86_64/thread_info.h @@ -29,7 +29,7 @@ struct thread_info { __u32 flags; /* low level flags */ __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ - int preempt_count; + int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; struct restart_block restart_block; diff --git a/include/asm-x86_64/timex.h b/include/asm-x86_64/timex.h index 34f31a18f90b..24ecf6a637cb 100644 --- a/include/asm-x86_64/timex.h +++ b/include/asm-x86_64/timex.h @@ -26,6 +26,9 @@ static inline cycles_t get_cycles (void) extern unsigned int cpu_khz; +extern int read_current_timer(unsigned long *timer_value); +#define ARCH_HAS_READ_CURRENT_TIMER 1 + extern struct vxtime_data vxtime; #endif diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index 67f24e0ea819..8f77e9f6bc23 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h @@ -3,7 +3,7 @@ #include <linux/config.h> -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA #include <asm/mpspec.h> #include <asm/bitops.h> @@ -13,8 +13,8 @@ extern cpumask_t cpu_online_map; extern unsigned char cpu_to_node[]; +extern unsigned char pci_bus_to_node[]; extern cpumask_t node_to_cpumask[]; -extern cpumask_t pci_bus_to_cpumask[]; #ifdef CONFIG_ACPI_NUMA extern int __node_distance(int, int); @@ -26,18 +26,9 @@ extern int __node_distance(int, int); #define parent_node(node) (node) #define node_to_first_cpu(node) (__ffs(node_to_cpumask[node])) #define node_to_cpumask(node) (node_to_cpumask[node]) +#define pcibus_to_node(bus) pci_bus_to_node[(bus)->number] +#define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus)); -static inline cpumask_t __pcibus_to_cpumask(int bus) -{ - cpumask_t busmask = pci_bus_to_cpumask[bus]; - cpumask_t online = cpu_online_map; - cpumask_t res; - cpus_and(res, busmask, online); - return res; -} -#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus->number) - -#ifdef CONFIG_NUMA /* sched_domains SD_NODE_INIT for x86_64 machines */ #define SD_NODE_INIT (struct sched_domain) { \ .span = CPU_MASK_NONE, \ @@ -59,7 +50,6 @@ static inline cpumask_t __pcibus_to_cpumask(int bus) .balance_interval = 1, \ .nr_balance_failed = 0, \ } -#endif #endif diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 7e736e201c46..c1e82c514443 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -69,6 +69,11 @@ extern void remove_arg_zero(struct linux_binprm *); extern int search_binary_handler(struct linux_binprm *,struct pt_regs *); extern int flush_old_exec(struct linux_binprm * bprm); +extern int suid_dumpable; +#define SUID_DUMP_DISABLE 0 /* No setuid dumping */ +#define SUID_DUMP_USER 1 /* Dump as user of process */ +#define SUID_DUMP_ROOT 2 /* Dump as root */ + /* Stack area protections */ #define EXSTACK_DEFAULT 0 /* Whatever the arch defaults to */ #define EXSTACK_DISABLE_X 1 /* Disable executable stacks */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4a99b76c5a33..60272141ff19 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -285,16 +285,12 @@ enum blk_queue_state { Queue_up, }; -#define BLK_TAGS_PER_LONG (sizeof(unsigned long) * 8) -#define BLK_TAGS_MASK (BLK_TAGS_PER_LONG - 1) - struct blk_queue_tag { struct request **tag_index; /* map of busy tags */ unsigned long *tag_map; /* bit map of free/busy tags */ struct list_head busy_list; /* fifo list of busy tags */ int busy; /* current depth */ int max_depth; /* what we will send to device */ - int real_max_depth; /* what the array can hold */ atomic_t refcnt; /* map can be shared */ }; @@ -396,6 +392,7 @@ struct request_queue */ unsigned int sg_timeout; unsigned int sg_reserved_size; + int node; struct list_head drain_list; @@ -615,6 +612,8 @@ static inline void blkdev_dequeue_request(struct request *req) /* * Access functions for manipulating queue properties */ +extern request_queue_t *blk_init_queue_node(request_fn_proc *rfn, + spinlock_t *lock, int node_id); extern request_queue_t *blk_init_queue(request_fn_proc *, spinlock_t *); extern void blk_cleanup_queue(request_queue_t *); extern void blk_queue_make_request(request_queue_t *, make_request_fn *); @@ -646,7 +645,8 @@ extern void blk_wait_queue_drained(request_queue_t *, int); extern void blk_finish_queue_drain(request_queue_t *); int blk_get_queue(request_queue_t *); -request_queue_t *blk_alloc_queue(int); +request_queue_t *blk_alloc_queue(int gfp_mask); +request_queue_t *blk_alloc_queue_node(int,int); #define blk_put_queue(q) blk_cleanup_queue((q)) /* diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 0dd8ca1a3d5a..500f451ce0c0 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -67,6 +67,15 @@ extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ +#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP +extern void *alloc_remap(int nid, unsigned long size); +#else +static inline void *alloc_remap(int nid, unsigned long size) +{ + return NULL; +} +#endif + extern unsigned long __initdata nr_kernel_pages; extern unsigned long __initdata nr_all_pages; diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h index 704fb76b6334..8a7c82151de9 100644 --- a/include/linux/fcntl.h +++ b/include/linux/fcntl.h @@ -25,6 +25,10 @@ #ifdef __KERNEL__ +#ifndef force_o_largefile +#define force_o_largefile() (BITS_PER_LONG != 32) +#endif + #if BITS_PER_LONG == 32 #define IS_GETLK32(cmd) ((cmd) == F_GETLK) #define IS_SETLK32(cmd) ((cmd) == F_SETLK) diff --git a/include/linux/fs.h b/include/linux/fs.h index e5a8db00df29..517bf4966bf5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -581,7 +581,6 @@ struct file { atomic_t f_count; unsigned int f_flags; mode_t f_mode; - int f_error; loff_t f_pos; struct fown_struct f_owner; unsigned int f_uid, f_gid; @@ -884,6 +883,7 @@ struct block_device_operations { int (*open) (struct inode *, struct file *); int (*release) (struct inode *, struct file *); int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); + long (*unlocked_ioctl) (struct file *, unsigned, unsigned long); long (*compat_ioctl) (struct file *, unsigned, unsigned long); int (*media_changed) (struct gendisk *); int (*revalidate_disk) (struct gendisk *); @@ -1025,6 +1025,7 @@ struct super_operations { #define I_FREEING 16 #define I_CLEAR 32 #define I_NEW 64 +#define I_WILL_FREE 128 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 47dedaf971d6..01796c41c951 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -224,7 +224,7 @@ static inline void free_disk_stats(struct gendisk *disk) extern void disk_round_stats(struct gendisk *disk); /* drivers/block/genhd.c */ -extern int get_blkdev_list(char *); +extern int get_blkdev_list(char *, int); extern void add_disk(struct gendisk *disk); extern void del_gendisk(struct gendisk *gp); extern void unlink_gendisk(struct gendisk *gp); @@ -403,6 +403,7 @@ extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); extern void add_partition(struct gendisk *, int, sector_t, sector_t); extern void delete_partition(struct gendisk *, int); +extern struct gendisk *alloc_disk_node(int minors, int node_id); extern struct gendisk *alloc_disk(int minors); extern struct kobject *get_disk(struct gendisk *disk); extern void put_disk(struct gendisk *disk); diff --git a/include/linux/ide.h b/include/linux/ide.h index 336d6e509f59..92129078d4f3 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -917,7 +917,7 @@ typedef struct hwif_s { unsigned dma; void (*led_act)(void *data, int rw); -} ide_hwif_t; +} ____cacheline_maxaligned_in_smp ide_hwif_t; /* * internal ide interrupt handler type diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 99ddba5a4e00..5e1a7b0d7b3f 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -25,27 +25,45 @@ * Rusty Russell). * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes * interface to access function arguments. + * 2005-May Hien Nguyen <hien@us.ibm.com> and Jim Keniston + * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi + * <prasanna@in.ibm.com> added function-return probes. */ #include <linux/config.h> #include <linux/list.h> #include <linux/notifier.h> #include <linux/smp.h> + #include <asm/kprobes.h> +/* kprobe_status settings */ +#define KPROBE_HIT_ACTIVE 0x00000001 +#define KPROBE_HIT_SS 0x00000002 +#define KPROBE_REENTER 0x00000004 +#define KPROBE_HIT_SSDONE 0x00000008 + struct kprobe; struct pt_regs; +struct kretprobe; +struct kretprobe_instance; typedef int (*kprobe_pre_handler_t) (struct kprobe *, struct pt_regs *); typedef int (*kprobe_break_handler_t) (struct kprobe *, struct pt_regs *); typedef void (*kprobe_post_handler_t) (struct kprobe *, struct pt_regs *, unsigned long flags); typedef int (*kprobe_fault_handler_t) (struct kprobe *, struct pt_regs *, int trapnr); +typedef int (*kretprobe_handler_t) (struct kretprobe_instance *, + struct pt_regs *); + struct kprobe { struct hlist_node hlist; /* list of kprobes for multi-handler support */ struct list_head list; + /*count the number of times this probe was temporarily disarmed */ + unsigned long nmissed; + /* location of the probe point */ kprobe_opcode_t *addr; @@ -85,6 +103,62 @@ struct jprobe { kprobe_opcode_t *entry; /* probe handling code to jump to */ }; +#ifdef ARCH_SUPPORTS_KRETPROBES +extern int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs); +extern void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags); +extern struct task_struct *arch_get_kprobe_task(void *ptr); +extern void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs); +extern void arch_kprobe_flush_task(struct task_struct *tk); +#else /* ARCH_SUPPORTS_KRETPROBES */ +static inline void kretprobe_trampoline(void) +{ +} +static inline int trampoline_probe_handler(struct kprobe *p, + struct pt_regs *regs) +{ + return 0; +} +static inline void trampoline_post_handler(struct kprobe *p, + struct pt_regs *regs, unsigned long flags) +{ +} +static inline void arch_prepare_kretprobe(struct kretprobe *rp, + struct pt_regs *regs) +{ +} +static inline void arch_kprobe_flush_task(struct task_struct *tk) +{ +} +#define arch_get_kprobe_task(ptr) ((struct task_struct *)NULL) +#endif /* ARCH_SUPPORTS_KRETPROBES */ +/* + * Function-return probe - + * Note: + * User needs to provide a handler function, and initialize maxactive. + * maxactive - The maximum number of instances of the probed function that + * can be active concurrently. + * nmissed - tracks the number of times the probed function's return was + * ignored, due to maxactive being too low. + * + */ +struct kretprobe { + struct kprobe kp; + kretprobe_handler_t handler; + int maxactive; + int nmissed; + struct hlist_head free_instances; + struct hlist_head used_instances; +}; + +struct kretprobe_instance { + struct hlist_node uflist; /* either on free list or used list */ + struct hlist_node hlist; + struct kretprobe *rp; + void *ret_addr; + void *stack_addr; +}; + #ifdef CONFIG_KPROBES /* Locks kprobe: irq must be disabled */ void lock_kprobes(void); @@ -99,11 +173,14 @@ static inline int kprobe_running(void) extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_copy_kprobe(struct kprobe *p); +extern void arch_arm_kprobe(struct kprobe *p); +extern void arch_disarm_kprobe(struct kprobe *p); extern void arch_remove_kprobe(struct kprobe *p); extern void show_registers(struct pt_regs *regs); /* Get the kprobe at this addr (if any). Must have called lock_kprobes */ struct kprobe *get_kprobe(void *addr); +struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk); int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); @@ -113,7 +190,16 @@ int register_jprobe(struct jprobe *p); void unregister_jprobe(struct jprobe *p); void jprobe_return(void); -#else +int register_kretprobe(struct kretprobe *rp); +void unregister_kretprobe(struct kretprobe *rp); + +struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp); +struct kretprobe_instance *get_rp_inst(void *sara); +struct kretprobe_instance *get_rp_inst_tsk(struct task_struct *tk); +void add_rp_inst(struct kretprobe_instance *ri); +void kprobe_flush_task(struct task_struct *tk); +void recycle_rp_inst(struct kretprobe_instance *ri); +#else /* CONFIG_KPROBES */ static inline int kprobe_running(void) { return 0; @@ -135,5 +221,15 @@ static inline void unregister_jprobe(struct jprobe *p) static inline void jprobe_return(void) { } -#endif +static inline int register_kretprobe(struct kretprobe *rp) +{ + return -ENOSYS; +} +static inline void unregister_kretprobe(struct kretprobe *rp) +{ +} +static inline void kprobe_flush_task(struct task_struct *tk) +{ +} +#endif /* CONFIG_KPROBES */ #endif /* _LINUX_KPROBES_H */ diff --git a/include/linux/loop.h b/include/linux/loop.h index 8220d9c9da00..53fa51595443 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -61,7 +61,7 @@ struct loop_device { struct semaphore lo_sem; struct semaphore lo_ctl_mutex; struct semaphore lo_bh_mutex; - atomic_t lo_pending; + int lo_pending; request_queue_t *lo_queue; }; diff --git a/include/linux/mempool.h b/include/linux/mempool.h index 4a36edf1c974..796220ce47cc 100644 --- a/include/linux/mempool.h +++ b/include/linux/mempool.h @@ -20,9 +20,14 @@ typedef struct mempool_s { mempool_free_t *free; wait_queue_head_t wait; } mempool_t; -extern mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn, - mempool_free_t *free_fn, void *pool_data); -extern int mempool_resize(mempool_t *pool, int new_min_nr, unsigned int __nocast gfp_mask); + +extern mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data); +extern mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data, int nid); + +extern int mempool_resize(mempool_t *pool, int new_min_nr, + unsigned int __nocast gfp_mask); extern void mempool_destroy(mempool_t *pool); extern void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask); extern void mempool_free(void *element, mempool_t *pool); diff --git a/include/linux/mm.h b/include/linux/mm.h index 1813b162b0a8..6eb7f48317f8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -395,19 +395,81 @@ static inline void put_page(struct page *page) /* * The zone field is never updated after free_area_init_core() * sets it, so none of the operations on it need to be atomic. - * We'll have up to (MAX_NUMNODES * MAX_NR_ZONES) zones total, - * so we use (MAX_NODES_SHIFT + MAX_ZONES_SHIFT) here to get enough bits. */ -#define NODEZONE_SHIFT (sizeof(page_flags_t)*8 - MAX_NODES_SHIFT - MAX_ZONES_SHIFT) -#define NODEZONE(node, zone) ((node << ZONES_SHIFT) | zone) + + +/* + * page->flags layout: + * + * There are three possibilities for how page->flags get + * laid out. The first is for the normal case, without + * sparsemem. The second is for sparsemem when there is + * plenty of space for node and section. The last is when + * we have run out of space and have to fall back to an + * alternate (slower) way of determining the node. + * + * No sparsemem: | NODE | ZONE | ... | FLAGS | + * with space for node: | SECTION | NODE | ZONE | ... | FLAGS | + * no space for node: | SECTION | ZONE | ... | FLAGS | + */ +#ifdef CONFIG_SPARSEMEM +#define SECTIONS_WIDTH SECTIONS_SHIFT +#else +#define SECTIONS_WIDTH 0 +#endif + +#define ZONES_WIDTH ZONES_SHIFT + +#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= FLAGS_RESERVED +#define NODES_WIDTH NODES_SHIFT +#else +#define NODES_WIDTH 0 +#endif + +/* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */ +#define SECTIONS_PGOFF ((sizeof(page_flags_t)*8) - SECTIONS_WIDTH) +#define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) +#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) + +/* + * We are going to use the flags for the page to node mapping if its in + * there. This includes the case where there is no node, so it is implicit. + */ +#define FLAGS_HAS_NODE (NODES_WIDTH > 0 || NODES_SHIFT == 0) + +#ifndef PFN_SECTION_SHIFT +#define PFN_SECTION_SHIFT 0 +#endif + +/* + * Define the bit shifts to access each section. For non-existant + * sections we define the shift as 0; that plus a 0 mask ensures + * the compiler will optimise away reference to them. + */ +#define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) +#define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) +#define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) + +/* NODE:ZONE or SECTION:ZONE is used to lookup the zone from a page. */ +#if FLAGS_HAS_NODE +#define ZONETABLE_SHIFT (NODES_SHIFT + ZONES_SHIFT) +#else +#define ZONETABLE_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) +#endif +#define ZONETABLE_PGSHIFT ZONES_PGSHIFT + +#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED +#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED +#endif + +#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) +#define NODES_MASK ((1UL << NODES_WIDTH) - 1) +#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) +#define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1) static inline unsigned long page_zonenum(struct page *page) { - return (page->flags >> NODEZONE_SHIFT) & (~(~0UL << ZONES_SHIFT)); -} -static inline unsigned long page_to_nid(struct page *page) -{ - return (page->flags >> (NODEZONE_SHIFT + ZONES_SHIFT)); + return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } struct zone; @@ -415,13 +477,44 @@ extern struct zone *zone_table[]; static inline struct zone *page_zone(struct page *page) { - return zone_table[page->flags >> NODEZONE_SHIFT]; + return zone_table[(page->flags >> ZONETABLE_PGSHIFT) & + ZONETABLE_MASK]; +} + +static inline unsigned long page_to_nid(struct page *page) +{ + if (FLAGS_HAS_NODE) + return (page->flags >> NODES_PGSHIFT) & NODES_MASK; + else + return page_zone(page)->zone_pgdat->node_id; +} +static inline unsigned long page_to_section(struct page *page) +{ + return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; +} + +static inline void set_page_zone(struct page *page, unsigned long zone) +{ + page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); + page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT; +} +static inline void set_page_node(struct page *page, unsigned long node) +{ + page->flags &= ~(NODES_MASK << NODES_PGSHIFT); + page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; +} +static inline void set_page_section(struct page *page, unsigned long section) +{ + page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); + page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; } -static inline void set_page_zone(struct page *page, unsigned long nodezone_num) +static inline void set_page_links(struct page *page, unsigned long zone, + unsigned long node, unsigned long pfn) { - page->flags &= ~(~0UL << NODEZONE_SHIFT); - page->flags |= nodezone_num << NODEZONE_SHIFT; + set_page_zone(page, zone); + set_page_node(page, node); + set_page_section(page, pfn_to_section_nr(pfn)); } #ifndef CONFIG_DISCONTIGMEM diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4733d35d8223..6c90461ed99f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -269,7 +269,9 @@ typedef struct pglist_data { struct zone node_zones[MAX_NR_ZONES]; struct zonelist node_zonelists[GFP_ZONETYPES]; int nr_zones; +#ifdef CONFIG_FLAT_NODE_MEM_MAP struct page *node_mem_map; +#endif struct bootmem_data *bdata; unsigned long node_start_pfn; unsigned long node_present_pages; /* total number of physical pages */ @@ -284,6 +286,12 @@ typedef struct pglist_data { #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) +#ifdef CONFIG_FLAT_NODE_MEM_MAP +#define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr)) +#else +#define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr)) +#endif +#define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) extern struct pglist_data *pgdat_list; @@ -400,7 +408,7 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, /* Returns the number of the current Node. */ #define numa_node_id() (cpu_to_node(raw_smp_processor_id())) -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES extern struct pglist_data contig_page_data; #define NODE_DATA(nid) (&contig_page_data) @@ -408,36 +416,177 @@ extern struct pglist_data contig_page_data; #define MAX_NODES_SHIFT 1 #define pfn_to_nid(pfn) (0) -#else /* CONFIG_DISCONTIGMEM */ +#else /* CONFIG_NEED_MULTIPLE_NODES */ #include <asm/mmzone.h> +#endif /* !CONFIG_NEED_MULTIPLE_NODES */ + +#ifdef CONFIG_SPARSEMEM +#include <asm/sparsemem.h> +#endif + #if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED) /* * with 32 bit page->flags field, we reserve 8 bits for node/zone info. * there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes. */ -#define MAX_NODES_SHIFT 6 +#define FLAGS_RESERVED 8 + #elif BITS_PER_LONG == 64 /* * with 64 bit flags field, there's plenty of room. */ -#define MAX_NODES_SHIFT 10 +#define FLAGS_RESERVED 32 + +#else + +#error BITS_PER_LONG not defined + +#endif + +#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID +#define early_pfn_to_nid(nid) (0UL) #endif -#endif /* !CONFIG_DISCONTIGMEM */ +#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) +#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT) + +#ifdef CONFIG_SPARSEMEM + +/* + * SECTION_SHIFT #bits space required to store a section # + * + * PA_SECTION_SHIFT physical address to/from section number + * PFN_SECTION_SHIFT pfn to/from section number + */ +#define SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS) + +#define PA_SECTION_SHIFT (SECTION_SIZE_BITS) +#define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT) + +#define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT) -#if NODES_SHIFT > MAX_NODES_SHIFT -#error NODES_SHIFT > MAX_NODES_SHIFT +#define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT) +#define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) + +#if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS +#error Allocator MAX_ORDER exceeds SECTION_SIZE #endif -/* There are currently 3 zones: DMA, Normal & Highmem, thus we need 2 bits */ -#define MAX_ZONES_SHIFT 2 +struct page; +struct mem_section { + /* + * This is, logically, a pointer to an array of struct + * pages. However, it is stored with some other magic. + * (see sparse.c::sparse_init_one_section()) + * + * Making it a UL at least makes someone do a cast + * before using it wrong. + */ + unsigned long section_mem_map; +}; + +extern struct mem_section mem_section[NR_MEM_SECTIONS]; -#if ZONES_SHIFT > MAX_ZONES_SHIFT -#error ZONES_SHIFT > MAX_ZONES_SHIFT +static inline struct mem_section *__nr_to_section(unsigned long nr) +{ + return &mem_section[nr]; +} + +/* + * We use the lower bits of the mem_map pointer to store + * a little bit of information. There should be at least + * 3 bits here due to 32-bit alignment. + */ +#define SECTION_MARKED_PRESENT (1UL<<0) +#define SECTION_HAS_MEM_MAP (1UL<<1) +#define SECTION_MAP_LAST_BIT (1UL<<2) +#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) + +static inline struct page *__section_mem_map_addr(struct mem_section *section) +{ + unsigned long map = section->section_mem_map; + map &= SECTION_MAP_MASK; + return (struct page *)map; +} + +static inline int valid_section(struct mem_section *section) +{ + return (section->section_mem_map & SECTION_MARKED_PRESENT); +} + +static inline int section_has_mem_map(struct mem_section *section) +{ + return (section->section_mem_map & SECTION_HAS_MEM_MAP); +} + +static inline int valid_section_nr(unsigned long nr) +{ + return valid_section(__nr_to_section(nr)); +} + +/* + * Given a kernel address, find the home node of the underlying memory. + */ +#define kvaddr_to_nid(kaddr) pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT) + +static inline struct mem_section *__pfn_to_section(unsigned long pfn) +{ + return __nr_to_section(pfn_to_section_nr(pfn)); +} + +#define pfn_to_page(pfn) \ +({ \ + unsigned long __pfn = (pfn); \ + __section_mem_map_addr(__pfn_to_section(__pfn)) + __pfn; \ +}) +#define page_to_pfn(page) \ +({ \ + page - __section_mem_map_addr(__nr_to_section( \ + page_to_section(page))); \ +}) + +static inline int pfn_valid(unsigned long pfn) +{ + if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) + return 0; + return valid_section(__nr_to_section(pfn_to_section_nr(pfn))); +} + +/* + * These are _only_ used during initialisation, therefore they + * can use __initdata ... They could have names to indicate + * this restriction. + */ +#ifdef CONFIG_NUMA +#define pfn_to_nid early_pfn_to_nid +#endif + +#define pfn_to_pgdat(pfn) \ +({ \ + NODE_DATA(pfn_to_nid(pfn)); \ +}) + +#define early_pfn_valid(pfn) pfn_valid(pfn) +void sparse_init(void); +#else +#define sparse_init() do {} while (0) +#endif /* CONFIG_SPARSEMEM */ + +#ifdef CONFIG_NODES_SPAN_OTHER_NODES +#define early_pfn_in_nid(pfn, nid) (early_pfn_to_nid(pfn) == (nid)) +#else +#define early_pfn_in_nid(pfn, nid) (1) +#endif + +#ifndef early_pfn_valid +#define early_pfn_valid(pfn) (1) #endif +void memory_present(int nid, unsigned long start, unsigned long end); +unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); + #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _LINUX_MMZONE_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ba5d1236aa17..d89816ad642f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -41,7 +41,7 @@ struct divert_blk; struct vlan_group; struct ethtool_ops; -struct netpoll; +struct netpoll_info; /* source back-compat hooks */ #define SET_ETHTOOL_OPS(netdev,ops) \ ( (netdev)->ethtool_ops = (ops) ) @@ -468,7 +468,7 @@ struct net_device unsigned char *haddr); int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); #ifdef CONFIG_NETPOLL - struct netpoll *np; + struct netpoll_info *npinfo; #endif #ifdef CONFIG_NET_POLL_CONTROLLER void (*poll_controller)(struct net_device *dev); @@ -925,10 +925,6 @@ extern int skb_checksum_help(struct sk_buff *skb, int inward); extern void net_enable_timestamp(void); extern void net_disable_timestamp(void); -#ifdef CONFIG_SYSCTL -extern char *net_sysctl_strdup(const char *s); -#endif - #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h index baa83e757156..d9bceedfb3dc 100644 --- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h +++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h @@ -18,7 +18,6 @@ struct clusterip_config; struct ipt_clusterip_tgt_info { u_int32_t flags; - struct clusterip_config *config; /* only relevant for new ones */ u_int8_t clustermac[6]; @@ -27,6 +26,8 @@ struct ipt_clusterip_tgt_info { u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; enum clusterip_hashmode hash_mode; u_int32_t hash_initval; + + struct clusterip_config *config; }; #endif /*_IPT_CLUSTERIP_H_target*/ diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index c0d8b90c5202..bcd0ac33f592 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -16,14 +16,19 @@ struct netpoll; struct netpoll { struct net_device *dev; char dev_name[16], *name; - int rx_flags; void (*rx_hook)(struct netpoll *, int, char *, int); void (*drop)(struct sk_buff *skb); u32 local_ip, remote_ip; u16 local_port, remote_port; unsigned char local_mac[6], remote_mac[6]; +}; + +struct netpoll_info { spinlock_t poll_lock; int poll_owner; + int rx_flags; + spinlock_t rx_lock; + struct netpoll *rx_np; /* netpoll that registered an rx_hook */ }; void netpoll_poll(struct netpoll *np); @@ -39,22 +44,35 @@ void netpoll_queue(struct sk_buff *skb); #ifdef CONFIG_NETPOLL static inline int netpoll_rx(struct sk_buff *skb) { - return skb->dev->np && skb->dev->np->rx_flags && __netpoll_rx(skb); + struct netpoll_info *npinfo = skb->dev->npinfo; + unsigned long flags; + int ret = 0; + + if (!npinfo || (!npinfo->rx_np && !npinfo->rx_flags)) + return 0; + + spin_lock_irqsave(&npinfo->rx_lock, flags); + /* check rx_flags again with the lock held */ + if (npinfo->rx_flags && __netpoll_rx(skb)) + ret = 1; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + + return ret; } static inline void netpoll_poll_lock(struct net_device *dev) { - if (dev->np) { - spin_lock(&dev->np->poll_lock); - dev->np->poll_owner = smp_processor_id(); + if (dev->npinfo) { + spin_lock(&dev->npinfo->poll_lock); + dev->npinfo->poll_owner = smp_processor_id(); } } static inline void netpoll_poll_unlock(struct net_device *dev) { - if (dev->np) { - spin_unlock(&dev->np->poll_lock); - dev->np->poll_owner = -1; + if (dev->npinfo) { + dev->npinfo->poll_owner = -1; + spin_unlock(&dev->npinfo->poll_lock); } } diff --git a/include/linux/numa.h b/include/linux/numa.h index bd0c8c4e9a95..f0c539bd3cfc 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -3,7 +3,7 @@ #include <linux/config.h> -#ifdef CONFIG_DISCONTIGMEM +#ifndef CONFIG_FLATMEM #include <asm/numnodes.h> #endif diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h index 4e2d2a942ecb..4b32bce9a289 100644 --- a/include/linux/pktcdvd.h +++ b/include/linux/pktcdvd.h @@ -159,7 +159,7 @@ struct packet_iosched struct bio *read_queue_tail; struct bio *write_queue; struct bio *write_queue_tail; - int high_prio_read; /* An important read request has been queued */ + sector_t last_write; /* The sector where the last write ended */ int successive_reads; }; diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h index 5efd0a6dad94..6e53c34035cd 100644 --- a/include/linux/posix_acl_xattr.h +++ b/include/linux/posix_acl_xattr.h @@ -23,13 +23,13 @@ #define ACL_UNDEFINED_ID (-1) typedef struct { - __u16 e_tag; - __u16 e_perm; - __u32 e_id; + __le16 e_tag; + __le16 e_perm; + __le32 e_id; } posix_acl_xattr_entry; typedef struct { - __u32 a_version; + __le32 a_version; posix_acl_xattr_entry a_entries[0]; } posix_acl_xattr_header; @@ -52,4 +52,7 @@ posix_acl_xattr_count(size_t size) return size / sizeof(posix_acl_xattr_entry); } +struct posix_acl *posix_acl_from_xattr(const void *value, size_t size); +int posix_acl_to_xattr(const struct posix_acl *acl, void *buffer, size_t size); + #endif /* _POSIX_ACL_XATTR_H */ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index e57baa85e744..d211507ab246 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -39,7 +39,8 @@ extern int dquot_commit_info(struct super_block *sb, int type); extern int dquot_mark_dquot_dirty(struct dquot *dquot); extern int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path); -extern int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry); +extern int vfs_quota_on_mount(struct super_block *sb, char *qf_name, + int format_id, int type); extern int vfs_quota_off(struct super_block *sb, int type); #define vfs_quota_off_mount(sb, type) vfs_quota_off(sb, type) extern int vfs_quota_sync(struct super_block *sb, int type); diff --git a/include/linux/reiserfs_acl.h b/include/linux/reiserfs_acl.h index 2aef9c3f5ce8..0760507a545b 100644 --- a/include/linux/reiserfs_acl.h +++ b/include/linux/reiserfs_acl.h @@ -1,6 +1,5 @@ #include <linux/init.h> #include <linux/posix_acl.h> -#include <linux/xattr_acl.h> #define REISERFS_ACL_VERSION 0x0001 diff --git a/include/linux/sched.h b/include/linux/sched.h index b58afd97a180..901742f92389 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -246,7 +246,7 @@ struct mm_struct { unsigned long saved_auxv[42]; /* for /proc/PID/auxv */ - unsigned dumpable:1; + unsigned dumpable:2; cpumask_t cpu_vm_mask; /* Architecture-specific MM context */ diff --git a/include/linux/string.h b/include/linux/string.h index b9fc59469956..93994c613095 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -88,6 +88,8 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); #endif +extern char *kstrdup(const char *s, int gfp); + #ifdef __cplusplus } #endif diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index a17745c80a91..614e939c78a4 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -136,6 +136,7 @@ enum KERN_UNKNOWN_NMI_PANIC=66, /* int: unknown nmi panic flag */ KERN_BOOTLOADER_TYPE=67, /* int: boot loader type */ KERN_RANDOMIZE=68, /* int: randomize virtual address space */ + KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */ }; diff --git a/include/linux/timer.h b/include/linux/timer.h index 90db1cc62ddd..221f81ac2002 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -6,45 +6,33 @@ #include <linux/spinlock.h> #include <linux/stddef.h> -struct tvec_t_base_s; +struct timer_base_s; struct timer_list { struct list_head entry; unsigned long expires; - spinlock_t lock; unsigned long magic; void (*function)(unsigned long); unsigned long data; - struct tvec_t_base_s *base; + struct timer_base_s *base; }; #define TIMER_MAGIC 0x4b87ad6e +extern struct timer_base_s __init_timer_base; + #define TIMER_INITIALIZER(_function, _expires, _data) { \ .function = (_function), \ .expires = (_expires), \ .data = (_data), \ - .base = NULL, \ + .base = &__init_timer_base, \ .magic = TIMER_MAGIC, \ - .lock = SPIN_LOCK_UNLOCKED, \ } -/*** - * init_timer - initialize a timer. - * @timer: the timer to be initialized - * - * init_timer() must be done to a timer prior calling *any* of the - * other timer functions. - */ -static inline void init_timer(struct timer_list * timer) -{ - timer->base = NULL; - timer->magic = TIMER_MAGIC; - spin_lock_init(&timer->lock); -} +void fastcall init_timer(struct timer_list * timer); /*** * timer_pending - is a timer pending? @@ -58,7 +46,7 @@ static inline void init_timer(struct timer_list * timer) */ static inline int timer_pending(const struct timer_list * timer) { - return timer->base != NULL; + return timer->entry.next != NULL; } extern void add_timer_on(struct timer_list *timer, int cpu); @@ -88,13 +76,15 @@ static inline void add_timer(struct timer_list * timer) } #ifdef CONFIG_SMP + extern int try_to_del_timer_sync(struct timer_list *timer); extern int del_timer_sync(struct timer_list *timer); - extern int del_singleshot_timer_sync(struct timer_list *timer); #else -# define del_timer_sync(t) del_timer(t) -# define del_singleshot_timer_sync(t) del_timer(t) +# define try_to_del_timer_sync(t) del_timer(t) +# define del_timer_sync(t) del_timer(t) #endif +#define del_singleshot_timer_sync(t) del_timer_sync(t) + extern void init_timers(void); extern void run_local_timers(void); extern void it_real_fn(unsigned long); diff --git a/include/linux/tty.h b/include/linux/tty.h index 1b76106272d3..59ff42c629ec 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -345,6 +345,7 @@ extern int tty_check_change(struct tty_struct * tty); extern void stop_tty(struct tty_struct * tty); extern void start_tty(struct tty_struct * tty); extern int tty_register_ldisc(int disc, struct tty_ldisc *new_ldisc); +extern int tty_unregister_ldisc(int disc); extern int tty_register_driver(struct tty_driver *driver); extern int tty_unregister_driver(struct tty_driver *driver); extern void tty_register_device(struct tty_driver *driver, unsigned index, struct device *dev); diff --git a/include/linux/wait.h b/include/linux/wait.h index c9486c3efb4a..d38c9fecdc36 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -33,7 +33,7 @@ int default_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key struct __wait_queue { unsigned int flags; #define WQ_FLAG_EXCLUSIVE 0x01 - struct task_struct * task; + void *private; wait_queue_func_t func; struct list_head task_list; }; @@ -60,7 +60,7 @@ typedef struct __wait_queue_head wait_queue_head_t; */ #define __WAITQUEUE_INITIALIZER(name, tsk) { \ - .task = tsk, \ + .private = tsk, \ .func = default_wake_function, \ .task_list = { NULL, NULL } } @@ -86,7 +86,7 @@ static inline void init_waitqueue_head(wait_queue_head_t *q) static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p) { q->flags = 0; - q->task = p; + q->private = p; q->func = default_wake_function; } @@ -94,7 +94,7 @@ static inline void init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func) { q->flags = 0; - q->task = NULL; + q->private = NULL; q->func = func; } @@ -110,7 +110,7 @@ static inline int waitqueue_active(wait_queue_head_t *q) * aio specifies a wait queue entry with an async notification * callback routine, not associated with any task. */ -#define is_sync_wait(wait) (!(wait) || ((wait)->task)) +#define is_sync_wait(wait) (!(wait) || ((wait)->private)) extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)); extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)); @@ -384,7 +384,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); #define DEFINE_WAIT(name) \ wait_queue_t name = { \ - .task = current, \ + .private = current, \ .func = autoremove_wake_function, \ .task_list = LIST_HEAD_INIT((name).task_list), \ } @@ -393,7 +393,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); struct wait_bit_queue name = { \ .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ .wait = { \ - .task = current, \ + .private = current, \ .func = wake_bit_function, \ .task_list = \ LIST_HEAD_INIT((name).wait.task_list), \ @@ -402,7 +402,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); #define init_wait(wait) \ do { \ - (wait)->task = current; \ + (wait)->private = current; \ (wait)->func = autoremove_wake_function; \ INIT_LIST_HEAD(&(wait)->task_list); \ } while (0) diff --git a/include/linux/x25.h b/include/linux/x25.h index 7531cfed5885..16d44931afa0 100644 --- a/include/linux/x25.h +++ b/include/linux/x25.h @@ -4,6 +4,8 @@ * History * mar/20/00 Daniela Squassoni Disabling/enabling of facilities * negotiation. + * apr/02/05 Shaun Pereira Selective sub address matching with + * call user data */ #ifndef X25_KERNEL_H @@ -16,6 +18,9 @@ #define SIOCX25GCALLUSERDATA (SIOCPROTOPRIVATE + 4) #define SIOCX25SCALLUSERDATA (SIOCPROTOPRIVATE + 5) #define SIOCX25GCAUSEDIAG (SIOCPROTOPRIVATE + 6) +#define SIOCX25SCUDMATCHLEN (SIOCPROTOPRIVATE + 7) +#define SIOCX25CALLACCPTAPPRV (SIOCPROTOPRIVATE + 8) +#define SIOCX25SENDCALLACCPT (SIOCPROTOPRIVATE + 9) /* * Values for {get,set}sockopt. @@ -109,4 +114,11 @@ struct x25_causediag { unsigned char diagnostic; }; +/* + * Further optional call user data match length selection + */ +struct x25_subaddr { + unsigned int cudmatchlength; +}; + #endif diff --git a/include/net/x25.h b/include/net/x25.h index 7a1ba5bbb868..8b39b98876e8 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -79,6 +79,8 @@ enum { #define X25_DEFAULT_PACKET_SIZE X25_PS128 /* Default Packet Size */ #define X25_DEFAULT_THROUGHPUT 0x0A /* Deafult Throughput */ #define X25_DEFAULT_REVERSE 0x00 /* Default Reverse Charging */ +#define X25_DENY_ACCPT_APPRV 0x01 /* Default value */ +#define X25_ALLOW_ACCPT_APPRV 0x00 /* Control enabled */ #define X25_SMODULUS 8 #define X25_EMODULUS 128 @@ -94,7 +96,7 @@ enum { #define X25_FAC_CLASS_C 0x80 #define X25_FAC_CLASS_D 0xC0 -#define X25_FAC_REVERSE 0x01 +#define X25_FAC_REVERSE 0x01 /* also fast select */ #define X25_FAC_THROUGHPUT 0x02 #define X25_FAC_PACKET_SIZE 0x42 #define X25_FAC_WINDOW_SIZE 0x43 @@ -134,8 +136,8 @@ struct x25_sock { struct sock sk; struct x25_address source_addr, dest_addr; struct x25_neigh *neighbour; - unsigned int lci; - unsigned char state, condition, qbitincl, intflag; + unsigned int lci, cudmatchlength; + unsigned char state, condition, qbitincl, intflag, accptapprv; unsigned short vs, vr, va, vl; unsigned long t2, t21, t22, t23; unsigned short fraglen; @@ -242,7 +244,6 @@ extern int x25_validate_nr(struct sock *, unsigned short); extern void x25_write_internal(struct sock *, int); extern int x25_decode(struct sock *, struct sk_buff *, int *, int *, int *, int *, int *); extern void x25_disconnect(struct sock *, int, unsigned char, unsigned char); -extern int x25_check_calluserdata(struct x25_calluserdata *,struct x25_calluserdata *); /* x25_timer.c */ extern void x25_start_heartbeat(struct sock *); diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index 6d3413a56708..67b867f31fe4 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -77,6 +77,11 @@ extern socket_state_t dead_socket; /* Use this just for bridge windows */ #define MAP_IOSPACE 0x20 +/* power hook operations */ +#define HOOK_POWER_PRE 0x01 +#define HOOK_POWER_POST 0x02 + + typedef struct pccard_io_map { u_char map; u_char flags; @@ -222,6 +227,9 @@ struct pcmcia_socket { /* Zoom video behaviour is so chip specific its not worth adding this to _ops */ void (*zoom_video)(struct pcmcia_socket *, int); + + /* so is power hook */ + int (*power_hook)(struct pcmcia_socket *sock, int operation); /* state thread */ struct semaphore skt_sem; /* protects socket h/w state */ diff --git a/include/sound/core.h b/include/sound/core.h index 9117c23e3a01..f8c4ef0aa352 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -292,6 +292,7 @@ void *snd_hidden_kcalloc(size_t n, size_t size, int flags); void snd_hidden_kfree(const void *obj); void *snd_hidden_vmalloc(unsigned long size); void snd_hidden_vfree(void *obj); +char *snd_hidden_kstrdup(const char *s, int flags); #define kmalloc(size, flags) snd_hidden_kmalloc(size, flags) #define kcalloc(n, size, flags) snd_hidden_kcalloc(n, size, flags) #define kfree(obj) snd_hidden_kfree(obj) @@ -301,6 +302,7 @@ void snd_hidden_vfree(void *obj); #define vmalloc_nocheck(size) snd_wrapper_vmalloc(size) #define kfree_nocheck(obj) snd_wrapper_kfree(obj) #define vfree_nocheck(obj) snd_wrapper_vfree(obj) +#define kstrdup(s, flags) snd_hidden_kstrdup(s, flags) #else #define snd_memory_init() /*NOP*/ #define snd_memory_done() /*NOP*/ @@ -311,7 +313,6 @@ void snd_hidden_vfree(void *obj); #define kfree_nocheck(obj) kfree(obj) #define vfree_nocheck(obj) vfree(obj) #endif -char *snd_kmalloc_strdup(const char *string, int flags); int copy_to_user_fromio(void __user *dst, const volatile void __iomem *src, size_t count); int copy_from_user_toio(volatile void __iomem *dst, const void __user *src, size_t count); diff --git a/init/calibrate.c b/init/calibrate.c index c698e04a3dbe..d206c7548fe6 100644 --- a/init/calibrate.c +++ b/init/calibrate.c @@ -8,6 +8,8 @@ #include <linux/delay.h> #include <linux/init.h> +#include <asm/timex.h> + static unsigned long preset_lpj; static int __init lpj_setup(char *str) { @@ -17,6 +19,92 @@ static int __init lpj_setup(char *str) __setup("lpj=", lpj_setup); +#ifdef ARCH_HAS_READ_CURRENT_TIMER + +/* This routine uses the read_current_timer() routine and gets the + * loops per jiffy directly, instead of guessing it using delay(). + * Also, this code tries to handle non-maskable asynchronous events + * (like SMIs) + */ +#define DELAY_CALIBRATION_TICKS ((HZ < 100) ? 1 : (HZ/100)) +#define MAX_DIRECT_CALIBRATION_RETRIES 5 + +static unsigned long __devinit calibrate_delay_direct(void) +{ + unsigned long pre_start, start, post_start; + unsigned long pre_end, end, post_end; + unsigned long start_jiffies; + unsigned long tsc_rate_min, tsc_rate_max; + unsigned long good_tsc_sum = 0; + unsigned long good_tsc_count = 0; + int i; + + if (read_current_timer(&pre_start) < 0 ) + return 0; + + /* + * A simple loop like + * while ( jiffies < start_jiffies+1) + * start = read_current_timer(); + * will not do. As we don't really know whether jiffy switch + * happened first or timer_value was read first. And some asynchronous + * event can happen between these two events introducing errors in lpj. + * + * So, we do + * 1. pre_start <- When we are sure that jiffy switch hasn't happened + * 2. check jiffy switch + * 3. start <- timer value before or after jiffy switch + * 4. post_start <- When we are sure that jiffy switch has happened + * + * Note, we don't know anything about order of 2 and 3. + * Now, by looking at post_start and pre_start difference, we can + * check whether any asynchronous event happened or not + */ + + for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) { + pre_start = 0; + read_current_timer(&start); + start_jiffies = jiffies; + while (jiffies <= (start_jiffies + 1)) { + pre_start = start; + read_current_timer(&start); + } + read_current_timer(&post_start); + + pre_end = 0; + end = post_start; + while (jiffies <= + (start_jiffies + 1 + DELAY_CALIBRATION_TICKS)) { + pre_end = end; + read_current_timer(&end); + } + read_current_timer(&post_end); + + tsc_rate_max = (post_end - pre_start) / DELAY_CALIBRATION_TICKS; + tsc_rate_min = (pre_end - post_start) / DELAY_CALIBRATION_TICKS; + + /* + * If the upper limit and lower limit of the tsc_rate is + * >= 12.5% apart, redo calibration. + */ + if (pre_start != 0 && pre_end != 0 && + (tsc_rate_max - tsc_rate_min) < (tsc_rate_max >> 3)) { + good_tsc_count++; + good_tsc_sum += tsc_rate_max; + } + } + + if (good_tsc_count) + return (good_tsc_sum/good_tsc_count); + + printk(KERN_WARNING "calibrate_delay_direct() failed to get a good " + "estimate for loops_per_jiffy.\nProbably due to long platform interrupts. Consider using \"lpj=\" boot option.\n"); + return 0; +} +#else +static unsigned long __devinit calibrate_delay_direct(void) {return 0;} +#endif + /* * This is the number of bits of precision for the loops_per_jiffy. Each * bit takes on average 1.5/HZ seconds. This (like the original) is a little @@ -35,6 +123,12 @@ void __devinit calibrate_delay(void) "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ)) % 100); + } else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) { + printk("Calibrating delay using timer specific routine.. "); + printk("%lu.%02lu BogoMIPS (lpj=%lu)\n", + loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100, + loops_per_jiffy); } else { loops_per_jiffy = (1<<12); diff --git a/ipc/sem.c b/ipc/sem.c index 5ad7ac0ed60d..7e8a25c82ef3 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1054,7 +1054,7 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops, struct sembuf fast_sops[SEMOPM_FAST]; struct sembuf* sops = fast_sops, *sop; struct sem_undo *un; - int undos = 0, decrease = 0, alter = 0, max; + int undos = 0, alter = 0, max; struct sem_queue queue; unsigned long jiffies_left = 0; @@ -1089,13 +1089,10 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops, if (sop->sem_num >= max) max = sop->sem_num; if (sop->sem_flg & SEM_UNDO) - undos++; - if (sop->sem_op < 0) - decrease = 1; - if (sop->sem_op > 0) + undos = 1; + if (sop->sem_op != 0) alter = 1; } - alter |= decrease; retry_undos: if (undos) { diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz new file mode 100644 index 000000000000..248e1c396f8b --- /dev/null +++ b/kernel/Kconfig.hz @@ -0,0 +1,46 @@ +# +# Timer Interrupt Frequency Configuration +# + +choice + prompt "Timer frequency" + default HZ_250 + help + Allows the configuration of the timer frequency. It is customary + to have the timer interrupt run at 1000 HZ but 100 HZ may be more + beneficial for servers and NUMA systems that do not need to have + a fast response for user interaction and that may experience bus + contention and cacheline bounces as a result of timer interrupts. + Note that the timer interrupt occurs on each processor in an SMP + environment leading to NR_CPUS * HZ number of timer interrupts + per second. + + + config HZ_100 + bool "100 HZ" + help + 100 HZ is a typical choice for servers, SMP and NUMA systems + with lots of processors that may show reduced performance if + too many timer interrupts are occurring. + + config HZ_250 + bool "250 HZ" + help + 250 HZ is a good compromise choice allowing server performance + while also showing good interactive responsiveness even + on SMP and NUMA systems. + + config HZ_1000 + bool "1000 HZ" + help + 1000 HZ is the preferred choice for desktop systems and other + systems requiring fast interactive responses to events. + +endchoice + +config HZ + int + default 100 if HZ_100 + default 250 if HZ_250 + default 1000 if HZ_1000 + diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 00e8f2575512..79dd929f4084 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -228,13 +228,7 @@ static struct dentry_operations cpuset_dops = { static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name) { - struct qstr qstr; - struct dentry *d; - - qstr.name = name; - qstr.len = strlen(name); - qstr.hash = full_name_hash(name, qstr.len); - d = lookup_hash(&qstr, parent); + struct dentry *d = lookup_one_len(name, parent, strlen(name)); if (!IS_ERR(d)) d->d_op = &cpuset_dops; return d; diff --git a/kernel/exit.c b/kernel/exit.c index 2ef2ad540201..3ebcd60a19c6 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -72,6 +72,11 @@ repeat: BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); __exit_signal(p); __exit_sighand(p); + /* + * Note that the fastpath in sys_times depends on __exit_signal having + * updated the counters before a task is removed from the tasklist of + * the process by __unhash_process. + */ __unhash_process(p); /* @@ -793,6 +798,17 @@ fastcall NORET_TYPE void do_exit(long code) ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP); } + /* + * We're taking recursive faults here in do_exit. Safest is to just + * leave this task alone and wait for reboot. + */ + if (unlikely(tsk->flags & PF_EXITING)) { + printk(KERN_ALERT + "Fixing recursive fault but reboot is needed!\n"); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + } + tsk->flags |= PF_EXITING; /* diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 037142b72a49..334f37472c56 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -27,6 +27,9 @@ * interface to access function arguments. * 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes * exceptions notifier to be first on the priority list. + * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston + * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi + * <prasanna@in.ibm.com> added function-return probes. */ #include <linux/kprobes.h> #include <linux/spinlock.h> @@ -41,6 +44,7 @@ #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; +static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; unsigned int kprobe_cpu = NR_CPUS; static DEFINE_SPINLOCK(kprobe_lock); @@ -78,22 +82,23 @@ struct kprobe *get_kprobe(void *addr) * Aggregate handlers for multiple kprobes support - these handlers * take care of invoking the individual kprobe handlers on p->list */ -int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) +static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe *kp; list_for_each_entry(kp, &p->list, list) { if (kp->pre_handler) { curr_kprobe = kp; - kp->pre_handler(kp, regs); - curr_kprobe = NULL; + if (kp->pre_handler(kp, regs)) + return 1; } + curr_kprobe = NULL; } return 0; } -void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, - unsigned long flags) +static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) { struct kprobe *kp; @@ -107,7 +112,8 @@ void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, return; } -int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr) +static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, + int trapnr) { /* * if we faulted "during" the execution of a user specified @@ -120,19 +126,191 @@ int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr) return 0; } +static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe *kp = curr_kprobe; + if (curr_kprobe && kp->break_handler) { + if (kp->break_handler(kp, regs)) { + curr_kprobe = NULL; + return 1; + } + } + curr_kprobe = NULL; + return 0; +} + +struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .pre_handler = trampoline_probe_handler, + .post_handler = trampoline_post_handler +}; + +struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp) +{ + struct hlist_node *node; + struct kretprobe_instance *ri; + hlist_for_each_entry(ri, node, &rp->free_instances, uflist) + return ri; + return NULL; +} + +static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp) +{ + struct hlist_node *node; + struct kretprobe_instance *ri; + hlist_for_each_entry(ri, node, &rp->used_instances, uflist) + return ri; + return NULL; +} + +struct kretprobe_instance *get_rp_inst(void *sara) +{ + struct hlist_head *head; + struct hlist_node *node; + struct task_struct *tsk; + struct kretprobe_instance *ri; + + tsk = arch_get_kprobe_task(sara); + head = &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; + hlist_for_each_entry(ri, node, head, hlist) { + if (ri->stack_addr == sara) + return ri; + } + return NULL; +} + +void add_rp_inst(struct kretprobe_instance *ri) +{ + struct task_struct *tsk; + /* + * Remove rp inst off the free list - + * Add it back when probed function returns + */ + hlist_del(&ri->uflist); + tsk = arch_get_kprobe_task(ri->stack_addr); + /* Add rp inst onto table */ + INIT_HLIST_NODE(&ri->hlist); + hlist_add_head(&ri->hlist, + &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]); + + /* Also add this rp inst to the used list. */ + INIT_HLIST_NODE(&ri->uflist); + hlist_add_head(&ri->uflist, &ri->rp->used_instances); +} + +void recycle_rp_inst(struct kretprobe_instance *ri) +{ + /* remove rp inst off the rprobe_inst_table */ + hlist_del(&ri->hlist); + if (ri->rp) { + /* remove rp inst off the used list */ + hlist_del(&ri->uflist); + /* put rp inst back onto the free list */ + INIT_HLIST_NODE(&ri->uflist); + hlist_add_head(&ri->uflist, &ri->rp->free_instances); + } else + /* Unregistering */ + kfree(ri); +} + +struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk) +{ + return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; +} + +struct kretprobe_instance *get_rp_inst_tsk(struct task_struct *tk) +{ + struct task_struct *tsk; + struct hlist_head *head; + struct hlist_node *node; + struct kretprobe_instance *ri; + + head = &kretprobe_inst_table[hash_ptr(tk, KPROBE_HASH_BITS)]; + + hlist_for_each_entry(ri, node, head, hlist) { + tsk = arch_get_kprobe_task(ri->stack_addr); + if (tsk == tk) + return ri; + } + return NULL; +} + +/* + * This function is called from do_exit or do_execv when task tk's stack is + * about to be recycled. Recycle any function-return probe instances + * associated with this task. These represent probed functions that have + * been called but may never return. + */ +void kprobe_flush_task(struct task_struct *tk) +{ + unsigned long flags = 0; + spin_lock_irqsave(&kprobe_lock, flags); + arch_kprobe_flush_task(tk); + spin_unlock_irqrestore(&kprobe_lock, flags); +} + +/* + * This kprobe pre_handler is registered with every kretprobe. When probe + * hits it will set up the return probe. + */ +static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) +{ + struct kretprobe *rp = container_of(p, struct kretprobe, kp); + + /*TODO: consider to only swap the RA after the last pre_handler fired */ + arch_prepare_kretprobe(rp, regs); + return 0; +} + +static inline void free_rp_inst(struct kretprobe *rp) +{ + struct kretprobe_instance *ri; + while ((ri = get_free_rp_inst(rp)) != NULL) { + hlist_del(&ri->uflist); + kfree(ri); + } +} + +/* + * Keep all fields in the kprobe consistent + */ +static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) +{ + memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t)); + memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn)); +} + +/* +* Add the new probe to old_p->list. Fail if this is the +* second jprobe at the address - two jprobes can't coexist +*/ +static int add_new_kprobe(struct kprobe *old_p, struct kprobe *p) +{ + struct kprobe *kp; + + if (p->break_handler) { + list_for_each_entry(kp, &old_p->list, list) { + if (kp->break_handler) + return -EEXIST; + } + list_add_tail(&p->list, &old_p->list); + } else + list_add(&p->list, &old_p->list); + return 0; +} + /* * Fill in the required fields of the "manager kprobe". Replace the * earlier kprobe in the hlist with the manager kprobe */ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) { + copy_kprobe(p, ap); ap->addr = p->addr; - ap->opcode = p->opcode; - memcpy(&ap->ainsn, &p->ainsn, sizeof(struct arch_specific_insn)); - ap->pre_handler = aggr_pre_handler; ap->post_handler = aggr_post_handler; ap->fault_handler = aggr_fault_handler; + ap->break_handler = aggr_break_handler; INIT_LIST_HEAD(&ap->list); list_add(&p->list, &ap->list); @@ -153,16 +331,16 @@ static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) int ret = 0; struct kprobe *ap; - if (old_p->break_handler || p->break_handler) { - ret = -EEXIST; /* kprobe and jprobe can't (yet) coexist */ - } else if (old_p->pre_handler == aggr_pre_handler) { - list_add(&p->list, &old_p->list); + if (old_p->pre_handler == aggr_pre_handler) { + copy_kprobe(old_p, p); + ret = add_new_kprobe(old_p, p); } else { ap = kcalloc(1, sizeof(struct kprobe), GFP_ATOMIC); if (!ap) return -ENOMEM; add_aggr_kprobe(ap, old_p); - list_add(&p->list, &ap->list); + copy_kprobe(ap, p); + ret = add_new_kprobe(ap, p); } return ret; } @@ -170,10 +348,8 @@ static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) /* kprobe removal house-keeping routines */ static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) { - *p->addr = p->opcode; + arch_disarm_kprobe(p); hlist_del(&p->hlist); - flush_icache_range((unsigned long) p->addr, - (unsigned long) p->addr + sizeof(kprobe_opcode_t)); spin_unlock_irqrestore(&kprobe_lock, flags); arch_remove_kprobe(p); } @@ -200,6 +376,7 @@ int register_kprobe(struct kprobe *p) } spin_lock_irqsave(&kprobe_lock, flags); old_p = get_kprobe(p->addr); + p->nmissed = 0; if (old_p) { ret = register_aggr_kprobe(old_p, p); goto out; @@ -210,10 +387,8 @@ int register_kprobe(struct kprobe *p) hlist_add_head(&p->hlist, &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); - p->opcode = *p->addr; - *p->addr = BREAKPOINT_INSTRUCTION; - flush_icache_range((unsigned long) p->addr, - (unsigned long) p->addr + sizeof(kprobe_opcode_t)); + arch_arm_kprobe(p); + out: spin_unlock_irqrestore(&kprobe_lock, flags); rm_kprobe: @@ -257,16 +432,82 @@ void unregister_jprobe(struct jprobe *jp) unregister_kprobe(&jp->kp); } +#ifdef ARCH_SUPPORTS_KRETPROBES + +int register_kretprobe(struct kretprobe *rp) +{ + int ret = 0; + struct kretprobe_instance *inst; + int i; + + rp->kp.pre_handler = pre_handler_kretprobe; + + /* Pre-allocate memory for max kretprobe instances */ + if (rp->maxactive <= 0) { +#ifdef CONFIG_PREEMPT + rp->maxactive = max(10, 2 * NR_CPUS); +#else + rp->maxactive = NR_CPUS; +#endif + } + INIT_HLIST_HEAD(&rp->used_instances); + INIT_HLIST_HEAD(&rp->free_instances); + for (i = 0; i < rp->maxactive; i++) { + inst = kmalloc(sizeof(struct kretprobe_instance), GFP_KERNEL); + if (inst == NULL) { + free_rp_inst(rp); + return -ENOMEM; + } + INIT_HLIST_NODE(&inst->uflist); + hlist_add_head(&inst->uflist, &rp->free_instances); + } + + rp->nmissed = 0; + /* Establish function entry probe point */ + if ((ret = register_kprobe(&rp->kp)) != 0) + free_rp_inst(rp); + return ret; +} + +#else /* ARCH_SUPPORTS_KRETPROBES */ + +int register_kretprobe(struct kretprobe *rp) +{ + return -ENOSYS; +} + +#endif /* ARCH_SUPPORTS_KRETPROBES */ + +void unregister_kretprobe(struct kretprobe *rp) +{ + unsigned long flags; + struct kretprobe_instance *ri; + + unregister_kprobe(&rp->kp); + /* No race here */ + spin_lock_irqsave(&kprobe_lock, flags); + free_rp_inst(rp); + while ((ri = get_used_rp_inst(rp)) != NULL) { + ri->rp = NULL; + hlist_del(&ri->uflist); + } + spin_unlock_irqrestore(&kprobe_lock, flags); +} + static int __init init_kprobes(void) { int i, err = 0; /* FIXME allocate the probe table, currently defined statically */ /* initialize all list heads */ - for (i = 0; i < KPROBE_TABLE_SIZE; i++) + for (i = 0; i < KPROBE_TABLE_SIZE; i++) { INIT_HLIST_HEAD(&kprobe_table[i]); + INIT_HLIST_HEAD(&kretprobe_inst_table[i]); + } err = register_die_notifier(&kprobe_exceptions_nb); + /* Register the trampoline probe for return probe */ + register_kprobe(&trampoline_p); return err; } @@ -277,3 +518,6 @@ EXPORT_SYMBOL_GPL(unregister_kprobe); EXPORT_SYMBOL_GPL(register_jprobe); EXPORT_SYMBOL_GPL(unregister_jprobe); EXPORT_SYMBOL_GPL(jprobe_return); +EXPORT_SYMBOL_GPL(register_kretprobe); +EXPORT_SYMBOL_GPL(unregister_kretprobe); + diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index cabb63fc9e16..5b7b4736d82b 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -89,23 +89,6 @@ static struct idr posix_timers_id; static DEFINE_SPINLOCK(idr_lock); /* - * Just because the timer is not in the timer list does NOT mean it is - * inactive. It could be in the "fire" routine getting a new expire time. - */ -#define TIMER_INACTIVE 1 - -#ifdef CONFIG_SMP -# define timer_active(tmr) \ - ((tmr)->it.real.timer.entry.prev != (void *)TIMER_INACTIVE) -# define set_timer_inactive(tmr) \ - do { \ - (tmr)->it.real.timer.entry.prev = (void *)TIMER_INACTIVE; \ - } while (0) -#else -# define timer_active(tmr) BARFY // error to use outside of SMP -# define set_timer_inactive(tmr) do { } while (0) -#endif -/* * we assume that the new SIGEV_THREAD_ID shares no bits with the other * SIGEV values. Here we put out an error if this assumption fails. */ @@ -226,7 +209,6 @@ static inline int common_timer_create(struct k_itimer *new_timer) init_timer(&new_timer->it.real.timer); new_timer->it.real.timer.data = (unsigned long) new_timer; new_timer->it.real.timer.function = posix_timer_fn; - set_timer_inactive(new_timer); return 0; } @@ -480,7 +462,6 @@ static void posix_timer_fn(unsigned long __data) int do_notify = 1; spin_lock_irqsave(&timr->it_lock, flags); - set_timer_inactive(timr); if (!list_empty(&timr->it.real.abs_timer_entry)) { spin_lock(&abs_list.lock); do { @@ -983,8 +964,8 @@ common_timer_set(struct k_itimer *timr, int flags, * careful here. If smp we could be in the "fire" routine which will * be spinning as we hold the lock. But this is ONLY an SMP issue. */ + if (try_to_del_timer_sync(&timr->it.real.timer) < 0) { #ifdef CONFIG_SMP - if (timer_active(timr) && !del_timer(&timr->it.real.timer)) /* * It can only be active if on an other cpu. Since * we have cleared the interval stuff above, it should @@ -994,11 +975,9 @@ common_timer_set(struct k_itimer *timr, int flags, * a "retry" exit status. */ return TIMER_RETRY; - - set_timer_inactive(timr); -#else - del_timer(&timr->it.real.timer); #endif + } + remove_from_abslist(timr); timr->it_requeue_pending = (timr->it_requeue_pending + 2) & @@ -1083,8 +1062,9 @@ retry: static inline int common_timer_del(struct k_itimer *timer) { timer->it.real.incr = 0; + + if (try_to_del_timer_sync(&timer->it.real.timer) < 0) { #ifdef CONFIG_SMP - if (timer_active(timer) && !del_timer(&timer->it.real.timer)) /* * It can only be active if on an other cpu. Since * we have cleared the interval stuff above, it should @@ -1094,9 +1074,9 @@ static inline int common_timer_del(struct k_itimer *timer) * a "retry" exit status. */ return TIMER_RETRY; -#else - del_timer(&timer->it.real.timer); #endif + } + remove_from_abslist(timer); return 0; diff --git a/kernel/printk.c b/kernel/printk.c index 01b58d7d17ff..3a442bfb8bee 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -876,8 +876,10 @@ void register_console(struct console * console) break; console->flags |= CON_ENABLED; console->index = console_cmdline[i].index; - if (i == preferred_console) + if (i == selected_console) { console->flags |= CON_CONSDEV; + preferred_console = selected_console; + } break; } @@ -897,6 +899,8 @@ void register_console(struct console * console) if ((console->flags & CON_CONSDEV) || console_drivers == NULL) { console->next = console_drivers; console_drivers = console; + if (console->next) + console->next->flags &= ~CON_CONSDEV; } else { console->next = console_drivers->next; console_drivers->next = console; @@ -937,10 +941,14 @@ int unregister_console(struct console * console) /* If last console is removed, we re-enable picking the first * one that gets registered. Without that, pmac early boot console * would prevent fbcon from taking over. + * + * If this isn't the last console and it has CON_CONSDEV set, we + * need to set it on the next preferred console. */ if (console_drivers == NULL) preferred_console = selected_console; - + else if (console->flags & CON_CONSDEV) + console_drivers->flags |= CON_CONSDEV; release_console_sem(); return res; diff --git a/kernel/sched.c b/kernel/sched.c index deca041fc364..76080d142e3d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2576,7 +2576,7 @@ void fastcall add_preempt_count(int val) /* * Underflow? */ - BUG_ON(((int)preempt_count() < 0)); + BUG_ON((preempt_count() < 0)); preempt_count() += val; /* * Spinlock count overflowing soon? @@ -2869,7 +2869,7 @@ need_resched: int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, void *key) { - task_t *p = curr->task; + task_t *p = curr->private; return try_to_wake_up(p, mode, sync); } diff --git a/kernel/signal.c b/kernel/signal.c index c89821b69ae3..d1258729a5f9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -213,6 +213,7 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked) fastcall void recalc_sigpending_tsk(struct task_struct *t) { if (t->signal->group_stop_count > 0 || + (t->flags & PF_FREEZE) || PENDING(&t->pending, &t->blocked) || PENDING(&t->signal->shared_pending, &t->blocked)) set_tsk_thread_flag(t, TIF_SIGPENDING); diff --git a/kernel/sys.c b/kernel/sys.c index f006632c2ba7..5a9d6b075016 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -525,7 +525,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) } if (new_egid != old_egid) { - current->mm->dumpable = 0; + current->mm->dumpable = suid_dumpable; smp_wmb(); } if (rgid != (gid_t) -1 || @@ -556,7 +556,7 @@ asmlinkage long sys_setgid(gid_t gid) { if(old_egid != gid) { - current->mm->dumpable=0; + current->mm->dumpable = suid_dumpable; smp_wmb(); } current->gid = current->egid = current->sgid = current->fsgid = gid; @@ -565,7 +565,7 @@ asmlinkage long sys_setgid(gid_t gid) { if(old_egid != gid) { - current->mm->dumpable=0; + current->mm->dumpable = suid_dumpable; smp_wmb(); } current->egid = current->fsgid = gid; @@ -596,7 +596,7 @@ static int set_user(uid_t new_ruid, int dumpclear) if(dumpclear) { - current->mm->dumpable = 0; + current->mm->dumpable = suid_dumpable; smp_wmb(); } current->uid = new_ruid; @@ -653,7 +653,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) if (new_euid != old_euid) { - current->mm->dumpable=0; + current->mm->dumpable = suid_dumpable; smp_wmb(); } current->fsuid = current->euid = new_euid; @@ -703,7 +703,7 @@ asmlinkage long sys_setuid(uid_t uid) if (old_euid != uid) { - current->mm->dumpable = 0; + current->mm->dumpable = suid_dumpable; smp_wmb(); } current->fsuid = current->euid = uid; @@ -748,7 +748,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) if (euid != (uid_t) -1) { if (euid != current->euid) { - current->mm->dumpable = 0; + current->mm->dumpable = suid_dumpable; smp_wmb(); } current->euid = euid; @@ -798,7 +798,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) if (egid != (gid_t) -1) { if (egid != current->egid) { - current->mm->dumpable = 0; + current->mm->dumpable = suid_dumpable; smp_wmb(); } current->egid = egid; @@ -845,7 +845,7 @@ asmlinkage long sys_setfsuid(uid_t uid) { if (uid != old_fsuid) { - current->mm->dumpable = 0; + current->mm->dumpable = suid_dumpable; smp_wmb(); } current->fsuid = uid; @@ -875,7 +875,7 @@ asmlinkage long sys_setfsgid(gid_t gid) { if (gid != old_fsgid) { - current->mm->dumpable = 0; + current->mm->dumpable = suid_dumpable; smp_wmb(); } current->fsgid = gid; @@ -894,35 +894,69 @@ asmlinkage long sys_times(struct tms __user * tbuf) */ if (tbuf) { struct tms tmp; - struct task_struct *tsk = current; - struct task_struct *t; cputime_t utime, stime, cutime, cstime; - read_lock(&tasklist_lock); - utime = tsk->signal->utime; - stime = tsk->signal->stime; - t = tsk; - do { - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); - t = next_thread(t); - } while (t != tsk); - - /* - * While we have tasklist_lock read-locked, no dying thread - * can be updating current->signal->[us]time. Instead, - * we got their counts included in the live thread loop. - * However, another thread can come in right now and - * do a wait call that updates current->signal->c[us]time. - * To make sure we always see that pair updated atomically, - * we take the siglock around fetching them. - */ - spin_lock_irq(&tsk->sighand->siglock); - cutime = tsk->signal->cutime; - cstime = tsk->signal->cstime; - spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); +#ifdef CONFIG_SMP + if (thread_group_empty(current)) { + /* + * Single thread case without the use of any locks. + * + * We may race with release_task if two threads are + * executing. However, release task first adds up the + * counters (__exit_signal) before removing the task + * from the process tasklist (__unhash_process). + * __exit_signal also acquires and releases the + * siglock which results in the proper memory ordering + * so that the list modifications are always visible + * after the counters have been updated. + * + * If the counters have been updated by the second thread + * but the thread has not yet been removed from the list + * then the other branch will be executing which will + * block on tasklist_lock until the exit handling of the + * other task is finished. + * + * This also implies that the sighand->siglock cannot + * be held by another processor. So we can also + * skip acquiring that lock. + */ + utime = cputime_add(current->signal->utime, current->utime); + stime = cputime_add(current->signal->utime, current->stime); + cutime = current->signal->cutime; + cstime = current->signal->cstime; + } else +#endif + { + + /* Process with multiple threads */ + struct task_struct *tsk = current; + struct task_struct *t; + read_lock(&tasklist_lock); + utime = tsk->signal->utime; + stime = tsk->signal->stime; + t = tsk; + do { + utime = cputime_add(utime, t->utime); + stime = cputime_add(stime, t->stime); + t = next_thread(t); + } while (t != tsk); + + /* + * While we have tasklist_lock read-locked, no dying thread + * can be updating current->signal->[us]time. Instead, + * we got their counts included in the live thread loop. + * However, another thread can come in right now and + * do a wait call that updates current->signal->c[us]time. + * To make sure we always see that pair updated atomically, + * we take the siglock around fetching them. + */ + spin_lock_irq(&tsk->sighand->siglock); + cutime = tsk->signal->cutime; + cstime = tsk->signal->cstime; + spin_unlock_irq(&tsk->sighand->siglock); + read_unlock(&tasklist_lock); + } tmp.tms_utime = cputime_to_clock_t(utime); tmp.tms_stime = cputime_to_clock_t(stime); tmp.tms_cutime = cputime_to_clock_t(cutime); @@ -1652,7 +1686,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, error = 1; break; case PR_SET_DUMPABLE: - if (arg2 != 0 && arg2 != 1) { + if (arg2 < 0 || arg2 > 2) { error = -EINVAL; break; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 701d12c63068..24a4d12d5aa9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -58,6 +58,7 @@ extern int sysctl_overcommit_ratio; extern int max_threads; extern int sysrq_enabled; extern int core_uses_pid; +extern int suid_dumpable; extern char core_pattern[]; extern int cad_pid; extern int pid_max; @@ -950,6 +951,14 @@ static ctl_table fs_table[] = { .proc_handler = &proc_dointvec, }, #endif + { + .ctl_name = KERN_SETUID_DUMPABLE, + .procname = "suid_dumpable", + .data = &suid_dumpable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0 } }; diff --git a/kernel/timer.c b/kernel/timer.c index 207aa4f0aa10..51ff917c9590 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -57,6 +57,11 @@ static void time_interpolator_update(long delta_nsec); #define TVN_MASK (TVN_SIZE - 1) #define TVR_MASK (TVR_SIZE - 1) +struct timer_base_s { + spinlock_t lock; + struct timer_list *running_timer; +}; + typedef struct tvec_s { struct list_head vec[TVN_SIZE]; } tvec_t; @@ -66,9 +71,8 @@ typedef struct tvec_root_s { } tvec_root_t; struct tvec_t_base_s { - spinlock_t lock; + struct timer_base_s t_base; unsigned long timer_jiffies; - struct timer_list *running_timer; tvec_root_t tv1; tvec_t tv2; tvec_t tv3; @@ -77,18 +81,16 @@ struct tvec_t_base_s { } ____cacheline_aligned_in_smp; typedef struct tvec_t_base_s tvec_base_t; +static DEFINE_PER_CPU(tvec_base_t, tvec_bases); static inline void set_running_timer(tvec_base_t *base, struct timer_list *timer) { #ifdef CONFIG_SMP - base->running_timer = timer; + base->t_base.running_timer = timer; #endif } -/* Fake initialization */ -static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED }; - static void check_timer_failed(struct timer_list *timer) { static int whine_count; @@ -103,7 +105,6 @@ static void check_timer_failed(struct timer_list *timer) /* * Now fix it up */ - spin_lock_init(&timer->lock); timer->magic = TIMER_MAGIC; } @@ -156,65 +157,113 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer) list_add_tail(&timer->entry, vec); } +typedef struct timer_base_s timer_base_t; +/* + * Used by TIMER_INITIALIZER, we can't use per_cpu(tvec_bases) + * at compile time, and we need timer->base to lock the timer. + */ +timer_base_t __init_timer_base + ____cacheline_aligned_in_smp = { .lock = SPIN_LOCK_UNLOCKED }; +EXPORT_SYMBOL(__init_timer_base); + +/*** + * init_timer - initialize a timer. + * @timer: the timer to be initialized + * + * init_timer() must be done to a timer prior calling *any* of the + * other timer functions. + */ +void fastcall init_timer(struct timer_list *timer) +{ + timer->entry.next = NULL; + timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base; + timer->magic = TIMER_MAGIC; +} +EXPORT_SYMBOL(init_timer); + +static inline void detach_timer(struct timer_list *timer, + int clear_pending) +{ + struct list_head *entry = &timer->entry; + + __list_del(entry->prev, entry->next); + if (clear_pending) + entry->next = NULL; + entry->prev = LIST_POISON2; +} + +/* + * We are using hashed locking: holding per_cpu(tvec_bases).t_base.lock + * means that all timers which are tied to this base via timer->base are + * locked, and the base itself is locked too. + * + * So __run_timers/migrate_timers can safely modify all timers which could + * be found on ->tvX lists. + * + * When the timer's base is locked, and the timer removed from list, it is + * possible to set timer->base = NULL and drop the lock: the timer remains + * locked. + */ +static timer_base_t *lock_timer_base(struct timer_list *timer, + unsigned long *flags) +{ + timer_base_t *base; + + for (;;) { + base = timer->base; + if (likely(base != NULL)) { + spin_lock_irqsave(&base->lock, *flags); + if (likely(base == timer->base)) + return base; + /* The timer has migrated to another CPU */ + spin_unlock_irqrestore(&base->lock, *flags); + } + cpu_relax(); + } +} + int __mod_timer(struct timer_list *timer, unsigned long expires) { - tvec_base_t *old_base, *new_base; + timer_base_t *base; + tvec_base_t *new_base; unsigned long flags; int ret = 0; BUG_ON(!timer->function); - check_timer(timer); - spin_lock_irqsave(&timer->lock, flags); + base = lock_timer_base(timer, &flags); + + if (timer_pending(timer)) { + detach_timer(timer, 0); + ret = 1; + } + new_base = &__get_cpu_var(tvec_bases); -repeat: - old_base = timer->base; - /* - * Prevent deadlocks via ordering by old_base < new_base. - */ - if (old_base && (new_base != old_base)) { - if (old_base < new_base) { - spin_lock(&new_base->lock); - spin_lock(&old_base->lock); - } else { - spin_lock(&old_base->lock); - spin_lock(&new_base->lock); - } + if (base != &new_base->t_base) { /* - * The timer base might have been cancelled while we were - * trying to take the lock(s): + * We are trying to schedule the timer on the local CPU. + * However we can't change timer's base while it is running, + * otherwise del_timer_sync() can't detect that the timer's + * handler yet has not finished. This also guarantees that + * the timer is serialized wrt itself. */ - if (timer->base != old_base) { - spin_unlock(&new_base->lock); - spin_unlock(&old_base->lock); - goto repeat; - } - } else { - spin_lock(&new_base->lock); - if (timer->base != old_base) { - spin_unlock(&new_base->lock); - goto repeat; + if (unlikely(base->running_timer == timer)) { + /* The timer remains on a former base */ + new_base = container_of(base, tvec_base_t, t_base); + } else { + /* See the comment in lock_timer_base() */ + timer->base = NULL; + spin_unlock(&base->lock); + spin_lock(&new_base->t_base.lock); + timer->base = &new_base->t_base; } } - /* - * Delete the previous timeout (if there was any), and install - * the new one: - */ - if (old_base) { - list_del(&timer->entry); - ret = 1; - } timer->expires = expires; internal_add_timer(new_base, timer); - timer->base = new_base; - - if (old_base && (new_base != old_base)) - spin_unlock(&old_base->lock); - spin_unlock(&new_base->lock); - spin_unlock_irqrestore(&timer->lock, flags); + spin_unlock_irqrestore(&new_base->t_base.lock, flags); return ret; } @@ -232,15 +281,15 @@ void add_timer_on(struct timer_list *timer, int cpu) { tvec_base_t *base = &per_cpu(tvec_bases, cpu); unsigned long flags; - + BUG_ON(timer_pending(timer) || !timer->function); check_timer(timer); - spin_lock_irqsave(&base->lock, flags); + spin_lock_irqsave(&base->t_base.lock, flags); + timer->base = &base->t_base; internal_add_timer(base, timer); - timer->base = base; - spin_unlock_irqrestore(&base->lock, flags); + spin_unlock_irqrestore(&base->t_base.lock, flags); } @@ -295,109 +344,84 @@ EXPORT_SYMBOL(mod_timer); */ int del_timer(struct timer_list *timer) { + timer_base_t *base; unsigned long flags; - tvec_base_t *base; + int ret = 0; check_timer(timer); -repeat: - base = timer->base; - if (!base) - return 0; - spin_lock_irqsave(&base->lock, flags); - if (base != timer->base) { + if (timer_pending(timer)) { + base = lock_timer_base(timer, &flags); + if (timer_pending(timer)) { + detach_timer(timer, 1); + ret = 1; + } spin_unlock_irqrestore(&base->lock, flags); - goto repeat; } - list_del(&timer->entry); - /* Need to make sure that anybody who sees a NULL base also sees the list ops */ - smp_wmb(); - timer->base = NULL; - spin_unlock_irqrestore(&base->lock, flags); - return 1; + return ret; } EXPORT_SYMBOL(del_timer); #ifdef CONFIG_SMP -/*** - * del_timer_sync - deactivate a timer and wait for the handler to finish. - * @timer: the timer to be deactivated - * - * This function only differs from del_timer() on SMP: besides deactivating - * the timer it also makes sure the handler has finished executing on other - * CPUs. - * - * Synchronization rules: callers must prevent restarting of the timer, - * otherwise this function is meaningless. It must not be called from - * interrupt contexts. The caller must not hold locks which would prevent - * completion of the timer's handler. Upon exit the timer is not queued and - * the handler is not running on any CPU. - * - * The function returns whether it has deactivated a pending timer or not. +/* + * This function tries to deactivate a timer. Upon successful (ret >= 0) + * exit the timer is not queued and the handler is not running on any CPU. * - * del_timer_sync() is slow and complicated because it copes with timer - * handlers which re-arm the timer (periodic timers). If the timer handler - * is known to not do this (a single shot timer) then use - * del_singleshot_timer_sync() instead. + * It must not be called from interrupt contexts. */ -int del_timer_sync(struct timer_list *timer) +int try_to_del_timer_sync(struct timer_list *timer) { - tvec_base_t *base; - int i, ret = 0; + timer_base_t *base; + unsigned long flags; + int ret = -1; - check_timer(timer); + base = lock_timer_base(timer, &flags); -del_again: - ret += del_timer(timer); + if (base->running_timer == timer) + goto out; - for_each_online_cpu(i) { - base = &per_cpu(tvec_bases, i); - if (base->running_timer == timer) { - while (base->running_timer == timer) { - cpu_relax(); - preempt_check_resched(); - } - break; - } + ret = 0; + if (timer_pending(timer)) { + detach_timer(timer, 1); + ret = 1; } - smp_rmb(); - if (timer_pending(timer)) - goto del_again; +out: + spin_unlock_irqrestore(&base->lock, flags); return ret; } -EXPORT_SYMBOL(del_timer_sync); /*** - * del_singleshot_timer_sync - deactivate a non-recursive timer + * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated * - * This function is an optimization of del_timer_sync for the case where the - * caller can guarantee the timer does not reschedule itself in its timer - * function. + * This function only differs from del_timer() on SMP: besides deactivating + * the timer it also makes sure the handler has finished executing on other + * CPUs. * * Synchronization rules: callers must prevent restarting of the timer, * otherwise this function is meaningless. It must not be called from - * interrupt contexts. The caller must not hold locks which wold prevent - * completion of the timer's handler. Upon exit the timer is not queued and - * the handler is not running on any CPU. + * interrupt contexts. The caller must not hold locks which would prevent + * completion of the timer's handler. The timer's handler must not call + * add_timer_on(). Upon exit the timer is not queued and the handler is + * not running on any CPU. * * The function returns whether it has deactivated a pending timer or not. */ -int del_singleshot_timer_sync(struct timer_list *timer) +int del_timer_sync(struct timer_list *timer) { - int ret = del_timer(timer); + check_timer(timer); - if (!ret) { - ret = del_timer_sync(timer); - BUG_ON(ret); + for (;;) { + int ret = try_to_del_timer_sync(timer); + if (ret >= 0) + return ret; } - - return ret; } -EXPORT_SYMBOL(del_singleshot_timer_sync); + +EXPORT_SYMBOL(del_timer_sync); #endif static int cascade(tvec_base_t *base, tvec_t *tv, int index) @@ -415,7 +439,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index) struct timer_list *tmp; tmp = list_entry(curr, struct timer_list, entry); - BUG_ON(tmp->base != base); + BUG_ON(tmp->base != &base->t_base); curr = curr->next; internal_add_timer(base, tmp); } @@ -437,7 +461,7 @@ static inline void __run_timers(tvec_base_t *base) { struct timer_list *timer; - spin_lock_irq(&base->lock); + spin_lock_irq(&base->t_base.lock); while (time_after_eq(jiffies, base->timer_jiffies)) { struct list_head work_list = LIST_HEAD_INIT(work_list); struct list_head *head = &work_list; @@ -453,8 +477,7 @@ static inline void __run_timers(tvec_base_t *base) cascade(base, &base->tv5, INDEX(3)); ++base->timer_jiffies; list_splice_init(base->tv1.vec + index, &work_list); -repeat: - if (!list_empty(head)) { + while (!list_empty(head)) { void (*fn)(unsigned long); unsigned long data; @@ -462,25 +485,26 @@ repeat: fn = timer->function; data = timer->data; - list_del(&timer->entry); set_running_timer(base, timer); - smp_wmb(); - timer->base = NULL; - spin_unlock_irq(&base->lock); + detach_timer(timer, 1); + spin_unlock_irq(&base->t_base.lock); { - u32 preempt_count = preempt_count(); + int preempt_count = preempt_count(); fn(data); if (preempt_count != preempt_count()) { - printk("huh, entered %p with %08x, exited with %08x?\n", fn, preempt_count, preempt_count()); + printk(KERN_WARNING "huh, entered %p " + "with preempt_count %08x, exited" + " with %08x?\n", + fn, preempt_count, + preempt_count()); BUG(); } } - spin_lock_irq(&base->lock); - goto repeat; + spin_lock_irq(&base->t_base.lock); } } set_running_timer(base, NULL); - spin_unlock_irq(&base->lock); + spin_unlock_irq(&base->t_base.lock); } #ifdef CONFIG_NO_IDLE_HZ @@ -499,7 +523,7 @@ unsigned long next_timer_interrupt(void) int i, j; base = &__get_cpu_var(tvec_bases); - spin_lock(&base->lock); + spin_lock(&base->t_base.lock); expires = base->timer_jiffies + (LONG_MAX >> 1); list = 0; @@ -547,7 +571,7 @@ found: expires = nte->expires; } } - spin_unlock(&base->lock); + spin_unlock(&base->t_base.lock); return expires; } #endif @@ -1286,9 +1310,9 @@ static void __devinit init_timers_cpu(int cpu) { int j; tvec_base_t *base; - + base = &per_cpu(tvec_bases, cpu); - spin_lock_init(&base->lock); + spin_lock_init(&base->t_base.lock); for (j = 0; j < TVN_SIZE; j++) { INIT_LIST_HEAD(base->tv5.vec + j); INIT_LIST_HEAD(base->tv4.vec + j); @@ -1302,22 +1326,16 @@ static void __devinit init_timers_cpu(int cpu) } #ifdef CONFIG_HOTPLUG_CPU -static int migrate_timer_list(tvec_base_t *new_base, struct list_head *head) +static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head) { struct timer_list *timer; while (!list_empty(head)) { timer = list_entry(head->next, struct timer_list, entry); - /* We're locking backwards from __mod_timer order here, - beware deadlock. */ - if (!spin_trylock(&timer->lock)) - return 0; - list_del(&timer->entry); + detach_timer(timer, 0); + timer->base = &new_base->t_base; internal_add_timer(new_base, timer); - timer->base = new_base; - spin_unlock(&timer->lock); } - return 1; } static void __devinit migrate_timers(int cpu) @@ -1331,39 +1349,24 @@ static void __devinit migrate_timers(int cpu) new_base = &get_cpu_var(tvec_bases); local_irq_disable(); -again: - /* Prevent deadlocks via ordering by old_base < new_base. */ - if (old_base < new_base) { - spin_lock(&new_base->lock); - spin_lock(&old_base->lock); - } else { - spin_lock(&old_base->lock); - spin_lock(&new_base->lock); - } + spin_lock(&new_base->t_base.lock); + spin_lock(&old_base->t_base.lock); - if (old_base->running_timer) + if (old_base->t_base.running_timer) BUG(); for (i = 0; i < TVR_SIZE; i++) - if (!migrate_timer_list(new_base, old_base->tv1.vec + i)) - goto unlock_again; - for (i = 0; i < TVN_SIZE; i++) - if (!migrate_timer_list(new_base, old_base->tv2.vec + i) - || !migrate_timer_list(new_base, old_base->tv3.vec + i) - || !migrate_timer_list(new_base, old_base->tv4.vec + i) - || !migrate_timer_list(new_base, old_base->tv5.vec + i)) - goto unlock_again; - spin_unlock(&old_base->lock); - spin_unlock(&new_base->lock); + migrate_timer_list(new_base, old_base->tv1.vec + i); + for (i = 0; i < TVN_SIZE; i++) { + migrate_timer_list(new_base, old_base->tv2.vec + i); + migrate_timer_list(new_base, old_base->tv3.vec + i); + migrate_timer_list(new_base, old_base->tv4.vec + i); + migrate_timer_list(new_base, old_base->tv5.vec + i); + } + + spin_unlock(&old_base->t_base.lock); + spin_unlock(&new_base->t_base.lock); local_irq_enable(); put_cpu_var(tvec_bases); - return; - -unlock_again: - /* Avoid deadlock with __mod_timer, by backing off. */ - spin_unlock(&old_base->lock); - spin_unlock(&new_base->lock); - cpu_relax(); - goto again; } #endif /* CONFIG_HOTPLUG_CPU */ diff --git a/mm/Kconfig b/mm/Kconfig new file mode 100644 index 000000000000..cd379936cac6 --- /dev/null +++ b/mm/Kconfig @@ -0,0 +1,91 @@ +config SELECT_MEMORY_MODEL + def_bool y + depends on EXPERIMENTAL || ARCH_SELECT_MEMORY_MODEL + +choice + prompt "Memory model" + depends on SELECT_MEMORY_MODEL + default DISCONTIGMEM_MANUAL if ARCH_DISCONTIGMEM_DEFAULT + default SPARSEMEM_MANUAL if ARCH_SPARSEMEM_DEFAULT + default FLATMEM_MANUAL + +config FLATMEM_MANUAL + bool "Flat Memory" + depends on !ARCH_DISCONTIGMEM_ENABLE || ARCH_FLATMEM_ENABLE + help + This option allows you to change some of the ways that + Linux manages its memory internally. Most users will + only have one option here: FLATMEM. This is normal + and a correct option. + + Some users of more advanced features like NUMA and + memory hotplug may have different options here. + DISCONTIGMEM is an more mature, better tested system, + but is incompatible with memory hotplug and may suffer + decreased performance over SPARSEMEM. If unsure between + "Sparse Memory" and "Discontiguous Memory", choose + "Discontiguous Memory". + + If unsure, choose this option (Flat Memory) over any other. + +config DISCONTIGMEM_MANUAL + bool "Discontigious Memory" + depends on ARCH_DISCONTIGMEM_ENABLE + help + This option provides enhanced support for discontiguous + memory systems, over FLATMEM. These systems have holes + in their physical address spaces, and this option provides + more efficient handling of these holes. However, the vast + majority of hardware has quite flat address spaces, and + can have degraded performance from extra overhead that + this option imposes. + + Many NUMA configurations will have this as the only option. + + If unsure, choose "Flat Memory" over this option. + +config SPARSEMEM_MANUAL + bool "Sparse Memory" + depends on ARCH_SPARSEMEM_ENABLE + help + This will be the only option for some systems, including + memory hotplug systems. This is normal. + + For many other systems, this will be an alternative to + "Discontigious Memory". This option provides some potential + performance benefits, along with decreased code complexity, + but it is newer, and more experimental. + + If unsure, choose "Discontiguous Memory" or "Flat Memory" + over this option. + +endchoice + +config DISCONTIGMEM + def_bool y + depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) || DISCONTIGMEM_MANUAL + +config SPARSEMEM + def_bool y + depends on SPARSEMEM_MANUAL + +config FLATMEM + def_bool y + depends on (!DISCONTIGMEM && !SPARSEMEM) || FLATMEM_MANUAL + +config FLAT_NODE_MEM_MAP + def_bool y + depends on !SPARSEMEM + +# +# Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's +# to represent different areas of memory. This variable allows +# those dependencies to exist individually. +# +config NEED_MULTIPLE_NODES + def_bool y + depends on DISCONTIGMEM || NUMA + +config HAVE_MEMORY_PRESENT + def_bool y + depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM diff --git a/mm/Makefile b/mm/Makefile index 097408064f6a..8f70ffd763c8 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -15,6 +15,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o obj-$(CONFIG_NUMA) += mempolicy.o +obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SHMEM) += shmem.o obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o diff --git a/mm/bootmem.c b/mm/bootmem.c index 260e703850d8..f82f7aebbee3 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -256,6 +256,7 @@ found: static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) { struct page *page; + unsigned long pfn; bootmem_data_t *bdata = pgdat->bdata; unsigned long i, count, total = 0; unsigned long idx; @@ -266,7 +267,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) count = 0; /* first extant page of the node */ - page = virt_to_page(phys_to_virt(bdata->node_boot_start)); + pfn = bdata->node_boot_start >> PAGE_SHIFT; idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); map = bdata->node_bootmem_map; /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */ @@ -275,9 +276,11 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) gofast = 1; for (i = 0; i < idx; ) { unsigned long v = ~map[i / BITS_PER_LONG]; + if (gofast && v == ~0UL) { int j, order; + page = pfn_to_page(pfn); count += BITS_PER_LONG; __ClearPageReserved(page); order = ffs(BITS_PER_LONG) - 1; @@ -292,6 +295,8 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) page += BITS_PER_LONG; } else if (v) { unsigned long m; + + page = pfn_to_page(pfn); for (m = 1; m && i < idx; m<<=1, page++, i++) { if (v & m) { count++; @@ -302,8 +307,8 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) } } else { i+=BITS_PER_LONG; - page += BITS_PER_LONG; } + pfn += BITS_PER_LONG; } total += count; diff --git a/mm/filemap.c b/mm/filemap.c index 4a2fee2cb62b..a3598b542a31 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1827,12 +1827,6 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i if (unlikely(*pos < 0)) return -EINVAL; - if (unlikely(file->f_error)) { - int err = file->f_error; - file->f_error = 0; - return err; - } - if (!isblk) { /* FIXME: this is for backwards compatibility with 2.4 */ if (file->f_flags & O_APPEND) diff --git a/mm/madvise.c b/mm/madvise.c index e3108054733c..54a5d3bc55d5 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -65,7 +65,6 @@ static long madvise_behavior(struct vm_area_struct * vma, /* * vm_flags is protected by the mmap_sem held in write mode. */ - VM_ClearReadHint(vma); vma->vm_flags = new_flags; out: diff --git a/mm/memory.c b/mm/memory.c index da91b7bf9986..30975ef48722 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -58,7 +58,7 @@ #include <linux/swapops.h> #include <linux/elf.h> -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES /* use the per-pgdat data instead for discontigmem - mbligh */ unsigned long max_mapnr; struct page *mem_map; diff --git a/mm/mempool.c b/mm/mempool.c index c9f3d4620428..9a72f7d918fa 100644 --- a/mm/mempool.c +++ b/mm/mempool.c @@ -51,16 +51,23 @@ static void free_pool(mempool_t *pool) * functions might sleep - as long as the mempool_alloc function is not called * from IRQ contexts. */ -mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn, +mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data) { - mempool_t *pool; + return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,-1); +} +EXPORT_SYMBOL(mempool_create); - pool = kmalloc(sizeof(*pool), GFP_KERNEL); +mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn, + mempool_free_t *free_fn, void *pool_data, int node_id) +{ + mempool_t *pool; + pool = kmalloc_node(sizeof(*pool), GFP_KERNEL, node_id); if (!pool) return NULL; memset(pool, 0, sizeof(*pool)); - pool->elements = kmalloc(min_nr * sizeof(void *), GFP_KERNEL); + pool->elements = kmalloc_node(min_nr * sizeof(void *), + GFP_KERNEL, node_id); if (!pool->elements) { kfree(pool); return NULL; @@ -87,7 +94,7 @@ mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn, } return pool; } -EXPORT_SYMBOL(mempool_create); +EXPORT_SYMBOL(mempool_create_node); /** * mempool_resize - resize an existing memory pool @@ -197,7 +204,7 @@ void * mempool_alloc(mempool_t *pool, unsigned int __nocast gfp_mask) { void *element; unsigned long flags; - DEFINE_WAIT(wait); + wait_queue_t wait; int gfp_temp; might_sleep_if(gfp_mask & __GFP_WAIT); @@ -228,6 +235,7 @@ repeat_alloc: /* Now start performing page reclaim */ gfp_temp = gfp_mask; + init_wait(&wait); prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); smp_mb(); if (!pool->curr_nr) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 206920796f5f..7ee675ad101e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -68,7 +68,7 @@ EXPORT_SYMBOL(nr_swap_pages); * Used by page_zone() to look up the address of the struct zone whose * id is encoded in the upper bits of page->flags */ -struct zone *zone_table[1 << (ZONES_SHIFT + NODES_SHIFT)]; +struct zone *zone_table[1 << ZONETABLE_SHIFT]; EXPORT_SYMBOL(zone_table); static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; @@ -1649,11 +1649,17 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat, void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { - struct page *start = pfn_to_page(start_pfn); struct page *page; + unsigned long end_pfn = start_pfn + size; + unsigned long pfn; - for (page = start; page < (start + size); page++) { - set_page_zone(page, NODEZONE(nid, zone)); + for (pfn = start_pfn; pfn < end_pfn; pfn++, page++) { + if (!early_pfn_valid(pfn)) + continue; + if (!early_pfn_in_nid(pfn, nid)) + continue; + page = pfn_to_page(pfn); + set_page_links(page, zone, nid, pfn); set_page_count(page, 0); reset_page_mapcount(page); SetPageReserved(page); @@ -1677,6 +1683,20 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, } } +#define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr) +void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn, + unsigned long size) +{ + unsigned long snum = pfn_to_section_nr(pfn); + unsigned long end = pfn_to_section_nr(pfn + size); + + if (FLAGS_HAS_NODE) + zone_table[ZONETABLE_INDEX(nid, zid)] = zone; + else + for (; snum <= end; snum++) + zone_table[ZONETABLE_INDEX(snum, zid)] = zone; +} + #ifndef __HAVE_ARCH_MEMMAP_INIT #define memmap_init(size, nid, zone, start_pfn) \ memmap_init_zone((size), (nid), (zone), (start_pfn)) @@ -1742,10 +1762,17 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) * with interrupts disabled. * * Some NUMA counter updates may also be caught by the boot pagesets. - * These will be discarded when bootup is complete. + * + * The boot_pagesets must be kept even after bootup is complete for + * unused processors and/or zones. They do play a role for bootstrapping + * hotplugged processors. + * + * zoneinfo_show() and maybe other functions do + * not check if the processor is online before following the pageset pointer. + * Other parts of the kernel may not check if the zone is available. */ static struct per_cpu_pageset - boot_pageset[NR_CPUS] __initdata; + boot_pageset[NR_CPUS]; /* * Dynamically allocate memory for the @@ -1854,7 +1881,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat, unsigned long size, realsize; unsigned long batch; - zone_table[NODEZONE(nid, j)] = zone; realsize = size = zones_size[j]; if (zholes_size) realsize -= zholes_size[j]; @@ -1920,6 +1946,8 @@ static void __init free_area_init_core(struct pglist_data *pgdat, memmap_init(size, nid, j, zone_start_pfn); + zonetable_add(zone, nid, j, zone_start_pfn, size); + zone_start_pfn += size; zone_init_free_lists(pgdat, zone, zone->spanned_pages); @@ -1928,24 +1956,30 @@ static void __init free_area_init_core(struct pglist_data *pgdat, static void __init alloc_node_mem_map(struct pglist_data *pgdat) { - unsigned long size; - /* Skip empty nodes */ if (!pgdat->node_spanned_pages) return; +#ifdef CONFIG_FLAT_NODE_MEM_MAP /* ia64 gets its own node_mem_map, before this, without bootmem */ if (!pgdat->node_mem_map) { + unsigned long size; + struct page *map; + size = (pgdat->node_spanned_pages + 1) * sizeof(struct page); - pgdat->node_mem_map = alloc_bootmem_node(pgdat, size); + map = alloc_remap(pgdat->node_id, size); + if (!map) + map = alloc_bootmem_node(pgdat, size); + pgdat->node_mem_map = map; } -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_FLATMEM /* * With no DISCONTIG, the global mem_map is just set as node 0's */ if (pgdat == NODE_DATA(0)) mem_map = NODE_DATA(0)->node_mem_map; #endif +#endif /* CONFIG_FLAT_NODE_MEM_MAP */ } void __init free_area_init_node(int nid, struct pglist_data *pgdat, @@ -1961,18 +1995,18 @@ void __init free_area_init_node(int nid, struct pglist_data *pgdat, free_area_init_core(pgdat, zones_size, zholes_size); } -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES static bootmem_data_t contig_bootmem_data; struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data }; EXPORT_SYMBOL(contig_page_data); +#endif void __init free_area_init(unsigned long *zones_size) { - free_area_init_node(0, &contig_page_data, zones_size, + free_area_init_node(0, NODE_DATA(0), zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); } -#endif #ifdef CONFIG_PROC_FS diff --git a/mm/slab.c b/mm/slab.c index 93cbbbb39f42..122d031baab2 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -92,6 +92,7 @@ #include <linux/sysctl.h> #include <linux/module.h> #include <linux/rcupdate.h> +#include <linux/string.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> @@ -3082,3 +3083,26 @@ unsigned int ksize(const void *objp) return size; } + + +/* + * kstrdup - allocate space for and copy an existing string + * + * @s: the string to duplicate + * @gfp: the GFP mask used in the kmalloc() call when allocating memory + */ +char *kstrdup(const char *s, int gfp) +{ + size_t len; + char *buf; + + if (!s) + return NULL; + + len = strlen(s) + 1; + buf = kmalloc(len, gfp); + if (buf) + memcpy(buf, s, len); + return buf; +} +EXPORT_SYMBOL(kstrdup); diff --git a/mm/sparse.c b/mm/sparse.c new file mode 100644 index 000000000000..b54e304df4a7 --- /dev/null +++ b/mm/sparse.c @@ -0,0 +1,137 @@ +/* + * sparse memory mappings. + */ +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <linux/bootmem.h> +#include <linux/module.h> +#include <asm/dma.h> + +/* + * Permanent SPARSEMEM data: + * + * 1) mem_section - memory sections, mem_map's for valid memory + */ +struct mem_section mem_section[NR_MEM_SECTIONS]; +EXPORT_SYMBOL(mem_section); + +/* Record a memory area against a node. */ +void memory_present(int nid, unsigned long start, unsigned long end) +{ + unsigned long pfn; + + start &= PAGE_SECTION_MASK; + for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { + unsigned long section = pfn_to_section_nr(pfn); + if (!mem_section[section].section_mem_map) + mem_section[section].section_mem_map = SECTION_MARKED_PRESENT; + } +} + +/* + * Only used by the i386 NUMA architecures, but relatively + * generic code. + */ +unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, + unsigned long end_pfn) +{ + unsigned long pfn; + unsigned long nr_pages = 0; + + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + if (nid != early_pfn_to_nid(pfn)) + continue; + + if (pfn_valid(pfn)) + nr_pages += PAGES_PER_SECTION; + } + + return nr_pages * sizeof(struct page); +} + +/* + * Subtle, we encode the real pfn into the mem_map such that + * the identity pfn - section_mem_map will return the actual + * physical page frame number. + */ +static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum) +{ + return (unsigned long)(mem_map - (section_nr_to_pfn(pnum))); +} + +/* + * We need this if we ever free the mem_maps. While not implemented yet, + * this function is included for parity with its sibling. + */ +static __attribute((unused)) +struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum) +{ + return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum); +} + +static int sparse_init_one_section(struct mem_section *ms, + unsigned long pnum, struct page *mem_map) +{ + if (!valid_section(ms)) + return -EINVAL; + + ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum); + + return 1; +} + +static struct page *sparse_early_mem_map_alloc(unsigned long pnum) +{ + struct page *map; + int nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); + + map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); + if (map) + return map; + + map = alloc_bootmem_node(NODE_DATA(nid), + sizeof(struct page) * PAGES_PER_SECTION); + if (map) + return map; + + printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); + mem_section[pnum].section_mem_map = 0; + return NULL; +} + +/* + * Allocate the accumulated non-linear sections, allocate a mem_map + * for each and record the physical to section mapping. + */ +void sparse_init(void) +{ + unsigned long pnum; + struct page *map; + + for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { + if (!valid_section_nr(pnum)) + continue; + + map = sparse_early_mem_map_alloc(pnum); + if (map) + sparse_init_one_section(&mem_section[pnum], pnum, map); + } +} + +/* + * returns the number of sections whose mem_maps were properly + * set. If this is <=0, then that means that the passed-in + * map was not consumed and must be freed. + */ +int sparse_add_one_section(unsigned long start_pfn, int nr_pages, struct page *map) +{ + struct mem_section *ms = __pfn_to_section(start_pfn); + + if (ms->section_mem_map & SECTION_MARKED_PRESENT) + return -EEXIST; + + ms->section_mem_map |= SECTION_MARKED_PRESENT; + + return sparse_init_one_section(ms, pfn_to_section_nr(start_pfn), map); +} diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 10d040461021..c34614ea5fce 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -35,6 +35,7 @@ #include <net/datalink.h> #include <net/psnap.h> #include <linux/atalk.h> +#include <linux/delay.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -462,8 +463,7 @@ void aarp_probe_network(struct atalk_iface *atif) aarp_send_probe(atif->dev, &atif->address); /* Defer 1/10th */ - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ / 10); + msleep(100); if (atif->status & ATIF_PROBE_FAIL) break; @@ -510,9 +510,8 @@ int aarp_proxy_probe_network(struct atalk_iface *atif, struct atalk_addr *sa) aarp_send_probe(atif->dev, sa); /* Defer 1/10th */ - current->state = TASK_INTERRUPTIBLE; write_unlock_bh(&aarp_lock); - schedule_timeout(HZ / 10); + msleep(100); write_lock_bh(&aarp_lock); if (entry->status & ATIF_PROBE_FAIL) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 18ebc664769b..c4540144f0f4 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -859,8 +859,7 @@ static int translate_table(struct ebt_replace *repl, if (repl->valid_hooks & (1 << i)) if (check_chainloops(newinfo->hook_entry[i], cl_s, udc_cnt, i, newinfo->entries)) { - if (cl_s) - vfree(cl_s); + vfree(cl_s); return -EINVAL; } @@ -883,8 +882,7 @@ static int translate_table(struct ebt_replace *repl, EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, ebt_cleanup_entry, &i); } - if (cl_s) - vfree(cl_s); + vfree(cl_s); return ret; } @@ -1030,8 +1028,7 @@ static int do_replace(void __user *user, unsigned int len) } vfree(table); - if (counterstmp) - vfree(counterstmp); + vfree(counterstmp); return ret; free_unlock: @@ -1040,8 +1037,7 @@ free_iterate: EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, ebt_cleanup_entry, NULL); free_counterstmp: - if (counterstmp) - vfree(counterstmp); + vfree(counterstmp); /* can be initialized in translate_table() */ if (newinfo->chainstack) { for (i = 0; i < num_possible_cpus(); i++) @@ -1049,11 +1045,9 @@ free_counterstmp: vfree(newinfo->chainstack); } free_entries: - if (newinfo->entries) - vfree(newinfo->entries); + vfree(newinfo->entries); free_newinfo: - if (newinfo) - vfree(newinfo); + vfree(newinfo); return ret; } @@ -1213,8 +1207,7 @@ void ebt_unregister_table(struct ebt_table *table) down(&ebt_mutex); LIST_DELETE(&ebt_tables, table); up(&ebt_mutex); - if (table->private->entries) - vfree(table->private->entries); + vfree(table->private->entries); if (table->private->chainstack) { for (i = 0; i < num_possible_cpus(); i++) vfree(table->private->chainstack[i]); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f6bdcad47da6..851eb927ed97 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -32,6 +32,7 @@ #include <net/sock.h> #include <linux/rtnetlink.h> #include <linux/random.h> +#include <linux/string.h> #define NEIGH_DEBUG 1 @@ -2592,7 +2593,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[17].extra1 = dev; } - dev_name = net_sysctl_strdup(dev_name_source); + dev_name = kstrdup(dev_name_source, GFP_KERNEL); if (!dev_name) { err = -ENOBUFS; goto free; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a119696d5521..c327c9edadc5 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -130,19 +130,20 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, */ static void poll_napi(struct netpoll *np) { + struct netpoll_info *npinfo = np->dev->npinfo; int budget = 16; if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) && - np->poll_owner != smp_processor_id() && - spin_trylock(&np->poll_lock)) { - np->rx_flags |= NETPOLL_RX_DROP; + npinfo->poll_owner != smp_processor_id() && + spin_trylock(&npinfo->poll_lock)) { + npinfo->rx_flags |= NETPOLL_RX_DROP; atomic_inc(&trapped); np->dev->poll(np->dev, &budget); atomic_dec(&trapped); - np->rx_flags &= ~NETPOLL_RX_DROP; - spin_unlock(&np->poll_lock); + npinfo->rx_flags &= ~NETPOLL_RX_DROP; + spin_unlock(&npinfo->poll_lock); } } @@ -245,6 +246,7 @@ repeat: static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { int status; + struct netpoll_info *npinfo; repeat: if(!np || !np->dev || !netif_running(np->dev)) { @@ -253,8 +255,9 @@ repeat: } /* avoid recursion */ - if(np->poll_owner == smp_processor_id() || - np->dev->xmit_lock_owner == smp_processor_id()) { + npinfo = np->dev->npinfo; + if (npinfo->poll_owner == smp_processor_id() || + np->dev->xmit_lock_owner == smp_processor_id()) { if (np->drop) np->drop(skb); else @@ -341,14 +344,22 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) static void arp_reply(struct sk_buff *skb) { + struct netpoll_info *npinfo = skb->dev->npinfo; struct arphdr *arp; unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; u32 sip, tip; + unsigned long flags; struct sk_buff *send_skb; - struct netpoll *np = skb->dev->np; + struct netpoll *np = NULL; + + spin_lock_irqsave(&npinfo->rx_lock, flags); + if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev) + np = npinfo->rx_np; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); - if (!np) return; + if (!np) + return; /* No arp on this interface */ if (skb->dev->flags & IFF_NOARP) @@ -429,9 +440,9 @@ int __netpoll_rx(struct sk_buff *skb) int proto, len, ulen; struct iphdr *iph; struct udphdr *uh; - struct netpoll *np = skb->dev->np; + struct netpoll *np = skb->dev->npinfo->rx_np; - if (!np->rx_hook) + if (!np) goto out; if (skb->dev->type != ARPHRD_ETHER) goto out; @@ -611,9 +622,8 @@ int netpoll_setup(struct netpoll *np) { struct net_device *ndev = NULL; struct in_device *in_dev; - - np->poll_lock = SPIN_LOCK_UNLOCKED; - np->poll_owner = -1; + struct netpoll_info *npinfo; + unsigned long flags; if (np->dev_name) ndev = dev_get_by_name(np->dev_name); @@ -624,7 +634,17 @@ int netpoll_setup(struct netpoll *np) } np->dev = ndev; - ndev->np = np; + if (!ndev->npinfo) { + npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); + if (!npinfo) + goto release; + + npinfo->rx_np = NULL; + npinfo->poll_lock = SPIN_LOCK_UNLOCKED; + npinfo->poll_owner = -1; + npinfo->rx_lock = SPIN_LOCK_UNLOCKED; + } else + npinfo = ndev->npinfo; if (!ndev->poll_controller) { printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", @@ -692,13 +712,20 @@ int netpoll_setup(struct netpoll *np) np->name, HIPQUAD(np->local_ip)); } - if(np->rx_hook) - np->rx_flags = NETPOLL_RX_ENABLED; + if (np->rx_hook) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_flags |= NETPOLL_RX_ENABLED; + npinfo->rx_np = np; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } + /* last thing to do is link it to the net device structure */ + ndev->npinfo = npinfo; return 0; release: - ndev->np = NULL; + if (!ndev->npinfo) + kfree(npinfo); np->dev = NULL; dev_put(ndev); return -1; @@ -706,9 +733,20 @@ int netpoll_setup(struct netpoll *np) void netpoll_cleanup(struct netpoll *np) { - if (np->dev) - np->dev->np = NULL; - dev_put(np->dev); + struct netpoll_info *npinfo; + unsigned long flags; + + if (np->dev) { + npinfo = np->dev->npinfo; + if (npinfo && npinfo->rx_np == np) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_np = NULL; + npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } + dev_put(np->dev); + } + np->dev = NULL; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index c8be646cb191..880a88815211 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -35,19 +35,6 @@ extern int sysctl_somaxconn; extern char sysctl_divert_version[]; #endif /* CONFIG_NET_DIVERT */ -/* - * This strdup() is used for creating copies of network - * device names to be handed over to sysctl. - */ - -char *net_sysctl_strdup(const char *s) -{ - char *rv = kmalloc(strlen(s)+1, GFP_KERNEL); - if (rv) - strcpy(rv, s); - return rv; -} - ctl_table core_table[] = { #ifdef CONFIG_NET { @@ -177,6 +164,4 @@ ctl_table core_table[] = { { .ctl_name = 0 } }; -EXPORT_SYMBOL(net_sysctl_strdup); - #endif diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 650dcb12d9a1..d8a10e3dd77d 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1471,7 +1471,7 @@ static void devinet_sysctl_register(struct in_device *in_dev, * by sysctl and we wouldn't want anyone to change it under our feet * (see SIOCSIFNAME). */ - dev_name = net_sysctl_strdup(dev_name); + dev_name = kstrdup(dev_name, GFP_KERNEL); if (!dev_name) goto free; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index dc4362b57cfa..9cde8c61f525 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -339,7 +339,7 @@ target(struct sk_buff **pskb, * error messages (RELATED) and information requests (see below) */ if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP && (ctinfo == IP_CT_RELATED - || ctinfo == IP_CT_IS_REPLY+IP_CT_IS_REPLY)) + || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) return IPT_CONTINUE; /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f4d53c919869..80cf633d9f4a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1767,7 +1767,7 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, struct in_device *in_dev, u32 daddr, u32 saddr, u32 tos) { - struct rtable* rth; + struct rtable* rth = NULL; int err; unsigned hash; @@ -1794,7 +1794,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, u32 daddr, u32 saddr, u32 tos) { #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - struct rtable* rth; + struct rtable* rth = NULL; unsigned char hop, hopcount, lasthop; int err = -EINVAL; unsigned int hash; @@ -2239,7 +2239,7 @@ static inline int ip_mkroute_output_def(struct rtable **rp, struct net_device *dev_out, unsigned flags) { - struct rtable *rth; + struct rtable *rth = NULL; int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); unsigned hash; if (err == 0) { @@ -2267,7 +2267,7 @@ static inline int ip_mkroute_output(struct rtable** rp, unsigned char hop; unsigned hash; int err = -EINVAL; - struct rtable *rth; + struct rtable *rth = NULL; if (res->fi && res->fi->fib_nhs > 1) { unsigned char hopcount = res->fi->fib_nhs; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 14f5c53235fe..a54d4ef3fd35 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -57,6 +57,7 @@ #endif #include <linux/delay.h> #include <linux/notifier.h> +#include <linux/string.h> #include <net/sock.h> #include <net/snmp.h> @@ -3437,7 +3438,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf * by sysctl and we wouldn't want anyone to change it under our feet * (see SIOCSIFNAME). */ - dev_name = net_sysctl_strdup(dev_name); + dev_name = kstrdup(dev_name, GFP_KERNEL); if (!dev_name) goto free; diff --git a/net/socket.c b/net/socket.c index 38729af09461..6f2a17881972 100644 --- a/net/socket.c +++ b/net/socket.c @@ -383,9 +383,8 @@ int sock_map_fd(struct socket *sock) goto out; } - sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino); + this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino); this.name = name; - this.len = strlen(name); this.hash = SOCK_INODE(sock)->i_ino; file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 2b99b4028d31..d6baf6fdf8a9 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -8,6 +8,7 @@ #include <linux/err.h> #include <linux/seq_file.h> #include <linux/hash.h> +#include <linux/string.h> #define RPCDBG_FACILITY RPCDBG_AUTH @@ -20,14 +21,6 @@ */ -static char *strdup(char *s) -{ - char *rv = kmalloc(strlen(s)+1, GFP_KERNEL); - if (rv) - strcpy(rv, s); - return rv; -} - struct unix_domain { struct auth_domain h; int addr_changes; @@ -55,7 +48,7 @@ struct auth_domain *unix_domain_find(char *name) if (new == NULL) return NULL; cache_init(&new->h.h); - new->h.name = strdup(name); + new->h.name = kstrdup(name, GFP_KERNEL); new->h.flavour = RPC_AUTH_UNIX; new->addr_changes = 0; new->h.h.expiry_time = NEVER; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 2a24b243b841..04bec047fa9a 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -29,6 +29,10 @@ * 2000-11-14 Henner Eisen Closing datalink from NETDEV_GOING_DOWN * 2002-10-06 Arnaldo C. Melo Get rid of cli/sti, move proc stuff to * x25_proc.c, using seq_file + * 2005-04-02 Shaun Pereira Selective sub address matching + * with call user data + * 2005-04-15 Shaun Pereira Fast select with no restriction on + * response */ #include <linux/config.h> @@ -219,7 +223,8 @@ static void x25_insert_socket(struct sock *sk) * Note: if a listening socket has cud set it must only get calls * with matching cud. */ -static struct sock *x25_find_listener(struct x25_address *addr, struct x25_calluserdata *calluserdata) +static struct sock *x25_find_listener(struct x25_address *addr, + struct sk_buff *skb) { struct sock *s; struct sock *next_best; @@ -230,22 +235,23 @@ static struct sock *x25_find_listener(struct x25_address *addr, struct x25_callu sk_for_each(s, node, &x25_list) if ((!strcmp(addr->x25_addr, - x25_sk(s)->source_addr.x25_addr) || - !strcmp(addr->x25_addr, - null_x25_address.x25_addr)) && - s->sk_state == TCP_LISTEN) { - + x25_sk(s)->source_addr.x25_addr) || + !strcmp(addr->x25_addr, + null_x25_address.x25_addr)) && + s->sk_state == TCP_LISTEN) { /* * Found a listening socket, now check the incoming * call user data vs this sockets call user data */ - if (x25_check_calluserdata(&x25_sk(s)->calluserdata, calluserdata)) { - sock_hold(s); - goto found; - } - if (x25_sk(s)->calluserdata.cudlength == 0) { + if(skb->len > 0 && x25_sk(s)->cudmatchlength > 0) { + if((memcmp(x25_sk(s)->calluserdata.cuddata, + skb->data, + x25_sk(s)->cudmatchlength)) == 0) { + sock_hold(s); + goto found; + } + } else next_best = s; - } } if (next_best) { s = next_best; @@ -497,6 +503,9 @@ static int x25_create(struct socket *sock, int protocol) x25->t23 = sysctl_x25_clear_request_timeout; x25->t2 = sysctl_x25_ack_holdback_timeout; x25->state = X25_STATE_0; + x25->cudmatchlength = 0; + x25->accptapprv = X25_DENY_ACCPT_APPRV; /* normally no cud */ + /* on call accept */ x25->facilities.winsize_in = X25_DEFAULT_WINDOW_SIZE; x25->facilities.winsize_out = X25_DEFAULT_WINDOW_SIZE; @@ -545,6 +554,8 @@ static struct sock *x25_make_new(struct sock *osk) x25->t2 = ox25->t2; x25->facilities = ox25->facilities; x25->qbitincl = ox25->qbitincl; + x25->cudmatchlength = ox25->cudmatchlength; + x25->accptapprv = ox25->accptapprv; x25_init_timers(sk); out: @@ -822,7 +833,6 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, struct x25_sock *makex25; struct x25_address source_addr, dest_addr; struct x25_facilities facilities; - struct x25_calluserdata calluserdata; int len, rc; /* @@ -845,19 +855,10 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, skb_pull(skb,len); /* - * Incoming Call User Data. - */ - if (skb->len >= 0) { - memcpy(calluserdata.cuddata, skb->data, skb->len); - calluserdata.cudlength = skb->len; - } - - skb_push(skb,len); - - /* * Find a listener for the particular address/cud pair. */ - sk = x25_find_listener(&source_addr,&calluserdata); + sk = x25_find_listener(&source_addr,skb); + skb_push(skb,len); /* * We can't accept the Call Request. @@ -900,11 +901,23 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, makex25->neighbour = nb; makex25->facilities = facilities; makex25->vc_facil_mask = x25_sk(sk)->vc_facil_mask; - makex25->calluserdata = calluserdata; - - x25_write_internal(make, X25_CALL_ACCEPTED); + /* ensure no reverse facil on accept */ + makex25->vc_facil_mask &= ~X25_MASK_REVERSE; + makex25->cudmatchlength = x25_sk(sk)->cudmatchlength; + + /* Normally all calls are accepted immediatly */ + if(makex25->accptapprv & X25_DENY_ACCPT_APPRV) { + x25_write_internal(make, X25_CALL_ACCEPTED); + makex25->state = X25_STATE_3; + } - makex25->state = X25_STATE_3; + /* + * Incoming Call User Data. + */ + if (skb->len >= 0) { + memcpy(makex25->calluserdata.cuddata, skb->data, skb->len); + makex25->calluserdata.cudlength = skb->len; + } sk->sk_ack_backlog++; @@ -1288,7 +1301,8 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (facilities.throughput < 0x03 || facilities.throughput > 0xDD) break; - if (facilities.reverse && facilities.reverse != 1) + if (facilities.reverse && + (facilities.reverse | 0x81)!= 0x81) break; x25->facilities = facilities; rc = 0; @@ -1325,6 +1339,44 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } + case SIOCX25SCUDMATCHLEN: { + struct x25_subaddr sub_addr; + rc = -EINVAL; + if(sk->sk_state != TCP_CLOSE) + break; + rc = -EFAULT; + if (copy_from_user(&sub_addr, argp, + sizeof(sub_addr))) + break; + rc = -EINVAL; + if(sub_addr.cudmatchlength > X25_MAX_CUD_LEN) + break; + x25->cudmatchlength = sub_addr.cudmatchlength; + rc = 0; + break; + } + + case SIOCX25CALLACCPTAPPRV: { + rc = -EINVAL; + if (sk->sk_state != TCP_CLOSE) + break; + x25->accptapprv = X25_ALLOW_ACCPT_APPRV; + rc = 0; + break; + } + + case SIOCX25SENDCALLACCPT: { + rc = -EINVAL; + if (sk->sk_state != TCP_ESTABLISHED) + break; + if (x25->accptapprv) /* must call accptapprv above */ + break; + x25_write_internal(sk, X25_CALL_ACCEPTED); + x25->state = X25_STATE_3; + rc = 0; + break; + } + default: rc = dev_ioctl(cmd, argp); break; diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index a21bdb95f9a8..54278b962f4c 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -17,6 +17,8 @@ * X.25 001 Split from x25_subr.c * mar/20/00 Daniela Squassoni Disabling/enabling of facilities * negotiation. + * apr/14/05 Shaun Pereira - Allow fast select with no restriction + * on response. */ #include <linux/kernel.h> @@ -43,9 +45,31 @@ int x25_parse_facilities(struct sk_buff *skb, case X25_FAC_CLASS_A: switch (*p) { case X25_FAC_REVERSE: - facilities->reverse = p[1] & 0x01; - *vc_fac_mask |= X25_MASK_REVERSE; - break; + if((p[1] & 0x81) == 0x81) { + facilities->reverse = p[1] & 0x81; + *vc_fac_mask |= X25_MASK_REVERSE; + break; + } + + if((p[1] & 0x01) == 0x01) { + facilities->reverse = p[1] & 0x01; + *vc_fac_mask |= X25_MASK_REVERSE; + break; + } + + if((p[1] & 0x80) == 0x80) { + facilities->reverse = p[1] & 0x80; + *vc_fac_mask |= X25_MASK_REVERSE; + break; + } + + if(p[1] == 0x00) { + facilities->reverse + = X25_DEFAULT_REVERSE; + *vc_fac_mask |= X25_MASK_REVERSE; + break; + } + case X25_FAC_THROUGHPUT: facilities->throughput = p[1]; *vc_fac_mask |= X25_MASK_THROUGHPUT; @@ -122,7 +146,7 @@ int x25_create_facilities(unsigned char *buffer, if (facilities->reverse && (facil_mask & X25_MASK_REVERSE)) { *p++ = X25_FAC_REVERSE; - *p++ = !!facilities->reverse; + *p++ = facilities->reverse; } if (facilities->throughput && (facil_mask & X25_MASK_THROUGHPUT)) { @@ -171,7 +195,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk, /* * They want reverse charging, we won't accept it. */ - if (theirs.reverse && ours->reverse) { + if ((theirs.reverse & 0x01 ) && (ours->reverse & 0x01)) { SOCK_DEBUG(sk, "X.25: rejecting reverse charging request"); return -1; } diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 183fea3bba67..7fd872ad0c20 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -19,6 +19,8 @@ * mar/20/00 Daniela Squassoni Disabling/enabling of facilities * negotiation. * jun/24/01 Arnaldo C. Melo use skb_queue_purge, cleanups + * apr/04/15 Shaun Pereira Fast select with no + * restriction on response. */ #include <linux/kernel.h> @@ -127,8 +129,12 @@ void x25_write_internal(struct sock *sk, int frametype) len += 1 + X25_ADDR_LEN + X25_MAX_FAC_LEN + X25_MAX_CUD_LEN; break; - case X25_CALL_ACCEPTED: - len += 1 + X25_MAX_FAC_LEN + X25_MAX_CUD_LEN; + case X25_CALL_ACCEPTED: /* fast sel with no restr on resp */ + if(x25->facilities.reverse & 0x80) { + len += 1 + X25_MAX_FAC_LEN + X25_MAX_CUD_LEN; + } else { + len += 1 + X25_MAX_FAC_LEN; + } break; case X25_CLEAR_REQUEST: case X25_RESET_REQUEST: @@ -203,9 +209,16 @@ void x25_write_internal(struct sock *sk, int frametype) x25->vc_facil_mask); dptr = skb_put(skb, len); memcpy(dptr, facilities, len); - dptr = skb_put(skb, x25->calluserdata.cudlength); - memcpy(dptr, x25->calluserdata.cuddata, - x25->calluserdata.cudlength); + + /* fast select with no restriction on response + allows call user data. Userland must + ensure it is ours and not theirs */ + if(x25->facilities.reverse & 0x80) { + dptr = skb_put(skb, + x25->calluserdata.cudlength); + memcpy(dptr, x25->calluserdata.cuddata, + x25->calluserdata.cudlength); + } x25->calluserdata.cudlength = 0; break; @@ -354,21 +367,3 @@ void x25_check_rbuf(struct sock *sk) } } -/* - * Compare 2 calluserdata structures, used to find correct listening sockets - * when call user data is used. - */ -int x25_check_calluserdata(struct x25_calluserdata *ours, struct x25_calluserdata *theirs) -{ - int i; - if (ours->cudlength != theirs->cudlength) - return 0; - - for (i=0;i<ours->cudlength;i++) { - if (ours->cuddata[i] != theirs->cuddata[i]) { - return 0; - } - } - return 1; -} - diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 352d531ee3c1..76ba6be3dfc9 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -176,10 +176,10 @@ endif define rule_cc_o_c $(if $($(quiet)cmd_checksrc),echo ' $($(quiet)cmd_checksrc)';) \ $(cmd_checksrc) \ - $(if $($(quiet)cmd_cc_o_c),echo ' $($(quiet)cmd_cc_o_c)';) \ + $(if $($(quiet)cmd_cc_o_c),echo ' $(subst ','\'',$($(quiet)cmd_cc_o_c))';) \ $(cmd_cc_o_c); \ $(cmd_modversions) \ - scripts/basic/fixdep $(depfile) $@ '$(cmd_cc_o_c)' > $(@D)/.$(@F).tmp; \ + scripts/basic/fixdep $(depfile) $@ '$(subst ','\'',$(cmd_cc_o_c))' > $(@D)/.$(@F).tmp; \ rm -f $(depfile); \ mv -f $(@D)/.$(@F).tmp $(@D)/.$(@F).cmd endef diff --git a/scripts/kconfig/gconf.glade b/scripts/kconfig/gconf.glade index 1e1736d81ee9..ace4706ab251 100644 --- a/scripts/kconfig/gconf.glade +++ b/scripts/kconfig/gconf.glade @@ -432,8 +432,8 @@ <child> <widget class="GtkScrolledWindow" id="scrolledwindow1"> <property name="visible">True</property> - <property name="hscrollbar_policy">GTK_POLICY_ALWAYS</property> - <property name="vscrollbar_policy">GTK_POLICY_ALWAYS</property> + <property name="hscrollbar_policy">GTK_POLICY_AUTOMATIC</property> + <property name="vscrollbar_policy">GTK_POLICY_AUTOMATIC</property> <property name="shadow_type">GTK_SHADOW_IN</property> <property name="window_placement">GTK_CORNER_TOP_LEFT</property> @@ -466,8 +466,8 @@ <child> <widget class="GtkScrolledWindow" id="scrolledwindow2"> <property name="visible">True</property> - <property name="hscrollbar_policy">GTK_POLICY_ALWAYS</property> - <property name="vscrollbar_policy">GTK_POLICY_ALWAYS</property> + <property name="hscrollbar_policy">GTK_POLICY_AUTOMATIC</property> + <property name="vscrollbar_policy">GTK_POLICY_AUTOMATIC</property> <property name="shadow_type">GTK_SHADOW_IN</property> <property name="window_placement">GTK_CORNER_TOP_LEFT</property> @@ -496,7 +496,7 @@ <widget class="GtkScrolledWindow" id="scrolledwindow3"> <property name="visible">True</property> <property name="hscrollbar_policy">GTK_POLICY_NEVER</property> - <property name="vscrollbar_policy">GTK_POLICY_ALWAYS</property> + <property name="vscrollbar_policy">GTK_POLICY_AUTOMATIC</property> <property name="shadow_type">GTK_SHADOW_IN</property> <property name="window_placement">GTK_CORNER_TOP_LEFT</property> diff --git a/security/commoncap.c b/security/commoncap.c index 849b8c338ee8..04c12f58d656 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -149,7 +149,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || !cap_issubset (new_permitted, current->cap_permitted)) { - current->mm->dumpable = 0; + current->mm->dumpable = suid_dumpable; if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { if (!capable(CAP_SETUID)) { diff --git a/security/dummy.c b/security/dummy.c index b32eff146547..6ff887586479 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -130,7 +130,7 @@ static void dummy_bprm_free_security (struct linux_binprm *bprm) static void dummy_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) { if (bprm->e_uid != current->uid || bprm->e_gid != current->gid) { - current->mm->dumpable = 0; + current->mm->dumpable = suid_dumpable; if ((unsafe & ~LSM_UNSAFE_PTRACE_CAP) && !capable(CAP_SETUID)) { bprm->e_uid = current->uid; diff --git a/sound/core/info.c b/sound/core/info.c index 31faffe01cb0..5e122bbe7c92 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -24,6 +24,7 @@ #include <linux/vmalloc.h> #include <linux/time.h> #include <linux/smp_lock.h> +#include <linux/string.h> #include <sound/core.h> #include <sound/minors.h> #include <sound/info.h> @@ -754,7 +755,7 @@ static snd_info_entry_t *snd_info_create_entry(const char *name) entry = kcalloc(1, sizeof(*entry), GFP_KERNEL); if (entry == NULL) return NULL; - entry->name = snd_kmalloc_strdup(name, GFP_KERNEL); + entry->name = kstrdup(name, GFP_KERNEL); if (entry->name == NULL) { kfree(entry); return NULL; diff --git a/sound/core/info_oss.c b/sound/core/info_oss.c index f9e4ce443454..12107968d402 100644 --- a/sound/core/info_oss.c +++ b/sound/core/info_oss.c @@ -22,6 +22,7 @@ #include <sound/driver.h> #include <linux/slab.h> #include <linux/time.h> +#include <linux/string.h> #include <sound/core.h> #include <sound/minors.h> #include <sound/info.h> @@ -51,7 +52,7 @@ int snd_oss_info_register(int dev, int num, char *string) x = NULL; } } else { - x = snd_kmalloc_strdup(string, GFP_KERNEL); + x = kstrdup(string, GFP_KERNEL); if (x == NULL) { up(&strings); return -ENOMEM; diff --git a/sound/core/memory.c b/sound/core/memory.c index 20860fec9364..c1fb28e84330 100644 --- a/sound/core/memory.c +++ b/sound/core/memory.c @@ -184,6 +184,20 @@ void snd_hidden_vfree(void *obj) snd_wrapper_vfree(obj); } +char *snd_hidden_kstrdup(const char *s, int flags) +{ + int len; + char *buf; + + if (!s) return NULL; + + len = strlen(s) + 1; + buf = _snd_kmalloc(len, flags); + if (buf) + memcpy(buf, s, len); + return buf; +} + static void snd_memory_info_read(snd_info_entry_t *entry, snd_info_buffer_t * buffer) { snd_iprintf(buffer, "kmalloc: %li bytes\n", snd_alloc_kmalloc); @@ -214,36 +228,9 @@ int __exit snd_memory_info_done(void) return 0; } -#else - -#define _snd_kmalloc kmalloc - #endif /* CONFIG_SND_DEBUG_MEMORY */ /** - * snd_kmalloc_strdup - copy the string - * @string: the original string - * @flags: allocation conditions, GFP_XXX - * - * Allocates a memory chunk via kmalloc() and copies the string to it. - * - * Returns the pointer, or NULL if no enoguh memory. - */ -char *snd_kmalloc_strdup(const char *string, int flags) -{ - size_t len; - char *ptr; - - if (!string) - return NULL; - len = strlen(string) + 1; - ptr = _snd_kmalloc(len, flags); - if (ptr) - memcpy(ptr, string, len); - return ptr; -} - -/** * copy_to_user_fromio - copy data from mmio-space to user-space * @dst: the destination pointer on user-space * @src: the source pointer on mmio diff --git a/sound/core/oss/mixer_oss.c b/sound/core/oss/mixer_oss.c index 98ed9a9f0da6..98fc0766f885 100644 --- a/sound/core/oss/mixer_oss.c +++ b/sound/core/oss/mixer_oss.c @@ -24,6 +24,7 @@ #include <linux/smp_lock.h> #include <linux/slab.h> #include <linux/time.h> +#include <linux/string.h> #include <sound/core.h> #include <sound/minors.h> #include <sound/control.h> @@ -1137,7 +1138,7 @@ static void snd_mixer_oss_proc_write(snd_info_entry_t *entry, goto __unlock; } tbl->oss_id = ch; - tbl->name = snd_kmalloc_strdup(str, GFP_KERNEL); + tbl->name = kstrdup(str, GFP_KERNEL); if (! tbl->name) { kfree(tbl); goto __unlock; diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index cab30977e7c0..de7444c586f9 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -33,6 +33,7 @@ #include <linux/time.h> #include <linux/vmalloc.h> #include <linux/moduleparam.h> +#include <linux/string.h> #include <sound/core.h> #include <sound/minors.h> #include <sound/pcm.h> @@ -2360,7 +2361,7 @@ static void snd_pcm_oss_proc_write(snd_info_entry_t *entry, for (setup1 = pstr->oss.setup_list; setup1->next; setup1 = setup1->next); setup1->next = setup; } - template.task_name = snd_kmalloc_strdup(task_name, GFP_KERNEL); + template.task_name = kstrdup(task_name, GFP_KERNEL); } else { buffer->error = -ENOMEM; } diff --git a/sound/core/sound.c b/sound/core/sound.c index 0815fadeb3ec..7612884f530b 100644 --- a/sound/core/sound.c +++ b/sound/core/sound.c @@ -399,8 +399,8 @@ EXPORT_SYMBOL(snd_hidden_kcalloc); EXPORT_SYMBOL(snd_hidden_kfree); EXPORT_SYMBOL(snd_hidden_vmalloc); EXPORT_SYMBOL(snd_hidden_vfree); +EXPORT_SYMBOL(snd_hidden_kstrdup); #endif -EXPORT_SYMBOL(snd_kmalloc_strdup); EXPORT_SYMBOL(copy_to_user_fromio); EXPORT_SYMBOL(copy_from_user_toio); /* init.c */ diff --git a/sound/core/timer.c b/sound/core/timer.c index b498e5482d77..cfaccd415b3b 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -26,6 +26,7 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/moduleparam.h> +#include <linux/string.h> #include <sound/core.h> #include <sound/timer.h> #include <sound/control.h> @@ -100,7 +101,7 @@ static snd_timer_instance_t *snd_timer_instance_new(char *owner, snd_timer_t *ti timeri = kcalloc(1, sizeof(*timeri), GFP_KERNEL); if (timeri == NULL) return NULL; - timeri->owner = snd_kmalloc_strdup(owner, GFP_KERNEL); + timeri->owner = kstrdup(owner, GFP_KERNEL); if (! timeri->owner) { kfree(timeri); return NULL; diff --git a/sound/isa/gus/gus_mem.c b/sound/isa/gus/gus_mem.c index 609838e8ef67..5eb766dd564b 100644 --- a/sound/isa/gus/gus_mem.c +++ b/sound/isa/gus/gus_mem.c @@ -21,6 +21,7 @@ #include <sound/driver.h> #include <linux/slab.h> +#include <linux/string.h> #include <sound/core.h> #include <sound/gus.h> #include <sound/info.h> @@ -213,7 +214,7 @@ snd_gf1_mem_block_t *snd_gf1_mem_alloc(snd_gf1_mem_t * alloc, int owner, if (share_id != NULL) memcpy(&block.share_id, share_id, sizeof(block.share_id)); block.owner = owner; - block.name = snd_kmalloc_strdup(name, GFP_KERNEL); + block.name = kstrdup(name, GFP_KERNEL); nblock = snd_gf1_mem_xalloc(alloc, &block); snd_gf1_mem_lock(alloc, 1); return nblock; @@ -253,13 +254,13 @@ int snd_gf1_mem_init(snd_gus_card_t * gus) if (gus->gf1.enh_mode) { block.ptr = 0; block.size = 1024; - block.name = snd_kmalloc_strdup("InterWave LFOs", GFP_KERNEL); + block.name = kstrdup("InterWave LFOs", GFP_KERNEL); if (snd_gf1_mem_xalloc(alloc, &block) == NULL) return -ENOMEM; } block.ptr = gus->gf1.default_voice_address; block.size = 4; - block.name = snd_kmalloc_strdup("Voice default (NULL's)", GFP_KERNEL); + block.name = kstrdup("Voice default (NULL's)", GFP_KERNEL); if (snd_gf1_mem_xalloc(alloc, &block) == NULL) return -ENOMEM; #ifdef CONFIG_SND_DEBUG diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9edd558d6bd3..bab89843d850 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1781,7 +1781,7 @@ static int add_control(struct alc_spec *spec, int type, const char *name, unsign knew = &spec->kctl_alloc[spec->num_kctl_used]; *knew = alc880_control_templates[type]; - knew->name = snd_kmalloc_strdup(name, GFP_KERNEL); + knew->name = kstrdup(name, GFP_KERNEL); if (! knew->name) return -ENOMEM; knew->private_value = val; diff --git a/sound/synth/emux/emux.c b/sound/synth/emux/emux.c index 16f3b461627a..60d0b2c66698 100644 --- a/sound/synth/emux/emux.c +++ b/sound/synth/emux/emux.c @@ -22,6 +22,7 @@ #include <linux/wait.h> #include <linux/sched.h> #include <linux/slab.h> +#include <linux/string.h> #include <sound/core.h> #include <sound/emux_synth.h> #include <linux/init.h> @@ -76,7 +77,7 @@ int snd_emux_register(snd_emux_t *emu, snd_card_t *card, int index, char *name) snd_assert(name != NULL, return -EINVAL); emu->card = card; - emu->name = snd_kmalloc_strdup(name, GFP_KERNEL); + emu->name = kstrdup(name, GFP_KERNEL); emu->voices = kcalloc(emu->max_voices, sizeof(snd_emux_voice_t), GFP_KERNEL); if (emu->voices == NULL) return -ENOMEM; |