diff options
105 files changed, 3169 insertions, 1036 deletions
diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.txt index fcc79957be63..1fb12f9dfe48 100644 --- a/Documentation/filesystems/gfs2-glocks.txt +++ b/Documentation/filesystems/gfs2-glocks.txt @@ -5,7 +5,7 @@ This documents the basic principles of the glock state machine internals. Each glock (struct gfs2_glock in fs/gfs2/incore.h) has two main (internal) locks: - 1. A spinlock (gl_spin) which protects the internal state such + 1. A spinlock (gl_lockref.lock) which protects the internal state such as gl_state, gl_target and the list of holders (gl_holders) 2. A non-blocking bit lock, GLF_LOCK, which is used to prevent other threads from making calls to the DLM, etc. at the same time. If a @@ -82,8 +82,8 @@ rather than via the glock. Locking rules for glock operations: -Operation | GLF_LOCK bit lock held | gl_spin spinlock held ------------------------------------------------------------------ +Operation | GLF_LOCK bit lock held | gl_lockref.lock spinlock held +------------------------------------------------------------------------- go_xmote_th | Yes | No go_xmote_bh | Yes | No go_inval | Yes | No diff --git a/arch/nios2/include/asm/cmpxchg.h b/arch/nios2/include/asm/cmpxchg.h index 85938711542d..a7978f14d157 100644 --- a/arch/nios2/include/asm/cmpxchg.h +++ b/arch/nios2/include/asm/cmpxchg.h @@ -9,53 +9,6 @@ #ifndef _ASM_NIOS2_CMPXCHG_H #define _ASM_NIOS2_CMPXCHG_H -#include <linux/irqflags.h> - -#define xchg(ptr, x) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) - -struct __xchg_dummy { unsigned long a[100]; }; -#define __xg(x) ((volatile struct __xchg_dummy *)(x)) - -static inline unsigned long __xchg(unsigned long x, volatile void *ptr, - int size) -{ - unsigned long tmp, flags; - - local_irq_save(flags); - - switch (size) { - case 1: - __asm__ __volatile__( - "ldb %0, %2\n" - "stb %1, %2\n" - : "=&r" (tmp) - : "r" (x), "m" (*__xg(ptr)) - : "memory"); - break; - case 2: - __asm__ __volatile__( - "ldh %0, %2\n" - "sth %1, %2\n" - : "=&r" (tmp) - : "r" (x), "m" (*__xg(ptr)) - : "memory"); - break; - case 4: - __asm__ __volatile__( - "ldw %0, %2\n" - "stw %1, %2\n" - : "=&r" (tmp) - : "r" (x), "m" (*__xg(ptr)) - : "memory"); - break; - } - - local_irq_restore(flags); - return tmp; -} - #include <asm-generic/cmpxchg.h> -#include <asm-generic/cmpxchg-local.h> #endif /* _ASM_NIOS2_CMPXCHG_H */ diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index b101a43d3c5a..a4ff86d58d5c 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -104,7 +104,7 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6, unsigned r7) { unsigned dtb_passed = 0; - char cmdline_passed[COMMAND_LINE_SIZE] = { 0, }; + char cmdline_passed[COMMAND_LINE_SIZE] __maybe_unused = { 0, }; #if defined(CONFIG_NIOS2_PASS_CMDLINE) if (r4 == 0x534f494e) { /* r4 is magic NIOS */ diff --git a/arch/nios2/lib/memmove.c b/arch/nios2/lib/memmove.c index c65ef517eb80..866c021f278c 100644 --- a/arch/nios2/lib/memmove.c +++ b/arch/nios2/lib/memmove.c @@ -10,7 +10,6 @@ #include <linux/types.h> #include <linux/string.h> -#ifdef __HAVE_ARCH_MEMMOVE void *memmove(void *d, const void *s, size_t count) { unsigned long dst, src; @@ -79,4 +78,3 @@ restdown: return d; } -#endif /* __HAVE_ARCH_MEMMOVE */ diff --git a/arch/nios2/lib/memset.c b/arch/nios2/lib/memset.c index 65e97802f5cc..c2cfcb121e34 100644 --- a/arch/nios2/lib/memset.c +++ b/arch/nios2/lib/memset.c @@ -10,7 +10,6 @@ #include <linux/types.h> #include <linux/string.h> -#ifdef __HAVE_ARCH_MEMSET void *memset(void *s, int c, size_t count) { int destptr, charcnt, dwordcnt, fill8reg, wrkrega; @@ -78,4 +77,3 @@ void *memset(void *s, int c, size_t count) return s; } -#endif /* __HAVE_ARCH_MEMSET */ diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h index 94710cfc1ce8..0448a2c8eafb 100644 --- a/arch/parisc/include/asm/compat.h +++ b/arch/parisc/include/asm/compat.h @@ -206,10 +206,10 @@ struct compat_ipc64_perm { struct compat_semid64_ds { struct compat_ipc64_perm sem_perm; - compat_time_t sem_otime; unsigned int __unused1; - compat_time_t sem_ctime; + compat_time_t sem_otime; unsigned int __unused2; + compat_time_t sem_ctime; compat_ulong_t sem_nsems; compat_ulong_t __unused3; compat_ulong_t __unused4; diff --git a/arch/parisc/include/uapi/asm/ipcbuf.h b/arch/parisc/include/uapi/asm/ipcbuf.h index bd956c425785..790c4119f647 100644 --- a/arch/parisc/include/uapi/asm/ipcbuf.h +++ b/arch/parisc/include/uapi/asm/ipcbuf.h @@ -1,6 +1,9 @@ #ifndef __PARISC_IPCBUF_H__ #define __PARISC_IPCBUF_H__ +#include <asm/bitsperlong.h> +#include <linux/posix_types.h> + /* * The ipc64_perm structure for PA-RISC is almost identical to * kern_ipc_perm as we have always had 32-bit UIDs and GIDs in the kernel. @@ -10,16 +13,18 @@ struct ipc64_perm { - key_t key; - uid_t uid; - gid_t gid; - uid_t cuid; - gid_t cgid; + __kernel_key_t key; + __kernel_uid_t uid; + __kernel_gid_t gid; + __kernel_uid_t cuid; + __kernel_gid_t cgid; +#if __BITS_PER_LONG != 64 unsigned short int __pad1; - mode_t mode; +#endif + __kernel_mode_t mode; unsigned short int __pad2; unsigned short int seq; - unsigned int __pad3; + unsigned int __pad3; unsigned long long int __unused1; unsigned long long int __unused2; }; diff --git a/arch/parisc/include/uapi/asm/msgbuf.h b/arch/parisc/include/uapi/asm/msgbuf.h index 342138983914..2e83ac758e19 100644 --- a/arch/parisc/include/uapi/asm/msgbuf.h +++ b/arch/parisc/include/uapi/asm/msgbuf.h @@ -27,13 +27,13 @@ struct msqid64_ds { unsigned int __pad3; #endif __kernel_time_t msg_ctime; /* last change time */ - unsigned int msg_cbytes; /* current number of bytes on queue */ - unsigned int msg_qnum; /* number of messages in queue */ - unsigned int msg_qbytes; /* max number of bytes on queue */ + unsigned long msg_cbytes; /* current number of bytes on queue */ + unsigned long msg_qnum; /* number of messages in queue */ + unsigned long msg_qbytes; /* max number of bytes on queue */ __kernel_pid_t msg_lspid; /* pid of last msgsnd */ __kernel_pid_t msg_lrpid; /* last receive pid */ - unsigned int __unused1; - unsigned int __unused2; + unsigned long __unused1; + unsigned long __unused2; }; #endif /* _PARISC_MSGBUF_H */ diff --git a/arch/parisc/include/uapi/asm/posix_types.h b/arch/parisc/include/uapi/asm/posix_types.h index b9344256f76b..f3b5f70b9a5f 100644 --- a/arch/parisc/include/uapi/asm/posix_types.h +++ b/arch/parisc/include/uapi/asm/posix_types.h @@ -7,8 +7,10 @@ * assume GCC is being used. */ +#ifndef __LP64__ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t +#endif typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/parisc/include/uapi/asm/sembuf.h b/arch/parisc/include/uapi/asm/sembuf.h index f01d89e30d73..c20971bf520f 100644 --- a/arch/parisc/include/uapi/asm/sembuf.h +++ b/arch/parisc/include/uapi/asm/sembuf.h @@ -23,9 +23,9 @@ struct semid64_ds { unsigned int __pad2; #endif __kernel_time_t sem_ctime; /* last change time */ - unsigned int sem_nsems; /* no. of semaphores in array */ - unsigned int __unused1; - unsigned int __unused2; + unsigned long sem_nsems; /* no. of semaphores in array */ + unsigned long __unused1; + unsigned long __unused2; }; #endif /* _PARISC_SEMBUF_H */ diff --git a/arch/parisc/include/uapi/asm/shmbuf.h b/arch/parisc/include/uapi/asm/shmbuf.h index 8496c38560c6..750e13e77991 100644 --- a/arch/parisc/include/uapi/asm/shmbuf.h +++ b/arch/parisc/include/uapi/asm/shmbuf.h @@ -30,12 +30,12 @@ struct shmid64_ds { #if __BITS_PER_LONG != 64 unsigned int __pad4; #endif - size_t shm_segsz; /* size of segment (bytes) */ + __kernel_size_t shm_segsz; /* size of segment (bytes) */ __kernel_pid_t shm_cpid; /* pid of creator */ __kernel_pid_t shm_lpid; /* pid of last operator */ - unsigned int shm_nattch; /* no. of current attaches */ - unsigned int __unused1; - unsigned int __unused2; + unsigned long shm_nattch; /* no. of current attaches */ + unsigned long __unused1; + unsigned long __unused2; }; struct shminfo64 { diff --git a/arch/parisc/include/uapi/asm/stat.h b/arch/parisc/include/uapi/asm/stat.h index b606b366d0a7..3310d2a49759 100644 --- a/arch/parisc/include/uapi/asm/stat.h +++ b/arch/parisc/include/uapi/asm/stat.h @@ -36,37 +36,6 @@ struct stat { #define STAT_HAVE_NSEC -struct hpux_stat64 { - unsigned int st_dev; /* dev_t is 32 bits on parisc */ - unsigned int st_ino; /* 32 bits */ - unsigned short st_mode; /* 16 bits */ - unsigned short st_nlink; /* 16 bits */ - unsigned short st_reserved1; /* old st_uid */ - unsigned short st_reserved2; /* old st_gid */ - unsigned int st_rdev; - signed long long st_size; - signed int st_atime; - unsigned int st_spare1; - signed int st_mtime; - unsigned int st_spare2; - signed int st_ctime; - unsigned int st_spare3; - int st_blksize; - unsigned long long st_blocks; - unsigned int __unused1; /* ACL stuff */ - unsigned int __unused2; /* network */ - unsigned int __unused3; /* network */ - unsigned int __unused4; /* cnodes */ - unsigned short __unused5; /* netsite */ - short st_fstype; - unsigned int st_realdev; - unsigned short st_basemode; - unsigned short st_spareshort; - unsigned int st_uid; - unsigned int st_gid; - unsigned int st_spare4[3]; -}; - /* This is the struct that 32-bit userspace applications are expecting. * How 64-bit apps are going to be compiled, I have no idea. But at least * this way, we don't have a wrapper in the kernel. diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index c229427fa546..c5fec4890fdf 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -23,6 +23,7 @@ #include <linux/unistd.h> #include <linux/nodemask.h> /* for node_online_map */ #include <linux/pagemap.h> /* for release_pages and page_cache_release */ +#include <linux/compat.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> @@ -30,6 +31,7 @@ #include <asm/pdc_chassis.h> #include <asm/mmzone.h> #include <asm/sections.h> +#include <asm/msgbuf.h> extern int data_start; extern void parisc_kernel_start(void); /* Kernel entry point in head.S */ @@ -590,6 +592,20 @@ unsigned long pcxl_dma_start __read_mostly; void __init mem_init(void) { + /* Do sanity checks on IPC (compat) structures */ + BUILD_BUG_ON(sizeof(struct ipc64_perm) != 48); +#ifndef CONFIG_64BIT + BUILD_BUG_ON(sizeof(struct semid64_ds) != 80); + BUILD_BUG_ON(sizeof(struct msqid64_ds) != 104); + BUILD_BUG_ON(sizeof(struct shmid64_ds) != 104); +#endif +#ifdef CONFIG_COMPAT + BUILD_BUG_ON(sizeof(struct compat_ipc64_perm) != sizeof(struct ipc64_perm)); + BUILD_BUG_ON(sizeof(struct compat_semid64_ds) != 80); + BUILD_BUG_ON(sizeof(struct compat_msqid64_ds) != 104); + BUILD_BUG_ON(sizeof(struct compat_shmid64_ds) != 104); +#endif + /* Do sanity checks on page table constants */ BUILD_BUG_ON(PTE_ENTRY_SIZE != sizeof(pte_t)); BUILD_BUG_ON(PMD_ENTRY_SIZE != sizeof(pmd_t)); diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 3bd3504a6cc7..82044f732323 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -17,6 +17,7 @@ config XTENSA select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS select HAVE_FUNCTION_TRACER + select HAVE_FUTEX_CMPXCHG if !MMU select HAVE_IRQ_TIME_ACCOUNTING select HAVE_OPROFILE select HAVE_PERF_EVENTS @@ -397,6 +398,20 @@ config SIMDISK1_FILENAME source "mm/Kconfig" +config FORCE_MAX_ZONEORDER + int "Maximum zone order" + default "11" + help + The kernel memory allocator divides physically contiguous memory + blocks into "zones", where each zone is a power of two number of + pages. This option selects the largest power of two that the kernel + keeps in the memory allocator. If you need to allocate very large + blocks of physically contiguous memory, then you may need to + increase this value. + + This config option is actually maximum order plus one. For example, + a value of 11 means that the largest free memory block is 2^10 pages. + source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" @@ -408,7 +423,7 @@ config DEFAULT_MEM_START hex "Physical address of the default memory area start" depends on PLATFORM_WANT_DEFAULT_MEM default 0x00000000 if MMU - default 0x40000000 if !MMU + default 0x60000000 if !MMU help This is a fallback start address of the default memory area, it is used when no physical memory size is passed through DTB or through diff --git a/arch/xtensa/boot/boot-elf/boot.lds.S b/arch/xtensa/boot/boot-elf/boot.lds.S index 958b33af96b7..e54f2c9df63a 100644 --- a/arch/xtensa/boot/boot-elf/boot.lds.S +++ b/arch/xtensa/boot/boot-elf/boot.lds.S @@ -40,17 +40,4 @@ SECTIONS *(.bss) __bss_end = .; } - -#ifdef CONFIG_MMU - /* - * This is a remapped copy of the Reset Vector Code. - * It keeps gdb in sync with the PC after switching - * to the temporary mapping used while setting up - * the V2 MMU mappings for Linux. - */ - .ResetVector.remapped_text 0x46000000 (INFO): - { - *(.ResetVector.remapped_text) - } -#endif } diff --git a/arch/xtensa/boot/boot-elf/bootstrap.S b/arch/xtensa/boot/boot-elf/bootstrap.S index 9341a5750694..e6bf313613cf 100644 --- a/arch/xtensa/boot/boot-elf/bootstrap.S +++ b/arch/xtensa/boot/boot-elf/bootstrap.S @@ -58,8 +58,6 @@ _SetupMMU: wsr a0, ps rsync - Offset = _SetupMMU - _ResetVector - #ifndef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX initialize_mmu #endif @@ -74,29 +72,3 @@ reset: movi a3, 0 movi a4, 0 jx a0 - -#ifdef CONFIG_MMU - .align 4 - - .section .ResetVector.remapped_text, "x" - .global _RemappedResetVector - - /* Do org before literals */ - .org 0 - -_RemappedResetVector: - .begin no-absolute-literals - .literal_position - - _j _RemappedSetupMMU - - /* Position Remapped code at the same location as the original code */ - . = _RemappedResetVector + Offset - -_RemappedSetupMMU: -#ifndef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX - initialize_mmu -#endif - - .end no-absolute-literals -#endif diff --git a/arch/xtensa/boot/dts/kc705_nommu.dts b/arch/xtensa/boot/dts/kc705_nommu.dts new file mode 100644 index 000000000000..65f3d741b964 --- /dev/null +++ b/arch/xtensa/boot/dts/kc705_nommu.dts @@ -0,0 +1,17 @@ +/dts-v1/; +/include/ "xtfpga.dtsi" +/include/ "xtfpga-flash-128m.dtsi" + +/ { + compatible = "cdns,xtensa-kc705"; + chosen { + bootargs = "earlycon=uart8250,mmio32,0x9d050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug"; + }; + memory@0 { + device_type = "memory"; + reg = <0x60000000 0x10000000>; + }; + soc { + ranges = <0x00000000 0x90000000 0x10000000>; + }; +}; diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig new file mode 100644 index 000000000000..337d5ba2d285 --- /dev/null +++ b/arch/xtensa/configs/nommu_kc705_defconfig @@ -0,0 +1,131 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_FHANDLE=y +CONFIG_IRQ_DOMAIN_DEBUG=y +CONFIG_NO_HZ_IDLE=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_IRQ_TIME_ACCOUNTING=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_CGROUP_DEBUG=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CPUSETS=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_MEMCG=y +CONFIG_NAMESPACES=y +CONFIG_SCHED_AUTOGROUP=y +CONFIG_RELAY=y +CONFIG_BLK_DEV_INITRD=y +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set +# CONFIG_RD_LZ4 is not set +CONFIG_EXPERT=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_PERF_EVENTS=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_XTENSA_VARIANT_CUSTOM=y +CONFIG_XTENSA_VARIANT_CUSTOM_NAME="de212" +# CONFIG_XTENSA_VARIANT_MMU is not set +CONFIG_XTENSA_UNALIGNED_USER=y +CONFIG_PREEMPT=y +# CONFIG_PCI is not set +CONFIG_XTENSA_PLATFORM_XTFPGA=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="earlycon=uart8250,mmio32,0x9d050020,115200n8 console=ttyS0,115200n8 ip=dhcp root=/dev/nfs rw debug" +CONFIG_USE_OF=y +CONFIG_BUILTIN_DTB="kc705_nommu" +CONFIG_DEFAULT_MEM_SIZE=0x10000000 +CONFIG_BINFMT_FLAT=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +# CONFIG_IPV6 is not set +CONFIG_NETFILTER=y +# CONFIG_WIRELESS is not set +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_STANDALONE is not set +CONFIG_MTD=y +CONFIG_MTD_CFI=y +CONFIG_MTD_JEDECPROBE=y +CONFIG_MTD_CFI_INTELEXT=y +CONFIG_MTD_CFI_AMDSTD=y +CONFIG_MTD_CFI_STAA=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_NETDEVICES=y +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_BROADCOM is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +# CONFIG_NET_VENDOR_MICREL is not set +# CONFIG_NET_VENDOR_NATSEMI is not set +# CONFIG_NET_VENDOR_SEEQ is not set +# CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WIZNET is not set +CONFIG_MARVELL_PHY=y +# CONFIG_WLAN is not set +# CONFIG_INPUT_MOUSEDEV is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +CONFIG_SERIAL_8250=y +# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_HW_RANDOM=y +# CONFIG_HWMON is not set +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +CONFIG_SOFT_WATCHDOG=y +# CONFIG_VGA_CONSOLE is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_EXT3_FS=y +CONFIG_EXT4_FS=y +CONFIG_FANOTIFY=y +CONFIG_VFAT_FS=y +CONFIG_JFFS2_FS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V4=y +CONFIG_NFS_SWAP=y +CONFIG_ROOT_NFS=y +CONFIG_SUNRPC_DEBUG=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_PRINTK_TIME=y +CONFIG_DYNAMIC_DEBUG=y +CONFIG_DEBUG_INFO=y +# CONFIG_FRAME_POINTER is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_NOMMU_REGIONS=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_SCHEDSTATS=y +CONFIG_TIMER_STATS=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_STACKTRACE=y +# CONFIG_RCU_CPU_STALL_INFO is not set +CONFIG_RCU_TRACE=y +# CONFIG_FTRACE is not set +# CONFIG_LD_NO_RELAX is not set +# CONFIG_CRYPTO_ECHAINIV is not set +CONFIG_CRYPTO_ANSI_CPRNG=y diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h index 755320f6e0bc..746dcc8b5abc 100644 --- a/arch/xtensa/include/asm/asmmacro.h +++ b/arch/xtensa/include/asm/asmmacro.h @@ -35,9 +35,10 @@ * __loop as * restart loop. 'as' register must not have been modified! * - * __endla ar, at, incr + * __endla ar, as, incr * ar start address (modified) - * as scratch register used by macro + * as scratch register used by __loops/__loopi macros or + * end address used by __loopt macro * inc increment */ @@ -97,7 +98,7 @@ .endm /* - * loop from ar to ax + * loop from ar to as */ .macro __loopt ar, as, at, incr_log2 diff --git a/arch/xtensa/include/asm/cacheasm.h b/arch/xtensa/include/asm/cacheasm.h index 60e18773ecb8..e0f9e1109c83 100644 --- a/arch/xtensa/include/asm/cacheasm.h +++ b/arch/xtensa/include/asm/cacheasm.h @@ -73,7 +73,9 @@ .macro ___unlock_dcache_all ar at +#if XCHAL_DCACHE_SIZE __loop_cache_all \ar \at diu XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH +#endif .endm @@ -90,30 +92,38 @@ .macro ___flush_invalidate_dcache_all ar at +#if XCHAL_DCACHE_SIZE __loop_cache_all \ar \at diwbi XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH +#endif .endm .macro ___flush_dcache_all ar at +#if XCHAL_DCACHE_SIZE __loop_cache_all \ar \at diwb XCHAL_DCACHE_SIZE XCHAL_DCACHE_LINEWIDTH +#endif .endm .macro ___invalidate_dcache_all ar at +#if XCHAL_DCACHE_SIZE __loop_cache_all \ar \at dii __stringify(DCACHE_WAY_SIZE) \ XCHAL_DCACHE_LINEWIDTH +#endif .endm .macro ___invalidate_icache_all ar at +#if XCHAL_ICACHE_SIZE __loop_cache_all \ar \at iii __stringify(ICACHE_WAY_SIZE) \ XCHAL_ICACHE_LINEWIDTH +#endif .endm @@ -121,28 +131,36 @@ .macro ___flush_invalidate_dcache_range ar as at +#if XCHAL_DCACHE_SIZE __loop_cache_range \ar \as \at dhwbi XCHAL_DCACHE_LINEWIDTH +#endif .endm .macro ___flush_dcache_range ar as at +#if XCHAL_DCACHE_SIZE __loop_cache_range \ar \as \at dhwb XCHAL_DCACHE_LINEWIDTH +#endif .endm .macro ___invalidate_dcache_range ar as at +#if XCHAL_DCACHE_SIZE __loop_cache_range \ar \as \at dhi XCHAL_DCACHE_LINEWIDTH +#endif .endm .macro ___invalidate_icache_range ar as at +#if XCHAL_ICACHE_SIZE __loop_cache_range \ar \as \at ihi XCHAL_ICACHE_LINEWIDTH +#endif .endm @@ -150,27 +168,35 @@ .macro ___flush_invalidate_dcache_page ar as +#if XCHAL_DCACHE_SIZE __loop_cache_page \ar \as dhwbi XCHAL_DCACHE_LINEWIDTH +#endif .endm .macro ___flush_dcache_page ar as +#if XCHAL_DCACHE_SIZE __loop_cache_page \ar \as dhwb XCHAL_DCACHE_LINEWIDTH +#endif .endm .macro ___invalidate_dcache_page ar as +#if XCHAL_DCACHE_SIZE __loop_cache_page \ar \as dhi XCHAL_DCACHE_LINEWIDTH +#endif .endm .macro ___invalidate_icache_page ar as +#if XCHAL_ICACHE_SIZE __loop_cache_page \ar \as ihi XCHAL_ICACHE_LINEWIDTH +#endif .endm diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h index 5f67ace97b32..397d6a1a4224 100644 --- a/arch/xtensa/include/asm/cacheflush.h +++ b/arch/xtensa/include/asm/cacheflush.h @@ -55,9 +55,14 @@ extern void __flush_dcache_range(unsigned long, unsigned long); extern void __flush_invalidate_dcache_page(unsigned long); extern void __flush_invalidate_dcache_range(unsigned long, unsigned long); #else -# define __flush_dcache_range(p,s) do { } while(0) -# define __flush_dcache_page(p) do { } while(0) -# define __flush_invalidate_dcache_page(p) __invalidate_dcache_page(p) +static inline void __flush_dcache_page(unsigned long va) +{ +} +static inline void __flush_dcache_range(unsigned long va, unsigned long sz) +{ +} +# define __flush_invalidate_dcache_all() __invalidate_dcache_all() +# define __flush_invalidate_dcache_page(p) __invalidate_dcache_page(p) # define __flush_invalidate_dcache_range(p,s) __invalidate_dcache_range(p,s) #endif @@ -174,99 +179,4 @@ extern void copy_from_user_page(struct vm_area_struct*, struct page*, #endif -#define XTENSA_CACHEBLK_LOG2 29 -#define XTENSA_CACHEBLK_SIZE (1 << XTENSA_CACHEBLK_LOG2) -#define XTENSA_CACHEBLK_MASK (7 << XTENSA_CACHEBLK_LOG2) - -#if XCHAL_HAVE_CACHEATTR -static inline u32 xtensa_get_cacheattr(void) -{ - u32 r; - asm volatile(" rsr %0, cacheattr" : "=a"(r)); - return r; -} - -static inline u32 xtensa_get_dtlb1(u32 addr) -{ - u32 r = addr & XTENSA_CACHEBLK_MASK; - return r | ((xtensa_get_cacheattr() >> (r >> (XTENSA_CACHEBLK_LOG2-2))) - & 0xF); -} -#else -static inline u32 xtensa_get_dtlb1(u32 addr) -{ - u32 r; - asm volatile(" rdtlb1 %0, %1" : "=a"(r) : "a"(addr)); - asm volatile(" dsync"); - return r; -} - -static inline u32 xtensa_get_cacheattr(void) -{ - u32 r = 0; - u32 a = 0; - do { - a -= XTENSA_CACHEBLK_SIZE; - r = (r << 4) | (xtensa_get_dtlb1(a) & 0xF); - } while (a); - return r; -} -#endif - -static inline int xtensa_need_flush_dma_source(u32 addr) -{ - return (xtensa_get_dtlb1(addr) & ((1 << XCHAL_CA_BITS) - 1)) >= 4; -} - -static inline int xtensa_need_invalidate_dma_destination(u32 addr) -{ - return (xtensa_get_dtlb1(addr) & ((1 << XCHAL_CA_BITS) - 1)) != 2; -} - -static inline void flush_dcache_unaligned(u32 addr, u32 size) -{ - u32 cnt; - if (size) { - cnt = (size + ((XCHAL_DCACHE_LINESIZE - 1) & addr) - + XCHAL_DCACHE_LINESIZE - 1) / XCHAL_DCACHE_LINESIZE; - while (cnt--) { - asm volatile(" dhwb %0, 0" : : "a"(addr)); - addr += XCHAL_DCACHE_LINESIZE; - } - asm volatile(" dsync"); - } -} - -static inline void invalidate_dcache_unaligned(u32 addr, u32 size) -{ - int cnt; - if (size) { - asm volatile(" dhwbi %0, 0 ;" : : "a"(addr)); - cnt = (size + ((XCHAL_DCACHE_LINESIZE - 1) & addr) - - XCHAL_DCACHE_LINESIZE - 1) / XCHAL_DCACHE_LINESIZE; - while (cnt-- > 0) { - asm volatile(" dhi %0, %1" : : "a"(addr), - "n"(XCHAL_DCACHE_LINESIZE)); - addr += XCHAL_DCACHE_LINESIZE; - } - asm volatile(" dhwbi %0, %1" : : "a"(addr), - "n"(XCHAL_DCACHE_LINESIZE)); - asm volatile(" dsync"); - } -} - -static inline void flush_invalidate_dcache_unaligned(u32 addr, u32 size) -{ - u32 cnt; - if (size) { - cnt = (size + ((XCHAL_DCACHE_LINESIZE - 1) & addr) - + XCHAL_DCACHE_LINESIZE - 1) / XCHAL_DCACHE_LINESIZE; - while (cnt--) { - asm volatile(" dhwbi %0, 0" : : "a"(addr)); - addr += XCHAL_DCACHE_LINESIZE; - } - asm volatile(" dsync"); - } -} - #endif /* _XTENSA_CACHEFLUSH_H */ diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h index 4427f38b634e..66c9ba261e30 100644 --- a/arch/xtensa/include/asm/dma-mapping.h +++ b/arch/xtensa/include/asm/dma-mapping.h @@ -35,4 +35,14 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction); +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return (dma_addr_t)paddr; +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return (phys_addr_t)daddr; +} + #endif /* _XTENSA_DMA_MAPPING_H */ diff --git a/arch/xtensa/include/asm/initialize_mmu.h b/arch/xtensa/include/asm/initialize_mmu.h index e256f2270ec9..7a1e075969a3 100644 --- a/arch/xtensa/include/asm/initialize_mmu.h +++ b/arch/xtensa/include/asm/initialize_mmu.h @@ -161,7 +161,8 @@ #endif /* defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY */ -#if !defined(CONFIG_MMU) && XCHAL_HAVE_TLBS +#if !defined(CONFIG_MMU) && XCHAL_HAVE_TLBS && \ + (XCHAL_DCACHE_SIZE || XCHAL_ICACHE_SIZE) /* Enable data and instruction cache in the DEFAULT_MEMORY region * if the processor has DTLB and ITLB. */ @@ -175,14 +176,18 @@ 1: sub a9, a9, a8 2: +#if XCHAL_DCACHE_SIZE rdtlb1 a3, a5 - ritlb1 a4, a5 and a3, a3, a6 - and a4, a4, a6 or a3, a3, a7 - or a4, a4, a7 wdtlb a3, a5 +#endif +#if XCHAL_ICACHE_SIZE + ritlb1 a4, a5 + and a4, a4, a6 + or a4, a4, a7 witlb a4, a5 +#endif add a5, a5, a8 bltu a8, a9, 1b diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h index 867840f5400f..74fed0b4e2c2 100644 --- a/arch/xtensa/include/asm/io.h +++ b/arch/xtensa/include/asm/io.h @@ -25,15 +25,6 @@ #ifdef CONFIG_MMU -#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && defined(CONFIG_OF) -extern unsigned long xtensa_kio_paddr; - -static inline unsigned long xtensa_get_kio_paddr(void) -{ - return xtensa_kio_paddr; -} -#endif - /* * Return the virtual address for the specified bus memory. * Note that we currently don't support any address outside the KIO segment. diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index a5e929a10c20..fb02fdc5ecee 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -18,7 +18,11 @@ * We only use two ring levels, user and kernel space. */ +#ifdef CONFIG_MMU #define USER_RING 1 /* user ring level */ +#else +#define USER_RING 0 +#endif #define KERNEL_RING 0 /* kernel ring level */ /* diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h index a46c53f36113..288c776736d3 100644 --- a/arch/xtensa/include/asm/vectors.h +++ b/arch/xtensa/include/asm/vectors.h @@ -21,13 +21,26 @@ #include <variant/core.h> #include <platform/hardware.h> +#if XCHAL_HAVE_PTP_MMU #define XCHAL_KIO_CACHED_VADDR 0xe0000000 #define XCHAL_KIO_BYPASS_VADDR 0xf0000000 #define XCHAL_KIO_DEFAULT_PADDR 0xf0000000 +#else +#define XCHAL_KIO_BYPASS_VADDR XCHAL_KIO_PADDR +#define XCHAL_KIO_DEFAULT_PADDR 0x90000000 +#endif #define XCHAL_KIO_SIZE 0x10000000 -#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && defined(CONFIG_OF) +#if (!XCHAL_HAVE_PTP_MMU || XCHAL_HAVE_SPANNING_WAY) && defined(CONFIG_OF) #define XCHAL_KIO_PADDR xtensa_get_kio_paddr() +#ifndef __ASSEMBLY__ +extern unsigned long xtensa_kio_paddr; + +static inline unsigned long xtensa_get_kio_paddr(void) +{ + return xtensa_kio_paddr; +} +#endif #else #define XCHAL_KIO_PADDR XCHAL_KIO_DEFAULT_PADDR #endif @@ -48,6 +61,9 @@ #define LOAD_MEMORY_ADDRESS 0xD0003000 #endif +#define RESET_VECTOR1_VADDR (VIRTUAL_MEMORY_ADDRESS + \ + XCHAL_RESET_VECTOR1_PADDR) + #else /* !defined(CONFIG_MMU) */ /* MMU Not being used - Virtual == Physical */ @@ -60,6 +76,8 @@ /* Loaded just above possibly live vectors */ #define LOAD_MEMORY_ADDRESS (PLATFORM_DEFAULT_MEM_START + 0x3000) +#define RESET_VECTOR1_VADDR (XCHAL_RESET_VECTOR1_VADDR) + #endif /* CONFIG_MMU */ #define XC_VADDR(offset) (VIRTUAL_MEMORY_ADDRESS + offset) @@ -67,14 +85,6 @@ /* Used to set VECBASE register */ #define VECBASE_RESET_VADDR VIRTUAL_MEMORY_ADDRESS -#define RESET_VECTOR_VECOFS (XCHAL_RESET_VECTOR_VADDR - \ - VECBASE_RESET_VADDR) -#define RESET_VECTOR_VADDR XC_VADDR(RESET_VECTOR_VECOFS) - -#define RESET_VECTOR1_VECOFS (XCHAL_RESET_VECTOR1_VADDR - \ - VECBASE_RESET_VADDR) -#define RESET_VECTOR1_VADDR XC_VADDR(RESET_VECTOR1_VECOFS) - #if defined(XCHAL_HAVE_VECBASE) && XCHAL_HAVE_VECBASE #define USER_VECTOR_VADDR XC_VADDR(XCHAL_USER_VECOFS) diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile index 50137bc9e150..4db730290d2d 100644 --- a/arch/xtensa/kernel/Makefile +++ b/arch/xtensa/kernel/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_SMP) += smp.o mxhead.o obj-$(CONFIG_XTENSA_VARIANT_HAVE_PERF_EVENTS) += perf_event.o AFLAGS_head.o += -mtext-section-literals +AFLAGS_mxhead.o += -mtext-section-literals # In the Xtensa architecture, assembly generates literals which must always # precede the L32R instruction with a relative offset less than 256 kB. diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 504130357597..db5c1765b413 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -367,8 +367,10 @@ common_exception: s32i a2, a1, PT_SYSCALL movi a2, 0 s32i a3, a1, PT_EXCVADDR +#if XCHAL_HAVE_LOOPS xsr a2, lcount s32i a2, a1, PT_LCOUNT +#endif /* It is now save to restore the EXC_TABLE_FIXUP variable. */ @@ -429,11 +431,12 @@ common_exception: rsync # PS.WOE => rsync => overflow /* Save lbeg, lend */ - +#if XCHAL_HAVE_LOOPS rsr a4, lbeg rsr a3, lend s32i a4, a1, PT_LBEG s32i a3, a1, PT_LEND +#endif /* Save SCOMPARE1 */ @@ -724,13 +727,14 @@ common_exception_exit: wsr a3, sar /* Restore LBEG, LEND, LCOUNT */ - +#if XCHAL_HAVE_LOOPS l32i a2, a1, PT_LBEG l32i a3, a1, PT_LEND wsr a2, lbeg l32i a2, a1, PT_LCOUNT wsr a3, lend wsr a2, lcount +#endif /* We control single stepping through the ICOUNTLEVEL register. */ diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index 15a461e2a0ed..9ed55649ac8e 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -249,7 +249,7 @@ ENTRY(_startup) __loopt a2, a3, a4, 2 s32i a0, a2, 0 - __endla a2, a4, 4 + __endla a2, a3, 4 #if XCHAL_DCACHE_IS_WRITEBACK diff --git a/arch/xtensa/kernel/mxhead.S b/arch/xtensa/kernel/mxhead.S index 77a161a112c5..9f3843742726 100644 --- a/arch/xtensa/kernel/mxhead.S +++ b/arch/xtensa/kernel/mxhead.S @@ -48,8 +48,6 @@ _SetupOCD: rsync _SetupMMU: - Offset = _SetupMMU - _SecondaryResetVector - #ifdef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX initialize_mmu #endif @@ -62,24 +60,3 @@ _SetupMMU: jx a3 .end no-absolute-literals - - - .section .SecondaryResetVector.remapped_text, "ax" - .global _RemappedSecondaryResetVector - - .org 0 # Need to do org before literals - -_RemappedSecondaryResetVector: - .begin no-absolute-literals - .literal_position - - _j _RemappedSetupMMU - . = _RemappedSecondaryResetVector + Offset - -_RemappedSetupMMU: - -#ifdef CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX - initialize_mmu -#endif - - .end no-absolute-literals diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index fb75ebf1463a..cd66698348ca 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -15,14 +15,15 @@ * Joe Taylor <joe@tensilica.com, joetylr@yahoo.com> */ -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/pci.h> #include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/mm.h> #include <linux/module.h> -#include <asm/io.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/types.h> #include <asm/cacheflush.h> +#include <asm/io.h> void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction dir) @@ -47,17 +48,36 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size, } EXPORT_SYMBOL(dma_cache_sync); +static void do_cache_op(dma_addr_t dma_handle, size_t size, + void (*fn)(unsigned long, unsigned long)) +{ + unsigned long off = dma_handle & (PAGE_SIZE - 1); + unsigned long pfn = PFN_DOWN(dma_handle); + struct page *page = pfn_to_page(pfn); + + if (!PageHighMem(page)) + fn((unsigned long)bus_to_virt(dma_handle), size); + else + while (size > 0) { + size_t sz = min_t(size_t, size, PAGE_SIZE - off); + void *vaddr = kmap_atomic(page); + + fn((unsigned long)vaddr + off, sz); + kunmap_atomic(vaddr); + off = 0; + ++page; + size -= sz; + } +} + static void xtensa_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) { - void *vaddr; - switch (dir) { case DMA_BIDIRECTIONAL: case DMA_FROM_DEVICE: - vaddr = bus_to_virt(dma_handle); - __invalidate_dcache_range((unsigned long)vaddr, size); + do_cache_op(dma_handle, size, __invalidate_dcache_range); break; case DMA_NONE: @@ -73,13 +93,11 @@ static void xtensa_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) { - void *vaddr; - switch (dir) { case DMA_BIDIRECTIONAL: case DMA_TO_DEVICE: - vaddr = bus_to_virt(dma_handle); - __flush_dcache_range((unsigned long)vaddr, size); + if (XCHAL_DCACHE_IS_WRITEBACK) + do_cache_op(dma_handle, size, __flush_dcache_range); break; case DMA_NONE: @@ -171,7 +189,6 @@ static dma_addr_t xtensa_map_page(struct device *dev, struct page *page, { dma_addr_t dma_handle = page_to_phys(page) + offset; - BUG_ON(PageHighMem(page)); xtensa_sync_single_for_device(dev, dma_handle, size, dir); return dma_handle; } diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 28fc57ef5b86..9735691f37f1 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -190,7 +190,7 @@ static int __init parse_bootparam(const bp_tag_t* tag) #ifdef CONFIG_OF bool __initdata dt_memory_scan = false; -#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY +#if !XCHAL_HAVE_PTP_MMU || XCHAL_HAVE_SPANNING_WAY unsigned long xtensa_kio_paddr = XCHAL_KIO_DEFAULT_PADDR; EXPORT_SYMBOL(xtensa_kio_paddr); @@ -334,7 +334,10 @@ extern char _Level5InterruptVector_text_end; extern char _Level6InterruptVector_text_start; extern char _Level6InterruptVector_text_end; #endif - +#ifdef CONFIG_SMP +extern char _SecondaryResetVector_text_start; +extern char _SecondaryResetVector_text_end; +#endif #ifdef CONFIG_S32C1I_SELFTEST @@ -506,6 +509,10 @@ void __init setup_arch(char **cmdline_p) __pa(&_Level6InterruptVector_text_end), 0); #endif +#ifdef CONFIG_SMP + mem_reserve(__pa(&_SecondaryResetVector_text_start), + __pa(&_SecondaryResetVector_text_end), 0); +#endif parse_early_param(); bootmem_init(); diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S index abcdb527f18a..fc25318e75ad 100644 --- a/arch/xtensa/kernel/vectors.S +++ b/arch/xtensa/kernel/vectors.S @@ -478,6 +478,9 @@ _DoubleExceptionVector_handle_exception: ENDPROC(_DoubleExceptionVector) + .end literal_prefix + + .text /* * Fixup handler for TLB miss in double exception handler for window owerflow. * We get here with windowbase set to the window that was being spilled and @@ -587,7 +590,6 @@ ENTRY(window_overflow_restore_a0_fixup) ENDPROC(window_overflow_restore_a0_fixup) - .end literal_prefix /* * Debug interrupt vector * diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index fc1bc2ba8d5d..c417cbe4ec87 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -166,8 +166,6 @@ SECTIONS RELOCATE_ENTRY(_DebugInterruptVector_text, .DebugInterruptVector.text); #if defined(CONFIG_SMP) - RELOCATE_ENTRY(_SecondaryResetVector_literal, - .SecondaryResetVector.literal); RELOCATE_ENTRY(_SecondaryResetVector_text, .SecondaryResetVector.text); #endif @@ -282,17 +280,11 @@ SECTIONS #if defined(CONFIG_SMP) - SECTION_VECTOR (_SecondaryResetVector_literal, - .SecondaryResetVector.literal, - RESET_VECTOR1_VADDR - 4, - SIZEOF(.DoubleExceptionVector.text), - .DoubleExceptionVector.text) - SECTION_VECTOR (_SecondaryResetVector_text, .SecondaryResetVector.text, RESET_VECTOR1_VADDR, - 4, - .SecondaryResetVector.literal) + SIZEOF(.DoubleExceptionVector.text), + .DoubleExceptionVector.text) . = LOADADDR(.SecondaryResetVector.text)+SIZEOF(.SecondaryResetVector.text); @@ -306,31 +298,6 @@ SECTIONS _end = .; - /* only used by the boot loader */ - - . = ALIGN(0x10); - .bootstrap : { *(.bootstrap.literal .bootstrap.text .bootstrap.data) } - - .ResetVector.text RESET_VECTOR_VADDR : - { - *(.ResetVector.text) - } - - - /* - * This is a remapped copy of the Secondary Reset Vector Code. - * It keeps gdb in sync with the PC after switching - * to the temporary mapping used while setting up - * the V2 MMU mappings for Linux. - * - * Only debug information about this section is put in the kernel image. - */ - .SecondaryResetVector.remapped_text 0x46000000 (INFO): - { - *(.SecondaryResetVector.remapped_text) - } - - .xt.lit : { *(.xt.lit) } .xt.prop : { *(.xt.prop) } diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S index ace1892a875e..7ea4dd68893e 100644 --- a/arch/xtensa/lib/usercopy.S +++ b/arch/xtensa/lib/usercopy.S @@ -222,8 +222,8 @@ __xtensa_copy_user: loopnez a7, .Loop2done #else /* !XCHAL_HAVE_LOOPS */ beqz a7, .Loop2done - slli a10, a7, 4 - add a10, a10, a3 # a10 = end of last 16B source chunk + slli a12, a7, 4 + add a12, a12, a3 # a12 = end of last 16B source chunk #endif /* !XCHAL_HAVE_LOOPS */ .Loop2: EX(l32i, a7, a3, 4, l_fixup) @@ -241,7 +241,7 @@ __xtensa_copy_user: EX(s32i, a9, a5, 12, s_fixup) addi a5, a5, 16 #if !XCHAL_HAVE_LOOPS - blt a3, a10, .Loop2 + blt a3, a12, .Loop2 #endif /* !XCHAL_HAVE_LOOPS */ .Loop2done: bbci.l a4, 3, .L12 diff --git a/arch/xtensa/platforms/iss/setup.c b/arch/xtensa/platforms/iss/setup.c index da7d18240866..391820539f0a 100644 --- a/arch/xtensa/platforms/iss/setup.c +++ b/arch/xtensa/platforms/iss/setup.c @@ -61,7 +61,9 @@ void platform_restart(void) #if XCHAL_NUM_IBREAK > 0 "wsr a2, ibreakenable\n\t" #endif +#if XCHAL_HAVE_LOOPS "wsr a2, lcount\n\t" +#endif "movi a2, 0x1f\n\t" "wsr a2, ps\n\t" "isync\n\t" diff --git a/arch/xtensa/platforms/xt2000/setup.c b/arch/xtensa/platforms/xt2000/setup.c index b90555cb8089..87678961a8c8 100644 --- a/arch/xtensa/platforms/xt2000/setup.c +++ b/arch/xtensa/platforms/xt2000/setup.c @@ -72,7 +72,9 @@ void platform_restart(void) #if XCHAL_NUM_IBREAK > 0 "wsr a2, ibreakenable\n\t" #endif +#if XCHAL_HAVE_LOOPS "wsr a2, lcount\n\t" +#endif "movi a2, 0x1f\n\t" "wsr a2, ps\n\t" "isync\n\t" diff --git a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h index 0a55bb9c5420..dbeea2b440a1 100644 --- a/arch/xtensa/platforms/xtfpga/include/platform/hardware.h +++ b/arch/xtensa/platforms/xtfpga/include/platform/hardware.h @@ -12,13 +12,15 @@ * This file contains the hardware configuration of the XTAVNET boards. */ +#include <asm/types.h> + #ifndef __XTENSA_XTAVNET_HARDWARE_H #define __XTENSA_XTAVNET_HARDWARE_H /* Memory configuration. */ -#define PLATFORM_DEFAULT_MEM_START CONFIG_DEFAULT_MEM_START -#define PLATFORM_DEFAULT_MEM_SIZE CONFIG_DEFAULT_MEM_SIZE +#define PLATFORM_DEFAULT_MEM_START __XTENSA_UL(CONFIG_DEFAULT_MEM_START) +#define PLATFORM_DEFAULT_MEM_SIZE __XTENSA_UL(CONFIG_DEFAULT_MEM_SIZE) /* Interrupt configuration. */ diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index b4cf70e535ab..e9f65f79cf2e 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -63,7 +63,9 @@ void platform_restart(void) #if XCHAL_NUM_IBREAK > 0 "wsr a2, ibreakenable\n\t" #endif +#if XCHAL_HAVE_LOOPS "wsr a2, lcount\n\t" +#endif "movi a2, 0x1f\n\t" "wsr a2, ps\n\t" "isync\n\t" diff --git a/arch/xtensa/variants/de212/include/variant/core.h b/arch/xtensa/variants/de212/include/variant/core.h new file mode 100644 index 000000000000..59e91e47ef3c --- /dev/null +++ b/arch/xtensa/variants/de212/include/variant/core.h @@ -0,0 +1,594 @@ +/* + * xtensa/config/core-isa.h -- HAL definitions that are dependent on Xtensa + * processor CORE configuration + * + * See <xtensa/config/core.h>, which includes this file, for more details. + */ + +/* Xtensa processor core configuration information. + + Copyright (c) 1999-2015 Tensilica Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef _XTENSA_CORE_CONFIGURATION_H +#define _XTENSA_CORE_CONFIGURATION_H + + +/**************************************************************************** + Parameters Useful for Any Code, USER or PRIVILEGED + ****************************************************************************/ + +/* + * Note: Macros of the form XCHAL_HAVE_*** have a value of 1 if the option is + * configured, and a value of 0 otherwise. These macros are always defined. + */ + + +/*---------------------------------------------------------------------- + ISA + ----------------------------------------------------------------------*/ + +#define XCHAL_HAVE_BE 0 /* big-endian byte ordering */ +#define XCHAL_HAVE_WINDOWED 1 /* windowed registers option */ +#define XCHAL_NUM_AREGS 32 /* num of physical addr regs */ +#define XCHAL_NUM_AREGS_LOG2 5 /* log2(XCHAL_NUM_AREGS) */ +#define XCHAL_MAX_INSTRUCTION_SIZE 3 /* max instr bytes (3..8) */ +#define XCHAL_HAVE_DEBUG 1 /* debug option */ +#define XCHAL_HAVE_DENSITY 1 /* 16-bit instructions */ +#define XCHAL_HAVE_LOOPS 1 /* zero-overhead loops */ +#define XCHAL_LOOP_BUFFER_SIZE 0 /* zero-ov. loop instr buffer size */ +#define XCHAL_HAVE_NSA 1 /* NSA/NSAU instructions */ +#define XCHAL_HAVE_MINMAX 1 /* MIN/MAX instructions */ +#define XCHAL_HAVE_SEXT 1 /* SEXT instruction */ +#define XCHAL_HAVE_DEPBITS 0 /* DEPBITS instruction */ +#define XCHAL_HAVE_CLAMPS 1 /* CLAMPS instruction */ +#define XCHAL_HAVE_MUL16 1 /* MUL16S/MUL16U instructions */ +#define XCHAL_HAVE_MUL32 1 /* MULL instruction */ +#define XCHAL_HAVE_MUL32_HIGH 0 /* MULUH/MULSH instructions */ +#define XCHAL_HAVE_DIV32 1 /* QUOS/QUOU/REMS/REMU instructions */ +#define XCHAL_HAVE_L32R 1 /* L32R instruction */ +#define XCHAL_HAVE_ABSOLUTE_LITERALS 0 /* non-PC-rel (extended) L32R */ +#define XCHAL_HAVE_CONST16 0 /* CONST16 instruction */ +#define XCHAL_HAVE_ADDX 1 /* ADDX#/SUBX# instructions */ +#define XCHAL_HAVE_WIDE_BRANCHES 0 /* B*.W18 or B*.W15 instr's */ +#define XCHAL_HAVE_PREDICTED_BRANCHES 0 /* B[EQ/EQZ/NE/NEZ]T instr's */ +#define XCHAL_HAVE_CALL4AND12 1 /* (obsolete option) */ +#define XCHAL_HAVE_ABS 1 /* ABS instruction */ +/*#define XCHAL_HAVE_POPC 0*/ /* POPC instruction */ +/*#define XCHAL_HAVE_CRC 0*/ /* CRC instruction */ +#define XCHAL_HAVE_RELEASE_SYNC 1 /* L32AI/S32RI instructions */ +#define XCHAL_HAVE_S32C1I 1 /* S32C1I instruction */ +#define XCHAL_HAVE_SPECULATION 0 /* speculation */ +#define XCHAL_HAVE_FULL_RESET 1 /* all regs/state reset */ +#define XCHAL_NUM_CONTEXTS 1 /* */ +#define XCHAL_NUM_MISC_REGS 2 /* num of scratch regs (0..4) */ +#define XCHAL_HAVE_TAP_MASTER 0 /* JTAG TAP control instr's */ +#define XCHAL_HAVE_PRID 1 /* processor ID register */ +#define XCHAL_HAVE_EXTERN_REGS 1 /* WER/RER instructions */ +#define XCHAL_HAVE_MX 0 /* MX core (Tensilica internal) */ +#define XCHAL_HAVE_MP_INTERRUPTS 0 /* interrupt distributor port */ +#define XCHAL_HAVE_MP_RUNSTALL 0 /* core RunStall control port */ +#define XCHAL_HAVE_PSO 0 /* Power Shut-Off */ +#define XCHAL_HAVE_PSO_CDM 0 /* core/debug/mem pwr domains */ +#define XCHAL_HAVE_PSO_FULL_RETENTION 0 /* all regs preserved on PSO */ +#define XCHAL_HAVE_THREADPTR 0 /* THREADPTR register */ +#define XCHAL_HAVE_BOOLEANS 0 /* boolean registers */ +#define XCHAL_HAVE_CP 0 /* CPENABLE reg (coprocessor) */ +#define XCHAL_CP_MAXCFG 0 /* max allowed cp id plus one */ +#define XCHAL_HAVE_MAC16 1 /* MAC16 package */ + +#define XCHAL_HAVE_FUSION 0 /* Fusion*/ +#define XCHAL_HAVE_FUSION_FP 0 /* Fusion FP option */ +#define XCHAL_HAVE_FUSION_LOW_POWER 0 /* Fusion Low Power option */ +#define XCHAL_HAVE_FUSION_AES 0 /* Fusion BLE/Wifi AES-128 CCM option */ +#define XCHAL_HAVE_FUSION_CONVENC 0 /* Fusion Conv Encode option */ +#define XCHAL_HAVE_FUSION_LFSR_CRC 0 /* Fusion LFSR-CRC option */ +#define XCHAL_HAVE_FUSION_BITOPS 0 /* Fusion Bit Operations Support option */ +#define XCHAL_HAVE_FUSION_AVS 0 /* Fusion AVS option */ +#define XCHAL_HAVE_FUSION_16BIT_BASEBAND 0 /* Fusion 16-bit Baseband option */ +#define XCHAL_HAVE_HIFIPRO 0 /* HiFiPro Audio Engine pkg */ +#define XCHAL_HAVE_HIFI4 0 /* HiFi4 Audio Engine pkg */ +#define XCHAL_HAVE_HIFI4_VFPU 0 /* HiFi4 Audio Engine VFPU option */ +#define XCHAL_HAVE_HIFI3 0 /* HiFi3 Audio Engine pkg */ +#define XCHAL_HAVE_HIFI3_VFPU 0 /* HiFi3 Audio Engine VFPU option */ +#define XCHAL_HAVE_HIFI2 0 /* HiFi2 Audio Engine pkg */ +#define XCHAL_HAVE_HIFI2EP 0 /* HiFi2EP */ +#define XCHAL_HAVE_HIFI_MINI 0 + + +#define XCHAL_HAVE_VECTORFPU2005 0 /* vector or user floating-point pkg */ +#define XCHAL_HAVE_USER_DPFPU 0 /* user DP floating-point pkg */ +#define XCHAL_HAVE_USER_SPFPU 0 /* user DP floating-point pkg */ +#define XCHAL_HAVE_FP 0 /* single prec floating point */ +#define XCHAL_HAVE_FP_DIV 0 /* FP with DIV instructions */ +#define XCHAL_HAVE_FP_RECIP 0 /* FP with RECIP instructions */ +#define XCHAL_HAVE_FP_SQRT 0 /* FP with SQRT instructions */ +#define XCHAL_HAVE_FP_RSQRT 0 /* FP with RSQRT instructions */ +#define XCHAL_HAVE_DFP 0 /* double precision FP pkg */ +#define XCHAL_HAVE_DFP_DIV 0 /* DFP with DIV instructions */ +#define XCHAL_HAVE_DFP_RECIP 0 /* DFP with RECIP instructions*/ +#define XCHAL_HAVE_DFP_SQRT 0 /* DFP with SQRT instructions */ +#define XCHAL_HAVE_DFP_RSQRT 0 /* DFP with RSQRT instructions*/ +#define XCHAL_HAVE_DFP_ACCEL 0 /* double precision FP acceleration pkg */ +#define XCHAL_HAVE_DFP_accel XCHAL_HAVE_DFP_ACCEL /* for backward compatibility */ + +#define XCHAL_HAVE_DFPU_SINGLE_ONLY 0 /* DFPU Coprocessor, single precision only */ +#define XCHAL_HAVE_DFPU_SINGLE_DOUBLE 0 /* DFPU Coprocessor, single and double precision */ +#define XCHAL_HAVE_VECTRA1 0 /* Vectra I pkg */ +#define XCHAL_HAVE_VECTRALX 0 /* Vectra LX pkg */ +#define XCHAL_HAVE_PDX4 0 /* PDX4 */ +#define XCHAL_HAVE_CONNXD2 0 /* ConnX D2 pkg */ +#define XCHAL_HAVE_CONNXD2_DUALLSFLIX 0 /* ConnX D2 & Dual LoadStore Flix */ +#define XCHAL_HAVE_BBE16 0 /* ConnX BBE16 pkg */ +#define XCHAL_HAVE_BBE16_RSQRT 0 /* BBE16 & vector recip sqrt */ +#define XCHAL_HAVE_BBE16_VECDIV 0 /* BBE16 & vector divide */ +#define XCHAL_HAVE_BBE16_DESPREAD 0 /* BBE16 & despread */ +#define XCHAL_HAVE_BBENEP 0 /* ConnX BBENEP pkgs */ +#define XCHAL_HAVE_BSP3 0 /* ConnX BSP3 pkg */ +#define XCHAL_HAVE_BSP3_TRANSPOSE 0 /* BSP3 & transpose32x32 */ +#define XCHAL_HAVE_SSP16 0 /* ConnX SSP16 pkg */ +#define XCHAL_HAVE_SSP16_VITERBI 0 /* SSP16 & viterbi */ +#define XCHAL_HAVE_TURBO16 0 /* ConnX Turbo16 pkg */ +#define XCHAL_HAVE_BBP16 0 /* ConnX BBP16 pkg */ +#define XCHAL_HAVE_FLIX3 0 /* basic 3-way FLIX option */ +#define XCHAL_HAVE_GRIVPEP 0 /* GRIVPEP is General Release of IVPEP */ +#define XCHAL_HAVE_GRIVPEP_HISTOGRAM 0 /* Histogram option on GRIVPEP */ + + +/*---------------------------------------------------------------------- + MISC + ----------------------------------------------------------------------*/ + +#define XCHAL_NUM_LOADSTORE_UNITS 1 /* load/store units */ +#define XCHAL_NUM_WRITEBUFFER_ENTRIES 8 /* size of write buffer */ +#define XCHAL_INST_FETCH_WIDTH 4 /* instr-fetch width in bytes */ +#define XCHAL_DATA_WIDTH 4 /* data width in bytes */ +#define XCHAL_DATA_PIPE_DELAY 1 /* d-side pipeline delay + (1 = 5-stage, 2 = 7-stage) */ +#define XCHAL_CLOCK_GATING_GLOBAL 0 /* global clock gating */ +#define XCHAL_CLOCK_GATING_FUNCUNIT 0 /* funct. unit clock gating */ +/* In T1050, applies to selected core load and store instructions (see ISA): */ +#define XCHAL_UNALIGNED_LOAD_EXCEPTION 1 /* unaligned loads cause exc. */ +#define XCHAL_UNALIGNED_STORE_EXCEPTION 1 /* unaligned stores cause exc.*/ +#define XCHAL_UNALIGNED_LOAD_HW 0 /* unaligned loads work in hw */ +#define XCHAL_UNALIGNED_STORE_HW 0 /* unaligned stores work in hw*/ + +#define XCHAL_SW_VERSION 1100002 /* sw version of this header */ + +#define XCHAL_CORE_ID "de212" /* alphanum core name + (CoreID) set in the Xtensa + Processor Generator */ + +#define XCHAL_BUILD_UNIQUE_ID 0x0005A985 /* 22-bit sw build ID */ + +/* + * These definitions describe the hardware targeted by this software. + */ +#define XCHAL_HW_CONFIGID0 0xC283DFFE /* ConfigID hi 32 bits*/ +#define XCHAL_HW_CONFIGID1 0x1C85A985 /* ConfigID lo 32 bits*/ +#define XCHAL_HW_VERSION_NAME "LX6.0.2" /* full version name */ +#define XCHAL_HW_VERSION_MAJOR 2600 /* major ver# of targeted hw */ +#define XCHAL_HW_VERSION_MINOR 2 /* minor ver# of targeted hw */ +#define XCHAL_HW_VERSION 260002 /* major*100+minor */ +#define XCHAL_HW_REL_LX6 1 +#define XCHAL_HW_REL_LX6_0 1 +#define XCHAL_HW_REL_LX6_0_2 1 +#define XCHAL_HW_CONFIGID_RELIABLE 1 +/* If software targets a *range* of hardware versions, these are the bounds: */ +#define XCHAL_HW_MIN_VERSION_MAJOR 2600 /* major v of earliest tgt hw */ +#define XCHAL_HW_MIN_VERSION_MINOR 2 /* minor v of earliest tgt hw */ +#define XCHAL_HW_MIN_VERSION 260002 /* earliest targeted hw */ +#define XCHAL_HW_MAX_VERSION_MAJOR 2600 /* major v of latest tgt hw */ +#define XCHAL_HW_MAX_VERSION_MINOR 2 /* minor v of latest tgt hw */ +#define XCHAL_HW_MAX_VERSION 260002 /* latest targeted hw */ + + +/*---------------------------------------------------------------------- + CACHE + ----------------------------------------------------------------------*/ + +#define XCHAL_ICACHE_LINESIZE 32 /* I-cache line size in bytes */ +#define XCHAL_DCACHE_LINESIZE 32 /* D-cache line size in bytes */ +#define XCHAL_ICACHE_LINEWIDTH 5 /* log2(I line size in bytes) */ +#define XCHAL_DCACHE_LINEWIDTH 5 /* log2(D line size in bytes) */ + +#define XCHAL_ICACHE_SIZE 8192 /* I-cache size in bytes or 0 */ +#define XCHAL_DCACHE_SIZE 8192 /* D-cache size in bytes or 0 */ + +#define XCHAL_DCACHE_IS_WRITEBACK 1 /* writeback feature */ +#define XCHAL_DCACHE_IS_COHERENT 0 /* MP coherence feature */ + +#define XCHAL_HAVE_PREFETCH 0 /* PREFCTL register */ +#define XCHAL_HAVE_PREFETCH_L1 0 /* prefetch to L1 dcache */ +#define XCHAL_PREFETCH_CASTOUT_LINES 0 /* dcache pref. castout bufsz */ +#define XCHAL_PREFETCH_ENTRIES 0 /* cache prefetch entries */ +#define XCHAL_PREFETCH_BLOCK_ENTRIES 0 /* prefetch block streams */ +#define XCHAL_HAVE_CACHE_BLOCKOPS 0 /* block prefetch for caches */ +#define XCHAL_HAVE_ICACHE_TEST 1 /* Icache test instructions */ +#define XCHAL_HAVE_DCACHE_TEST 1 /* Dcache test instructions */ +#define XCHAL_HAVE_ICACHE_DYN_WAYS 0 /* Icache dynamic way support */ +#define XCHAL_HAVE_DCACHE_DYN_WAYS 0 /* Dcache dynamic way support */ + + + + +/**************************************************************************** + Parameters Useful for PRIVILEGED (Supervisory or Non-Virtualized) Code + ****************************************************************************/ + + +#ifndef XTENSA_HAL_NON_PRIVILEGED_ONLY + +/*---------------------------------------------------------------------- + CACHE + ----------------------------------------------------------------------*/ + +#define XCHAL_HAVE_PIF 1 /* any outbound PIF present */ + +/* If present, cache size in bytes == (ways * 2^(linewidth + setwidth)). */ + +/* Number of cache sets in log2(lines per way): */ +#define XCHAL_ICACHE_SETWIDTH 7 +#define XCHAL_DCACHE_SETWIDTH 7 + +/* Cache set associativity (number of ways): */ +#define XCHAL_ICACHE_WAYS 2 +#define XCHAL_DCACHE_WAYS 2 + +/* Cache features: */ +#define XCHAL_ICACHE_LINE_LOCKABLE 1 +#define XCHAL_DCACHE_LINE_LOCKABLE 1 +#define XCHAL_ICACHE_ECC_PARITY 0 +#define XCHAL_DCACHE_ECC_PARITY 0 + +/* Cache access size in bytes (affects operation of SICW instruction): */ +#define XCHAL_ICACHE_ACCESS_SIZE 4 +#define XCHAL_DCACHE_ACCESS_SIZE 4 + +#define XCHAL_DCACHE_BANKS 1 /* number of banks */ + +/* Number of encoded cache attr bits (see <xtensa/hal.h> for decoded bits): */ +#define XCHAL_CA_BITS 4 + +/* Whether MEMCTL register has anything useful */ +#define XCHAL_USE_MEMCTL (((XCHAL_LOOP_BUFFER_SIZE > 0) || \ + XCHAL_DCACHE_IS_COHERENT || \ + XCHAL_HAVE_ICACHE_DYN_WAYS || \ + XCHAL_HAVE_DCACHE_DYN_WAYS) && \ + (XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RE_2012_0)) + + +/*---------------------------------------------------------------------- + INTERNAL I/D RAM/ROMs and XLMI + ----------------------------------------------------------------------*/ + +#define XCHAL_NUM_INSTROM 0 /* number of core instr. ROMs */ +#define XCHAL_NUM_INSTRAM 1 /* number of core instr. RAMs */ +#define XCHAL_NUM_DATAROM 0 /* number of core data ROMs */ +#define XCHAL_NUM_DATARAM 1 /* number of core data RAMs */ +#define XCHAL_NUM_URAM 0 /* number of core unified RAMs*/ +#define XCHAL_NUM_XLMI 1 /* number of core XLMI ports */ + +/* Instruction RAM 0: */ +#define XCHAL_INSTRAM0_VADDR 0x40000000 /* virtual address */ +#define XCHAL_INSTRAM0_PADDR 0x40000000 /* physical address */ +#define XCHAL_INSTRAM0_SIZE 131072 /* size in bytes */ +#define XCHAL_INSTRAM0_ECC_PARITY 0 /* ECC/parity type, 0=none */ + +/* Data RAM 0: */ +#define XCHAL_DATARAM0_VADDR 0x3FFE0000 /* virtual address */ +#define XCHAL_DATARAM0_PADDR 0x3FFE0000 /* physical address */ +#define XCHAL_DATARAM0_SIZE 131072 /* size in bytes */ +#define XCHAL_DATARAM0_ECC_PARITY 0 /* ECC/parity type, 0=none */ +#define XCHAL_DATARAM0_BANKS 1 /* number of banks */ + +/* XLMI Port 0: */ +#define XCHAL_XLMI0_VADDR 0x3FFC0000 /* virtual address */ +#define XCHAL_XLMI0_PADDR 0x3FFC0000 /* physical address */ +#define XCHAL_XLMI0_SIZE 131072 /* size in bytes */ +#define XCHAL_XLMI0_ECC_PARITY 0 /* ECC/parity type, 0=none */ + +#define XCHAL_HAVE_IMEM_LOADSTORE 1 /* can load/store to IROM/IRAM*/ + + +/*---------------------------------------------------------------------- + INTERRUPTS and TIMERS + ----------------------------------------------------------------------*/ + +#define XCHAL_HAVE_INTERRUPTS 1 /* interrupt option */ +#define XCHAL_HAVE_HIGHPRI_INTERRUPTS 1 /* med/high-pri. interrupts */ +#define XCHAL_HAVE_NMI 1 /* non-maskable interrupt */ +#define XCHAL_HAVE_CCOUNT 1 /* CCOUNT reg. (timer option) */ +#define XCHAL_NUM_TIMERS 3 /* number of CCOMPAREn regs */ +#define XCHAL_NUM_INTERRUPTS 22 /* number of interrupts */ +#define XCHAL_NUM_INTERRUPTS_LOG2 5 /* ceil(log2(NUM_INTERRUPTS)) */ +#define XCHAL_NUM_EXTINTERRUPTS 17 /* num of external interrupts */ +#define XCHAL_NUM_INTLEVELS 6 /* number of interrupt levels + (not including level zero) */ +#define XCHAL_EXCM_LEVEL 3 /* level masked by PS.EXCM */ + /* (always 1 in XEA1; levels 2 .. EXCM_LEVEL are "medium priority") */ + +/* Masks of interrupts at each interrupt level: */ +#define XCHAL_INTLEVEL1_MASK 0x001F80FF +#define XCHAL_INTLEVEL2_MASK 0x00000100 +#define XCHAL_INTLEVEL3_MASK 0x00200E00 +#define XCHAL_INTLEVEL4_MASK 0x00001000 +#define XCHAL_INTLEVEL5_MASK 0x00002000 +#define XCHAL_INTLEVEL6_MASK 0x00000000 +#define XCHAL_INTLEVEL7_MASK 0x00004000 + +/* Masks of interrupts at each range 1..n of interrupt levels: */ +#define XCHAL_INTLEVEL1_ANDBELOW_MASK 0x001F80FF +#define XCHAL_INTLEVEL2_ANDBELOW_MASK 0x001F81FF +#define XCHAL_INTLEVEL3_ANDBELOW_MASK 0x003F8FFF +#define XCHAL_INTLEVEL4_ANDBELOW_MASK 0x003F9FFF +#define XCHAL_INTLEVEL5_ANDBELOW_MASK 0x003FBFFF +#define XCHAL_INTLEVEL6_ANDBELOW_MASK 0x003FBFFF +#define XCHAL_INTLEVEL7_ANDBELOW_MASK 0x003FFFFF + +/* Level of each interrupt: */ +#define XCHAL_INT0_LEVEL 1 +#define XCHAL_INT1_LEVEL 1 +#define XCHAL_INT2_LEVEL 1 +#define XCHAL_INT3_LEVEL 1 +#define XCHAL_INT4_LEVEL 1 +#define XCHAL_INT5_LEVEL 1 +#define XCHAL_INT6_LEVEL 1 +#define XCHAL_INT7_LEVEL 1 +#define XCHAL_INT8_LEVEL 2 +#define XCHAL_INT9_LEVEL 3 +#define XCHAL_INT10_LEVEL 3 +#define XCHAL_INT11_LEVEL 3 +#define XCHAL_INT12_LEVEL 4 +#define XCHAL_INT13_LEVEL 5 +#define XCHAL_INT14_LEVEL 7 +#define XCHAL_INT15_LEVEL 1 +#define XCHAL_INT16_LEVEL 1 +#define XCHAL_INT17_LEVEL 1 +#define XCHAL_INT18_LEVEL 1 +#define XCHAL_INT19_LEVEL 1 +#define XCHAL_INT20_LEVEL 1 +#define XCHAL_INT21_LEVEL 3 +#define XCHAL_DEBUGLEVEL 6 /* debug interrupt level */ +#define XCHAL_HAVE_DEBUG_EXTERN_INT 1 /* OCD external db interrupt */ +#define XCHAL_NMILEVEL 7 /* NMI "level" (for use with + EXCSAVE/EPS/EPC_n, RFI n) */ + +/* Type of each interrupt: */ +#define XCHAL_INT0_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT1_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT2_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT3_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT4_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT5_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT6_TYPE XTHAL_INTTYPE_TIMER +#define XCHAL_INT7_TYPE XTHAL_INTTYPE_SOFTWARE +#define XCHAL_INT8_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT9_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT10_TYPE XTHAL_INTTYPE_TIMER +#define XCHAL_INT11_TYPE XTHAL_INTTYPE_SOFTWARE +#define XCHAL_INT12_TYPE XTHAL_INTTYPE_EXTERN_LEVEL +#define XCHAL_INT13_TYPE XTHAL_INTTYPE_TIMER +#define XCHAL_INT14_TYPE XTHAL_INTTYPE_NMI +#define XCHAL_INT15_TYPE XTHAL_INTTYPE_EXTERN_EDGE +#define XCHAL_INT16_TYPE XTHAL_INTTYPE_EXTERN_EDGE +#define XCHAL_INT17_TYPE XTHAL_INTTYPE_EXTERN_EDGE +#define XCHAL_INT18_TYPE XTHAL_INTTYPE_EXTERN_EDGE +#define XCHAL_INT19_TYPE XTHAL_INTTYPE_EXTERN_EDGE +#define XCHAL_INT20_TYPE XTHAL_INTTYPE_EXTERN_EDGE +#define XCHAL_INT21_TYPE XTHAL_INTTYPE_EXTERN_EDGE + +/* Masks of interrupts for each type of interrupt: */ +#define XCHAL_INTTYPE_MASK_UNCONFIGURED 0xFFC00000 +#define XCHAL_INTTYPE_MASK_SOFTWARE 0x00000880 +#define XCHAL_INTTYPE_MASK_EXTERN_EDGE 0x003F8000 +#define XCHAL_INTTYPE_MASK_EXTERN_LEVEL 0x0000133F +#define XCHAL_INTTYPE_MASK_TIMER 0x00002440 +#define XCHAL_INTTYPE_MASK_NMI 0x00004000 +#define XCHAL_INTTYPE_MASK_WRITE_ERROR 0x00000000 +#define XCHAL_INTTYPE_MASK_PROFILING 0x00000000 + +/* Interrupt numbers assigned to specific interrupt sources: */ +#define XCHAL_TIMER0_INTERRUPT 6 /* CCOMPARE0 */ +#define XCHAL_TIMER1_INTERRUPT 10 /* CCOMPARE1 */ +#define XCHAL_TIMER2_INTERRUPT 13 /* CCOMPARE2 */ +#define XCHAL_TIMER3_INTERRUPT XTHAL_TIMER_UNCONFIGURED +#define XCHAL_NMI_INTERRUPT 14 /* non-maskable interrupt */ + +/* Interrupt numbers for levels at which only one interrupt is configured: */ +#define XCHAL_INTLEVEL2_NUM 8 +#define XCHAL_INTLEVEL4_NUM 12 +#define XCHAL_INTLEVEL5_NUM 13 +#define XCHAL_INTLEVEL7_NUM 14 +/* (There are many interrupts each at level(s) 1, 3.) */ + + +/* + * External interrupt mapping. + * These macros describe how Xtensa processor interrupt numbers + * (as numbered internally, eg. in INTERRUPT and INTENABLE registers) + * map to external BInterrupt<n> pins, for those interrupts + * configured as external (level-triggered, edge-triggered, or NMI). + * See the Xtensa processor databook for more details. + */ + +/* Core interrupt numbers mapped to each EXTERNAL BInterrupt pin number: */ +#define XCHAL_EXTINT0_NUM 0 /* (intlevel 1) */ +#define XCHAL_EXTINT1_NUM 1 /* (intlevel 1) */ +#define XCHAL_EXTINT2_NUM 2 /* (intlevel 1) */ +#define XCHAL_EXTINT3_NUM 3 /* (intlevel 1) */ +#define XCHAL_EXTINT4_NUM 4 /* (intlevel 1) */ +#define XCHAL_EXTINT5_NUM 5 /* (intlevel 1) */ +#define XCHAL_EXTINT6_NUM 8 /* (intlevel 2) */ +#define XCHAL_EXTINT7_NUM 9 /* (intlevel 3) */ +#define XCHAL_EXTINT8_NUM 12 /* (intlevel 4) */ +#define XCHAL_EXTINT9_NUM 14 /* (intlevel 7) */ +#define XCHAL_EXTINT10_NUM 15 /* (intlevel 1) */ +#define XCHAL_EXTINT11_NUM 16 /* (intlevel 1) */ +#define XCHAL_EXTINT12_NUM 17 /* (intlevel 1) */ +#define XCHAL_EXTINT13_NUM 18 /* (intlevel 1) */ +#define XCHAL_EXTINT14_NUM 19 /* (intlevel 1) */ +#define XCHAL_EXTINT15_NUM 20 /* (intlevel 1) */ +#define XCHAL_EXTINT16_NUM 21 /* (intlevel 3) */ +/* EXTERNAL BInterrupt pin numbers mapped to each core interrupt number: */ +#define XCHAL_INT0_EXTNUM 0 /* (intlevel 1) */ +#define XCHAL_INT1_EXTNUM 1 /* (intlevel 1) */ +#define XCHAL_INT2_EXTNUM 2 /* (intlevel 1) */ +#define XCHAL_INT3_EXTNUM 3 /* (intlevel 1) */ +#define XCHAL_INT4_EXTNUM 4 /* (intlevel 1) */ +#define XCHAL_INT5_EXTNUM 5 /* (intlevel 1) */ +#define XCHAL_INT8_EXTNUM 6 /* (intlevel 2) */ +#define XCHAL_INT9_EXTNUM 7 /* (intlevel 3) */ +#define XCHAL_INT12_EXTNUM 8 /* (intlevel 4) */ +#define XCHAL_INT14_EXTNUM 9 /* (intlevel 7) */ +#define XCHAL_INT15_EXTNUM 10 /* (intlevel 1) */ +#define XCHAL_INT16_EXTNUM 11 /* (intlevel 1) */ +#define XCHAL_INT17_EXTNUM 12 /* (intlevel 1) */ +#define XCHAL_INT18_EXTNUM 13 /* (intlevel 1) */ +#define XCHAL_INT19_EXTNUM 14 /* (intlevel 1) */ +#define XCHAL_INT20_EXTNUM 15 /* (intlevel 1) */ +#define XCHAL_INT21_EXTNUM 16 /* (intlevel 3) */ + + +/*---------------------------------------------------------------------- + EXCEPTIONS and VECTORS + ----------------------------------------------------------------------*/ + +#define XCHAL_XEA_VERSION 2 /* Xtensa Exception Architecture + number: 1 == XEA1 (old) + 2 == XEA2 (new) + 0 == XEAX (extern) or TX */ +#define XCHAL_HAVE_XEA1 0 /* Exception Architecture 1 */ +#define XCHAL_HAVE_XEA2 1 /* Exception Architecture 2 */ +#define XCHAL_HAVE_XEAX 0 /* External Exception Arch. */ +#define XCHAL_HAVE_EXCEPTIONS 1 /* exception option */ +#define XCHAL_HAVE_HALT 0 /* halt architecture option */ +#define XCHAL_HAVE_BOOTLOADER 0 /* boot loader (for TX) */ +#define XCHAL_HAVE_MEM_ECC_PARITY 0 /* local memory ECC/parity */ +#define XCHAL_HAVE_VECTOR_SELECT 1 /* relocatable vectors */ +#define XCHAL_HAVE_VECBASE 1 /* relocatable vectors */ +#define XCHAL_VECBASE_RESET_VADDR 0x60000000 /* VECBASE reset value */ +#define XCHAL_VECBASE_RESET_PADDR 0x60000000 +#define XCHAL_RESET_VECBASE_OVERLAP 0 + +#define XCHAL_RESET_VECTOR0_VADDR 0x50000000 +#define XCHAL_RESET_VECTOR0_PADDR 0x50000000 +#define XCHAL_RESET_VECTOR1_VADDR 0x40000400 +#define XCHAL_RESET_VECTOR1_PADDR 0x40000400 +#define XCHAL_RESET_VECTOR_VADDR 0x50000000 +#define XCHAL_RESET_VECTOR_PADDR 0x50000000 +#define XCHAL_USER_VECOFS 0x00000340 +#define XCHAL_USER_VECTOR_VADDR 0x60000340 +#define XCHAL_USER_VECTOR_PADDR 0x60000340 +#define XCHAL_KERNEL_VECOFS 0x00000300 +#define XCHAL_KERNEL_VECTOR_VADDR 0x60000300 +#define XCHAL_KERNEL_VECTOR_PADDR 0x60000300 +#define XCHAL_DOUBLEEXC_VECOFS 0x000003C0 +#define XCHAL_DOUBLEEXC_VECTOR_VADDR 0x600003C0 +#define XCHAL_DOUBLEEXC_VECTOR_PADDR 0x600003C0 +#define XCHAL_WINDOW_OF4_VECOFS 0x00000000 +#define XCHAL_WINDOW_UF4_VECOFS 0x00000040 +#define XCHAL_WINDOW_OF8_VECOFS 0x00000080 +#define XCHAL_WINDOW_UF8_VECOFS 0x000000C0 +#define XCHAL_WINDOW_OF12_VECOFS 0x00000100 +#define XCHAL_WINDOW_UF12_VECOFS 0x00000140 +#define XCHAL_WINDOW_VECTORS_VADDR 0x60000000 +#define XCHAL_WINDOW_VECTORS_PADDR 0x60000000 +#define XCHAL_INTLEVEL2_VECOFS 0x00000180 +#define XCHAL_INTLEVEL2_VECTOR_VADDR 0x60000180 +#define XCHAL_INTLEVEL2_VECTOR_PADDR 0x60000180 +#define XCHAL_INTLEVEL3_VECOFS 0x000001C0 +#define XCHAL_INTLEVEL3_VECTOR_VADDR 0x600001C0 +#define XCHAL_INTLEVEL3_VECTOR_PADDR 0x600001C0 +#define XCHAL_INTLEVEL4_VECOFS 0x00000200 +#define XCHAL_INTLEVEL4_VECTOR_VADDR 0x60000200 +#define XCHAL_INTLEVEL4_VECTOR_PADDR 0x60000200 +#define XCHAL_INTLEVEL5_VECOFS 0x00000240 +#define XCHAL_INTLEVEL5_VECTOR_VADDR 0x60000240 +#define XCHAL_INTLEVEL5_VECTOR_PADDR 0x60000240 +#define XCHAL_INTLEVEL6_VECOFS 0x00000280 +#define XCHAL_INTLEVEL6_VECTOR_VADDR 0x60000280 +#define XCHAL_INTLEVEL6_VECTOR_PADDR 0x60000280 +#define XCHAL_DEBUG_VECOFS XCHAL_INTLEVEL6_VECOFS +#define XCHAL_DEBUG_VECTOR_VADDR XCHAL_INTLEVEL6_VECTOR_VADDR +#define XCHAL_DEBUG_VECTOR_PADDR XCHAL_INTLEVEL6_VECTOR_PADDR +#define XCHAL_NMI_VECOFS 0x000002C0 +#define XCHAL_NMI_VECTOR_VADDR 0x600002C0 +#define XCHAL_NMI_VECTOR_PADDR 0x600002C0 +#define XCHAL_INTLEVEL7_VECOFS XCHAL_NMI_VECOFS +#define XCHAL_INTLEVEL7_VECTOR_VADDR XCHAL_NMI_VECTOR_VADDR +#define XCHAL_INTLEVEL7_VECTOR_PADDR XCHAL_NMI_VECTOR_PADDR + + +/*---------------------------------------------------------------------- + DEBUG MODULE + ----------------------------------------------------------------------*/ + +/* Misc */ +#define XCHAL_HAVE_DEBUG_ERI 1 /* ERI to debug module */ +#define XCHAL_HAVE_DEBUG_APB 0 /* APB to debug module */ +#define XCHAL_HAVE_DEBUG_JTAG 1 /* JTAG to debug module */ + +/* On-Chip Debug (OCD) */ +#define XCHAL_HAVE_OCD 1 /* OnChipDebug option */ +#define XCHAL_NUM_IBREAK 2 /* number of IBREAKn regs */ +#define XCHAL_NUM_DBREAK 2 /* number of DBREAKn regs */ +#define XCHAL_HAVE_OCD_DIR_ARRAY 0 /* faster OCD option (to LX4) */ +#define XCHAL_HAVE_OCD_LS32DDR 1 /* L32DDR/S32DDR (faster OCD) */ + +/* TRAX (in core) */ +#define XCHAL_HAVE_TRAX 1 /* TRAX in debug module */ +#define XCHAL_TRAX_MEM_SIZE 262144 /* TRAX memory size in bytes */ +#define XCHAL_TRAX_MEM_SHAREABLE 0 /* start/end regs; ready sig. */ +#define XCHAL_TRAX_ATB_WIDTH 0 /* ATB width (bits), 0=no ATB */ +#define XCHAL_TRAX_TIME_WIDTH 0 /* timestamp bitwidth, 0=none */ + +/* Perf counters */ +#define XCHAL_NUM_PERF_COUNTERS 0 /* performance counters */ + + +/*---------------------------------------------------------------------- + MMU + ----------------------------------------------------------------------*/ + +/* See core-matmap.h header file for more details. */ + +#define XCHAL_HAVE_TLBS 1 /* inverse of HAVE_CACHEATTR */ +#define XCHAL_HAVE_SPANNING_WAY 1 /* one way maps I+D 4GB vaddr */ +#define XCHAL_SPANNING_WAY 0 /* TLB spanning way number */ +#define XCHAL_HAVE_IDENTITY_MAP 1 /* vaddr == paddr always */ +#define XCHAL_HAVE_CACHEATTR 0 /* CACHEATTR register present */ +#define XCHAL_HAVE_MIMIC_CACHEATTR 1 /* region protection */ +#define XCHAL_HAVE_XLT_CACHEATTR 0 /* region prot. w/translation */ +#define XCHAL_HAVE_PTP_MMU 0 /* full MMU (with page table + [autorefill] and protection) + usable for an MMU-based OS */ +/* If none of the above last 4 are set, it's a custom TLB configuration. */ + +#define XCHAL_MMU_ASID_BITS 0 /* number of bits in ASIDs */ +#define XCHAL_MMU_RINGS 1 /* number of rings (1..4) */ +#define XCHAL_MMU_RING_BITS 0 /* num of bits in RING field */ + +#endif /* !XTENSA_HAL_NON_PRIVILEGED_ONLY */ + + +#endif /* _XTENSA_CORE_CONFIGURATION_H */ + diff --git a/arch/xtensa/variants/de212/include/variant/tie-asm.h b/arch/xtensa/variants/de212/include/variant/tie-asm.h new file mode 100644 index 000000000000..77755354f571 --- /dev/null +++ b/arch/xtensa/variants/de212/include/variant/tie-asm.h @@ -0,0 +1,170 @@ +/* + * tie-asm.h -- compile-time HAL assembler definitions dependent on CORE & TIE + * + * NOTE: This header file is not meant to be included directly. + */ + +/* This header file contains assembly-language definitions (assembly + macros, etc.) for this specific Xtensa processor's TIE extensions + and options. It is customized to this Xtensa processor configuration. + + Copyright (c) 1999-2015 Cadence Design Systems Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef _XTENSA_CORE_TIE_ASM_H +#define _XTENSA_CORE_TIE_ASM_H + +/* Selection parameter values for save-area save/restore macros: */ +/* Option vs. TIE: */ +#define XTHAL_SAS_TIE 0x0001 /* custom extension or coprocessor */ +#define XTHAL_SAS_OPT 0x0002 /* optional (and not a coprocessor) */ +#define XTHAL_SAS_ANYOT 0x0003 /* both of the above */ +/* Whether used automatically by compiler: */ +#define XTHAL_SAS_NOCC 0x0004 /* not used by compiler w/o special opts/code */ +#define XTHAL_SAS_CC 0x0008 /* used by compiler without special opts/code */ +#define XTHAL_SAS_ANYCC 0x000C /* both of the above */ +/* ABI handling across function calls: */ +#define XTHAL_SAS_CALR 0x0010 /* caller-saved */ +#define XTHAL_SAS_CALE 0x0020 /* callee-saved */ +#define XTHAL_SAS_GLOB 0x0040 /* global across function calls (in thread) */ +#define XTHAL_SAS_ANYABI 0x0070 /* all of the above three */ +/* Misc */ +#define XTHAL_SAS_ALL 0xFFFF /* include all default NCP contents */ +#define XTHAL_SAS3(optie,ccuse,abi) ( ((optie) & XTHAL_SAS_ANYOT) \ + | ((ccuse) & XTHAL_SAS_ANYCC) \ + | ((abi) & XTHAL_SAS_ANYABI) ) + + + /* + * Macro to store all non-coprocessor (extra) custom TIE and optional state + * (not including zero-overhead loop registers). + * Required parameters: + * ptr Save area pointer address register (clobbered) + * (register must contain a 4 byte aligned address). + * at1..at4 Four temporary address registers (first XCHAL_NCP_NUM_ATMPS + * registers are clobbered, the remaining are unused). + * Optional parameters: + * continue If macro invoked as part of a larger store sequence, set to 1 + * if this is not the first in the sequence. Defaults to 0. + * ofs Offset from start of larger sequence (from value of first ptr + * in sequence) at which to store. Defaults to next available space + * (or 0 if <continue> is 0). + * select Select what category(ies) of registers to store, as a bitmask + * (see XTHAL_SAS_xxx constants). Defaults to all registers. + * alloc Select what category(ies) of registers to allocate; if any + * category is selected here that is not in <select>, space for + * the corresponding registers is skipped without doing any store. + */ + .macro xchal_ncp_store ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL alloc=0 + xchal_sa_start \continue, \ofs + // Optional caller-saved registers used by default by the compiler: + .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\select) + xchal_sa_align \ptr, 0, 1016, 4, 4 + rsr.ACCLO \at1 // MAC16 option + s32i \at1, \ptr, .Lxchal_ofs_+0 + rsr.ACCHI \at1 // MAC16 option + s32i \at1, \ptr, .Lxchal_ofs_+4 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 8 + .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\alloc)) == 0 + xchal_sa_align \ptr, 0, 1016, 4, 4 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 8 + .endif + // Optional caller-saved registers not used by default by the compiler: + .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\select) + xchal_sa_align \ptr, 0, 1004, 4, 4 + rsr.SCOMPARE1 \at1 // conditional store option + s32i \at1, \ptr, .Lxchal_ofs_+0 + rsr.M0 \at1 // MAC16 option + s32i \at1, \ptr, .Lxchal_ofs_+4 + rsr.M1 \at1 // MAC16 option + s32i \at1, \ptr, .Lxchal_ofs_+8 + rsr.M2 \at1 // MAC16 option + s32i \at1, \ptr, .Lxchal_ofs_+12 + rsr.M3 \at1 // MAC16 option + s32i \at1, \ptr, .Lxchal_ofs_+16 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 20 + .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\alloc)) == 0 + xchal_sa_align \ptr, 0, 1004, 4, 4 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 20 + .endif + .endm // xchal_ncp_store + + /* + * Macro to load all non-coprocessor (extra) custom TIE and optional state + * (not including zero-overhead loop registers). + * Required parameters: + * ptr Save area pointer address register (clobbered) + * (register must contain a 4 byte aligned address). + * at1..at4 Four temporary address registers (first XCHAL_NCP_NUM_ATMPS + * registers are clobbered, the remaining are unused). + * Optional parameters: + * continue If macro invoked as part of a larger load sequence, set to 1 + * if this is not the first in the sequence. Defaults to 0. + * ofs Offset from start of larger sequence (from value of first ptr + * in sequence) at which to load. Defaults to next available space + * (or 0 if <continue> is 0). + * select Select what category(ies) of registers to load, as a bitmask + * (see XTHAL_SAS_xxx constants). Defaults to all registers. + * alloc Select what category(ies) of registers to allocate; if any + * category is selected here that is not in <select>, space for + * the corresponding registers is skipped without doing any load. + */ + .macro xchal_ncp_load ptr at1 at2 at3 at4 continue=0 ofs=-1 select=XTHAL_SAS_ALL alloc=0 + xchal_sa_start \continue, \ofs + // Optional caller-saved registers used by default by the compiler: + .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\select) + xchal_sa_align \ptr, 0, 1016, 4, 4 + l32i \at1, \ptr, .Lxchal_ofs_+0 + wsr.ACCLO \at1 // MAC16 option + l32i \at1, \ptr, .Lxchal_ofs_+4 + wsr.ACCHI \at1 // MAC16 option + .set .Lxchal_ofs_, .Lxchal_ofs_ + 8 + .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_CC | XTHAL_SAS_CALR) & ~(\alloc)) == 0 + xchal_sa_align \ptr, 0, 1016, 4, 4 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 8 + .endif + // Optional caller-saved registers not used by default by the compiler: + .ifeq (XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\select) + xchal_sa_align \ptr, 0, 1004, 4, 4 + l32i \at1, \ptr, .Lxchal_ofs_+0 + wsr.SCOMPARE1 \at1 // conditional store option + l32i \at1, \ptr, .Lxchal_ofs_+4 + wsr.M0 \at1 // MAC16 option + l32i \at1, \ptr, .Lxchal_ofs_+8 + wsr.M1 \at1 // MAC16 option + l32i \at1, \ptr, .Lxchal_ofs_+12 + wsr.M2 \at1 // MAC16 option + l32i \at1, \ptr, .Lxchal_ofs_+16 + wsr.M3 \at1 // MAC16 option + .set .Lxchal_ofs_, .Lxchal_ofs_ + 20 + .elseif ((XTHAL_SAS_OPT | XTHAL_SAS_NOCC | XTHAL_SAS_CALR) & ~(\alloc)) == 0 + xchal_sa_align \ptr, 0, 1004, 4, 4 + .set .Lxchal_ofs_, .Lxchal_ofs_ + 20 + .endif + .endm // xchal_ncp_load + + +#define XCHAL_NCP_NUM_ATMPS 1 + +#define XCHAL_SA_NUM_ATMPS 1 + +#endif /*_XTENSA_CORE_TIE_ASM_H*/ + diff --git a/arch/xtensa/variants/de212/include/variant/tie.h b/arch/xtensa/variants/de212/include/variant/tie.h new file mode 100644 index 000000000000..b8a061a3fa10 --- /dev/null +++ b/arch/xtensa/variants/de212/include/variant/tie.h @@ -0,0 +1,136 @@ +/* + * tie.h -- compile-time HAL definitions dependent on CORE & TIE configuration + * + * NOTE: This header file is not meant to be included directly. + */ + +/* This header file describes this specific Xtensa processor's TIE extensions + that extend basic Xtensa core functionality. It is customized to this + Xtensa processor configuration. + + Copyright (c) 1999-2015 Cadence Design Systems Inc. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef _XTENSA_CORE_TIE_H +#define _XTENSA_CORE_TIE_H + +#define XCHAL_CP_NUM 0 /* number of coprocessors */ +#define XCHAL_CP_MAX 0 /* max CP ID + 1 (0 if none) */ +#define XCHAL_CP_MASK 0x00 /* bitmask of all CPs by ID */ +#define XCHAL_CP_PORT_MASK 0x00 /* bitmask of only port CPs */ + +/* Save area for non-coprocessor optional and custom (TIE) state: */ +#define XCHAL_NCP_SA_SIZE 28 +#define XCHAL_NCP_SA_ALIGN 4 + +/* Total save area for optional and custom state (NCP + CPn): */ +#define XCHAL_TOTAL_SA_SIZE 32 /* with 16-byte align padding */ +#define XCHAL_TOTAL_SA_ALIGN 4 /* actual minimum alignment */ + +/* + * Detailed contents of save areas. + * NOTE: caller must define the XCHAL_SA_REG macro (not defined here) + * before expanding the XCHAL_xxx_SA_LIST() macros. + * + * XCHAL_SA_REG(s,ccused,abikind,kind,opt,name,galign,align,asize, + * dbnum,base,regnum,bitsz,gapsz,reset,x...) + * + * s = passed from XCHAL_*_LIST(s), eg. to select how to expand + * ccused = set if used by compiler without special options or code + * abikind = 0 (caller-saved), 1 (callee-saved), or 2 (thread-global) + * kind = 0 (special reg), 1 (TIE user reg), or 2 (TIE regfile reg) + * opt = 0 (custom TIE extension or coprocessor), or 1 (optional reg) + * name = lowercase reg name (no quotes) + * galign = group byte alignment (power of 2) (galign >= align) + * align = register byte alignment (power of 2) + * asize = allocated size in bytes (asize*8 == bitsz + gapsz + padsz) + * (not including any pad bytes required to galign this or next reg) + * dbnum = unique target number f/debug (see <xtensa-libdb-macros.h>) + * base = reg shortname w/o index (or sr=special, ur=TIE user reg) + * regnum = reg index in regfile, or special/TIE-user reg number + * bitsz = number of significant bits (regfile width, or ur/sr mask bits) + * gapsz = intervening bits, if bitsz bits not stored contiguously + * (padsz = pad bits at end [TIE regfile] or at msbits [ur,sr] of asize) + * reset = register reset value (or 0 if undefined at reset) + * x = reserved for future use (0 until then) + * + * To filter out certain registers, e.g. to expand only the non-global + * registers used by the compiler, you can do something like this: + * + * #define XCHAL_SA_REG(s,ccused,p...) SELCC##ccused(p) + * #define SELCC0(p...) + * #define SELCC1(abikind,p...) SELAK##abikind(p) + * #define SELAK0(p...) REG(p) + * #define SELAK1(p...) REG(p) + * #define SELAK2(p...) + * #define REG(kind,tie,name,galn,aln,asz,csz,dbnum,base,rnum,bsz,rst,x...) \ + * ...what you want to expand... + */ + +#define XCHAL_NCP_SA_NUM 7 +#define XCHAL_NCP_SA_LIST(s) \ + XCHAL_SA_REG(s,1,0,0,1, acclo, 4, 4, 4,0x0210, sr,16 , 32,0,0,0) \ + XCHAL_SA_REG(s,1,0,0,1, acchi, 4, 4, 4,0x0211, sr,17 , 8,0,0,0) \ + XCHAL_SA_REG(s,0,0,0,1, scompare1, 4, 4, 4,0x020C, sr,12 , 32,0,0,0) \ + XCHAL_SA_REG(s,0,0,0,1, m0, 4, 4, 4,0x0220, sr,32 , 32,0,0,0) \ + XCHAL_SA_REG(s,0,0,0,1, m1, 4, 4, 4,0x0221, sr,33 , 32,0,0,0) \ + XCHAL_SA_REG(s,0,0,0,1, m2, 4, 4, 4,0x0222, sr,34 , 32,0,0,0) \ + XCHAL_SA_REG(s,0,0,0,1, m3, 4, 4, 4,0x0223, sr,35 , 32,0,0,0) + +#define XCHAL_CP0_SA_NUM 0 +#define XCHAL_CP0_SA_LIST(s) /* empty */ + +#define XCHAL_CP1_SA_NUM 0 +#define XCHAL_CP1_SA_LIST(s) /* empty */ + +#define XCHAL_CP2_SA_NUM 0 +#define XCHAL_CP2_SA_LIST(s) /* empty */ + +#define XCHAL_CP3_SA_NUM 0 +#define XCHAL_CP3_SA_LIST(s) /* empty */ + +#define XCHAL_CP4_SA_NUM 0 +#define XCHAL_CP4_SA_LIST(s) /* empty */ + +#define XCHAL_CP5_SA_NUM 0 +#define XCHAL_CP5_SA_LIST(s) /* empty */ + +#define XCHAL_CP6_SA_NUM 0 +#define XCHAL_CP6_SA_LIST(s) /* empty */ + +#define XCHAL_CP7_SA_NUM 0 +#define XCHAL_CP7_SA_LIST(s) /* empty */ + +/* Byte length of instruction from its first nibble (op0 field), per FLIX. */ +#define XCHAL_OP0_FORMAT_LENGTHS 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3 +/* Byte length of instruction from its first byte, per FLIX. */ +#define XCHAL_BYTE0_FORMAT_LENGTHS \ + 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3, 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3,\ + 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3, 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3,\ + 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3, 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3,\ + 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3, 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3,\ + 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3, 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3,\ + 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3, 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3,\ + 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3, 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3,\ + 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3, 3,3,3,3,3,3,3,3,2,2,2,2,2,2,3,3 + +#endif /*_XTENSA_CORE_TIE_H*/ + diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 20462069e513..50d15b7b0ca9 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -404,10 +404,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) PAGE_ALIGN(current->mm->start_brk); #else - /* create a stack and brk area big enough for everyone - * - the brk heap starts at the bottom and works up - * - the stack starts at the top and works down - */ + /* create a stack area and zero-size brk area */ stack_size = (stack_size + PAGE_SIZE - 1) & PAGE_MASK; if (stack_size < PAGE_SIZE * 2) stack_size = PAGE_SIZE * 2; @@ -430,8 +427,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) current->mm->brk = current->mm->start_brk; current->mm->context.end_brk = current->mm->start_brk; - current->mm->context.end_brk += - (stack_size > PAGE_SIZE) ? (stack_size - PAGE_SIZE) : 0; current->mm->start_stack = current->mm->start_brk + stack_size; #endif diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 8d15febd0aa3..4c69c94cafd8 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -684,6 +684,9 @@ struct ext2_inode_info { struct rw_semaphore xattr_sem; #endif rwlock_t i_meta_lock; +#ifdef CONFIG_FS_DAX + struct rw_semaphore dax_sem; +#endif /* * truncate_mutex is for serialising ext2_truncate() against @@ -699,6 +702,14 @@ struct ext2_inode_info { #endif }; +#ifdef CONFIG_FS_DAX +#define dax_sem_down_write(ext2_inode) down_write(&(ext2_inode)->dax_sem) +#define dax_sem_up_write(ext2_inode) up_write(&(ext2_inode)->dax_sem) +#else +#define dax_sem_down_write(ext2_inode) +#define dax_sem_up_write(ext2_inode) +#endif + /* * Inode dynamic state flags */ diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 1982c3f11aec..11a42c5a09ae 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -27,27 +27,103 @@ #include "acl.h" #ifdef CONFIG_FS_DAX +/* + * The lock ordering for ext2 DAX fault paths is: + * + * mmap_sem (MM) + * sb_start_pagefault (vfs, freeze) + * ext2_inode_info->dax_sem + * address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX) + * ext2_inode_info->truncate_mutex + * + * The default page_lock and i_size verification done by non-DAX fault paths + * is sufficient because ext2 doesn't support hole punching. + */ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_fault(vma, vmf, ext2_get_block, NULL); + struct inode *inode = file_inode(vma->vm_file); + struct ext2_inode_info *ei = EXT2_I(inode); + int ret; + + if (vmf->flags & FAULT_FLAG_WRITE) { + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + } + down_read(&ei->dax_sem); + + ret = __dax_fault(vma, vmf, ext2_get_block, NULL); + + up_read(&ei->dax_sem); + if (vmf->flags & FAULT_FLAG_WRITE) + sb_end_pagefault(inode->i_sb); + return ret; } static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, unsigned int flags) { - return dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block, NULL); + struct inode *inode = file_inode(vma->vm_file); + struct ext2_inode_info *ei = EXT2_I(inode); + int ret; + + if (flags & FAULT_FLAG_WRITE) { + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + } + down_read(&ei->dax_sem); + + ret = __dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block, NULL); + + up_read(&ei->dax_sem); + if (flags & FAULT_FLAG_WRITE) + sb_end_pagefault(inode->i_sb); + return ret; } static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { - return dax_mkwrite(vma, vmf, ext2_get_block, NULL); + struct inode *inode = file_inode(vma->vm_file); + struct ext2_inode_info *ei = EXT2_I(inode); + int ret; + + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + down_read(&ei->dax_sem); + + ret = __dax_mkwrite(vma, vmf, ext2_get_block, NULL); + + up_read(&ei->dax_sem); + sb_end_pagefault(inode->i_sb); + return ret; +} + +static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vma->vm_file); + struct ext2_inode_info *ei = EXT2_I(inode); + int ret = VM_FAULT_NOPAGE; + loff_t size; + + sb_start_pagefault(inode->i_sb); + file_update_time(vma->vm_file); + down_read(&ei->dax_sem); + + /* check that the faulting page hasn't raced with truncate */ + size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (vmf->pgoff >= size) + ret = VM_FAULT_SIGBUS; + + up_read(&ei->dax_sem); + sb_end_pagefault(inode->i_sb); + return ret; } static const struct vm_operations_struct ext2_dax_vm_ops = { .fault = ext2_dax_fault, .pmd_fault = ext2_dax_pmd_fault, .page_mkwrite = ext2_dax_mkwrite, - .pfn_mkwrite = dax_pfn_mkwrite, + .pfn_mkwrite = ext2_dax_pfn_mkwrite, }; static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index c60a248c640c..0aa9bf6e6e53 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1085,6 +1085,7 @@ static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int de ext2_free_data(inode, p, q); } +/* dax_sem must be held when calling this function */ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) { __le32 *i_data = EXT2_I(inode)->i_data; @@ -1100,6 +1101,10 @@ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) blocksize = inode->i_sb->s_blocksize; iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); +#ifdef CONFIG_FS_DAX + WARN_ON(!rwsem_is_locked(&ei->dax_sem)); +#endif + n = ext2_block_to_path(inode, iblock, offsets, NULL); if (n == 0) return; @@ -1185,7 +1190,10 @@ static void ext2_truncate_blocks(struct inode *inode, loff_t offset) return; if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; + + dax_sem_down_write(EXT2_I(inode)); __ext2_truncate_blocks(inode, offset); + dax_sem_up_write(EXT2_I(inode)); } static int ext2_setsize(struct inode *inode, loff_t newsize) @@ -1213,8 +1221,10 @@ static int ext2_setsize(struct inode *inode, loff_t newsize) if (error) return error; + dax_sem_down_write(EXT2_I(inode)); truncate_setsize(inode, newsize); __ext2_truncate_blocks(inode, newsize); + dax_sem_up_write(EXT2_I(inode)); inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; if (inode_needs_sync(inode)) { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 900e19cf9ef6..3a71cea68420 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -192,6 +192,9 @@ static void init_once(void *foo) init_rwsem(&ei->xattr_sem); #endif mutex_init(&ei->truncate_mutex); +#ifdef CONFIG_FS_DAX + init_rwsem(&ei->dax_sem); +#endif inode_init_once(&ei->vfs_inode); } diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 487527b42d94..ad8a5b757cc7 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -388,8 +388,13 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) */ void gfs2_dir_hash_inval(struct gfs2_inode *ip) { - __be64 *hc = ip->i_hash_cache; + __be64 *hc; + + spin_lock(&ip->i_inode.i_lock); + hc = ip->i_hash_cache; ip->i_hash_cache = NULL; + spin_unlock(&ip->i_inode.i_lock); + kvfree(hc); } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 9287a2d17b8c..5e425469f0c2 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -897,8 +897,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) { i_size_write(inode, pos + count); - /* Marks the inode as dirty */ file_update_time(file); + mark_inode_dirty(inode); } return generic_write_sync(file, pos, count); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9bd1244caf38..32e74710b1aa 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -246,8 +246,8 @@ static inline void do_error(struct gfs2_glock *gl, const int ret) */ static int do_promote(struct gfs2_glock *gl) -__releases(&gl->gl_spin) -__acquires(&gl->gl_spin) +__releases(&gl->gl_lockref.lock) +__acquires(&gl->gl_lockref.lock) { const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_holder *gh, *tmp; @@ -260,10 +260,10 @@ restart: if (may_grant(gl, gh)) { if (gh->gh_list.prev == &gl->gl_holders && glops->go_lock) { - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); /* FIXME: eliminate this eventually */ ret = glops->go_lock(gh); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); if (ret) { if (ret == 1) return 2; @@ -361,7 +361,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) unsigned state = ret & LM_OUT_ST_MASK; int rv; - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); trace_gfs2_glock_state_change(gl, state); state_change(gl, state); gh = find_first_waiter(gl); @@ -405,7 +405,7 @@ retry: pr_err("wanted %u got %u\n", gl->gl_target, state); GLOCK_BUG_ON(gl, 1); } - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); return; } @@ -414,9 +414,9 @@ retry: gfs2_demote_wake(gl); if (state != LM_ST_UNLOCKED) { if (glops->go_xmote_bh) { - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); rv = glops->go_xmote_bh(gl, gh); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); if (rv) { do_error(gl, rv); goto out; @@ -429,7 +429,7 @@ retry: out: clear_bit(GLF_LOCK, &gl->gl_flags); out_locked: - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); } /** @@ -441,8 +441,8 @@ out_locked: */ static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target) -__releases(&gl->gl_spin) -__acquires(&gl->gl_spin) +__releases(&gl->gl_lockref.lock) +__acquires(&gl->gl_lockref.lock) { const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; @@ -464,7 +464,7 @@ __acquires(&gl->gl_spin) (gl->gl_state == LM_ST_EXCLUSIVE) || (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) clear_bit(GLF_BLOCKING, &gl->gl_flags); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (glops->go_sync) glops->go_sync(gl); if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) @@ -485,7 +485,7 @@ __acquires(&gl->gl_spin) gfs2_glock_put(gl); } - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); } /** @@ -513,8 +513,8 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) */ static void run_queue(struct gfs2_glock *gl, const int nonblock) -__releases(&gl->gl_spin) -__acquires(&gl->gl_spin) +__releases(&gl->gl_lockref.lock) +__acquires(&gl->gl_lockref.lock) { struct gfs2_holder *gh = NULL; int ret; @@ -596,7 +596,7 @@ static void glock_work_func(struct work_struct *work) finish_xmote(gl, gl->gl_reply); drop_ref = 1; } - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && gl->gl_state != LM_ST_UNLOCKED && gl->gl_demote_state != LM_ST_EXCLUSIVE) { @@ -612,7 +612,7 @@ static void glock_work_func(struct work_struct *work) } } run_queue(gl, 0); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (!delay) gfs2_glock_put(gl); else { @@ -876,8 +876,8 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) */ static inline void add_to_queue(struct gfs2_holder *gh) -__releases(&gl->gl_spin) -__acquires(&gl->gl_spin) +__releases(&gl->gl_lockref.lock) +__acquires(&gl->gl_lockref.lock) { struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; @@ -926,10 +926,10 @@ fail: do_cancel: gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); if (!(gh->gh_flags & LM_FLAG_PRIORITY)) { - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (sdp->sd_lockstruct.ls_ops->lm_cancel) sdp->sd_lockstruct.ls_ops->lm_cancel(gl); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); } return; @@ -967,7 +967,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) if (test_bit(GLF_LRU, &gl->gl_flags)) gfs2_glock_remove_from_lru(gl); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); add_to_queue(gh); if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) { @@ -977,7 +977,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) gl->gl_lockref.count--; } run_queue(gl, 1); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (!(gh->gh_flags & GL_ASYNC)) error = gfs2_glock_wait(gh); @@ -1010,7 +1010,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) unsigned delay = 0; int fast_path = 0; - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); if (gh->gh_flags & GL_NOCACHE) handle_callback(gl, LM_ST_UNLOCKED, 0, false); @@ -1018,9 +1018,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh) if (find_first_holder(gl) == NULL) { if (glops->go_unlock) { GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags)); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); glops->go_unlock(gh); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); clear_bit(GLF_LOCK, &gl->gl_flags); } if (list_empty(&gl->gl_holders) && @@ -1033,7 +1033,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) gfs2_glock_add_to_lru(gl); trace_gfs2_glock_queue(gh, 0); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (likely(fast_path)) return; @@ -1217,9 +1217,9 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) delay = gl->gl_hold_time; } - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); handle_callback(gl, state, delay, true); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); } @@ -1259,7 +1259,7 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl) * @gl: Pointer to the glock * @ret: The return value from the dlm * - * The gl_reply field is under the gl_spin lock so that it is ok + * The gl_reply field is under the gl_lockref.lock lock so that it is ok * to use a bitfield shared with other glock state fields. */ @@ -1267,20 +1267,20 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) { struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); gl->gl_reply = ret; if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { if (gfs2_should_freeze(gl)) { set_bit(GLF_FROZEN, &gl->gl_flags); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); return; } } gl->gl_lockref.count++; set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); @@ -1326,14 +1326,14 @@ __acquires(&lru_lock) while(!list_empty(list)) { gl = list_entry(list->next, struct gfs2_glock, gl_lru); list_del_init(&gl->gl_lru); - if (!spin_trylock(&gl->gl_spin)) { + if (!spin_trylock(&gl->gl_lockref.lock)) { add_back_to_lru: list_add(&gl->gl_lru, &lru_list); atomic_inc(&lru_count); continue; } if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); goto add_back_to_lru; } clear_bit(GLF_LRU, &gl->gl_flags); @@ -1343,7 +1343,7 @@ add_back_to_lru: WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags)); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gl->gl_lockref.count--; - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); cond_resched_lock(&lru_lock); } } @@ -1461,10 +1461,10 @@ static void clear_glock(struct gfs2_glock *gl) { gfs2_glock_remove_from_lru(gl); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); if (gl->gl_state != LM_ST_UNLOCKED) handle_callback(gl, LM_ST_UNLOCKED, 0, false); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); } @@ -1482,9 +1482,9 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp) static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl) { - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); gfs2_dump_glock(seq, gl); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); } static void dump_glock_func(struct gfs2_glock *gl) @@ -1518,10 +1518,10 @@ void gfs2_glock_finish_truncate(struct gfs2_inode *ip) ret = gfs2_truncatei_resume(ip); gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); clear_bit(GLF_LOCK, &gl->gl_flags); run_queue(gl, 1); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); } static const char *state2str(unsigned state) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 32572f71f027..f7cdaa8b4c83 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -141,7 +141,7 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock * struct pid *pid; /* Look in glock's list of holders for one with current task as owner */ - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); pid = task_pid(current); list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) @@ -151,7 +151,7 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock * } gh = NULL; out: - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); return gh; } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 1f6c9c3fe5cb..f348cfb6b69a 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -146,11 +146,11 @@ static void rgrp_go_sync(struct gfs2_glock *gl) struct gfs2_rgrpd *rgd; int error; - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); rgd = gl->gl_object; if (rgd) gfs2_rgrp_brelse(rgd); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) return; @@ -162,11 +162,11 @@ static void rgrp_go_sync(struct gfs2_glock *gl) mapping_set_error(mapping, error); gfs2_ail_empty_gl(gl); - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); rgd = gl->gl_object; if (rgd) gfs2_free_clones(rgd); - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); } /** @@ -542,7 +542,7 @@ static int freeze_go_demote_ok(const struct gfs2_glock *gl) * iopen_go_callback - schedule the dcache entry for the inode to be deleted * @gl: the glock * - * gl_spin lock is held while calling this + * gl_lockref.lock lock is held while calling this */ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 121ed08d9d9f..de7b4f97ac75 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -334,9 +334,8 @@ struct gfs2_glock { struct lm_lockname gl_name; struct lockref gl_lockref; -#define gl_spin gl_lockref.lock - /* State fields protected by gl_spin */ + /* State fields protected by gl_lockref.lock */ unsigned int gl_state:2, /* Current state */ gl_target:2, /* Target state */ gl_demote_state:2, /* State requested by remote node */ diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 241a399bf83d..fb2b42cf46b5 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -50,7 +50,7 @@ static void gfs2_init_glock_once(void *foo) struct gfs2_glock *gl = foo; INIT_HLIST_BL_NODE(&gl->gl_list); - spin_lock_init(&gl->gl_spin); + spin_lock_init(&gl->gl_lockref.lock); INIT_LIST_HEAD(&gl->gl_holders); INIT_LIST_HEAD(&gl->gl_lru); INIT_LIST_HEAD(&gl->gl_ail_list); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 02586e7eb964..baab99b69d8a 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1291,6 +1291,9 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, up_write(&s->s_umount); blkdev_put(bdev, mode); down_write(&s->s_umount); + } else { + /* s_mode must be set before deactivate_locked_super calls */ + s->s_mode = mode; } memset(&args, 0, sizeof(args)); @@ -1314,7 +1317,6 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, } else { char b[BDEVNAME_SIZE]; - s->s_mode = mode; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 475985d14758..c134c0462cee 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -729,9 +729,9 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) rb_erase(n, &sdp->sd_rindex_tree); if (gl) { - spin_lock(&gl->gl_spin); + spin_lock(&gl->gl_lockref.lock); gl->gl_object = NULL; - spin_unlock(&gl->gl_spin); + spin_unlock(&gl->gl_lockref.lock); gfs2_glock_add_to_lru(gl); gfs2_glock_put(gl); } @@ -933,8 +933,9 @@ static int read_rindex_entry(struct gfs2_inode *ip) goto fail; rgd->rd_gl->gl_object = rgd; - rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize; - rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1; + rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_CACHE_MASK; + rgd->rd_gl->gl_vm.end = PAGE_CACHE_ALIGN((rgd->rd_addr + + rgd->rd_length) * bsize) - 1; rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED); if (rgd->rd_data > sdp->sd_max_rg_data) diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index b95d0d625f32..0c1bde395062 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -176,6 +176,8 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) unlock_buffer(bh); if (bh->b_private == NULL) bd = gfs2_alloc_bufdata(gl, bh, &gfs2_databuf_lops); + else + bd = bh->b_private; lock_buffer(bh); gfs2_log_lock(sdp); } @@ -236,6 +238,8 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) lock_page(bh->b_page); if (bh->b_private == NULL) bd = gfs2_alloc_bufdata(gl, bh, &gfs2_buf_lops); + else + bd = bh->b_private; unlock_page(bh->b_page); lock_buffer(bh); gfs2_log_lock(sdp); diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 9cd4eb3a1e22..ddd0138f410c 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -229,7 +229,7 @@ bl_read_pagelist(struct nfs_pgio_header *header) struct parallel_io *par; loff_t f_offset = header->args.offset; size_t bytes_left = header->args.count; - unsigned int pg_offset, pg_len; + unsigned int pg_offset = header->args.pgbase, pg_len; struct page **pages = header->args.pages; int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; const bool is_dio = (header->dreq != NULL); @@ -262,7 +262,6 @@ bl_read_pagelist(struct nfs_pgio_header *header) extent_length = be.be_length - (isect - be.be_f_offset); } - pg_offset = f_offset & ~PAGE_CACHE_MASK; if (is_dio) { if (pg_offset + bytes_left > PAGE_CACHE_SIZE) pg_len = PAGE_CACHE_SIZE - pg_offset; @@ -273,9 +272,6 @@ bl_read_pagelist(struct nfs_pgio_header *header) pg_len = PAGE_CACHE_SIZE; } - isect += (pg_offset >> SECTOR_SHIFT); - extent_length -= (pg_offset >> SECTOR_SHIFT); - if (is_hole(&be)) { bio = bl_submit_bio(READ, bio); /* Fill hole w/ zeroes w/o accessing device */ @@ -301,6 +297,7 @@ bl_read_pagelist(struct nfs_pgio_header *header) extent_length -= (pg_len >> SECTOR_SHIFT); f_offset += pg_len; bytes_left -= pg_len; + pg_offset = 0; } if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { header->res.eof = 1; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 75f7c0a7538a..a7f2e6e33305 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -99,17 +99,6 @@ nfs4_callback_up(struct svc_serv *serv) } #if defined(CONFIG_NFS_V4_1) -static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) -{ - /* - * Create an svc_sock for the back channel service that shares the - * fore channel connection. - * Returns the input port (0) and sets the svc_serv bc_xprt on success - */ - return svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0, - SVC_SOCK_ANONYMOUS); -} - /* * The callback service for NFSv4.1 callbacks */ @@ -184,11 +173,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, xprt->bc_serv = serv; } #else -static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) -{ - return 0; -} - static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) { @@ -259,7 +243,8 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc svc_shutdown_net(serv, net); } -static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net) +static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, + struct net *net, struct rpc_xprt *xprt) { struct nfs_net *nn = net_generic(net, nfs_net_id); int ret; @@ -275,20 +260,11 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct n goto err_bind; } - switch (minorversion) { - case 0: - ret = nfs4_callback_up_net(serv, net); - break; - case 1: - case 2: - ret = nfs41_callback_up_net(serv, net); - break; - default: - printk(KERN_ERR "NFS: unknown callback version: %d\n", - minorversion); - ret = -EINVAL; - break; - } + ret = -EPROTONOSUPPORT; + if (minorversion == 0) + ret = nfs4_callback_up_net(serv, net); + else if (xprt->ops->bc_up) + ret = xprt->ops->bc_up(serv, net); if (ret < 0) { printk(KERN_ERR "NFS: callback service start failed\n"); @@ -364,7 +340,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) goto err_create; } - ret = nfs_callback_up_net(minorversion, serv, net); + ret = nfs_callback_up_net(minorversion, serv, net, xprt); if (ret < 0) goto err_net; diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 84326e9fb47a..ff8195bd75ea 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -61,7 +61,6 @@ struct cb_compound_hdr_res { }; struct cb_getattrargs { - struct sockaddr *addr; struct nfs_fh fh; uint32_t bitmap[2]; }; @@ -76,7 +75,6 @@ struct cb_getattrres { }; struct cb_recallargs { - struct sockaddr *addr; struct nfs_fh fh; nfs4_stateid stateid; uint32_t truncate; @@ -119,9 +117,6 @@ extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_sequenceres *res, struct cb_process_state *cps); -extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, - const nfs4_stateid *stateid); - #define RCA4_TYPE_MASK_RDATA_DLG 0 #define RCA4_TYPE_MASK_WDATA_DLG 1 #define RCA4_TYPE_MASK_DIR_DLG 2 @@ -134,7 +129,6 @@ extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, #define RCA4_TYPE_MASK_ALL 0xf31f struct cb_recallanyargs { - struct sockaddr *craa_addr; uint32_t craa_objs_to_keep; uint32_t craa_type_mask; }; @@ -144,7 +138,6 @@ extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, struct cb_process_state *cps); struct cb_recallslotargs { - struct sockaddr *crsa_addr; uint32_t crsa_target_highest_slotid; }; extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, @@ -152,7 +145,6 @@ extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, struct cb_process_state *cps); struct cb_layoutrecallargs { - struct sockaddr *cbl_addr; uint32_t cbl_recall_type; uint32_t cbl_layout_type; uint32_t cbl_layoutchanged; @@ -196,9 +188,6 @@ extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, #if IS_ENABLED(CONFIG_NFS_V4) extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); extern void nfs_callback_down(int minorversion, struct net *net); -extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, - const nfs4_stateid *stateid); -extern int nfs4_set_callback_sessionid(struct nfs_client *clp); #endif /* CONFIG_NFS_V4 */ /* * nfs41: Callbacks are expected to not cause substantial latency, @@ -209,6 +198,5 @@ extern int nfs4_set_callback_sessionid(struct nfs_client *clp); #define NFS41_BC_MAX_CALLBACKS 1 extern unsigned int nfs_callback_set_tcpport; -extern unsigned short nfs_callback_tcpport; #endif /* __LINUX_FS_NFS_CALLBACK_H */ diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index b85cf7a30232..807eb6ef4f91 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -17,9 +17,7 @@ #include "nfs4session.h" #include "nfs4trace.h" -#ifdef NFS_DEBUG #define NFSDBG_FACILITY NFSDBG_CALLBACK -#endif __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res, diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 6b1697a01dde..646cdac73488 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -18,19 +18,21 @@ #include "internal.h" #include "nfs4session.h" -#define CB_OP_TAGLEN_MAXSZ (512) -#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ) -#define CB_OP_GETATTR_BITMAP_MAXSZ (4) -#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ - CB_OP_GETATTR_BITMAP_MAXSZ + \ - 2 + 2 + 3 + 3) -#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) +#define CB_OP_TAGLEN_MAXSZ (512) +#define CB_OP_HDR_RES_MAXSZ (2 * 4) // opcode, status +#define CB_OP_GETATTR_BITMAP_MAXSZ (4 * 4) // bitmap length, 3 bitmaps +#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ + CB_OP_GETATTR_BITMAP_MAXSZ + \ + /* change, size, ctime, mtime */\ + (2 + 2 + 3 + 3) * 4) +#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #if defined(CONFIG_NFS_V4_1) #define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_DEVICENOTIFY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ - 4 + 1 + 3) + NFS4_MAX_SESSIONID_LEN + \ + (1 + 3) * 4) // seqid, 3 slotids #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_RECALLSLOT_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #endif /* CONFIG_NFS_V4_1 */ @@ -157,7 +159,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound if (unlikely(status != 0)) return status; /* We do not like overly long tags! */ - if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) { + if (hdr->taglen > CB_OP_TAGLEN_MAXSZ) { printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n", __func__, hdr->taglen); return htonl(NFS4ERR_RESOURCE); @@ -198,7 +200,6 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr status = decode_fh(xdr, &args->fh); if (unlikely(status != 0)) goto out; - args->addr = svc_addr(rqstp); status = decode_bitmap(xdr, args->bitmap); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); @@ -210,7 +211,6 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, __be32 *p; __be32 status; - args->addr = svc_addr(rqstp); status = decode_stateid(xdr, &args->stateid); if (unlikely(status != 0)) goto out; @@ -236,7 +236,6 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp, __be32 status = 0; uint32_t iomode; - args->cbl_addr = svc_addr(rqstp); p = read_buf(xdr, 4 * sizeof(uint32_t)); if (unlikely(p == NULL)) { status = htonl(NFS4ERR_BADXDR); @@ -383,13 +382,12 @@ static __be32 decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid) { __be32 *p; - int len = NFS4_MAX_SESSIONID_LEN; - p = read_buf(xdr, len); + p = read_buf(xdr, NFS4_MAX_SESSIONID_LEN); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); - memcpy(sid->data, p, len); + memcpy(sid->data, p, NFS4_MAX_SESSIONID_LEN); return 0; } @@ -500,7 +498,6 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp, uint32_t bitmap[2]; __be32 *p, status; - args->craa_addr = svc_addr(rqstp); p = read_buf(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); @@ -519,7 +516,6 @@ static __be32 decode_recallslot_args(struct svc_rqst *rqstp, { __be32 *p; - args->crsa_addr = svc_addr(rqstp); p = read_buf(xdr, 4); if (unlikely(p == NULL)) return htonl(NFS4ERR_BADXDR); @@ -684,13 +680,12 @@ static __be32 encode_sessionid(struct xdr_stream *xdr, const struct nfs4_sessionid *sid) { __be32 *p; - int len = NFS4_MAX_SESSIONID_LEN; - p = xdr_reserve_space(xdr, len); + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); - memcpy(p, sid, len); + memcpy(p, sid, NFS4_MAX_SESSIONID_LEN); return 0; } @@ -704,7 +699,9 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp, if (unlikely(status != 0)) goto out; - encode_sessionid(xdr, &res->csr_sessionid); + status = encode_sessionid(xdr, &res->csr_sessionid); + if (status) + goto out; p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t)); if (unlikely(p == NULL)) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 57c5a02f6213..d6d5d2a48e83 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -764,6 +764,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->time_delta = fsinfo->time_delta; + server->clone_blksize = fsinfo->clone_blksize; /* We're airborne Set socket buffersize */ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); } diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index be806ead7f4d..5166adcfc0fb 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -721,14 +721,12 @@ int nfs_async_inode_return_delegation(struct inode *inode, struct nfs_client *clp = server->nfs_client; struct nfs_delegation *delegation; - filemap_flush(inode->i_mapping); - rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); if (delegation == NULL) goto out_enoent; - - if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) + if (stateid != NULL && + !clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) goto out_enoent; nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3d8e4ffa0a33..ce5a21861074 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1714,9 +1714,6 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n", dir->i_sb->s_id, dir->i_ino, dentry); - if (!new_valid_dev(rdev)) - return -EINVAL; - attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index fbc5a56de875..03516c80855a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -339,6 +339,19 @@ static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) } } +static void ff_layout_mark_devices_valid(struct nfs4_ff_layout_segment *fls) +{ + struct nfs4_deviceid_node *node; + int i; + + if (!(fls->flags & FF_FLAGS_NO_IO_THRU_MDS)) + return; + for (i = 0; i < fls->mirror_array_cnt; i++) { + node = &fls->mirror_array[i]->mirror_ds->id_node; + clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); + } +} + static struct pnfs_layout_segment * ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, struct nfs4_layoutget_res *lgr, @@ -499,6 +512,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, rc = ff_layout_check_layout(lgr); if (rc) goto out_err_free; + ff_layout_mark_devices_valid(fls); ret = &fls->generic_hdr; dprintk("<-- %s (success)\n", __func__); @@ -741,17 +755,17 @@ ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, } static struct nfs4_pnfs_ds * -ff_layout_choose_best_ds_for_read(struct nfs_pageio_descriptor *pgio, +ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, + int start_idx, int *best_idx) { - struct nfs4_ff_layout_segment *fls; + struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); struct nfs4_pnfs_ds *ds; int idx; - fls = FF_LAYOUT_LSEG(pgio->pg_lseg); /* mirrors are sorted by efficiency */ - for (idx = 0; idx < fls->mirror_array_cnt; idx++) { - ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, idx, false); + for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { + ds = nfs4_ff_layout_prepare_ds(lseg, idx, false); if (ds) { *best_idx = idx; return ds; @@ -782,7 +796,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, if (pgio->pg_lseg == NULL) goto out_mds; - ds = ff_layout_choose_best_ds_for_read(pgio, &ds_idx); + ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); if (!ds) goto out_mds; mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); @@ -1035,7 +1049,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: - if (ff_layout_has_available_ds(lseg)) + if (ff_layout_no_fallback_to_mds(lseg) || + ff_layout_has_available_ds(lseg)) return -NFS4ERR_RESET_TO_PNFS; reset: dprintk("%s Retry through MDS. Error %d\n", __func__, @@ -1153,7 +1168,6 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, } /* NFS_PROTO call done callback routines */ - static int ff_layout_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { @@ -1171,6 +1185,10 @@ static int ff_layout_read_done_cb(struct rpc_task *task, switch (err) { case -NFS4ERR_RESET_TO_PNFS: + if (ff_layout_choose_best_ds_for_read(hdr->lseg, + hdr->pgio_mirror_idx + 1, + &hdr->pgio_mirror_idx)) + goto out_eagain; set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &hdr->lseg->pls_layout->plh_flags); pnfs_read_resend_pnfs(hdr); @@ -1179,11 +1197,13 @@ static int ff_layout_read_done_cb(struct rpc_task *task, ff_layout_reset_read(hdr); return task->tk_status; case -EAGAIN: - rpc_restart_call_prepare(task); - return -EAGAIN; + goto out_eagain; } return 0; +out_eagain: + rpc_restart_call_prepare(task); + return -EAGAIN; } static bool diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 68cc0d9828f9..2bb08bc6aaf0 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -10,6 +10,7 @@ #define FS_NFS_NFS4FLEXFILELAYOUT_H #define FF_FLAGS_NO_LAYOUTCOMMIT 1 +#define FF_FLAGS_NO_IO_THRU_MDS 2 #include "../pnfs.h" @@ -146,6 +147,12 @@ FF_LAYOUT_MIRROR_COUNT(struct pnfs_layout_segment *lseg) } static inline bool +ff_layout_no_fallback_to_mds(struct pnfs_layout_segment *lseg) +{ + return FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_IO_THRU_MDS; +} + +static inline bool ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) { return nfs4_test_deviceid_unavailable(node); diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 99a45283b9ee..09b190015df4 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -16,9 +16,7 @@ #include <linux/nfs_fs.h> #include "internal.h" -#ifdef NFS_DEBUG -# define NFSDBG_FACILITY NFSDBG_MOUNT -#endif +#define NFSDBG_FACILITY NFSDBG_MOUNT /* * Defined by RFC 1094, section A.3; and RFC 1813, section 5.1.4 diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 814c1255f1d2..b587ccd31083 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -17,5 +17,6 @@ int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); int nfs42_proc_layoutstats_generic(struct nfs_server *, struct nfs42_layoutstat_data *); +int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t); #endif /* __LINUX_FS_NFS_NFS4_2_H */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 0f020e4d8421..3e92a3cde15d 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -271,3 +271,74 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, return PTR_ERR(task); return 0; } + +static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, + struct file *dst_f, loff_t src_offset, + loff_t dst_offset, loff_t count) +{ + struct inode *src_inode = file_inode(src_f); + struct inode *dst_inode = file_inode(dst_f); + struct nfs_server *server = NFS_SERVER(dst_inode); + struct nfs42_clone_args args = { + .src_fh = NFS_FH(src_inode), + .dst_fh = NFS_FH(dst_inode), + .src_offset = src_offset, + .dst_offset = dst_offset, + .dst_bitmask = server->cache_consistency_bitmask, + }; + struct nfs42_clone_res res = { + .server = server, + }; + int status; + + msg->rpc_argp = &args; + msg->rpc_resp = &res; + + status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ); + if (status) + return status; + + status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE); + if (status) + return status; + + res.dst_fattr = nfs_alloc_fattr(); + if (!res.dst_fattr) + return -ENOMEM; + + status = nfs4_call_sync(server->client, server, msg, + &args.seq_args, &res.seq_res, 0); + if (status == 0) + status = nfs_post_op_update_inode(dst_inode, res.dst_fattr); + + kfree(res.dst_fattr); + return status; +} + +int nfs42_proc_clone(struct file *src_f, struct file *dst_f, + loff_t src_offset, loff_t dst_offset, loff_t count) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLONE], + }; + struct inode *inode = file_inode(src_f); + struct nfs_server *server = NFS_SERVER(file_inode(src_f)); + struct nfs4_exception exception = { }; + int err; + + if (!nfs_server_capable(inode, NFS_CAP_CLONE)) + return -EOPNOTSUPP; + + do { + err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset, + dst_offset, count); + if (err == -ENOTSUPP || err == -EOPNOTSUPP) { + NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE; + return -EOPNOTSUPP; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + + return err; + +} diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 0eb29e14070d..0ca482a51e53 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -34,6 +34,12 @@ 1 /* opaque devaddr4 length */ + \ XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE)) #define decode_layoutstats_maxsz (op_decode_hdr_maxsz) +#define encode_clone_maxsz (encode_stateid_maxsz + \ + encode_stateid_maxsz + \ + 2 /* src offset */ + \ + 2 /* dst offset */ + \ + 2 /* count */) +#define decode_clone_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -65,7 +71,20 @@ decode_sequence_maxsz + \ decode_putfh_maxsz + \ PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz) - +#define NFS4_enc_clone_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_putfh_maxsz + \ + encode_clone_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_clone_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_putfh_maxsz + \ + decode_clone_maxsz + \ + decode_getattr_maxsz) static void encode_fallocate(struct xdr_stream *xdr, struct nfs42_falloc_args *args) @@ -128,6 +147,21 @@ static void encode_layoutstats(struct xdr_stream *xdr, encode_uint32(xdr, 0); } +static void encode_clone(struct xdr_stream *xdr, + struct nfs42_clone_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_CLONE, decode_clone_maxsz, hdr); + encode_nfs4_stateid(xdr, &args->src_stateid); + encode_nfs4_stateid(xdr, &args->dst_stateid); + p = reserve_space(xdr, 3*8); + p = xdr_encode_hyper(p, args->src_offset); + p = xdr_encode_hyper(p, args->dst_offset); + xdr_encode_hyper(p, args->count); +} + /* * Encode ALLOCATE request */ @@ -206,6 +240,27 @@ static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode CLONE request + */ +static void nfs4_xdr_enc_clone(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_clone_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->src_fh, &hdr); + encode_savefh(xdr, &hdr); + encode_putfh(xdr, args->dst_fh, &hdr); + encode_clone(xdr, args, &hdr); + encode_getfattr(xdr, args->dst_bitmask, &hdr); + encode_nops(&hdr); +} + static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_ALLOCATE); @@ -243,6 +298,11 @@ static int decode_layoutstats(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_LAYOUTSTATS); } +static int decode_clone(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_CLONE); +} + /* * Decode ALLOCATE request */ @@ -351,4 +411,39 @@ out: return status; } +/* + * Decode CLONE request + */ +static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_clone_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_savefh(xdr); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_clone(xdr); + if (status) + goto out; + status = decode_getfattr(xdr, res->dst_fattr, res->server); + +out: + res->rpc_status = status; + return status; +} + #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 50cfc4ca7a02..4afdee420d25 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -183,10 +183,12 @@ struct nfs4_state { struct nfs4_exception { - long timeout; - int retry; struct nfs4_state *state; struct inode *inode; + long timeout; + unsigned char delay : 1, + recovering : 1, + retry : 1; }; struct nfs4_state_recovery_ops { diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index b0dbe0abed53..4aa571956cd6 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -4,6 +4,7 @@ * Copyright (C) 1992 Rick Sladkey */ #include <linux/fs.h> +#include <linux/file.h> #include <linux/falloc.h> #include <linux/nfs_fs.h> #include "delegation.h" @@ -192,8 +193,138 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t return nfs42_proc_deallocate(filep, offset, len); return nfs42_proc_allocate(filep, offset, len); } + +static noinline long +nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, + u64 src_off, u64 dst_off, u64 count) +{ + struct inode *dst_inode = file_inode(dst_file); + struct nfs_server *server = NFS_SERVER(dst_inode); + struct fd src_file; + struct inode *src_inode; + unsigned int bs = server->clone_blksize; + int ret; + + /* dst file must be opened for writing */ + if (!(dst_file->f_mode & FMODE_WRITE)) + return -EINVAL; + + ret = mnt_want_write_file(dst_file); + if (ret) + return ret; + + src_file = fdget(srcfd); + if (!src_file.file) { + ret = -EBADF; + goto out_drop_write; + } + + src_inode = file_inode(src_file.file); + + /* src and dst must be different files */ + ret = -EINVAL; + if (src_inode == dst_inode) + goto out_fput; + + /* src file must be opened for reading */ + if (!(src_file.file->f_mode & FMODE_READ)) + goto out_fput; + + /* src and dst must be regular files */ + ret = -EISDIR; + if (!S_ISREG(src_inode->i_mode) || !S_ISREG(dst_inode->i_mode)) + goto out_fput; + + ret = -EXDEV; + if (src_file.file->f_path.mnt != dst_file->f_path.mnt || + src_inode->i_sb != dst_inode->i_sb) + goto out_fput; + + /* check alignment w.r.t. clone_blksize */ + ret = -EINVAL; + if (bs) { + if (!IS_ALIGNED(src_off, bs) || !IS_ALIGNED(dst_off, bs)) + goto out_fput; + if (!IS_ALIGNED(count, bs) && i_size_read(src_inode) != (src_off + count)) + goto out_fput; + } + + /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */ + if (dst_inode < src_inode) { + mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD); + } else { + mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_CHILD); + } + + /* flush all pending writes on both src and dst so that server + * has the latest data */ + ret = nfs_sync_inode(src_inode); + if (ret) + goto out_unlock; + ret = nfs_sync_inode(dst_inode); + if (ret) + goto out_unlock; + + ret = nfs42_proc_clone(src_file.file, dst_file, src_off, dst_off, count); + + /* truncate inode page cache of the dst range so that future reads can fetch + * new data from server */ + if (!ret) + truncate_inode_pages_range(&dst_inode->i_data, dst_off, dst_off + count - 1); + +out_unlock: + if (dst_inode < src_inode) { + mutex_unlock(&src_inode->i_mutex); + mutex_unlock(&dst_inode->i_mutex); + } else { + mutex_unlock(&dst_inode->i_mutex); + mutex_unlock(&src_inode->i_mutex); + } +out_fput: + fdput(src_file); +out_drop_write: + mnt_drop_write_file(dst_file); + return ret; +} + +static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp) +{ + struct nfs_ioctl_clone_range_args args; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + + return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_off, args.dst_off, args.count); +} +#else +static long nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, + u64 src_off, u64 dst_off, u64 count) +{ + return -ENOTTY; +} + +static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp) +{ + return -ENOTTY; +} #endif /* CONFIG_NFS_V4_2 */ +long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + + switch (cmd) { + case NFS_IOC_CLONE: + return nfs42_ioctl_clone(file, arg, 0, 0, 0); + case NFS_IOC_CLONE_RANGE: + return nfs42_ioctl_clone_range(file, argp); + } + + return -ENOTTY; +} + const struct file_operations nfs4_file_operations = { #ifdef CONFIG_NFS_V4_2 .llseek = nfs4_file_llseek, @@ -216,4 +347,9 @@ const struct file_operations nfs4_file_operations = { #endif /* CONFIG_NFS_V4_2 */ .check_flags = nfs_check_flags, .setlease = simple_nosetlease, +#ifdef CONFIG_COMPAT + .unlocked_ioctl = nfs4_ioctl, +#else + .compat_ioctl = nfs4_ioctl, +#endif /* CONFIG_COMPAT */ }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0e5ff69455c7..ff5bddc49a2a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -78,7 +78,6 @@ struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, long *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); @@ -239,6 +238,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_FS_LAYOUT_TYPES, FATTR4_WORD2_LAYOUT_BLKSIZE + | FATTR4_WORD2_CLONE_BLKSIZE }; const u32 nfs4_fs_locations_bitmap[3] = { @@ -344,13 +344,16 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) /* This is the error handling routine for processes that are allowed * to sleep. */ -int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +static int nfs4_do_handle_exception(struct nfs_server *server, + int errorcode, struct nfs4_exception *exception) { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; struct inode *inode = exception->inode; int ret = errorcode; + exception->delay = 0; + exception->recovering = 0; exception->retry = 0; switch(errorcode) { case 0: @@ -359,11 +362,9 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (inode && nfs4_have_delegation(inode, FMODE_READ)) { - nfs4_inode_return_delegation(inode); - exception->retry = 1; - return 0; - } + if (inode && nfs_async_inode_return_delegation(inode, + NULL) == 0) + goto wait_on_recovery; if (state == NULL) break; ret = nfs4_schedule_stateid_recovery(server, state); @@ -409,11 +410,12 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ ret = -EBUSY; break; } - case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: - ret = nfs4_delay(server->client, &exception->timeout); - if (ret != 0) - break; + nfs_inc_server_stats(server, NFSIOS_DELAY); + case -NFS4ERR_GRACE: + exception->delay = 1; + return 0; + case -NFS4ERR_RETRY_UNCACHED_REP: case -NFS4ERR_OLD_STATEID: exception->retry = 1; @@ -434,14 +436,85 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ /* We failed to handle the error */ return nfs4_map_errors(ret); wait_on_recovery: - ret = nfs4_wait_clnt_recover(clp); + exception->recovering = 1; + return 0; +} + +/* This is the error handling routine for processes that are allowed + * to sleep. + */ +int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +{ + struct nfs_client *clp = server->nfs_client; + int ret; + + ret = nfs4_do_handle_exception(server, errorcode, exception); + if (exception->delay) { + ret = nfs4_delay(server->client, &exception->timeout); + goto out_retry; + } + if (exception->recovering) { + ret = nfs4_wait_clnt_recover(clp); + if (test_bit(NFS_MIG_FAILED, &server->mig_status)) + return -EIO; + goto out_retry; + } + return ret; +out_retry: + if (ret == 0) + exception->retry = 1; + return ret; +} + +static int +nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server, + int errorcode, struct nfs4_exception *exception) +{ + struct nfs_client *clp = server->nfs_client; + int ret; + + ret = nfs4_do_handle_exception(server, errorcode, exception); + if (exception->delay) { + rpc_delay(task, nfs4_update_delay(&exception->timeout)); + goto out_retry; + } + if (exception->recovering) { + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) + rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); + goto out_retry; + } if (test_bit(NFS_MIG_FAILED, &server->mig_status)) - return -EIO; + ret = -EIO; + return ret; +out_retry: if (ret == 0) exception->retry = 1; return ret; } +static int +nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server, + struct nfs4_state *state, long *timeout) +{ + struct nfs4_exception exception = { + .state = state, + }; + + if (task->tk_status >= 0) + return 0; + if (timeout) + exception.timeout = *timeout; + task->tk_status = nfs4_async_handle_exception(task, server, + task->tk_status, + &exception); + if (exception.delay && timeout) + *timeout = exception.timeout; + if (exception.retry) + return -EAGAIN; + return 0; +} + /* * Return 'true' if 'clp' is using an rpc_client that is integrity protected * or 'false' otherwise. @@ -4530,7 +4603,7 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) #define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) static int buf_to_pages_noslab(const void *buf, size_t buflen, - struct page **pages, unsigned int *pgbase) + struct page **pages) { struct page *newpage, **spages; int rc = 0; @@ -4674,7 +4747,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu goto out_free; args.acl_len = npages * PAGE_SIZE; - args.acl_pgbase = 0; dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", __func__, buf, buflen, npages, args.acl_len); @@ -4766,7 +4838,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl return -EOPNOTSUPP; if (npages > ARRAY_SIZE(pages)) return -ERANGE; - i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); + i = buf_to_pages_noslab(buf, buflen, arg.acl_pages); if (i < 0) return i; nfs4_inode_return_delegation(inode); @@ -4955,79 +5027,6 @@ out: #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ -static int -nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, - struct nfs4_state *state, long *timeout) -{ - struct nfs_client *clp = server->nfs_client; - - if (task->tk_status >= 0) - return 0; - switch(task->tk_status) { - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OPENMODE: - if (state == NULL) - break; - if (nfs4_schedule_stateid_recovery(server, state) < 0) - goto recovery_failed; - goto wait_on_recovery; - case -NFS4ERR_EXPIRED: - if (state != NULL) { - if (nfs4_schedule_stateid_recovery(server, state) < 0) - goto recovery_failed; - } - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_STALE_CLIENTID: - nfs4_schedule_lease_recovery(clp); - goto wait_on_recovery; - case -NFS4ERR_MOVED: - if (nfs4_schedule_migration_recovery(server) < 0) - goto recovery_failed; - goto wait_on_recovery; - case -NFS4ERR_LEASE_MOVED: - nfs4_schedule_lease_moved_recovery(clp); - goto wait_on_recovery; -#if defined(CONFIG_NFS_V4_1) - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_DEADSESSION: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_SEQ_FALSE_RETRY: - case -NFS4ERR_SEQ_MISORDERED: - dprintk("%s ERROR %d, Reset session\n", __func__, - task->tk_status); - nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - goto wait_on_recovery; -#endif /* CONFIG_NFS_V4_1 */ - case -NFS4ERR_DELAY: - nfs_inc_server_stats(server, NFSIOS_DELAY); - rpc_delay(task, nfs4_update_delay(timeout)); - goto restart_call; - case -NFS4ERR_GRACE: - rpc_delay(task, NFS4_POLL_RETRY_MAX); - case -NFS4ERR_RETRY_UNCACHED_REP: - case -NFS4ERR_OLD_STATEID: - goto restart_call; - } - task->tk_status = nfs4_map_errors(task->tk_status); - return 0; -recovery_failed: - task->tk_status = -EIO; - return 0; -wait_on_recovery: - rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); - if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) - rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); - if (test_bit(NFS_MIG_FAILED, &server->mig_status)) - goto recovery_failed; -restart_call: - task->tk_status = 0; - return -EAGAIN; -} - static void nfs4_init_boot_verifier(const struct nfs_client *clp, nfs4_verifier *bootverf) { @@ -5522,7 +5521,7 @@ struct nfs4_unlockdata { struct nfs4_lock_state *lsp; struct nfs_open_context *ctx; struct file_lock fl; - const struct nfs_server *server; + struct nfs_server *server; unsigned long timestamp; }; @@ -8718,7 +8717,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_ALLOCATE | NFS_CAP_DEALLOCATE | NFS_CAP_SEEK - | NFS_CAP_LAYOUTSTATS, + | NFS_CAP_LAYOUTSTATS + | NFS_CAP_CLONE, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 788adf3897c7..dfed4f5c8fcc 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1659,7 +1659,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun *p = cpu_to_be32(FATTR4_WORD0_ACL); p = reserve_space(xdr, 4); *p = cpu_to_be32(arg->acl_len); - xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); + xdr_write_pages(xdr, arg->acl_pages, 0, arg->acl_len); } static void @@ -2491,7 +2491,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, - args->acl_pages, args->acl_pgbase, args->acl_len); + args->acl_pages, 0, args->acl_len); encode_nops(&hdr); } @@ -4375,6 +4375,11 @@ static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) goto xdr_error; if ((status = decode_attr_files_total(xdr, bitmap, &fsstat->tfiles)) != 0) goto xdr_error; + + status = -EIO; + if (unlikely(bitmap[0])) + goto xdr_error; + if ((status = decode_attr_space_avail(xdr, bitmap, &fsstat->abytes)) != 0) goto xdr_error; if ((status = decode_attr_space_free(xdr, bitmap, &fsstat->fbytes)) != 0) @@ -4574,6 +4579,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; + status = -EIO; + if (unlikely(bitmap[0])) + goto xdr_error; + status = decode_attr_mode(xdr, bitmap, &fmode); if (status < 0) goto xdr_error; @@ -4627,6 +4636,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; + status = -EIO; + if (unlikely(bitmap[1])) + goto xdr_error; + status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold); if (status < 0) goto xdr_error; @@ -4764,6 +4777,28 @@ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap, return 0; } +/* + * The granularity of a CLONE operation. + */ +static int decode_attr_clone_blksize(struct xdr_stream *xdr, uint32_t *bitmap, + uint32_t *res) +{ + __be32 *p; + + dprintk("%s: bitmap is %x\n", __func__, bitmap[2]); + *res = 0; + if (bitmap[2] & FATTR4_WORD2_CLONE_BLKSIZE) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) { + print_overflow_msg(__func__, xdr); + return -EIO; + } + *res = be32_to_cpup(p); + bitmap[2] &= ~FATTR4_WORD2_CLONE_BLKSIZE; + } + return 0; +} + static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) { unsigned int savep; @@ -4789,15 +4824,28 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0) goto xdr_error; fsinfo->wtpref = fsinfo->wtmax; + + status = -EIO; + if (unlikely(bitmap[0])) + goto xdr_error; + status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta); if (status != 0) goto xdr_error; status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); if (status != 0) goto xdr_error; + + status = -EIO; + if (unlikely(bitmap[1])) + goto xdr_error; + status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize); if (status) goto xdr_error; + status = decode_attr_clone_blksize(xdr, bitmap, &fsinfo->clone_blksize); + if (status) + goto xdr_error; status = verify_attr_len(xdr, savep, attrlen); xdr_error: @@ -7465,6 +7513,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(ALLOCATE, enc_allocate, dec_allocate), PROC(DEALLOCATE, enc_deallocate, dec_deallocate), PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), + PROC(CLONE, enc_clone, dec_clone), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 9bc9f04fb7f6..89a15dbe5efc 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -90,7 +90,7 @@ #define NFS_DEF_OPTIONS "vers=2,udp,rsize=4096,wsize=4096" /* Parameters passed from the kernel command line */ -static char nfs_root_parms[256] __initdata = ""; +static char nfs_root_parms[NFS_MAXPATHLEN + 1] __initdata = ""; /* Text-based mount options passed to super.c */ static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8abe27165ad0..93496c059837 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1912,12 +1912,13 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr) */ void pnfs_ld_write_done(struct nfs_pgio_header *hdr) { - trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); - if (!hdr->pnfs_error) { + if (likely(!hdr->pnfs_error)) { pnfs_set_layoutcommit(hdr->inode, hdr->lseg, hdr->mds_offset + hdr->res.count); hdr->mds_ops->rpc_call_done(&hdr->task, hdr); - } else + } + trace_nfs4_pnfs_write(hdr, hdr->pnfs_error); + if (unlikely(hdr->pnfs_error)) pnfs_ld_handle_write_error(hdr); hdr->mds_ops->rpc_release(hdr); } @@ -2028,11 +2029,12 @@ static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr) */ void pnfs_ld_read_done(struct nfs_pgio_header *hdr) { - trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); if (likely(!hdr->pnfs_error)) { __nfs4_read_done_cb(hdr); hdr->mds_ops->rpc_call_done(&hdr->task, hdr); - } else + } + trace_nfs4_pnfs_read(hdr, hdr->pnfs_error); + if (unlikely(hdr->pnfs_error)) pnfs_ld_handle_read_error(hdr); hdr->mds_ops->rpc_release(hdr); } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 01b8cc8e8cfc..0a5e33f33b5c 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -246,6 +246,13 @@ static void nfs_readpage_retry(struct rpc_task *task, nfs_set_pgio_error(hdr, -EIO, argp->offset); return; } + + /* For non rpc-based layout drivers, retry-through-MDS */ + if (!task->tk_ops) { + hdr->pnfs_error = -EAGAIN; + return; + } + /* Yes, so retry the read at the end of the hdr */ hdr->mds_offset += resp->count; argp->offset += resp->count; @@ -268,7 +275,7 @@ static void nfs_readpage_result(struct rpc_task *task, hdr->good_bytes = bound - hdr->io_start; } spin_unlock(&hdr->lock); - } else if (hdr->res.count != hdr->args.count) + } else if (hdr->res.count < hdr->args.count) nfs_readpage_retry(task, hdr); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 383a027de452..f1268280244e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2816,7 +2816,6 @@ out_invalid_transport_udp: * NFS client for backwards compatibility */ unsigned int nfs_callback_set_tcpport; -unsigned short nfs_callback_tcpport; /* Default cache timeout is 10 minutes */ unsigned int nfs_idmap_cache_timeout = 600; /* Turn off NFSv4 uid/gid mapping when using AUTH_SYS */ @@ -2827,7 +2826,6 @@ char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = ""; bool recover_lost_locks = false; EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); -EXPORT_SYMBOL_GPL(nfs_callback_tcpport); EXPORT_SYMBOL_GPL(nfs_idmap_cache_timeout); EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); EXPORT_SYMBOL_GPL(max_session_slots); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 75ab7622e0cc..7b9316406930 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1505,6 +1505,13 @@ static void nfs_writeback_result(struct rpc_task *task, task->tk_status = -EIO; return; } + + /* For non rpc-based layout drivers, retry-through-MDS */ + if (!task->tk_ops) { + hdr->pnfs_error = -EAGAIN; + return; + } + /* Was this an NFSv2 write or an NFSv3 stable write? */ if (resp->verf->committed != NFS_UNSTABLE) { /* Resend from where the server left off */ diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 00121f298269..e7e78537aea2 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -130,6 +130,7 @@ enum nfs_opnum4 { OP_READ_PLUS = 68, OP_SEEK = 69, OP_WRITE_SAME = 70, + OP_CLONE = 71, OP_ILLEGAL = 10044, }; @@ -421,6 +422,7 @@ enum lock_type4 { #define FATTR4_WORD2_LAYOUT_TYPES (1UL << 0) #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) +#define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) /* MDS threshold bitmap bits */ @@ -501,6 +503,7 @@ enum { NFSPROC4_CLNT_ALLOCATE, NFSPROC4_CLNT_DEALLOCATE, NFSPROC4_CLNT_LAYOUTSTATS, + NFSPROC4_CLNT_CLONE, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 570a7df2775b..2469ab0bb3a1 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -147,6 +147,7 @@ struct nfs_server { unsigned int acdirmax; unsigned int namelen; unsigned int options; /* extra options enabled by mount */ + unsigned int clone_blksize; /* granularity of a CLONE operation */ #define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */ #define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */ @@ -243,5 +244,6 @@ struct nfs_server { #define NFS_CAP_ALLOCATE (1U << 20) #define NFS_CAP_DEALLOCATE (1U << 21) #define NFS_CAP_LAYOUTSTATS (1U << 22) +#define NFS_CAP_CLONE (1U << 23) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 52faf7e96c65..570d630f98ae 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -141,6 +141,7 @@ struct nfs_fsinfo { __u32 lease_time; /* in seconds */ __u32 layouttype; /* supported pnfs layout driver */ __u32 blksize; /* preferred pnfs io block size */ + __u32 clone_blksize; /* granularity of a CLONE operation */ }; struct nfs_fsstat { @@ -359,6 +360,25 @@ struct nfs42_layoutstat_data { struct nfs42_layoutstat_res res; }; +struct nfs42_clone_args { + struct nfs4_sequence_args seq_args; + struct nfs_fh *src_fh; + struct nfs_fh *dst_fh; + nfs4_stateid src_stateid; + nfs4_stateid dst_stateid; + __u64 src_offset; + __u64 dst_offset; + __u64 count; + const u32 *dst_bitmask; +}; + +struct nfs42_clone_res { + struct nfs4_sequence_res seq_res; + unsigned int rpc_status; + struct nfs_fattr *dst_fattr; + const struct nfs_server *server; +}; + struct stateowner_id { __u64 create_time; __u32 uniquifier; @@ -528,7 +548,7 @@ struct nfs4_delegreturnargs { struct nfs4_delegreturnres { struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; - const struct nfs_server *server; + struct nfs_server *server; }; /* @@ -601,7 +621,7 @@ struct nfs_removeargs { struct nfs_removeres { struct nfs4_sequence_res seq_res; - const struct nfs_server *server; + struct nfs_server *server; struct nfs_fattr *dir_attr; struct nfs4_change_info cinfo; }; @@ -619,7 +639,7 @@ struct nfs_renameargs { struct nfs_renameres { struct nfs4_sequence_res seq_res; - const struct nfs_server *server; + struct nfs_server *server; struct nfs4_change_info old_cinfo; struct nfs_fattr *old_fattr; struct nfs4_change_info new_cinfo; @@ -685,7 +705,6 @@ struct nfs_setaclargs { struct nfs4_sequence_args seq_args; struct nfs_fh * fh; size_t acl_len; - unsigned int acl_pgbase; struct page ** acl_pages; }; @@ -697,7 +716,6 @@ struct nfs_getaclargs { struct nfs4_sequence_args seq_args; struct nfs_fh * fh; size_t acl_len; - unsigned int acl_pgbase; struct page ** acl_pages; }; diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 8df43c9f11dc..4397a4824c81 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -38,6 +38,11 @@ void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); +/* Socket backchannel transport methods */ +int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs); +void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs); +void xprt_free_bc_rqst(struct rpc_rqst *req); + /* * Determine if a shared backchannel is in use */ diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 1e4438ea2380..f869807a0d0e 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -226,9 +226,13 @@ extern void svc_rdma_put_frmr(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); extern void svc_sq_reap(struct svcxprt_rdma *); extern void svc_rq_reap(struct svcxprt_rdma *); -extern struct svc_xprt_class svc_rdma_class; extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); +extern struct svc_xprt_class svc_rdma_class; +#ifdef CONFIG_SUNRPC_BACKCHANNEL +extern struct svc_xprt_class svc_rdma_bc_class; +#endif + /* svc_rdma.c */ extern int svc_rdma_init(void); extern void svc_rdma_cleanup(void); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 0fb9acbb4780..69ef5b3ab038 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -54,6 +54,8 @@ enum rpc_display_format_t { struct rpc_task; struct rpc_xprt; struct seq_file; +struct svc_serv; +struct net; /* * This describes a complete RPC request @@ -136,6 +138,12 @@ struct rpc_xprt_ops { int (*enable_swap)(struct rpc_xprt *xprt); void (*disable_swap)(struct rpc_xprt *xprt); void (*inject_disconnect)(struct rpc_xprt *xprt); + int (*bc_setup)(struct rpc_xprt *xprt, + unsigned int min_reqs); + int (*bc_up)(struct svc_serv *serv, struct net *net); + void (*bc_free_rqst)(struct rpc_rqst *rqst); + void (*bc_destroy)(struct rpc_xprt *xprt, + unsigned int max_reqs); }; /* @@ -153,6 +161,7 @@ enum xprt_transports { XPRT_TRANSPORT_TCP = IPPROTO_TCP, XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC, XPRT_TRANSPORT_RDMA = 256, + XPRT_TRANSPORT_BC_RDMA = XPRT_TRANSPORT_RDMA | XPRT_TRANSPORT_BC, XPRT_TRANSPORT_LOCAL = 257, }; diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 357e44c1a46b..0ece4ba06f06 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -44,6 +44,8 @@ struct sock_xprt { */ unsigned long sock_state; struct delayed_work connect_worker; + struct work_struct recv_worker; + struct mutex recv_mutex; struct sockaddr_storage srcaddr; unsigned short srcport; diff --git a/include/uapi/linux/nfs.h b/include/uapi/linux/nfs.h index 5199a36dd574..654bae3f1a38 100644 --- a/include/uapi/linux/nfs.h +++ b/include/uapi/linux/nfs.h @@ -7,6 +7,8 @@ #ifndef _UAPI_LINUX_NFS_H #define _UAPI_LINUX_NFS_H +#include <linux/types.h> + #define NFS_PROGRAM 100003 #define NFS_PORT 2049 #define NFS_MAXDATA 8192 @@ -31,6 +33,17 @@ #define NFS_PIPE_DIRNAME "nfs" +/* NFS ioctls */ +/* Let's follow btrfs lead on CLONE to avoid messing userspace */ +#define NFS_IOC_CLONE _IOW(0x94, 9, int) +#define NFS_IOC_CLONE_RANGE _IOW(0x94, 13, int) + +struct nfs_ioctl_clone_range_args { + __s64 src_fd; + __u64 src_off, count; + __u64 dst_off; +}; + /* * NFS stats. The good thing with these values is that NFSv3 errors are * a superset of NFSv2 errors (with the exception of NFSERR_WFLUSH which diff --git a/kernel/params.c b/kernel/params.c index 93a380a2345d..a6d6149c0fe6 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -223,7 +223,7 @@ char *parse_args(const char *doing, int (*unknown)(char *param, char *val, const char *doing, void *arg)) { - char *param, *val; + char *param, *val, *err = NULL; /* Chew leading spaces */ args = skip_spaces(args); @@ -238,7 +238,7 @@ char *parse_args(const char *doing, args = next_arg(args, ¶m, &val); /* Stop at -- */ if (!val && strcmp(param, "--") == 0) - return args; + return err ?: args; irq_was_disabled = irqs_disabled(); ret = parse_one(param, val, doing, params, num, min_level, max_level, arg, unknown); @@ -247,24 +247,25 @@ char *parse_args(const char *doing, doing, param); switch (ret) { + case 0: + continue; case -ENOENT: pr_err("%s: Unknown parameter `%s'\n", doing, param); - return ERR_PTR(ret); + break; case -ENOSPC: pr_err("%s: `%s' too large for parameter `%s'\n", doing, val ?: "", param); - return ERR_PTR(ret); - case 0: break; default: pr_err("%s: `%s' invalid for parameter `%s'\n", doing, val ?: "", param); - return ERR_PTR(ret); + break; } + + err = ERR_PTR(ret); } - /* All parsed OK. */ - return NULL; + return err; } /* Lazy bastard, eh? */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 16bf3bc25e3e..8c15b29d5adc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -312,6 +312,15 @@ config DEBUG_SECTION_MISMATCH - Enable verbose reporting from modpost in order to help resolve the section mismatches that are reported. +config SECTION_MISMATCH_WARN_ONLY + bool "Make section mismatch errors non-fatal" + default y + help + If you say N here, the build process will fail if there are any + section mismatch, instead of just throwing warnings. + + If unsure, say Y. + # # Select this config option from the architecture Kconfig, if it # is preferred to always offer frame pointers as a config diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 6255d141133b..229956bf8457 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -138,6 +138,14 @@ out_free: */ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) { + if (!xprt->ops->bc_setup) + return 0; + return xprt->ops->bc_setup(xprt, min_reqs); +} +EXPORT_SYMBOL_GPL(xprt_setup_backchannel); + +int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs) +{ struct rpc_rqst *req; struct list_head tmp_list; int i; @@ -192,7 +200,6 @@ out_free: dprintk("RPC: setup backchannel transport failed\n"); return -ENOMEM; } -EXPORT_SYMBOL_GPL(xprt_setup_backchannel); /** * xprt_destroy_backchannel - Destroys the backchannel preallocated structures. @@ -205,6 +212,13 @@ EXPORT_SYMBOL_GPL(xprt_setup_backchannel); */ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs) { + if (xprt->ops->bc_destroy) + xprt->ops->bc_destroy(xprt, max_reqs); +} +EXPORT_SYMBOL_GPL(xprt_destroy_backchannel); + +void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs) +{ struct rpc_rqst *req = NULL, *tmp = NULL; dprintk("RPC: destroy backchannel transport\n"); @@ -227,7 +241,6 @@ out: dprintk("RPC: backchannel list empty= %s\n", list_empty(&xprt->bc_pa_list) ? "true" : "false"); } -EXPORT_SYMBOL_GPL(xprt_destroy_backchannel); static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) { @@ -264,6 +277,13 @@ void xprt_free_bc_request(struct rpc_rqst *req) { struct rpc_xprt *xprt = req->rq_xprt; + xprt->ops->bc_free_rqst(req); +} + +void xprt_free_bc_rqst(struct rpc_rqst *req) +{ + struct rpc_xprt *xprt = req->rq_xprt; + dprintk("RPC: free backchannel req=%p\n", req); req->rq_connect_cookie = xprt->connect_cookie - 1; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index a8f579df14d8..bc5b7b5032ca 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1367,11 +1367,6 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, /* reset result send buffer "put" position */ resv->iov_len = 0; - if (rqstp->rq_prot != IPPROTO_TCP) { - printk(KERN_ERR "No support for Non-TCP transports!\n"); - BUG(); - } - /* * Skip the next two words because they've already been * processed in the transport diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 887f0183b4c6..c88d9bc06f5c 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -76,7 +76,7 @@ static int proc_dodebug(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - char tmpbuf[20], c, *s; + char tmpbuf[20], c, *s = NULL; char __user *p; unsigned int value; size_t left, len; @@ -103,23 +103,24 @@ proc_dodebug(struct ctl_table *table, int write, return -EFAULT; tmpbuf[left] = '\0'; - for (s = tmpbuf, value = 0; '0' <= *s && *s <= '9'; s++, left--) - value = 10 * value + (*s - '0'); - if (*s && !isspace(*s)) - return -EINVAL; - while (left && isspace(*s)) - left--, s++; + value = simple_strtol(tmpbuf, &s, 0); + if (s) { + left -= (s - tmpbuf); + if (left && !isspace(*s)) + return -EINVAL; + while (left && isspace(*s)) + left--, s++; + } else + left = 0; *(unsigned int *) table->data = value; /* Display the RPC tasks on writing to rpc_debug */ if (strcmp(table->procname, "rpc_debug") == 0) rpc_show_tasks(&init_net); } else { - if (!access_ok(VERIFY_WRITE, buffer, left)) - return -EFAULT; - len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data); + len = sprintf(tmpbuf, "0x%04x", *(unsigned int *) table->data); if (len > left) len = left; - if (__copy_to_user(buffer, tmpbuf, len)) + if (copy_to_user(buffer, tmpbuf, len)) return -EFAULT; if ((left -= len) > 0) { if (put_user('\n', (char __user *)buffer + len)) diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index 48913de240bd..33f99d3004f2 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile @@ -5,3 +5,4 @@ rpcrdma-y := transport.o rpc_rdma.o verbs.o \ svc_rdma.o svc_rdma_transport.o \ svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \ module.o +rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c new file mode 100644 index 000000000000..2dcb44f69e53 --- /dev/null +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2015 Oracle. All rights reserved. + * + * Support for backward direction RPCs on RPC/RDMA. + */ + +#include <linux/module.h> +#include <linux/sunrpc/xprt.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/svc_xprt.h> + +#include "xprt_rdma.h" + +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +# define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +#define RPCRDMA_BACKCHANNEL_DEBUG + +static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, + struct rpc_rqst *rqst) +{ + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + + spin_lock(&buf->rb_reqslock); + list_del(&req->rl_all); + spin_unlock(&buf->rb_reqslock); + + rpcrdma_destroy_req(&r_xprt->rx_ia, req); + + kfree(rqst); +} + +static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, + struct rpc_rqst *rqst) +{ + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_regbuf *rb; + struct rpcrdma_req *req; + struct xdr_buf *buf; + size_t size; + + req = rpcrdma_create_req(r_xprt); + if (!req) + return -ENOMEM; + req->rl_backchannel = true; + + size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); + rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL); + if (IS_ERR(rb)) + goto out_fail; + req->rl_rdmabuf = rb; + + size += RPCRDMA_INLINE_READ_THRESHOLD(rqst); + rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL); + if (IS_ERR(rb)) + goto out_fail; + rb->rg_owner = req; + req->rl_sendbuf = rb; + /* so that rpcr_to_rdmar works when receiving a request */ + rqst->rq_buffer = (void *)req->rl_sendbuf->rg_base; + + buf = &rqst->rq_snd_buf; + buf->head[0].iov_base = rqst->rq_buffer; + buf->head[0].iov_len = 0; + buf->tail[0].iov_base = NULL; + buf->tail[0].iov_len = 0; + buf->page_len = 0; + buf->len = 0; + buf->buflen = size; + + return 0; + +out_fail: + rpcrdma_bc_free_rqst(r_xprt, rqst); + return -ENOMEM; +} + +/* Allocate and add receive buffers to the rpcrdma_buffer's + * existing list of rep's. These are released when the + * transport is destroyed. + */ +static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, + unsigned int count) +{ + struct rpcrdma_buffer *buffers = &r_xprt->rx_buf; + struct rpcrdma_rep *rep; + unsigned long flags; + int rc = 0; + + while (count--) { + rep = rpcrdma_create_rep(r_xprt); + if (IS_ERR(rep)) { + pr_err("RPC: %s: reply buffer alloc failed\n", + __func__); + rc = PTR_ERR(rep); + break; + } + + spin_lock_irqsave(&buffers->rb_lock, flags); + list_add(&rep->rr_list, &buffers->rb_recv_bufs); + spin_unlock_irqrestore(&buffers->rb_lock, flags); + } + + return rc; +} + +/** + * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests + * @xprt: transport associated with these backchannel resources + * @reqs: number of concurrent incoming requests to expect + * + * Returns 0 on success; otherwise a negative errno + */ +int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; + struct rpc_rqst *rqst; + unsigned int i; + int rc; + + /* The backchannel reply path returns each rpc_rqst to the + * bc_pa_list _after_ the reply is sent. If the server is + * faster than the client, it can send another backward + * direction request before the rpc_rqst is returned to the + * list. The client rejects the request in this case. + * + * Twice as many rpc_rqsts are prepared to ensure there is + * always an rpc_rqst available as soon as a reply is sent. + */ + if (reqs > RPCRDMA_BACKWARD_WRS >> 1) + goto out_err; + + for (i = 0; i < (reqs << 1); i++) { + rqst = kzalloc(sizeof(*rqst), GFP_KERNEL); + if (!rqst) { + pr_err("RPC: %s: Failed to create bc rpc_rqst\n", + __func__); + goto out_free; + } + + rqst->rq_xprt = &r_xprt->rx_xprt; + INIT_LIST_HEAD(&rqst->rq_list); + INIT_LIST_HEAD(&rqst->rq_bc_list); + + if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) + goto out_free; + + spin_lock_bh(&xprt->bc_pa_lock); + list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); + spin_unlock_bh(&xprt->bc_pa_lock); + } + + rc = rpcrdma_bc_setup_reps(r_xprt, reqs); + if (rc) + goto out_free; + + rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs); + if (rc) + goto out_free; + + buffer->rb_bc_srv_max_requests = reqs; + request_module("svcrdma"); + + return 0; + +out_free: + xprt_rdma_bc_destroy(xprt, reqs); + +out_err: + pr_err("RPC: %s: setup backchannel transport failed\n", __func__); + return -ENOMEM; +} + +/** + * xprt_rdma_bc_up - Create transport endpoint for backchannel service + * @serv: server endpoint + * @net: network namespace + * + * The "xprt" is an implied argument: it supplies the name of the + * backchannel transport class. + * + * Returns zero on success, negative errno on failure + */ +int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net) +{ + int ret; + + ret = svc_create_xprt(serv, "rdma-bc", net, PF_INET, 0, 0); + if (ret < 0) + return ret; + return 0; +} + +/** + * rpcrdma_bc_marshal_reply - Send backwards direction reply + * @rqst: buffer containing RPC reply data + * + * Returns zero on success. + */ +int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) +{ + struct rpc_xprt *xprt = rqst->rq_xprt; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + struct rpcrdma_msg *headerp; + size_t rpclen; + + headerp = rdmab_to_msg(req->rl_rdmabuf); + headerp->rm_xid = rqst->rq_xid; + headerp->rm_vers = rpcrdma_version; + headerp->rm_credit = + cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests); + headerp->rm_type = rdma_msg; + headerp->rm_body.rm_chunks[0] = xdr_zero; + headerp->rm_body.rm_chunks[1] = xdr_zero; + headerp->rm_body.rm_chunks[2] = xdr_zero; + + rpclen = rqst->rq_svec[0].iov_len; + + pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", + __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); + pr_info("RPC: %s: RPC/RDMA: %*ph\n", + __func__, (int)RPCRDMA_HDRLEN_MIN, headerp); + pr_info("RPC: %s: RPC: %*ph\n", + __func__, (int)rpclen, rqst->rq_svec[0].iov_base); + + req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); + req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; + req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf); + + req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf); + req->rl_send_iov[1].length = rpclen; + req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf); + + req->rl_niovs = 2; + return 0; +} + +/** + * xprt_rdma_bc_destroy - Release resources for handling backchannel requests + * @xprt: transport associated with these backchannel resources + * @reqs: number of incoming requests to destroy; ignored + */ +void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs) +{ + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpc_rqst *rqst, *tmp; + + spin_lock_bh(&xprt->bc_pa_lock); + list_for_each_entry_safe(rqst, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { + list_del(&rqst->rq_bc_pa_list); + spin_unlock_bh(&xprt->bc_pa_lock); + + rpcrdma_bc_free_rqst(r_xprt, rqst); + + spin_lock_bh(&xprt->bc_pa_lock); + } + spin_unlock_bh(&xprt->bc_pa_lock); +} + +/** + * xprt_rdma_bc_free_rqst - Release a backchannel rqst + * @rqst: request to release + */ +void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) +{ + struct rpc_xprt *xprt = rqst->rq_xprt; + + smp_mb__before_atomic(); + WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); + clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); + smp_mb__after_atomic(); + + spin_lock_bh(&xprt->bc_pa_lock); + list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); + spin_unlock_bh(&xprt->bc_pa_lock); +} + +/** + * rpcrdma_bc_receive_call - Handle a backward direction call + * @xprt: transport receiving the call + * @rep: receive buffer containing the call + * + * Called in the RPC reply handler, which runs in a tasklet. + * Be quick about it. + * + * Operational assumptions: + * o Backchannel credits are ignored, just as the NFS server + * forechannel currently does + * o The ULP manages a replay cache (eg, NFSv4.1 sessions). + * No replay detection is done at the transport level + */ +void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_rep *rep) +{ + struct rpc_xprt *xprt = &r_xprt->rx_xprt; + struct rpcrdma_msg *headerp; + struct svc_serv *bc_serv; + struct rpcrdma_req *req; + struct rpc_rqst *rqst; + struct xdr_buf *buf; + size_t size; + __be32 *p; + + headerp = rdmab_to_msg(rep->rr_rdmabuf); +#ifdef RPCRDMA_BACKCHANNEL_DEBUG + pr_info("RPC: %s: callback XID %08x, length=%u\n", + __func__, be32_to_cpu(headerp->rm_xid), rep->rr_len); + pr_info("RPC: %s: %*ph\n", __func__, rep->rr_len, headerp); +#endif + + /* Sanity check: + * Need at least enough bytes for RPC/RDMA header, as code + * here references the header fields by array offset. Also, + * backward calls are always inline, so ensure there + * are some bytes beyond the RPC/RDMA header. + */ + if (rep->rr_len < RPCRDMA_HDRLEN_MIN + 24) + goto out_short; + p = (__be32 *)((unsigned char *)headerp + RPCRDMA_HDRLEN_MIN); + size = rep->rr_len - RPCRDMA_HDRLEN_MIN; + + /* Grab a free bc rqst */ + spin_lock(&xprt->bc_pa_lock); + if (list_empty(&xprt->bc_pa_list)) { + spin_unlock(&xprt->bc_pa_lock); + goto out_overflow; + } + rqst = list_first_entry(&xprt->bc_pa_list, + struct rpc_rqst, rq_bc_pa_list); + list_del(&rqst->rq_bc_pa_list); + spin_unlock(&xprt->bc_pa_lock); +#ifdef RPCRDMA_BACKCHANNEL_DEBUG + pr_info("RPC: %s: using rqst %p\n", __func__, rqst); +#endif + + /* Prepare rqst */ + rqst->rq_reply_bytes_recvd = 0; + rqst->rq_bytes_sent = 0; + rqst->rq_xid = headerp->rm_xid; + set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); + + buf = &rqst->rq_rcv_buf; + memset(buf, 0, sizeof(*buf)); + buf->head[0].iov_base = p; + buf->head[0].iov_len = size; + buf->len = size; + + /* The receive buffer has to be hooked to the rpcrdma_req + * so that it can be reposted after the server is done + * parsing it but just before sending the backward + * direction reply. + */ + req = rpcr_to_rdmar(rqst); +#ifdef RPCRDMA_BACKCHANNEL_DEBUG + pr_info("RPC: %s: attaching rep %p to req %p\n", + __func__, rep, req); +#endif + req->rl_reply = rep; + + /* Defeat the retransmit detection logic in send_request */ + req->rl_connect_cookie = 0; + + /* Queue rqst for ULP's callback service */ + bc_serv = xprt->bc_serv; + spin_lock(&bc_serv->sv_cb_lock); + list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list); + spin_unlock(&bc_serv->sv_cb_lock); + + wake_up(&bc_serv->sv_cb_waitq); + + r_xprt->rx_stats.bcall_count++; + return; + +out_overflow: + pr_warn("RPC/RDMA backchannel overflow\n"); + xprt_disconnect_done(xprt); + /* This receive buffer gets reposted automatically + * when the connection is re-established. + */ + return; + +out_short: + pr_warn("RPC/RDMA short backward direction call\n"); + + if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) + xprt_disconnect_done(xprt); + else + pr_warn("RPC: %s: reposting rep %p\n", + __func__, rep); +} diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index a1434447b0d6..88cf9e7269c2 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -256,8 +256,11 @@ frwr_sendcompletion(struct ib_wc *wc) /* WARNING: Only wr_id and status are reliable at this point */ r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; - pr_warn("RPC: %s: frmr %p flushed, status %s (%d)\n", - __func__, r, ib_wc_status_msg(wc->status), wc->status); + if (wc->status == IB_WC_WR_FLUSH_ERR) + dprintk("RPC: %s: frmr %p flushed\n", __func__, r); + else + pr_warn("RPC: %s: frmr %p error, status %s (%d)\n", + __func__, r, ib_wc_status_msg(wc->status), wc->status); r->r.frmr.fr_state = FRMR_IS_STALE; } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index bc8bd6577467..c10d9699441c 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -441,6 +441,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) enum rpcrdma_chunktype rtype, wtype; struct rpcrdma_msg *headerp; +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)) + return rpcrdma_bc_marshal_reply(rqst); +#endif + /* * rpclen gets amount of data in first buffer, which is the * pre-registered buffer. @@ -711,6 +716,37 @@ rpcrdma_connect_worker(struct work_struct *work) spin_unlock_bh(&xprt->transport_lock); } +#if defined(CONFIG_SUNRPC_BACKCHANNEL) +/* By convention, backchannel calls arrive via rdma_msg type + * messages, and never populate the chunk lists. This makes + * the RPC/RDMA header small and fixed in size, so it is + * straightforward to check the RPC header's direction field. + */ +static bool +rpcrdma_is_bcall(struct rpcrdma_msg *headerp) +{ + __be32 *p = (__be32 *)headerp; + + if (headerp->rm_type != rdma_msg) + return false; + if (headerp->rm_body.rm_chunks[0] != xdr_zero) + return false; + if (headerp->rm_body.rm_chunks[1] != xdr_zero) + return false; + if (headerp->rm_body.rm_chunks[2] != xdr_zero) + return false; + + /* sanity */ + if (p[7] != headerp->rm_xid) + return false; + /* call direction */ + if (p[8] != cpu_to_be32(RPC_CALL)) + return false; + + return true; +} +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ + /* * This function is called when an async event is posted to * the connection which changes the connection state. All it @@ -723,8 +759,8 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep) schedule_delayed_work(&ep->rep_connect_worker, 0); } -/* - * Called as a tasklet to do req/reply match and complete a request +/* Process received RPC/RDMA messages. + * * Errors must result in the RPC task either being awakened, or * allowed to timeout, to discover the errors at that time. */ @@ -741,52 +777,32 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) unsigned long cwnd; u32 credits; - /* Check status. If bad, signal disconnect and return rep to pool */ - if (rep->rr_len == ~0U) { - rpcrdma_recv_buffer_put(rep); - if (r_xprt->rx_ep.rep_connected == 1) { - r_xprt->rx_ep.rep_connected = -EIO; - rpcrdma_conn_func(&r_xprt->rx_ep); - } - return; - } - if (rep->rr_len < RPCRDMA_HDRLEN_MIN) { - dprintk("RPC: %s: short/invalid reply\n", __func__); - goto repost; - } + dprintk("RPC: %s: incoming rep %p\n", __func__, rep); + + if (rep->rr_len == RPCRDMA_BAD_LEN) + goto out_badstatus; + if (rep->rr_len < RPCRDMA_HDRLEN_MIN) + goto out_shortreply; + headerp = rdmab_to_msg(rep->rr_rdmabuf); - if (headerp->rm_vers != rpcrdma_version) { - dprintk("RPC: %s: invalid version %d\n", - __func__, be32_to_cpu(headerp->rm_vers)); - goto repost; - } + if (headerp->rm_vers != rpcrdma_version) + goto out_badversion; +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + if (rpcrdma_is_bcall(headerp)) + goto out_bcall; +#endif - /* Get XID and try for a match. */ - spin_lock(&xprt->transport_lock); + /* Match incoming rpcrdma_rep to an rpcrdma_req to + * get context for handling any incoming chunks. + */ + spin_lock_bh(&xprt->transport_lock); rqst = xprt_lookup_rqst(xprt, headerp->rm_xid); - if (rqst == NULL) { - spin_unlock(&xprt->transport_lock); - dprintk("RPC: %s: reply 0x%p failed " - "to match any request xid 0x%08x len %d\n", - __func__, rep, be32_to_cpu(headerp->rm_xid), - rep->rr_len); -repost: - r_xprt->rx_stats.bad_reply_count++; - if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) - rpcrdma_recv_buffer_put(rep); + if (!rqst) + goto out_nomatch; - return; - } - - /* get request object */ req = rpcr_to_rdmar(rqst); - if (req->rl_reply) { - spin_unlock(&xprt->transport_lock); - dprintk("RPC: %s: duplicate reply 0x%p to RPC " - "request 0x%p: xid 0x%08x\n", __func__, rep, req, - be32_to_cpu(headerp->rm_xid)); - goto repost; - } + if (req->rl_reply) + goto out_duplicate; dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" " RPC request 0x%p xid 0x%08x\n", @@ -883,8 +899,50 @@ badheader: if (xprt->cwnd > cwnd) xprt_release_rqst_cong(rqst->rq_task); + xprt_complete_rqst(rqst->rq_task, status); + spin_unlock_bh(&xprt->transport_lock); dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", __func__, xprt, rqst, status); - xprt_complete_rqst(rqst->rq_task, status); - spin_unlock(&xprt->transport_lock); + return; + +out_badstatus: + rpcrdma_recv_buffer_put(rep); + if (r_xprt->rx_ep.rep_connected == 1) { + r_xprt->rx_ep.rep_connected = -EIO; + rpcrdma_conn_func(&r_xprt->rx_ep); + } + return; + +#if defined(CONFIG_SUNRPC_BACKCHANNEL) +out_bcall: + rpcrdma_bc_receive_call(r_xprt, rep); + return; +#endif + +out_shortreply: + dprintk("RPC: %s: short/invalid reply\n", __func__); + goto repost; + +out_badversion: + dprintk("RPC: %s: invalid version %d\n", + __func__, be32_to_cpu(headerp->rm_vers)); + goto repost; + +out_nomatch: + spin_unlock_bh(&xprt->transport_lock); + dprintk("RPC: %s: no match for incoming xid 0x%08x len %d\n", + __func__, be32_to_cpu(headerp->rm_xid), + rep->rr_len); + goto repost; + +out_duplicate: + spin_unlock_bh(&xprt->transport_lock); + dprintk("RPC: %s: " + "duplicate reply %p to RPC request %p: xid 0x%08x\n", + __func__, rep, req, be32_to_cpu(headerp->rm_xid)); + +repost: + r_xprt->rx_stats.bad_reply_count++; + if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) + rpcrdma_recv_buffer_put(rep); } diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 2cd252f023a5..1b7051bdbdc8 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -239,6 +239,9 @@ void svc_rdma_cleanup(void) unregister_sysctl_table(svcrdma_table_header); svcrdma_table_header = NULL; } +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + svc_unreg_xprt_class(&svc_rdma_bc_class); +#endif svc_unreg_xprt_class(&svc_rdma_class); kmem_cache_destroy(svc_rdma_map_cachep); kmem_cache_destroy(svc_rdma_ctxt_cachep); @@ -286,6 +289,9 @@ int svc_rdma_init(void) /* Register RDMA with the SVC transport switch */ svc_reg_xprt_class(&svc_rdma_class); +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + svc_reg_xprt_class(&svc_rdma_bc_class); +#endif return 0; err1: kmem_cache_destroy(svc_rdma_map_cachep); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index a266e870d870..b348b4adef29 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -56,6 +56,7 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT +static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int); static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, struct net *net, struct sockaddr *sa, int salen, @@ -95,6 +96,63 @@ struct svc_xprt_class svc_rdma_class = { .xcl_ident = XPRT_TRANSPORT_RDMA, }; +#if defined(CONFIG_SUNRPC_BACKCHANNEL) +static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *, + struct sockaddr *, int, int); +static void svc_rdma_bc_detach(struct svc_xprt *); +static void svc_rdma_bc_free(struct svc_xprt *); + +static struct svc_xprt_ops svc_rdma_bc_ops = { + .xpo_create = svc_rdma_bc_create, + .xpo_detach = svc_rdma_bc_detach, + .xpo_free = svc_rdma_bc_free, + .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr, + .xpo_secure_port = svc_rdma_secure_port, +}; + +struct svc_xprt_class svc_rdma_bc_class = { + .xcl_name = "rdma-bc", + .xcl_owner = THIS_MODULE, + .xcl_ops = &svc_rdma_bc_ops, + .xcl_max_payload = (1024 - RPCRDMA_HDRLEN_MIN) +}; + +static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv, + struct net *net, + struct sockaddr *sa, int salen, + int flags) +{ + struct svcxprt_rdma *cma_xprt; + struct svc_xprt *xprt; + + cma_xprt = rdma_create_xprt(serv, 0); + if (!cma_xprt) + return ERR_PTR(-ENOMEM); + xprt = &cma_xprt->sc_xprt; + + svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv); + serv->sv_bc_xprt = xprt; + + dprintk("svcrdma: %s(%p)\n", __func__, xprt); + return xprt; +} + +static void svc_rdma_bc_detach(struct svc_xprt *xprt) +{ + dprintk("svcrdma: %s(%p)\n", __func__, xprt); +} + +static void svc_rdma_bc_free(struct svc_xprt *xprt) +{ + struct svcxprt_rdma *rdma = + container_of(xprt, struct svcxprt_rdma, sc_xprt); + + dprintk("svcrdma: %s(%p)\n", __func__, xprt); + if (xprt) + kfree(rdma); +} +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ + struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 41e452bc580c..8c545f7d7525 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -676,7 +676,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) static int xprt_rdma_enable_swap(struct rpc_xprt *xprt) { - return -EINVAL; + return 0; } static void @@ -705,7 +705,13 @@ static struct rpc_xprt_ops xprt_rdma_procs = { .print_stats = xprt_rdma_print_stats, .enable_swap = xprt_rdma_enable_swap, .disable_swap = xprt_rdma_disable_swap, - .inject_disconnect = xprt_rdma_inject_disconnect + .inject_disconnect = xprt_rdma_inject_disconnect, +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + .bc_setup = xprt_rdma_bc_setup, + .bc_up = xprt_rdma_bc_up, + .bc_free_rqst = xprt_rdma_bc_free_rqst, + .bc_destroy = xprt_rdma_bc_destroy, +#endif }; static struct xprt_class xprt_rdma = { @@ -732,6 +738,7 @@ void xprt_rdma_cleanup(void) dprintk("RPC: %s: xprt_unregister returned %i\n", __func__, rc); + rpcrdma_destroy_wq(); frwr_destroy_recovery_wq(); } @@ -743,8 +750,15 @@ int xprt_rdma_init(void) if (rc) return rc; + rc = rpcrdma_alloc_wq(); + if (rc) { + frwr_destroy_recovery_wq(); + return rc; + } + rc = xprt_register_transport(&xprt_rdma); if (rc) { + rpcrdma_destroy_wq(); frwr_destroy_recovery_wq(); return rc; } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index f63369bd01c5..eadd1655145a 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -68,47 +68,33 @@ * internal functions */ -/* - * handle replies in tasklet context, using a single, global list - * rdma tasklet function -- just turn around and call the func - * for all replies on the list - */ - -static DEFINE_SPINLOCK(rpcrdma_tk_lock_g); -static LIST_HEAD(rpcrdma_tasklets_g); +static struct workqueue_struct *rpcrdma_receive_wq; -static void -rpcrdma_run_tasklet(unsigned long data) +int +rpcrdma_alloc_wq(void) { - struct rpcrdma_rep *rep; - unsigned long flags; - - data = data; - spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); - while (!list_empty(&rpcrdma_tasklets_g)) { - rep = list_entry(rpcrdma_tasklets_g.next, - struct rpcrdma_rep, rr_list); - list_del(&rep->rr_list); - spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); + struct workqueue_struct *recv_wq; - rpcrdma_reply_handler(rep); + recv_wq = alloc_workqueue("xprtrdma_receive", + WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI, + 0); + if (!recv_wq) + return -ENOMEM; - spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); - } - spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); + rpcrdma_receive_wq = recv_wq; + return 0; } -static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); - -static void -rpcrdma_schedule_tasklet(struct list_head *sched_list) +void +rpcrdma_destroy_wq(void) { - unsigned long flags; + struct workqueue_struct *wq; - spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); - list_splice_tail(sched_list, &rpcrdma_tasklets_g); - spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); - tasklet_schedule(&rpcrdma_tasklet_g); + if (rpcrdma_receive_wq) { + wq = rpcrdma_receive_wq; + rpcrdma_receive_wq = NULL; + destroy_workqueue(wq); + } } static void @@ -158,63 +144,54 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc) } } -static int -rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) +/* The common case is a single send completion is waiting. By + * passing two WC entries to ib_poll_cq, a return code of 1 + * means there is exactly one WC waiting and no more. We don't + * have to invoke ib_poll_cq again to know that the CQ has been + * properly drained. + */ +static void +rpcrdma_sendcq_poll(struct ib_cq *cq) { - struct ib_wc *wcs; - int budget, count, rc; + struct ib_wc *pos, wcs[2]; + int count, rc; - budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; do { - wcs = ep->rep_send_wcs; + pos = wcs; - rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); - if (rc <= 0) - return rc; + rc = ib_poll_cq(cq, ARRAY_SIZE(wcs), pos); + if (rc < 0) + break; count = rc; while (count-- > 0) - rpcrdma_sendcq_process_wc(wcs++); - } while (rc == RPCRDMA_POLLSIZE && --budget); - return 0; + rpcrdma_sendcq_process_wc(pos++); + } while (rc == ARRAY_SIZE(wcs)); + return; } -/* - * Handle send, fast_reg_mr, and local_inv completions. - * - * Send events are typically suppressed and thus do not result - * in an upcall. Occasionally one is signaled, however. This - * prevents the provider's completion queue from wrapping and - * losing a completion. +/* Handle provider send completion upcalls. */ static void rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context) { - struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context; - int rc; - - rc = rpcrdma_sendcq_poll(cq, ep); - if (rc) { - dprintk("RPC: %s: ib_poll_cq failed: %i\n", - __func__, rc); - return; - } + do { + rpcrdma_sendcq_poll(cq); + } while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | + IB_CQ_REPORT_MISSED_EVENTS) > 0); +} - rc = ib_req_notify_cq(cq, - IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); - if (rc == 0) - return; - if (rc < 0) { - dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", - __func__, rc); - return; - } +static void +rpcrdma_receive_worker(struct work_struct *work) +{ + struct rpcrdma_rep *rep = + container_of(work, struct rpcrdma_rep, rr_work); - rpcrdma_sendcq_poll(cq, ep); + rpcrdma_reply_handler(rep); } static void -rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) +rpcrdma_recvcq_process_wc(struct ib_wc *wc) { struct rpcrdma_rep *rep = (struct rpcrdma_rep *)(unsigned long)wc->wr_id; @@ -237,91 +214,60 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) prefetch(rdmab_to_msg(rep->rr_rdmabuf)); out_schedule: - list_add_tail(&rep->rr_list, sched_list); + queue_work(rpcrdma_receive_wq, &rep->rr_work); return; + out_fail: if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("RPC: %s: rep %p: %s\n", __func__, rep, ib_wc_status_msg(wc->status)); - rep->rr_len = ~0U; + rep->rr_len = RPCRDMA_BAD_LEN; goto out_schedule; } -static int -rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) +/* The wc array is on stack: automatic memory is always CPU-local. + * + * struct ib_wc is 64 bytes, making the poll array potentially + * large. But this is at the bottom of the call chain. Further + * substantial work is done in another thread. + */ +static void +rpcrdma_recvcq_poll(struct ib_cq *cq) { - struct list_head sched_list; - struct ib_wc *wcs; - int budget, count, rc; + struct ib_wc *pos, wcs[4]; + int count, rc; - INIT_LIST_HEAD(&sched_list); - budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; do { - wcs = ep->rep_recv_wcs; + pos = wcs; - rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); - if (rc <= 0) - goto out_schedule; + rc = ib_poll_cq(cq, ARRAY_SIZE(wcs), pos); + if (rc < 0) + break; count = rc; while (count-- > 0) - rpcrdma_recvcq_process_wc(wcs++, &sched_list); - } while (rc == RPCRDMA_POLLSIZE && --budget); - rc = 0; - -out_schedule: - rpcrdma_schedule_tasklet(&sched_list); - return rc; + rpcrdma_recvcq_process_wc(pos++); + } while (rc == ARRAY_SIZE(wcs)); } -/* - * Handle receive completions. - * - * It is reentrant but processes single events in order to maintain - * ordering of receives to keep server credits. - * - * It is the responsibility of the scheduled tasklet to return - * recv buffers to the pool. NOTE: this affects synchronization of - * connection shutdown. That is, the structures required for - * the completion of the reply handler must remain intact until - * all memory has been reclaimed. +/* Handle provider receive completion upcalls. */ static void rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context) { - struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context; - int rc; - - rc = rpcrdma_recvcq_poll(cq, ep); - if (rc) { - dprintk("RPC: %s: ib_poll_cq failed: %i\n", - __func__, rc); - return; - } - - rc = ib_req_notify_cq(cq, - IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); - if (rc == 0) - return; - if (rc < 0) { - dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", - __func__, rc); - return; - } - - rpcrdma_recvcq_poll(cq, ep); + do { + rpcrdma_recvcq_poll(cq); + } while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP | + IB_CQ_REPORT_MISSED_EVENTS) > 0); } static void rpcrdma_flush_cqs(struct rpcrdma_ep *ep) { struct ib_wc wc; - LIST_HEAD(sched_list); while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0) - rpcrdma_recvcq_process_wc(&wc, &sched_list); - if (!list_empty(&sched_list)) - rpcrdma_schedule_tasklet(&sched_list); + rpcrdma_recvcq_process_wc(&wc); while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0) rpcrdma_sendcq_process_wc(&wc); } @@ -623,6 +569,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct ib_device_attr *devattr = &ia->ri_devattr; struct ib_cq *sendcq, *recvcq; struct ib_cq_init_attr cq_attr = {}; + unsigned int max_qp_wr; int rc, err; if (devattr->max_sge < RPCRDMA_MAX_IOVS) { @@ -631,18 +578,27 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, return -ENOMEM; } + if (devattr->max_qp_wr <= RPCRDMA_BACKWARD_WRS) { + dprintk("RPC: %s: insufficient wqe's available\n", + __func__); + return -ENOMEM; + } + max_qp_wr = devattr->max_qp_wr - RPCRDMA_BACKWARD_WRS; + /* check provider's send/recv wr limits */ - if (cdata->max_requests > devattr->max_qp_wr) - cdata->max_requests = devattr->max_qp_wr; + if (cdata->max_requests > max_qp_wr) + cdata->max_requests = max_qp_wr; ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; ep->rep_attr.qp_context = ep; ep->rep_attr.srq = NULL; ep->rep_attr.cap.max_send_wr = cdata->max_requests; + ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; rc = ia->ri_ops->ro_open(ia, ep, cdata); if (rc) return rc; ep->rep_attr.cap.max_recv_wr = cdata->max_requests; + ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS; ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; @@ -670,7 +626,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1; sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall, - rpcrdma_cq_async_error_upcall, ep, &cq_attr); + rpcrdma_cq_async_error_upcall, NULL, &cq_attr); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); dprintk("RPC: %s: failed to create send CQ: %i\n", @@ -687,7 +643,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1; recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall, - rpcrdma_cq_async_error_upcall, ep, &cq_attr); + rpcrdma_cq_async_error_upcall, NULL, &cq_attr); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); dprintk("RPC: %s: failed to create recv CQ: %i\n", @@ -886,7 +842,21 @@ retry: } rc = ep->rep_connected; } else { + struct rpcrdma_xprt *r_xprt; + unsigned int extras; + dprintk("RPC: %s: connected\n", __func__); + + r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); + extras = r_xprt->rx_buf.rb_bc_srv_max_requests; + + if (extras) { + rc = rpcrdma_ep_post_extra_recv(r_xprt, extras); + if (rc) + pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n", + __func__, rc); + rc = 0; + } } out: @@ -923,20 +893,25 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) } } -static struct rpcrdma_req * +struct rpcrdma_req * rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) { + struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; struct rpcrdma_req *req; req = kzalloc(sizeof(*req), GFP_KERNEL); if (req == NULL) return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&req->rl_free); + spin_lock(&buffer->rb_reqslock); + list_add(&req->rl_all, &buffer->rb_allreqs); + spin_unlock(&buffer->rb_reqslock); req->rl_buffer = &r_xprt->rx_buf; return req; } -static struct rpcrdma_rep * +struct rpcrdma_rep * rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; @@ -958,6 +933,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) rep->rr_device = ia->ri_device; rep->rr_rxprt = r_xprt; + INIT_WORK(&rep->rr_work, rpcrdma_receive_worker); return rep; out_free: @@ -971,44 +947,21 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; - char *p; - size_t len; int i, rc; - buf->rb_max_requests = cdata->max_requests; + buf->rb_max_requests = r_xprt->rx_data.max_requests; + buf->rb_bc_srv_max_requests = 0; spin_lock_init(&buf->rb_lock); - /* Need to allocate: - * 1. arrays for send and recv pointers - * 2. arrays of struct rpcrdma_req to fill in pointers - * 3. array of struct rpcrdma_rep for replies - * Send/recv buffers in req/rep need to be registered - */ - len = buf->rb_max_requests * - (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); - - p = kzalloc(len, GFP_KERNEL); - if (p == NULL) { - dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n", - __func__, len); - rc = -ENOMEM; - goto out; - } - buf->rb_pool = p; /* for freeing it later */ - - buf->rb_send_bufs = (struct rpcrdma_req **) p; - p = (char *) &buf->rb_send_bufs[buf->rb_max_requests]; - buf->rb_recv_bufs = (struct rpcrdma_rep **) p; - p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; - rc = ia->ri_ops->ro_init(r_xprt); if (rc) goto out; + INIT_LIST_HEAD(&buf->rb_send_bufs); + INIT_LIST_HEAD(&buf->rb_allreqs); + spin_lock_init(&buf->rb_reqslock); for (i = 0; i < buf->rb_max_requests; i++) { struct rpcrdma_req *req; - struct rpcrdma_rep *rep; req = rpcrdma_create_req(r_xprt); if (IS_ERR(req)) { @@ -1017,7 +970,13 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) rc = PTR_ERR(req); goto out; } - buf->rb_send_bufs[i] = req; + req->rl_backchannel = false; + list_add(&req->rl_free, &buf->rb_send_bufs); + } + + INIT_LIST_HEAD(&buf->rb_recv_bufs); + for (i = 0; i < buf->rb_max_requests + 2; i++) { + struct rpcrdma_rep *rep; rep = rpcrdma_create_rep(r_xprt); if (IS_ERR(rep)) { @@ -1026,7 +985,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) rc = PTR_ERR(rep); goto out; } - buf->rb_recv_bufs[i] = rep; + list_add(&rep->rr_list, &buf->rb_recv_bufs); } return 0; @@ -1035,22 +994,38 @@ out: return rc; } +static struct rpcrdma_req * +rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf) +{ + struct rpcrdma_req *req; + + req = list_first_entry(&buf->rb_send_bufs, + struct rpcrdma_req, rl_free); + list_del(&req->rl_free); + return req; +} + +static struct rpcrdma_rep * +rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf) +{ + struct rpcrdma_rep *rep; + + rep = list_first_entry(&buf->rb_recv_bufs, + struct rpcrdma_rep, rr_list); + list_del(&rep->rr_list); + return rep; +} + static void rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep) { - if (!rep) - return; - rpcrdma_free_regbuf(ia, rep->rr_rdmabuf); kfree(rep); } -static void +void rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) { - if (!req) - return; - rpcrdma_free_regbuf(ia, req->rl_sendbuf); rpcrdma_free_regbuf(ia, req->rl_rdmabuf); kfree(req); @@ -1060,25 +1035,29 @@ void rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) { struct rpcrdma_ia *ia = rdmab_to_ia(buf); - int i; - /* clean up in reverse order from create - * 1. recv mr memory (mr free, then kfree) - * 2. send mr memory (mr free, then kfree) - * 3. MWs - */ - dprintk("RPC: %s: entering\n", __func__); + while (!list_empty(&buf->rb_recv_bufs)) { + struct rpcrdma_rep *rep; - for (i = 0; i < buf->rb_max_requests; i++) { - if (buf->rb_recv_bufs) - rpcrdma_destroy_rep(ia, buf->rb_recv_bufs[i]); - if (buf->rb_send_bufs) - rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]); + rep = rpcrdma_buffer_get_rep_locked(buf); + rpcrdma_destroy_rep(ia, rep); } - ia->ri_ops->ro_destroy(buf); + spin_lock(&buf->rb_reqslock); + while (!list_empty(&buf->rb_allreqs)) { + struct rpcrdma_req *req; + + req = list_first_entry(&buf->rb_allreqs, + struct rpcrdma_req, rl_all); + list_del(&req->rl_all); + + spin_unlock(&buf->rb_reqslock); + rpcrdma_destroy_req(ia, req); + spin_lock(&buf->rb_reqslock); + } + spin_unlock(&buf->rb_reqslock); - kfree(buf->rb_pool); + ia->ri_ops->ro_destroy(buf); } struct rpcrdma_mw * @@ -1110,53 +1089,34 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) spin_unlock(&buf->rb_mwlock); } -static void -rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) -{ - buf->rb_send_bufs[--buf->rb_send_index] = req; - req->rl_niovs = 0; - if (req->rl_reply) { - buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply; - req->rl_reply = NULL; - } -} - /* * Get a set of request/reply buffers. * - * Reply buffer (if needed) is attached to send buffer upon return. - * Rule: - * rb_send_index and rb_recv_index MUST always be pointing to the - * *next* available buffer (non-NULL). They are incremented after - * removing buffers, and decremented *before* returning them. + * Reply buffer (if available) is attached to send buffer upon return. */ struct rpcrdma_req * rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) { struct rpcrdma_req *req; - unsigned long flags; - - spin_lock_irqsave(&buffers->rb_lock, flags); - if (buffers->rb_send_index == buffers->rb_max_requests) { - spin_unlock_irqrestore(&buffers->rb_lock, flags); - dprintk("RPC: %s: out of request buffers\n", __func__); - return ((struct rpcrdma_req *)NULL); - } - - req = buffers->rb_send_bufs[buffers->rb_send_index]; - if (buffers->rb_send_index < buffers->rb_recv_index) { - dprintk("RPC: %s: %d extra receives outstanding (ok)\n", - __func__, - buffers->rb_recv_index - buffers->rb_send_index); - req->rl_reply = NULL; - } else { - req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; - buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; - } - buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; + spin_lock(&buffers->rb_lock); + if (list_empty(&buffers->rb_send_bufs)) + goto out_reqbuf; + req = rpcrdma_buffer_get_req_locked(buffers); + if (list_empty(&buffers->rb_recv_bufs)) + goto out_repbuf; + req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers); + spin_unlock(&buffers->rb_lock); + return req; - spin_unlock_irqrestore(&buffers->rb_lock, flags); +out_reqbuf: + spin_unlock(&buffers->rb_lock); + pr_warn("RPC: %s: out of request buffers\n", __func__); + return NULL; +out_repbuf: + spin_unlock(&buffers->rb_lock); + pr_warn("RPC: %s: out of reply buffers\n", __func__); + req->rl_reply = NULL; return req; } @@ -1168,30 +1128,31 @@ void rpcrdma_buffer_put(struct rpcrdma_req *req) { struct rpcrdma_buffer *buffers = req->rl_buffer; - unsigned long flags; + struct rpcrdma_rep *rep = req->rl_reply; - spin_lock_irqsave(&buffers->rb_lock, flags); - rpcrdma_buffer_put_sendbuf(req, buffers); - spin_unlock_irqrestore(&buffers->rb_lock, flags); + req->rl_niovs = 0; + req->rl_reply = NULL; + + spin_lock(&buffers->rb_lock); + list_add_tail(&req->rl_free, &buffers->rb_send_bufs); + if (rep) + list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); + spin_unlock(&buffers->rb_lock); } /* * Recover reply buffers from pool. - * This happens when recovering from error conditions. - * Post-increment counter/array index. + * This happens when recovering from disconnect. */ void rpcrdma_recv_buffer_get(struct rpcrdma_req *req) { struct rpcrdma_buffer *buffers = req->rl_buffer; - unsigned long flags; - spin_lock_irqsave(&buffers->rb_lock, flags); - if (buffers->rb_recv_index < buffers->rb_max_requests) { - req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; - buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; - } - spin_unlock_irqrestore(&buffers->rb_lock, flags); + spin_lock(&buffers->rb_lock); + if (!list_empty(&buffers->rb_recv_bufs)) + req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers); + spin_unlock(&buffers->rb_lock); } /* @@ -1202,11 +1163,10 @@ void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) { struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; - unsigned long flags; - spin_lock_irqsave(&buffers->rb_lock, flags); - buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep; - spin_unlock_irqrestore(&buffers->rb_lock, flags); + spin_lock(&buffers->rb_lock); + list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); + spin_unlock(&buffers->rb_lock); } /* @@ -1363,6 +1323,47 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, return rc; } +/** + * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests + * @r_xprt: transport associated with these backchannel resources + * @min_reqs: minimum number of incoming requests expected + * + * Returns zero if all requested buffers were posted, or a negative errno. + */ +int +rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) +{ + struct rpcrdma_buffer *buffers = &r_xprt->rx_buf; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_rep *rep; + unsigned long flags; + int rc; + + while (count--) { + spin_lock_irqsave(&buffers->rb_lock, flags); + if (list_empty(&buffers->rb_recv_bufs)) + goto out_reqbuf; + rep = rpcrdma_buffer_get_rep_locked(buffers); + spin_unlock_irqrestore(&buffers->rb_lock, flags); + + rc = rpcrdma_ep_post_recv(ia, ep, rep); + if (rc) + goto out_rc; + } + + return 0; + +out_reqbuf: + spin_unlock_irqrestore(&buffers->rb_lock, flags); + pr_warn("%s: no extra receive buffers\n", __func__); + return -ENOMEM; + +out_rc: + rpcrdma_recv_buffer_put(rep); + return rc; +} + /* How many chunk list items fit within our inline buffers? */ unsigned int diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c82abf44e39d..ac7f8d4f632a 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -77,9 +77,6 @@ struct rpcrdma_ia { * RDMA Endpoint -- one per transport instance */ -#define RPCRDMA_WC_BUDGET (128) -#define RPCRDMA_POLLSIZE (16) - struct rpcrdma_ep { atomic_t rep_cqcount; int rep_cqinit; @@ -89,8 +86,6 @@ struct rpcrdma_ep { struct rdma_conn_param rep_remote_cma; struct sockaddr_storage rep_remote_addr; struct delayed_work rep_connect_worker; - struct ib_wc rep_send_wcs[RPCRDMA_POLLSIZE]; - struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE]; }; /* @@ -106,6 +101,16 @@ struct rpcrdma_ep { */ #define RPCRDMA_IGNORE_COMPLETION (0ULL) +/* Pre-allocate extra Work Requests for handling backward receives + * and sends. This is a fixed value because the Work Queues are + * allocated when the forward channel is set up. + */ +#if defined(CONFIG_SUNRPC_BACKCHANNEL) +#define RPCRDMA_BACKWARD_WRS (8) +#else +#define RPCRDMA_BACKWARD_WRS (0) +#endif + /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV * * The below structure appears at the front of a large region of kmalloc'd @@ -169,10 +174,13 @@ struct rpcrdma_rep { unsigned int rr_len; struct ib_device *rr_device; struct rpcrdma_xprt *rr_rxprt; + struct work_struct rr_work; struct list_head rr_list; struct rpcrdma_regbuf *rr_rdmabuf; }; +#define RPCRDMA_BAD_LEN (~0U) + /* * struct rpcrdma_mw - external memory region metadata * @@ -256,6 +264,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ #define RPCRDMA_MAX_IOVS (2) struct rpcrdma_req { + struct list_head rl_free; unsigned int rl_niovs; unsigned int rl_nchunks; unsigned int rl_connect_cookie; @@ -265,6 +274,9 @@ struct rpcrdma_req { struct rpcrdma_regbuf *rl_rdmabuf; struct rpcrdma_regbuf *rl_sendbuf; struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; + + struct list_head rl_all; + bool rl_backchannel; }; static inline struct rpcrdma_req * @@ -289,12 +301,14 @@ struct rpcrdma_buffer { struct list_head rb_all; char *rb_pool; - spinlock_t rb_lock; /* protect buf arrays */ + spinlock_t rb_lock; /* protect buf lists */ + struct list_head rb_send_bufs; + struct list_head rb_recv_bufs; u32 rb_max_requests; - int rb_send_index; - int rb_recv_index; - struct rpcrdma_req **rb_send_bufs; - struct rpcrdma_rep **rb_recv_bufs; + + u32 rb_bc_srv_max_requests; + spinlock_t rb_reqslock; /* protect rb_allreqs */ + struct list_head rb_allreqs; }; #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) @@ -340,6 +354,7 @@ struct rpcrdma_stats { unsigned long failed_marshal_count; unsigned long bad_reply_count; unsigned long nomsg_call_count; + unsigned long bcall_count; }; /* @@ -415,6 +430,9 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, /* * Buffer calls - xprtrdma/verbs.c */ +struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); +struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *); +void rpcrdma_destroy_req(struct rpcrdma_ia *, struct rpcrdma_req *); int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); @@ -431,10 +449,14 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *); +int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int); int frwr_alloc_recovery_wq(void); void frwr_destroy_recovery_wq(void); +int rpcrdma_alloc_wq(void); +void rpcrdma_destroy_wq(void); + /* * Wrappers for chunk registration, shared by read/write chunk code. */ @@ -495,6 +517,18 @@ int rpcrdma_marshal_req(struct rpc_rqst *); int xprt_rdma_init(void); void xprt_rdma_cleanup(void); +/* Backchannel calls - xprtrdma/backchannel.c + */ +#if defined(CONFIG_SUNRPC_BACKCHANNEL) +int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int); +int xprt_rdma_bc_up(struct svc_serv *, struct net *); +int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); +void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); +int rpcrdma_bc_marshal_reply(struct rpc_rqst *); +void xprt_rdma_bc_free_rqst(struct rpc_rqst *); +void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ + /* Temporary NFS request map cache. Created in svc_rdma.c */ extern struct kmem_cache *svc_rdma_map_cachep; /* WR context cache. Created in svc_rdma.c */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 1a85e0ed0b48..1d1a70498910 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -360,8 +360,10 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i int flags = XS_SENDMSG_FLAGS; remainder -= len; - if (remainder != 0 || more) + if (more) flags |= MSG_MORE; + if (remainder != 0) + flags |= MSG_SENDPAGE_NOTLAST | MSG_MORE; err = do_sendpage(sock, *ppage, base, len, flags); if (remainder == 0 || err != len) break; @@ -823,6 +825,7 @@ static void xs_reset_transport(struct sock_xprt *transport) kernel_sock_shutdown(sock, SHUT_RDWR); + mutex_lock(&transport->recv_mutex); write_lock_bh(&sk->sk_callback_lock); transport->inet = NULL; transport->sock = NULL; @@ -833,6 +836,7 @@ static void xs_reset_transport(struct sock_xprt *transport) xprt_clear_connected(xprt); write_unlock_bh(&sk->sk_callback_lock); xs_sock_reset_connection_flags(xprt); + mutex_unlock(&transport->recv_mutex); trace_rpc_socket_close(xprt, sock); sock_release(sock); @@ -886,6 +890,7 @@ static void xs_destroy(struct rpc_xprt *xprt) cancel_delayed_work_sync(&transport->connect_worker); xs_close(xprt); + cancel_work_sync(&transport->recv_worker); xs_xprt_free(xprt); module_put(THIS_MODULE); } @@ -906,44 +911,36 @@ static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) } /** - * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets - * @sk: socket with data to read + * xs_local_data_read_skb + * @xprt: transport + * @sk: socket + * @skb: skbuff * * Currently this assumes we can read the whole reply in a single gulp. */ -static void xs_local_data_ready(struct sock *sk) +static void xs_local_data_read_skb(struct rpc_xprt *xprt, + struct sock *sk, + struct sk_buff *skb) { struct rpc_task *task; - struct rpc_xprt *xprt; struct rpc_rqst *rovr; - struct sk_buff *skb; - int err, repsize, copied; + int repsize, copied; u32 _xid; __be32 *xp; - read_lock_bh(&sk->sk_callback_lock); - dprintk("RPC: %s...\n", __func__); - xprt = xprt_from_sock(sk); - if (xprt == NULL) - goto out; - - skb = skb_recv_datagram(sk, 0, 1, &err); - if (skb == NULL) - goto out; - repsize = skb->len - sizeof(rpc_fraghdr); if (repsize < 4) { dprintk("RPC: impossible RPC reply size %d\n", repsize); - goto dropit; + return; } /* Copy the XID from the skb... */ xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid); if (xp == NULL) - goto dropit; + return; /* Look up and lock the request corresponding to the given XID */ - spin_lock(&xprt->transport_lock); + spin_lock_bh(&xprt->transport_lock); rovr = xprt_lookup_rqst(xprt, *xp); if (!rovr) goto out_unlock; @@ -961,50 +958,68 @@ static void xs_local_data_ready(struct sock *sk) xprt_complete_rqst(task, copied); out_unlock: - spin_unlock(&xprt->transport_lock); - dropit: - skb_free_datagram(sk, skb); - out: - read_unlock_bh(&sk->sk_callback_lock); + spin_unlock_bh(&xprt->transport_lock); +} + +static void xs_local_data_receive(struct sock_xprt *transport) +{ + struct sk_buff *skb; + struct sock *sk; + int err; + + mutex_lock(&transport->recv_mutex); + sk = transport->inet; + if (sk == NULL) + goto out; + for (;;) { + skb = skb_recv_datagram(sk, 0, 1, &err); + if (skb == NULL) + break; + xs_local_data_read_skb(&transport->xprt, sk, skb); + skb_free_datagram(sk, skb); + } +out: + mutex_unlock(&transport->recv_mutex); +} + +static void xs_local_data_receive_workfn(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, recv_worker); + xs_local_data_receive(transport); } /** - * xs_udp_data_ready - "data ready" callback for UDP sockets - * @sk: socket with data to read + * xs_udp_data_read_skb - receive callback for UDP sockets + * @xprt: transport + * @sk: socket + * @skb: skbuff * */ -static void xs_udp_data_ready(struct sock *sk) +static void xs_udp_data_read_skb(struct rpc_xprt *xprt, + struct sock *sk, + struct sk_buff *skb) { struct rpc_task *task; - struct rpc_xprt *xprt; struct rpc_rqst *rovr; - struct sk_buff *skb; - int err, repsize, copied; + int repsize, copied; u32 _xid; __be32 *xp; - read_lock_bh(&sk->sk_callback_lock); - dprintk("RPC: xs_udp_data_ready...\n"); - if (!(xprt = xprt_from_sock(sk))) - goto out; - - if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) - goto out; - repsize = skb->len - sizeof(struct udphdr); if (repsize < 4) { dprintk("RPC: impossible RPC reply size %d!\n", repsize); - goto dropit; + return; } /* Copy the XID from the skb... */ xp = skb_header_pointer(skb, sizeof(struct udphdr), sizeof(_xid), &_xid); if (xp == NULL) - goto dropit; + return; /* Look up and lock the request corresponding to the given XID */ - spin_lock(&xprt->transport_lock); + spin_lock_bh(&xprt->transport_lock); rovr = xprt_lookup_rqst(xprt, *xp); if (!rovr) goto out_unlock; @@ -1025,10 +1040,54 @@ static void xs_udp_data_ready(struct sock *sk) xprt_complete_rqst(task, copied); out_unlock: - spin_unlock(&xprt->transport_lock); - dropit: - skb_free_datagram(sk, skb); - out: + spin_unlock_bh(&xprt->transport_lock); +} + +static void xs_udp_data_receive(struct sock_xprt *transport) +{ + struct sk_buff *skb; + struct sock *sk; + int err; + + mutex_lock(&transport->recv_mutex); + sk = transport->inet; + if (sk == NULL) + goto out; + for (;;) { + skb = skb_recv_datagram(sk, 0, 1, &err); + if (skb == NULL) + break; + xs_udp_data_read_skb(&transport->xprt, sk, skb); + skb_free_datagram(sk, skb); + } +out: + mutex_unlock(&transport->recv_mutex); +} + +static void xs_udp_data_receive_workfn(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, recv_worker); + xs_udp_data_receive(transport); +} + +/** + * xs_data_ready - "data ready" callback for UDP sockets + * @sk: socket with data to read + * + */ +static void xs_data_ready(struct sock *sk) +{ + struct rpc_xprt *xprt; + + read_lock_bh(&sk->sk_callback_lock); + dprintk("RPC: xs_data_ready...\n"); + xprt = xprt_from_sock(sk); + if (xprt != NULL) { + struct sock_xprt *transport = container_of(xprt, + struct sock_xprt, xprt); + queue_work(rpciod_workqueue, &transport->recv_worker); + } read_unlock_bh(&sk->sk_callback_lock); } @@ -1243,12 +1302,12 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); /* Find and lock the request corresponding to this xid */ - spin_lock(&xprt->transport_lock); + spin_lock_bh(&xprt->transport_lock); req = xprt_lookup_rqst(xprt, transport->tcp_xid); if (!req) { dprintk("RPC: XID %08x request not found!\n", ntohl(transport->tcp_xid)); - spin_unlock(&xprt->transport_lock); + spin_unlock_bh(&xprt->transport_lock); return -1; } @@ -1257,7 +1316,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) xprt_complete_rqst(req->rq_task, transport->tcp_copied); - spin_unlock(&xprt->transport_lock); + spin_unlock_bh(&xprt->transport_lock); return 0; } @@ -1277,10 +1336,10 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt, struct rpc_rqst *req; /* Look up and lock the request corresponding to the given XID */ - spin_lock(&xprt->transport_lock); + spin_lock_bh(&xprt->transport_lock); req = xprt_lookup_bc_request(xprt, transport->tcp_xid); if (req == NULL) { - spin_unlock(&xprt->transport_lock); + spin_unlock_bh(&xprt->transport_lock); printk(KERN_WARNING "Callback slot table overflowed\n"); xprt_force_disconnect(xprt); return -1; @@ -1291,7 +1350,7 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt, if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) xprt_complete_bc_request(req, transport->tcp_copied); - spin_unlock(&xprt->transport_lock); + spin_unlock_bh(&xprt->transport_lock); return 0; } @@ -1306,6 +1365,17 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, xs_tcp_read_reply(xprt, desc) : xs_tcp_read_callback(xprt, desc); } + +static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net) +{ + int ret; + + ret = svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0, + SVC_SOCK_ANONYMOUS); + if (ret < 0) + return ret; + return 0; +} #else static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) @@ -1391,6 +1461,44 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns return len - desc.count; } +static void xs_tcp_data_receive(struct sock_xprt *transport) +{ + struct rpc_xprt *xprt = &transport->xprt; + struct sock *sk; + read_descriptor_t rd_desc = { + .count = 2*1024*1024, + .arg.data = xprt, + }; + unsigned long total = 0; + int read = 0; + + mutex_lock(&transport->recv_mutex); + sk = transport->inet; + if (sk == NULL) + goto out; + + /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ + for (;;) { + lock_sock(sk); + read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); + release_sock(sk); + if (read <= 0) + break; + total += read; + rd_desc.count = 65536; + } +out: + mutex_unlock(&transport->recv_mutex); + trace_xs_tcp_data_ready(xprt, read, total); +} + +static void xs_tcp_data_receive_workfn(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, recv_worker); + xs_tcp_data_receive(transport); +} + /** * xs_tcp_data_ready - "data ready" callback for TCP sockets * @sk: socket with data to read @@ -1398,34 +1506,24 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns */ static void xs_tcp_data_ready(struct sock *sk) { + struct sock_xprt *transport; struct rpc_xprt *xprt; - read_descriptor_t rd_desc; - int read; - unsigned long total = 0; dprintk("RPC: xs_tcp_data_ready...\n"); read_lock_bh(&sk->sk_callback_lock); - if (!(xprt = xprt_from_sock(sk))) { - read = 0; + if (!(xprt = xprt_from_sock(sk))) goto out; - } + transport = container_of(xprt, struct sock_xprt, xprt); + /* Any data means we had a useful conversation, so * the we don't need to delay the next reconnect */ if (xprt->reestablish_timeout) xprt->reestablish_timeout = 0; + queue_work(rpciod_workqueue, &transport->recv_worker); - /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ - rd_desc.arg.data = xprt; - do { - rd_desc.count = 65536; - read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); - if (read > 0) - total += read; - } while (read > 0); out: - trace_xs_tcp_data_ready(xprt, read, total); read_unlock_bh(&sk->sk_callback_lock); } @@ -1873,7 +1971,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, xs_save_old_callbacks(transport, sk); sk->sk_user_data = xprt; - sk->sk_data_ready = xs_local_data_ready; + sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; sk->sk_error_report = xs_error_report; sk->sk_allocation = GFP_NOIO; @@ -2059,7 +2157,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) xs_save_old_callbacks(transport, sk); sk->sk_user_data = xprt; - sk->sk_data_ready = xs_udp_data_ready; + sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; sk->sk_allocation = GFP_NOIO; @@ -2472,7 +2570,7 @@ static int bc_send_request(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct svc_xprt *xprt; - u32 len; + int len; dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid)); /* @@ -2580,6 +2678,12 @@ static struct rpc_xprt_ops xs_tcp_ops = { .enable_swap = xs_enable_swap, .disable_swap = xs_disable_swap, .inject_disconnect = xs_inject_disconnect, +#ifdef CONFIG_SUNRPC_BACKCHANNEL + .bc_setup = xprt_setup_bc, + .bc_up = xs_tcp_bc_up, + .bc_free_rqst = xprt_free_bc_rqst, + .bc_destroy = xprt_destroy_bc, +#endif }; /* @@ -2650,6 +2754,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, } new = container_of(xprt, struct sock_xprt, xprt); + mutex_init(&new->recv_mutex); memcpy(&xprt->addr, args->dstaddr, args->addrlen); xprt->addrlen = args->addrlen; if (args->srcaddr) @@ -2703,6 +2808,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) xprt->ops = &xs_local_ops; xprt->timeout = &xs_local_default_timeout; + INIT_WORK(&transport->recv_worker, xs_local_data_receive_workfn); INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket); @@ -2774,21 +2880,20 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) xprt->timeout = &xs_udp_default_timeout; + INIT_WORK(&transport->recv_worker, xs_udp_data_receive_workfn); + INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket); + switch (addr->sa_family) { case AF_INET: if (((struct sockaddr_in *)addr)->sin_port != htons(0)) xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, - xs_udp_setup_socket); xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); break; case AF_INET6: if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, - xs_udp_setup_socket); xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); break; default: @@ -2853,21 +2958,20 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xprt->ops = &xs_tcp_ops; xprt->timeout = &xs_tcp_default_timeout; + INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn); + INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket); + switch (addr->sa_family) { case AF_INET: if (((struct sockaddr_in *)addr)->sin_port != htons(0)) xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, - xs_tcp_setup_socket); xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); break; case AF_INET6: if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, - xs_tcp_setup_socket); xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); break; default: diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 69f0a1417e9a..1366a94b6c39 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -77,6 +77,7 @@ modpost = scripts/mod/modpost \ $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \ $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S) \ + $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) MODPOST_OPT=$(subst -i,-n,$(filter -i,$(MAKEFLAGS))) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 12d3db3bd46b..e080746e1a6b 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -38,6 +38,7 @@ static int warn_unresolved = 0; /* How a symbol is exported */ static int sec_mismatch_count = 0; static int sec_mismatch_verbose = 1; +static int sec_mismatch_fatal = 0; /* ignore missing files */ static int ignore_missing_files; @@ -833,6 +834,8 @@ static const char *const section_white_list[] = ".xt.lit", /* xtensa */ ".arcextmap*", /* arc */ ".gnu.linkonce.arcext*", /* arc : modules */ + ".cmem*", /* EZchip */ + ".fmt_slot*", /* EZchip */ ".gnu.lto*", NULL }; @@ -2133,6 +2136,11 @@ static void add_staging_flag(struct buffer *b, const char *name) buf_printf(b, "\nMODULE_INFO(staging, \"Y\");\n"); } +/* In kernel, this size is defined in linux/module.h; + * here we use Elf_Addr instead of long for covering cross-compile + */ +#define MODULE_NAME_LEN (64 - sizeof(Elf_Addr)) + /** * Record CRCs for unresolved symbols **/ @@ -2177,6 +2185,12 @@ static int add_versions(struct buffer *b, struct module *mod) s->name, mod->name); continue; } + if (strlen(s->name) >= MODULE_NAME_LEN) { + merror("too long symbol \"%s\" [%s.ko]\n", + s->name, mod->name); + err = 1; + break; + } buf_printf(b, "\t{ %#8x, __VMLINUX_SYMBOL_STR(%s) },\n", s->crc, s->name); } @@ -2374,7 +2388,7 @@ int main(int argc, char **argv) struct ext_sym_list *extsym_iter; struct ext_sym_list *extsym_start = NULL; - while ((opt = getopt(argc, argv, "i:I:e:mnsST:o:awM:K:")) != -1) { + while ((opt = getopt(argc, argv, "i:I:e:mnsST:o:awM:K:E")) != -1) { switch (opt) { case 'i': kernel_read = optarg; @@ -2415,6 +2429,9 @@ int main(int argc, char **argv) case 'w': warn_unresolved = 1; break; + case 'E': + sec_mismatch_fatal = 1; + break; default: exit(1); } @@ -2464,14 +2481,20 @@ int main(int argc, char **argv) sprintf(fname, "%s.mod.c", mod->name); write_if_changed(&buf, fname); } - if (dump_write) write_dump(dump_write); - if (sec_mismatch_count && !sec_mismatch_verbose) - warn("modpost: Found %d section mismatch(es).\n" - "To see full details build your kernel with:\n" - "'make CONFIG_DEBUG_SECTION_MISMATCH=y'\n", - sec_mismatch_count); + if (sec_mismatch_count) { + if (!sec_mismatch_verbose) { + warn("modpost: Found %d section mismatch(es).\n" + "To see full details build your kernel with:\n" + "'make CONFIG_DEBUG_SECTION_MISMATCH=y'\n", + sec_mismatch_count); + } + if (sec_mismatch_fatal) { + fatal("modpost: Section mismatches detected.\n" + "Set CONFIG_SECTION_MISMATCH_WARN_ONLY=y to allow them.\n"); + } + } return err; } |