summaryrefslogtreecommitdiff
path: root/arch/tile
diff options
context:
space:
mode:
Diffstat (limited to 'arch/tile')
-rw-r--r--arch/tile/Kconfig31
-rw-r--r--arch/tile/Kconfig.debug14
-rw-r--r--arch/tile/Makefile4
-rw-r--r--arch/tile/configs/tilegx_defconfig241
-rw-r--r--arch/tile/configs/tilepro_defconfig87
-rw-r--r--arch/tile/gxio/Kconfig5
-rw-r--r--arch/tile/gxio/Makefile1
-rw-r--r--arch/tile/gxio/iorpc_trio.c23
-rw-r--r--arch/tile/gxio/iorpc_uart.c77
-rw-r--r--arch/tile/gxio/uart.c87
-rw-r--r--arch/tile/include/arch/trio.h39
-rw-r--r--arch/tile/include/arch/uart.h300
-rw-r--r--arch/tile/include/arch/uart_def.h120
-rw-r--r--arch/tile/include/asm/Kbuild3
-rw-r--r--arch/tile/include/asm/atomic.h52
-rw-r--r--arch/tile/include/asm/atomic_32.h102
-rw-r--r--arch/tile/include/asm/atomic_64.h42
-rw-r--r--arch/tile/include/asm/barrier.h4
-rw-r--r--arch/tile/include/asm/bitops.h41
-rw-r--r--arch/tile/include/asm/bitops_32.h2
-rw-r--r--arch/tile/include/asm/bitops_64.h8
-rw-r--r--arch/tile/include/asm/cache.h13
-rw-r--r--arch/tile/include/asm/cacheflush.h44
-rw-r--r--arch/tile/include/asm/cmpxchg.h93
-rw-r--r--arch/tile/include/asm/device.h5
-rw-r--r--arch/tile/include/asm/dma-mapping.h27
-rw-r--r--arch/tile/include/asm/elf.h10
-rw-r--r--arch/tile/include/asm/fixmap.h8
-rw-r--r--arch/tile/include/asm/ftrace.h22
-rw-r--r--arch/tile/include/asm/futex.h1
-rw-r--r--arch/tile/include/asm/homecache.h11
-rw-r--r--arch/tile/include/asm/io.h132
-rw-r--r--arch/tile/include/asm/irqflags.h21
-rw-r--r--arch/tile/include/asm/kdebug.h (renamed from arch/tile/include/asm/hw_irq.h)18
-rw-r--r--arch/tile/include/asm/kgdb.h71
-rw-r--r--arch/tile/include/asm/kprobes.h79
-rw-r--r--arch/tile/include/asm/mmu.h1
-rw-r--r--arch/tile/include/asm/mmu_context.h2
-rw-r--r--arch/tile/include/asm/mmzone.h2
-rw-r--r--arch/tile/include/asm/page.h61
-rw-r--r--arch/tile/include/asm/pci.h22
-rw-r--r--arch/tile/include/asm/pgtable_32.h4
-rw-r--r--arch/tile/include/asm/pgtable_64.h27
-rw-r--r--arch/tile/include/asm/processor.h84
-rw-r--r--arch/tile/include/asm/ptrace.h6
-rw-r--r--arch/tile/include/asm/sections.h8
-rw-r--r--arch/tile/include/asm/setup.h3
-rw-r--r--arch/tile/include/asm/smp.h2
-rw-r--r--arch/tile/include/asm/spinlock_64.h4
-rw-r--r--arch/tile/include/asm/string.h2
-rw-r--r--arch/tile/include/asm/thread_info.h6
-rw-r--r--arch/tile/include/asm/traps.h13
-rw-r--r--arch/tile/include/asm/uaccess.h37
-rw-r--r--arch/tile/include/asm/unaligned.h14
-rw-r--r--arch/tile/include/asm/vdso.h49
-rw-r--r--arch/tile/include/gxio/iorpc_trio.h5
-rw-r--r--arch/tile/include/gxio/iorpc_uart.h40
-rw-r--r--arch/tile/include/gxio/uart.h105
-rw-r--r--arch/tile/include/hv/drv_trio_intf.h8
-rw-r--r--arch/tile/include/hv/drv_uart_intf.h33
-rw-r--r--arch/tile/include/hv/hypervisor.h61
-rw-r--r--arch/tile/include/uapi/arch/Kbuild1
-rw-r--r--arch/tile/include/uapi/arch/chip.h4
-rw-r--r--arch/tile/include/uapi/arch/chip_tile64.h258
-rw-r--r--arch/tile/include/uapi/arch/opcode_tilegx.h1
-rw-r--r--arch/tile/include/uapi/arch/opcode_tilepro.h1
-rw-r--r--arch/tile/include/uapi/arch/spr_def_32.h2
-rw-r--r--arch/tile/include/uapi/asm/auxvec.h3
-rw-r--r--arch/tile/include/uapi/asm/cachectl.h4
-rw-r--r--arch/tile/kernel/Makefile16
-rw-r--r--arch/tile/kernel/asm-offsets.c52
-rw-r--r--arch/tile/kernel/compat_signal.c3
-rw-r--r--arch/tile/kernel/early_printk.c47
-rw-r--r--arch/tile/kernel/entry.S16
-rw-r--r--arch/tile/kernel/ftrace.c246
-rw-r--r--arch/tile/kernel/hardwall.c28
-rw-r--r--arch/tile/kernel/head_32.S17
-rw-r--r--arch/tile/kernel/head_64.S46
-rw-r--r--arch/tile/kernel/hvglue.S74
-rw-r--r--arch/tile/kernel/hvglue.lds59
-rw-r--r--arch/tile/kernel/hvglue_trace.c266
-rw-r--r--arch/tile/kernel/intvec_32.S114
-rw-r--r--arch/tile/kernel/intvec_64.S305
-rw-r--r--arch/tile/kernel/irq.c8
-rw-r--r--arch/tile/kernel/kgdb.c499
-rw-r--r--arch/tile/kernel/kprobes.c528
-rw-r--r--arch/tile/kernel/mcount_64.S224
-rw-r--r--arch/tile/kernel/pci-dma.c74
-rw-r--r--arch/tile/kernel/pci.c33
-rw-r--r--arch/tile/kernel/pci_gx.c727
-rw-r--r--arch/tile/kernel/proc.c2
-rw-r--r--arch/tile/kernel/process.c116
-rw-r--r--arch/tile/kernel/ptrace.c19
-rw-r--r--arch/tile/kernel/reboot.c2
-rw-r--r--arch/tile/kernel/regs_32.S4
-rw-r--r--arch/tile/kernel/regs_64.S4
-rw-r--r--arch/tile/kernel/relocate_kernel_32.S27
-rw-r--r--arch/tile/kernel/relocate_kernel_64.S11
-rw-r--r--arch/tile/kernel/setup.c162
-rw-r--r--arch/tile/kernel/signal.c3
-rw-r--r--arch/tile/kernel/single_step.c118
-rw-r--r--arch/tile/kernel/smp.c22
-rw-r--r--arch/tile/kernel/smpboot.c8
-rw-r--r--arch/tile/kernel/stack.c51
-rw-r--r--arch/tile/kernel/sys.c4
-rw-r--r--arch/tile/kernel/sysfs.c76
-rw-r--r--arch/tile/kernel/time.c37
-rw-r--r--arch/tile/kernel/tlb.c8
-rw-r--r--arch/tile/kernel/traps.c89
-rw-r--r--arch/tile/kernel/unaligned.c1609
-rw-r--r--arch/tile/kernel/vdso.c212
-rw-r--r--arch/tile/kernel/vdso/Makefile118
-rw-r--r--arch/tile/kernel/vdso/vdso.S28
-rw-r--r--arch/tile/kernel/vdso/vdso.lds.S87
-rw-r--r--arch/tile/kernel/vdso/vdso32.S28
-rw-r--r--arch/tile/kernel/vdso/vgettimeofday.c107
-rw-r--r--arch/tile/kernel/vdso/vrt_sigreturn.S30
-rw-r--r--arch/tile/kernel/vmlinux.lds.S31
-rw-r--r--arch/tile/lib/Makefile16
-rw-r--r--arch/tile/lib/atomic_32.c133
-rw-r--r--arch/tile/lib/atomic_asm_32.S1
-rw-r--r--arch/tile/lib/cacheflush.c16
-rw-r--r--arch/tile/lib/exports.c7
-rw-r--r--arch/tile/lib/memchr_64.c2
-rw-r--r--arch/tile/lib/memcpy_32.S63
-rw-r--r--arch/tile/lib/memcpy_64.c264
-rw-r--r--arch/tile/lib/memcpy_tile64.c276
-rw-r--r--arch/tile/lib/memcpy_user_64.c2
-rw-r--r--arch/tile/lib/memset_32.c110
-rw-r--r--arch/tile/lib/memset_64.c9
-rw-r--r--arch/tile/lib/strchr_32.c2
-rw-r--r--arch/tile/lib/strchr_64.c2
-rw-r--r--arch/tile/lib/string-endian.h13
-rw-r--r--arch/tile/lib/strlen_32.c2
-rw-r--r--arch/tile/lib/strnlen_32.c47
-rw-r--r--arch/tile/lib/strnlen_64.c48
-rw-r--r--arch/tile/lib/usercopy_32.S36
-rw-r--r--arch/tile/lib/usercopy_64.S36
-rw-r--r--arch/tile/mm/elf.c99
-rw-r--r--arch/tile/mm/fault.c135
-rw-r--r--arch/tile/mm/highmem.c2
-rw-r--r--arch/tile/mm/homecache.c39
-rw-r--r--arch/tile/mm/hugetlbpage.c38
-rw-r--r--arch/tile/mm/init.c96
-rw-r--r--arch/tile/mm/migrate_32.S4
-rw-r--r--arch/tile/mm/migrate_64.S4
-rw-r--r--arch/tile/mm/mmap.c24
-rw-r--r--arch/tile/mm/pgtable.c76
148 files changed, 7824 insertions, 2884 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 24565a7ffe6d..6e1ed55f6cfc 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -26,6 +26,7 @@ config TILE
select HAVE_SYSCALL_TRACEPOINTS
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_DEBUG_STACKOVERFLOW
+ select ARCH_WANT_FRAME_POINTERS
# FIXME: investigate whether we need/want these options.
# select HAVE_IOREMAP_PROT
@@ -64,6 +65,9 @@ config HUGETLB_SUPER_PAGES
depends on HUGETLB_PAGE && TILEGX
def_bool y
+config GENERIC_TIME_VSYSCALL
+ def_bool y
+
# FIXME: tilegx can implement a more efficient rwsem.
config RWSEM_GENERIC_SPINLOCK
def_bool y
@@ -112,10 +116,19 @@ config SMP
config HVC_TILE
depends on TTY
select HVC_DRIVER
+ select HVC_IRQ if TILEGX
def_bool y
config TILEGX
- bool "Building with TILE-Gx (64-bit) compiler and toolchain"
+ bool "Building for TILE-Gx (64-bit) processor"
+ select HAVE_FUNCTION_TRACER
+ select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_DYNAMIC_FTRACE
+ select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_KPROBES
+ select HAVE_KRETPROBES
+ select HAVE_ARCH_KGDB
config TILEPRO
def_bool !TILEGX
@@ -194,7 +207,7 @@ config SYSVIPC_COMPAT
def_bool y
depends on COMPAT && SYSVIPC
-# We do not currently support disabling HIGHMEM on tile64 and tilepro.
+# We do not currently support disabling HIGHMEM on tilepro.
config HIGHMEM
bool # "Support for more than 512 MB of RAM"
default !TILEGX
@@ -300,6 +313,8 @@ config PAGE_OFFSET
source "mm/Kconfig"
+source "kernel/Kconfig.preempt"
+
config CMDLINE_BOOL
bool "Built-in kernel command line"
default n
@@ -396,8 +411,20 @@ config NO_IOMEM
config NO_IOPORT
def_bool !PCI
+config TILE_PCI_IO
+ bool "PCI I/O space support"
+ default n
+ depends on PCI
+ depends on TILEGX
+ ---help---
+ Enable PCI I/O space support on TILEGx. Since the PCI I/O space
+ is used by few modern PCIe endpoint devices, its support is disabled
+ by default to save the TRIO PIO Region resource for other purposes.
+
source "drivers/pci/Kconfig"
+source "drivers/pci/pcie/Kconfig"
+
config TILE_USB
tristate "Tilera USB host adapter support"
default y
diff --git a/arch/tile/Kconfig.debug b/arch/tile/Kconfig.debug
index 9165ea979e85..19734d3ab1e8 100644
--- a/arch/tile/Kconfig.debug
+++ b/arch/tile/Kconfig.debug
@@ -14,14 +14,12 @@ config EARLY_PRINTK
with klogd/syslogd. You should normally N here,
unless you want to debug such a crash.
-config DEBUG_EXTRA_FLAGS
- string "Additional compiler arguments when building with '-g'"
- depends on DEBUG_INFO
- default ""
+config TILE_HVGLUE_TRACE
+ bool "Provide wrapper functions for hypervisor ABI calls"
+ default n
help
- Debug info can be large, and flags like
- `-femit-struct-debug-baseonly' can reduce the kernel file
- size and build time noticeably. Such flags are often
- helpful if the main use of debug info is line number info.
+ Provide wrapper functions for the hypervisor ABI calls
+ defined in arch/tile/kernel/hvglue.S. This allows tracing
+ mechanisms, etc., to have visibility into those calls.
endmenu
diff --git a/arch/tile/Makefile b/arch/tile/Makefile
index 3d15364c6071..4dc380a519d4 100644
--- a/arch/tile/Makefile
+++ b/arch/tile/Makefile
@@ -30,10 +30,6 @@ endif
# In kernel modules, this causes load failures due to unsupported relocations.
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
-ifneq ($(CONFIG_DEBUG_EXTRA_FLAGS),"")
-KBUILD_CFLAGS += $(CONFIG_DEBUG_EXTRA_FLAGS)
-endif
-
LIBGCC_PATH := \
$(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name)
diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig
index 47684815e5c8..730e40d9cf62 100644
--- a/arch/tile/configs/tilegx_defconfig
+++ b/arch/tile/configs/tilegx_defconfig
@@ -1,16 +1,15 @@
CONFIG_TILEGX=y
-CONFIG_EXPERIMENTAL=y
-# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_FHANDLE=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_AUDIT=y
CONFIG_LOG_BUF_SHIFT=19
CONFIG_CGROUPS=y
CONFIG_CGROUP_DEBUG=y
@@ -18,18 +17,18 @@ CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_RESOURCE_COUNTERS=y
-CONFIG_CGROUP_MEMCG=y
-CONFIG_CGROUP_MEMCG_SWAP=y
CONFIG_CGROUP_SCHED=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_BLK_CGROUP=y
CONFIG_NAMESPACES=y
CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
+CONFIG_RD_XZ=y
CONFIG_SYSCTL_SYSCALL=y
CONFIG_EMBEDDED=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
+CONFIG_KPROBES=y
CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
@@ -45,12 +44,12 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_SGI_PARTITION=y
CONFIG_SUN_PARTITION=y
CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_NR_CPUS=100
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_HZ_100=y
+# CONFIG_COMPACTION is not set
+CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_TILE_PCI_IO=y
CONFIG_PCI_DEBUG=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=y
@@ -108,150 +107,9 @@ CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_PIMSM_V2=y
CONFIG_NETLABEL=y
-CONFIG_NETFILTER=y
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CONNTRACK_SECMARK=y
-CONFIG_NF_CONNTRACK_ZONES=y
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CT_PROTO_DCCP=m
-CONFIG_NF_CT_PROTO_UDPLITE=m
-CONFIG_NF_CONNTRACK_AMANDA=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_H323=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-CONFIG_NF_CONNTRACK_PPTP=m
-CONFIG_NF_CONNTRACK_SANE=m
-CONFIG_NF_CONNTRACK_SIP=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NETFILTER_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_CT=m
-CONFIG_NETFILTER_XT_TARGET_DSCP=m
-CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_TEE=m
-CONFIG_NETFILTER_XT_TARGET_TPROXY=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
-CONFIG_NETFILTER_XT_TARGET_SECMARK=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
-CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
-CONFIG_NETFILTER_XT_MATCH_IPVS=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_OSF=m
-CONFIG_NETFILTER_XT_MATCH_OWNER=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_RATEEST=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_SOCKET=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_IP_VS=m
-CONFIG_IP_VS_IPV6=y
-CONFIG_IP_VS_PROTO_TCP=y
-CONFIG_IP_VS_PROTO_UDP=y
-CONFIG_IP_VS_PROTO_ESP=y
-CONFIG_IP_VS_PROTO_AH=y
-CONFIG_IP_VS_PROTO_SCTP=y
-CONFIG_IP_VS_RR=m
-CONFIG_IP_VS_WRR=m
-CONFIG_IP_VS_LC=m
-CONFIG_IP_VS_WLC=m
-CONFIG_IP_VS_LBLC=m
-CONFIG_IP_VS_LBLCR=m
-CONFIG_IP_VS_SED=m
-CONFIG_IP_VS_NQ=m
-CONFIG_NF_CONNTRACK_IPV4=m
-# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
-CONFIG_IP_NF_QUEUE=m
-CONFIG_IP_NF_IPTABLES=y
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=y
-CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_IP6_NF_QUEUE=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_AH=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_MH=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_TARGET_LOG=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP6_NF_SECURITY=m
-CONFIG_BRIDGE_NF_EBTABLES=m
-CONFIG_BRIDGE_EBT_BROUTE=m
-CONFIG_BRIDGE_EBT_T_FILTER=m
-CONFIG_BRIDGE_EBT_T_NAT=m
-CONFIG_BRIDGE_EBT_802_3=m
-CONFIG_BRIDGE_EBT_AMONG=m
-CONFIG_BRIDGE_EBT_ARP=m
-CONFIG_BRIDGE_EBT_IP=m
-CONFIG_BRIDGE_EBT_IP6=m
-CONFIG_BRIDGE_EBT_LIMIT=m
-CONFIG_BRIDGE_EBT_MARK=m
-CONFIG_BRIDGE_EBT_PKTTYPE=m
-CONFIG_BRIDGE_EBT_STP=m
-CONFIG_BRIDGE_EBT_VLAN=m
-CONFIG_BRIDGE_EBT_ARPREPLY=m
-CONFIG_BRIDGE_EBT_DNAT=m
-CONFIG_BRIDGE_EBT_MARK_T=m
-CONFIG_BRIDGE_EBT_REDIRECT=m
-CONFIG_BRIDGE_EBT_SNAT=m
-CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_BRIDGE_EBT_ULOG=m
-CONFIG_BRIDGE_EBT_NFLOG=m
CONFIG_RDS=m
CONFIG_RDS_TCP=m
CONFIG_BRIDGE=m
-CONFIG_NET_DSA=y
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
CONFIG_PHONET=m
@@ -292,13 +150,13 @@ CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_IPT=m
CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_CLS_IND=y
CONFIG_DCB=y
+CONFIG_DNS_RESOLVER=y
# CONFIG_WIRELESS is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
@@ -317,10 +175,12 @@ CONFIG_BLK_DEV_SD=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SAS_ATA=y
+CONFIG_ISCSI_TCP=m
CONFIG_SCSI_MVSAS=y
# CONFIG_SCSI_MVSAS_DEBUG is not set
CONFIG_SCSI_MVSAS_TASKLET=y
CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
CONFIG_SATA_SIL24=y
# CONFIG_ATA_SFF is not set
CONFIG_MD=y
@@ -343,6 +203,12 @@ CONFIG_DM_MULTIPATH_QL=m
CONFIG_DM_MULTIPATH_ST=m
CONFIG_DM_DELAY=m
CONFIG_DM_UEVENT=y
+CONFIG_TARGET_CORE=m
+CONFIG_TCM_IBLOCK=m
+CONFIG_TCM_FILEIO=m
+CONFIG_TCM_PSCSI=m
+CONFIG_LOOPBACK_TARGET=m
+CONFIG_ISCSI_TARGET=m
CONFIG_FUSION=y
CONFIG_FUSION_SAS=y
CONFIG_NETDEVICES=y
@@ -359,42 +225,8 @@ CONFIG_VETH=m
CONFIG_NET_DSA_MV88E6060=y
CONFIG_NET_DSA_MV88E6131=y
CONFIG_NET_DSA_MV88E6123_61_65=y
-# CONFIG_NET_VENDOR_3COM is not set
-# CONFIG_NET_VENDOR_ADAPTEC is not set
-# CONFIG_NET_VENDOR_ALTEON is not set
-# CONFIG_NET_VENDOR_AMD is not set
-# CONFIG_NET_VENDOR_ATHEROS is not set
-# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_BROCADE is not set
-# CONFIG_NET_VENDOR_CHELSIO is not set
-# CONFIG_NET_VENDOR_CISCO is not set
-# CONFIG_NET_VENDOR_DEC is not set
-# CONFIG_NET_VENDOR_DLINK is not set
-# CONFIG_NET_VENDOR_EMULEX is not set
-# CONFIG_NET_VENDOR_EXAR is not set
-# CONFIG_NET_VENDOR_HP is not set
-# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MARVELL is not set
-# CONFIG_NET_VENDOR_MELLANOX is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_MYRI is not set
-# CONFIG_NET_VENDOR_NATSEMI is not set
-# CONFIG_NET_VENDOR_NVIDIA is not set
-# CONFIG_NET_VENDOR_OKI is not set
-# CONFIG_NET_PACKET_ENGINE is not set
-# CONFIG_NET_VENDOR_QLOGIC is not set
-# CONFIG_NET_VENDOR_REALTEK is not set
-# CONFIG_NET_VENDOR_RDC is not set
-# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SILAN is not set
-# CONFIG_NET_VENDOR_SIS is not set
-# CONFIG_NET_VENDOR_SMSC is not set
-# CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SUN is not set
-# CONFIG_NET_VENDOR_TEHUTI is not set
-# CONFIG_NET_VENDOR_TI is not set
-# CONFIG_TILE_NET is not set
-# CONFIG_NET_VENDOR_VIA is not set
+CONFIG_SKY2=y
+CONFIG_PTP_1588_CLOCK_TILEGX=y
# CONFIG_WLAN is not set
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
@@ -402,6 +234,7 @@ CONFIG_NET_DSA_MV88E6123_61_65=y
# CONFIG_SERIO is not set
# CONFIG_VT is not set
# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_TILEGX=y
CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_TIMERIOMEM=m
CONFIG_I2C=y
@@ -410,13 +243,16 @@ CONFIG_I2C_CHARDEV=y
CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_NOWAYOUT=y
# CONFIG_VGA_ARB is not set
-# CONFIG_HID_SUPPORT is not set
+CONFIG_DRM=m
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_MGA=m
+CONFIG_DRM_VIA=m
+CONFIG_DRM_SAVAGE=m
CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_STORAGE=y
-CONFIG_USB_LIBUSUAL=y
CONFIG_EDAC=y
CONFIG_EDAC_MM_EDAC=y
CONFIG_RTC_CLASS=y
@@ -464,9 +300,8 @@ CONFIG_ECRYPT_FS=m
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=m
CONFIG_NFS_FS=m
-CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
CONFIG_NFS_V4_1=y
CONFIG_NFS_FSCACHE=y
CONFIG_NFSD=m
@@ -519,25 +354,28 @@ CONFIG_NLS_ISO8859_15=m
CONFIG_NLS_KOI8_R=m
CONFIG_NLS_KOI8_U=m
CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
CONFIG_DLM_DEBUG=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_REDUCED=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
-CONFIG_MAGIC_SYSRQ=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_DEBUG_FS=y
CONFIG_HEADERS_CHECK=y
+# CONFIG_FRAME_POINTER is not set
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_INFO_REDUCED=y
-CONFIG_DEBUG_VM=y
-CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_CREDENTIALS=y
-CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
-CONFIG_DYNAMIC_DEBUG=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_ASYNC_RAID6_TEST=m
-CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_KGDB=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
@@ -546,7 +384,6 @@ CONFIG_SECURITY_NETWORK_XFRM=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
@@ -559,14 +396,12 @@ CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_CRC32C=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA256=m
CONFIG_CRYPTO_SHA512=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig
index dd2b8f0c631f..80fc32ed0491 100644
--- a/arch/tile/configs/tilepro_defconfig
+++ b/arch/tile/configs/tilepro_defconfig
@@ -1,15 +1,14 @@
-CONFIG_EXPERIMENTAL=y
-# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_FHANDLE=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_AUDIT=y
CONFIG_LOG_BUF_SHIFT=19
CONFIG_CGROUPS=y
CONFIG_CGROUP_DEBUG=y
@@ -17,14 +16,13 @@ CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_RESOURCE_COUNTERS=y
-CONFIG_CGROUP_MEMCG=y
-CONFIG_CGROUP_MEMCG_SWAP=y
CONFIG_CGROUP_SCHED=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_BLK_CGROUP=y
CONFIG_NAMESPACES=y
CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
+CONFIG_RD_XZ=y
CONFIG_SYSCTL_SYSCALL=y
CONFIG_EMBEDDED=y
# CONFIG_COMPAT_BRK is not set
@@ -44,11 +42,10 @@ CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_SGI_PARTITION=y
CONFIG_SUN_PARTITION=y
CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
CONFIG_CFQ_GROUP_IOSCHED=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_HZ_100=y
+# CONFIG_COMPACTION is not set
+CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_PCI_DEBUG=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=y
@@ -122,16 +119,15 @@ CONFIG_NF_CONNTRACK_PPTP=m
CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NETFILTER_TPROXY=m
CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_CT=m
CONFIG_NETFILTER_XT_TARGET_DSCP=m
CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
CONFIG_NETFILTER_XT_TARGET_MARK=m
CONFIG_NETFILTER_XT_TARGET_NFLOG=m
CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
CONFIG_NETFILTER_XT_TARGET_TEE=m
CONFIG_NETFILTER_XT_TARGET_TPROXY=m
CONFIG_NETFILTER_XT_TARGET_TRACE=m
@@ -189,14 +185,12 @@ CONFIG_IP_VS_SED=m
CONFIG_IP_VS_NQ=m
CONFIG_NF_CONNTRACK_IPV4=m
# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
-CONFIG_IP_NF_QUEUE=m
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_IP_NF_MATCH_TTL=m
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_LOG=m
CONFIG_IP_NF_TARGET_ULOG=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_ECN=m
@@ -207,8 +201,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_IP6_NF_QUEUE=m
-CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
CONFIG_IP6_NF_MATCH_FRAG=m
@@ -218,7 +210,6 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m
CONFIG_IP6_NF_MATCH_MH=m
CONFIG_IP6_NF_MATCH_RT=m
CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_TARGET_LOG=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP6_NF_MANGLE=m
@@ -249,7 +240,6 @@ CONFIG_BRIDGE_EBT_NFLOG=m
CONFIG_RDS=m
CONFIG_RDS_TCP=m
CONFIG_BRIDGE=m
-CONFIG_NET_DSA=y
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
CONFIG_PHONET=m
@@ -297,6 +287,7 @@ CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_CLS_IND=y
CONFIG_DCB=y
+CONFIG_DNS_RESOLVER=y
# CONFIG_WIRELESS is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
@@ -354,40 +345,7 @@ CONFIG_NET_DSA_MV88E6060=y
CONFIG_NET_DSA_MV88E6131=y
CONFIG_NET_DSA_MV88E6123_61_65=y
# CONFIG_NET_VENDOR_3COM is not set
-# CONFIG_NET_VENDOR_ADAPTEC is not set
-# CONFIG_NET_VENDOR_ALTEON is not set
-# CONFIG_NET_VENDOR_AMD is not set
-# CONFIG_NET_VENDOR_ATHEROS is not set
-# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_BROCADE is not set
-# CONFIG_NET_VENDOR_CHELSIO is not set
-# CONFIG_NET_VENDOR_CISCO is not set
-# CONFIG_NET_VENDOR_DEC is not set
-# CONFIG_NET_VENDOR_DLINK is not set
-# CONFIG_NET_VENDOR_EMULEX is not set
-# CONFIG_NET_VENDOR_EXAR is not set
-# CONFIG_NET_VENDOR_HP is not set
-# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MARVELL is not set
-# CONFIG_NET_VENDOR_MELLANOX is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_MYRI is not set
-# CONFIG_NET_VENDOR_NATSEMI is not set
-# CONFIG_NET_VENDOR_NVIDIA is not set
-# CONFIG_NET_VENDOR_OKI is not set
-# CONFIG_NET_PACKET_ENGINE is not set
-# CONFIG_NET_VENDOR_QLOGIC is not set
-# CONFIG_NET_VENDOR_REALTEK is not set
-# CONFIG_NET_VENDOR_RDC is not set
-# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SILAN is not set
-# CONFIG_NET_VENDOR_SIS is not set
-# CONFIG_NET_VENDOR_SMSC is not set
-# CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SUN is not set
-# CONFIG_NET_VENDOR_TEHUTI is not set
-# CONFIG_NET_VENDOR_TI is not set
-# CONFIG_NET_VENDOR_VIA is not set
+CONFIG_E1000E=y
# CONFIG_WLAN is not set
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
@@ -403,7 +361,6 @@ CONFIG_I2C_CHARDEV=y
CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_NOWAYOUT=y
# CONFIG_VGA_ARB is not set
-# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_EDAC=y
CONFIG_EDAC_MM_EDAC=y
@@ -448,13 +405,13 @@ CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
+CONFIG_CONFIGFS_FS=m
CONFIG_ECRYPT_FS=m
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=m
CONFIG_NFS_FS=m
-CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
CONFIG_NFS_V4_1=y
CONFIG_NFS_FSCACHE=y
CONFIG_NFSD=m
@@ -508,26 +465,29 @@ CONFIG_NLS_ISO8859_15=m
CONFIG_NLS_KOI8_R=m
CONFIG_NLS_KOI8_U=m
CONFIG_NLS_UTF8=m
+CONFIG_DLM=m
CONFIG_DLM_DEBUG=y
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_REDUCED=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
CONFIG_FRAME_WARN=2048
-CONFIG_MAGIC_SYSRQ=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_DEBUG_FS=y
CONFIG_HEADERS_CHECK=y
+# CONFIG_FRAME_POINTER is not set
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_INFO_REDUCED=y
-CONFIG_DEBUG_VM=y
-CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_CREDENTIALS=y
-CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
-CONFIG_DYNAMIC_DEBUG=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_ASYNC_RAID6_TEST=m
-CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
@@ -536,7 +496,6 @@ CONFIG_SECURITY_NETWORK_XFRM=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
@@ -549,14 +508,12 @@ CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_CRC32C=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA256=m
CONFIG_CRYPTO_SHA512=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
diff --git a/arch/tile/gxio/Kconfig b/arch/tile/gxio/Kconfig
index d221f8d6de8b..d4e10d58071b 100644
--- a/arch/tile/gxio/Kconfig
+++ b/arch/tile/gxio/Kconfig
@@ -26,3 +26,8 @@ config TILE_GXIO_TRIO
config TILE_GXIO_USB_HOST
bool
select TILE_GXIO
+
+# Support direct access to the TILE-Gx UART hardware from kernel space.
+config TILE_GXIO_UART
+ bool
+ select TILE_GXIO
diff --git a/arch/tile/gxio/Makefile b/arch/tile/gxio/Makefile
index 8684bcaa74ea..26ae2c727467 100644
--- a/arch/tile/gxio/Makefile
+++ b/arch/tile/gxio/Makefile
@@ -6,4 +6,5 @@ obj-$(CONFIG_TILE_GXIO) += iorpc_globals.o kiorpc.o
obj-$(CONFIG_TILE_GXIO_DMA) += dma_queue.o
obj-$(CONFIG_TILE_GXIO_MPIPE) += mpipe.o iorpc_mpipe.o iorpc_mpipe_info.o
obj-$(CONFIG_TILE_GXIO_TRIO) += trio.o iorpc_trio.o
+obj-$(CONFIG_TILE_GXIO_UART) += uart.o iorpc_uart.o
obj-$(CONFIG_TILE_GXIO_USB_HOST) += usb_host.o iorpc_usb_host.o
diff --git a/arch/tile/gxio/iorpc_trio.c b/arch/tile/gxio/iorpc_trio.c
index cef4b2209cda..da6e18e049c3 100644
--- a/arch/tile/gxio/iorpc_trio.c
+++ b/arch/tile/gxio/iorpc_trio.c
@@ -61,6 +61,29 @@ int gxio_trio_alloc_memory_maps(gxio_trio_context_t * context,
EXPORT_SYMBOL(gxio_trio_alloc_memory_maps);
+struct alloc_scatter_queues_param {
+ unsigned int count;
+ unsigned int first;
+ unsigned int flags;
+};
+
+int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context,
+ unsigned int count, unsigned int first,
+ unsigned int flags)
+{
+ struct alloc_scatter_queues_param temp;
+ struct alloc_scatter_queues_param *params = &temp;
+
+ params->count = count;
+ params->first = first;
+ params->flags = flags;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params),
+ GXIO_TRIO_OP_ALLOC_SCATTER_QUEUES);
+}
+
+EXPORT_SYMBOL(gxio_trio_alloc_scatter_queues);
struct alloc_pio_regions_param {
unsigned int count;
diff --git a/arch/tile/gxio/iorpc_uart.c b/arch/tile/gxio/iorpc_uart.c
new file mode 100644
index 000000000000..b9a6d6193d73
--- /dev/null
+++ b/arch/tile/gxio/iorpc_uart.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+#include "gxio/iorpc_uart.h"
+
+struct cfg_interrupt_param {
+ union iorpc_interrupt interrupt;
+};
+
+int gxio_uart_cfg_interrupt(gxio_uart_context_t *context, int inter_x,
+ int inter_y, int inter_ipi, int inter_event)
+{
+ struct cfg_interrupt_param temp;
+ struct cfg_interrupt_param *params = &temp;
+
+ params->interrupt.kernel.x = inter_x;
+ params->interrupt.kernel.y = inter_y;
+ params->interrupt.kernel.ipi = inter_ipi;
+ params->interrupt.kernel.event = inter_event;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_UART_OP_CFG_INTERRUPT);
+}
+
+EXPORT_SYMBOL(gxio_uart_cfg_interrupt);
+
+struct get_mmio_base_param {
+ HV_PTE base;
+};
+
+int gxio_uart_get_mmio_base(gxio_uart_context_t *context, HV_PTE *base)
+{
+ int __result;
+ struct get_mmio_base_param temp;
+ struct get_mmio_base_param *params = &temp;
+
+ __result =
+ hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params),
+ GXIO_UART_OP_GET_MMIO_BASE);
+ *base = params->base;
+
+ return __result;
+}
+
+EXPORT_SYMBOL(gxio_uart_get_mmio_base);
+
+struct check_mmio_offset_param {
+ unsigned long offset;
+ unsigned long size;
+};
+
+int gxio_uart_check_mmio_offset(gxio_uart_context_t *context,
+ unsigned long offset, unsigned long size)
+{
+ struct check_mmio_offset_param temp;
+ struct check_mmio_offset_param *params = &temp;
+
+ params->offset = offset;
+ params->size = size;
+
+ return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+ sizeof(*params), GXIO_UART_OP_CHECK_MMIO_OFFSET);
+}
+
+EXPORT_SYMBOL(gxio_uart_check_mmio_offset);
diff --git a/arch/tile/gxio/uart.c b/arch/tile/gxio/uart.c
new file mode 100644
index 000000000000..ba585175ef88
--- /dev/null
+++ b/arch/tile/gxio/uart.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/*
+ * Implementation of UART gxio calls.
+ */
+
+#include <linux/io.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+
+#include <gxio/uart.h>
+#include <gxio/iorpc_globals.h>
+#include <gxio/iorpc_uart.h>
+#include <gxio/kiorpc.h>
+
+int gxio_uart_init(gxio_uart_context_t *context, int uart_index)
+{
+ char file[32];
+ int fd;
+
+ snprintf(file, sizeof(file), "uart/%d/iorpc", uart_index);
+ fd = hv_dev_open((HV_VirtAddr) file, 0);
+ if (fd < 0) {
+ if (fd >= GXIO_ERR_MIN && fd <= GXIO_ERR_MAX)
+ return fd;
+ else
+ return -ENODEV;
+ }
+
+ context->fd = fd;
+
+ /* Map in the MMIO space. */
+ context->mmio_base = (void __force *)
+ iorpc_ioremap(fd, HV_UART_MMIO_OFFSET, HV_UART_MMIO_SIZE);
+
+ if (context->mmio_base == NULL) {
+ hv_dev_close(context->fd);
+ context->fd = -1;
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(gxio_uart_init);
+
+int gxio_uart_destroy(gxio_uart_context_t *context)
+{
+ iounmap((void __force __iomem *)(context->mmio_base));
+ hv_dev_close(context->fd);
+
+ context->mmio_base = NULL;
+ context->fd = -1;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(gxio_uart_destroy);
+
+/* UART register write wrapper. */
+void gxio_uart_write(gxio_uart_context_t *context, uint64_t offset,
+ uint64_t word)
+{
+ __gxio_mmio_write(context->mmio_base + offset, word);
+}
+
+EXPORT_SYMBOL_GPL(gxio_uart_write);
+
+/* UART register read wrapper. */
+uint64_t gxio_uart_read(gxio_uart_context_t *context, uint64_t offset)
+{
+ return __gxio_mmio_read(context->mmio_base + offset);
+}
+
+EXPORT_SYMBOL_GPL(gxio_uart_read);
diff --git a/arch/tile/include/arch/trio.h b/arch/tile/include/arch/trio.h
index d3000a871a21..c0ddedcae085 100644
--- a/arch/tile/include/arch/trio.h
+++ b/arch/tile/include/arch/trio.h
@@ -23,6 +23,45 @@
#ifndef __ASSEMBLER__
/*
+ * Map SQ Doorbell Format.
+ * This describes the format of the write-only doorbell register that exists
+ * in the last 8-bytes of the MAP_SQ_BASE/LIM range. This register is only
+ * writable from PCIe space. Writes to this register will not be written to
+ * Tile memory space and thus no IO VA translation is required if the last
+ * page of the BASE/LIM range is not otherwise written.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /*
+ * When written with a 1, the associated MAP_SQ region's doorbell
+ * interrupt will be triggered once all previous writes are visible to
+ * Tile software.
+ */
+ uint_reg_t doorbell : 1;
+ /*
+ * When written with a 1, the descriptor at the head of the associated
+ * MAP_SQ's FIFO will be dequeued.
+ */
+ uint_reg_t pop : 1;
+ /* Reserved. */
+ uint_reg_t __reserved : 62;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved : 62;
+ uint_reg_t pop : 1;
+ uint_reg_t doorbell : 1;
+#endif
+ };
+
+ uint_reg_t word;
+} TRIO_MAP_SQ_DOORBELL_FMT_t;
+
+
+/*
* Tile PIO Region Configuration - CFG Address Format.
* This register describes the address format for PIO accesses when the
* associated region is setup with TYPE=CFG.
diff --git a/arch/tile/include/arch/uart.h b/arch/tile/include/arch/uart.h
new file mode 100644
index 000000000000..07966970adad
--- /dev/null
+++ b/arch/tile/include/arch/uart.h
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_UART_H__
+#define __ARCH_UART_H__
+
+#include <arch/abi.h>
+#include <arch/uart_def.h>
+
+#ifndef __ASSEMBLER__
+
+/* Divisor. */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /*
+ * Baud Rate Divisor. Desired_baud_rate = REF_CLK frequency / (baud *
+ * 16).
+ * Note: REF_CLK is always 125 MHz, the default
+ * divisor = 68, baud rate = 125M/(68*16) = 115200 baud.
+ */
+ uint_reg_t divisor : 12;
+ /* Reserved. */
+ uint_reg_t __reserved : 52;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved : 52;
+ uint_reg_t divisor : 12;
+#endif
+ };
+
+ uint_reg_t word;
+} UART_DIVISOR_t;
+
+/* FIFO Count. */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /*
+ * n: n active entries in the receive FIFO (max is 2**8). Each entry has
+ * 8 bits.
+ * 0: no active entry in the receive FIFO (that is empty).
+ */
+ uint_reg_t rfifo_count : 9;
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 7;
+ /*
+ * n: n active entries in the transmit FIFO (max is 2**8). Each entry has
+ * 8 bits.
+ * 0: no active entry in the transmit FIFO (that is empty).
+ */
+ uint_reg_t tfifo_count : 9;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 7;
+ /*
+ * n: n active entries in the write FIFO (max is 2**2). Each entry has 8
+ * bits.
+ * 0: no active entry in the write FIFO (that is empty).
+ */
+ uint_reg_t wfifo_count : 3;
+ /* Reserved. */
+ uint_reg_t __reserved_2 : 29;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_2 : 29;
+ uint_reg_t wfifo_count : 3;
+ uint_reg_t __reserved_1 : 7;
+ uint_reg_t tfifo_count : 9;
+ uint_reg_t __reserved_0 : 7;
+ uint_reg_t rfifo_count : 9;
+#endif
+ };
+
+ uint_reg_t word;
+} UART_FIFO_COUNT_t;
+
+/* FLAG. */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 1;
+ /* 1: receive FIFO is empty */
+ uint_reg_t rfifo_empty : 1;
+ /* 1: write FIFO is empty. */
+ uint_reg_t wfifo_empty : 1;
+ /* 1: transmit FIFO is empty. */
+ uint_reg_t tfifo_empty : 1;
+ /* 1: receive FIFO is full. */
+ uint_reg_t rfifo_full : 1;
+ /* 1: write FIFO is full. */
+ uint_reg_t wfifo_full : 1;
+ /* 1: transmit FIFO is full. */
+ uint_reg_t tfifo_full : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 57;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_1 : 57;
+ uint_reg_t tfifo_full : 1;
+ uint_reg_t wfifo_full : 1;
+ uint_reg_t rfifo_full : 1;
+ uint_reg_t tfifo_empty : 1;
+ uint_reg_t wfifo_empty : 1;
+ uint_reg_t rfifo_empty : 1;
+ uint_reg_t __reserved_0 : 1;
+#endif
+ };
+
+ uint_reg_t word;
+} UART_FLAG_t;
+
+/*
+ * Interrupt Vector Mask.
+ * Each bit in this register corresponds to a specific interrupt. When set,
+ * the associated interrupt will not be dispatched.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* Read data FIFO read and no data available */
+ uint_reg_t rdat_err : 1;
+ /* Write FIFO was written but it was full */
+ uint_reg_t wdat_err : 1;
+ /* Stop bit not found when current data was received */
+ uint_reg_t frame_err : 1;
+ /* Parity error was detected when current data was received */
+ uint_reg_t parity_err : 1;
+ /* Data was received but the receive FIFO was full */
+ uint_reg_t rfifo_overflow : 1;
+ /*
+ * An almost full event is reached when data is to be written to the
+ * receive FIFO, and the receive FIFO has more than or equal to
+ * BUFFER_THRESHOLD.RFIFO_AFULL bytes.
+ */
+ uint_reg_t rfifo_afull : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 1;
+ /* An entry in the transmit FIFO was popped */
+ uint_reg_t tfifo_re : 1;
+ /* An entry has been pushed into the receive FIFO */
+ uint_reg_t rfifo_we : 1;
+ /* An entry of the write FIFO has been popped */
+ uint_reg_t wfifo_re : 1;
+ /* Rshim read receive FIFO in protocol mode */
+ uint_reg_t rfifo_err : 1;
+ /*
+ * An almost empty event is reached when data is to be read from the
+ * transmit FIFO, and the transmit FIFO has less than or equal to
+ * BUFFER_THRESHOLD.TFIFO_AEMPTY bytes.
+ */
+ uint_reg_t tfifo_aempty : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 52;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_1 : 52;
+ uint_reg_t tfifo_aempty : 1;
+ uint_reg_t rfifo_err : 1;
+ uint_reg_t wfifo_re : 1;
+ uint_reg_t rfifo_we : 1;
+ uint_reg_t tfifo_re : 1;
+ uint_reg_t __reserved_0 : 1;
+ uint_reg_t rfifo_afull : 1;
+ uint_reg_t rfifo_overflow : 1;
+ uint_reg_t parity_err : 1;
+ uint_reg_t frame_err : 1;
+ uint_reg_t wdat_err : 1;
+ uint_reg_t rdat_err : 1;
+#endif
+ };
+
+ uint_reg_t word;
+} UART_INTERRUPT_MASK_t;
+
+/*
+ * Interrupt vector, write-one-to-clear.
+ * Each bit in this register corresponds to a specific interrupt. Hardware
+ * sets the bit when the associated condition has occurred. Writing a 1
+ * clears the status bit.
+ */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* Read data FIFO read and no data available */
+ uint_reg_t rdat_err : 1;
+ /* Write FIFO was written but it was full */
+ uint_reg_t wdat_err : 1;
+ /* Stop bit not found when current data was received */
+ uint_reg_t frame_err : 1;
+ /* Parity error was detected when current data was received */
+ uint_reg_t parity_err : 1;
+ /* Data was received but the receive FIFO was full */
+ uint_reg_t rfifo_overflow : 1;
+ /*
+ * Data was received and the receive FIFO is now almost full (more than
+ * BUFFER_THRESHOLD.RFIFO_AFULL bytes in it)
+ */
+ uint_reg_t rfifo_afull : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 1;
+ /* An entry in the transmit FIFO was popped */
+ uint_reg_t tfifo_re : 1;
+ /* An entry has been pushed into the receive FIFO */
+ uint_reg_t rfifo_we : 1;
+ /* An entry of the write FIFO has been popped */
+ uint_reg_t wfifo_re : 1;
+ /* Rshim read receive FIFO in protocol mode */
+ uint_reg_t rfifo_err : 1;
+ /*
+ * Data was read from the transmit FIFO and now it is almost empty (less
+ * than or equal to BUFFER_THRESHOLD.TFIFO_AEMPTY bytes in it).
+ */
+ uint_reg_t tfifo_aempty : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 52;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_1 : 52;
+ uint_reg_t tfifo_aempty : 1;
+ uint_reg_t rfifo_err : 1;
+ uint_reg_t wfifo_re : 1;
+ uint_reg_t rfifo_we : 1;
+ uint_reg_t tfifo_re : 1;
+ uint_reg_t __reserved_0 : 1;
+ uint_reg_t rfifo_afull : 1;
+ uint_reg_t rfifo_overflow : 1;
+ uint_reg_t parity_err : 1;
+ uint_reg_t frame_err : 1;
+ uint_reg_t wdat_err : 1;
+ uint_reg_t rdat_err : 1;
+#endif
+ };
+
+ uint_reg_t word;
+} UART_INTERRUPT_STATUS_t;
+
+/* Type. */
+
+__extension__
+typedef union
+{
+ struct
+ {
+#ifndef __BIG_ENDIAN__
+ /* Number of stop bits, rx and tx */
+ uint_reg_t sbits : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_0 : 1;
+ /* Data word size, rx and tx */
+ uint_reg_t dbits : 1;
+ /* Reserved. */
+ uint_reg_t __reserved_1 : 1;
+ /* Parity selection, rx and tx */
+ uint_reg_t ptype : 3;
+ /* Reserved. */
+ uint_reg_t __reserved_2 : 57;
+#else /* __BIG_ENDIAN__ */
+ uint_reg_t __reserved_2 : 57;
+ uint_reg_t ptype : 3;
+ uint_reg_t __reserved_1 : 1;
+ uint_reg_t dbits : 1;
+ uint_reg_t __reserved_0 : 1;
+ uint_reg_t sbits : 1;
+#endif
+ };
+
+ uint_reg_t word;
+} UART_TYPE_t;
+#endif /* !defined(__ASSEMBLER__) */
+
+#endif /* !defined(__ARCH_UART_H__) */
diff --git a/arch/tile/include/arch/uart_def.h b/arch/tile/include/arch/uart_def.h
new file mode 100644
index 000000000000..42bcaf535379
--- /dev/null
+++ b/arch/tile/include/arch/uart_def.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* Machine-generated file; do not edit. */
+
+#ifndef __ARCH_UART_DEF_H__
+#define __ARCH_UART_DEF_H__
+#define UART_DIVISOR 0x0158
+#define UART_FIFO_COUNT 0x0110
+#define UART_FLAG 0x0108
+#define UART_INTERRUPT_MASK 0x0208
+#define UART_INTERRUPT_MASK__RDAT_ERR_SHIFT 0
+#define UART_INTERRUPT_MASK__RDAT_ERR_WIDTH 1
+#define UART_INTERRUPT_MASK__RDAT_ERR_RESET_VAL 1
+#define UART_INTERRUPT_MASK__RDAT_ERR_RMASK 0x1
+#define UART_INTERRUPT_MASK__RDAT_ERR_MASK 0x1
+#define UART_INTERRUPT_MASK__RDAT_ERR_FIELD 0,0
+#define UART_INTERRUPT_MASK__WDAT_ERR_SHIFT 1
+#define UART_INTERRUPT_MASK__WDAT_ERR_WIDTH 1
+#define UART_INTERRUPT_MASK__WDAT_ERR_RESET_VAL 1
+#define UART_INTERRUPT_MASK__WDAT_ERR_RMASK 0x1
+#define UART_INTERRUPT_MASK__WDAT_ERR_MASK 0x2
+#define UART_INTERRUPT_MASK__WDAT_ERR_FIELD 1,1
+#define UART_INTERRUPT_MASK__FRAME_ERR_SHIFT 2
+#define UART_INTERRUPT_MASK__FRAME_ERR_WIDTH 1
+#define UART_INTERRUPT_MASK__FRAME_ERR_RESET_VAL 1
+#define UART_INTERRUPT_MASK__FRAME_ERR_RMASK 0x1
+#define UART_INTERRUPT_MASK__FRAME_ERR_MASK 0x4
+#define UART_INTERRUPT_MASK__FRAME_ERR_FIELD 2,2
+#define UART_INTERRUPT_MASK__PARITY_ERR_SHIFT 3
+#define UART_INTERRUPT_MASK__PARITY_ERR_WIDTH 1
+#define UART_INTERRUPT_MASK__PARITY_ERR_RESET_VAL 1
+#define UART_INTERRUPT_MASK__PARITY_ERR_RMASK 0x1
+#define UART_INTERRUPT_MASK__PARITY_ERR_MASK 0x8
+#define UART_INTERRUPT_MASK__PARITY_ERR_FIELD 3,3
+#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_SHIFT 4
+#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_WIDTH 1
+#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_RESET_VAL 1
+#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_RMASK 0x1
+#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_MASK 0x10
+#define UART_INTERRUPT_MASK__RFIFO_OVERFLOW_FIELD 4,4
+#define UART_INTERRUPT_MASK__RFIFO_AFULL_SHIFT 5
+#define UART_INTERRUPT_MASK__RFIFO_AFULL_WIDTH 1
+#define UART_INTERRUPT_MASK__RFIFO_AFULL_RESET_VAL 1
+#define UART_INTERRUPT_MASK__RFIFO_AFULL_RMASK 0x1
+#define UART_INTERRUPT_MASK__RFIFO_AFULL_MASK 0x20
+#define UART_INTERRUPT_MASK__RFIFO_AFULL_FIELD 5,5
+#define UART_INTERRUPT_MASK__TFIFO_RE_SHIFT 7
+#define UART_INTERRUPT_MASK__TFIFO_RE_WIDTH 1
+#define UART_INTERRUPT_MASK__TFIFO_RE_RESET_VAL 1
+#define UART_INTERRUPT_MASK__TFIFO_RE_RMASK 0x1
+#define UART_INTERRUPT_MASK__TFIFO_RE_MASK 0x80
+#define UART_INTERRUPT_MASK__TFIFO_RE_FIELD 7,7
+#define UART_INTERRUPT_MASK__RFIFO_WE_SHIFT 8
+#define UART_INTERRUPT_MASK__RFIFO_WE_WIDTH 1
+#define UART_INTERRUPT_MASK__RFIFO_WE_RESET_VAL 1
+#define UART_INTERRUPT_MASK__RFIFO_WE_RMASK 0x1
+#define UART_INTERRUPT_MASK__RFIFO_WE_MASK 0x100
+#define UART_INTERRUPT_MASK__RFIFO_WE_FIELD 8,8
+#define UART_INTERRUPT_MASK__WFIFO_RE_SHIFT 9
+#define UART_INTERRUPT_MASK__WFIFO_RE_WIDTH 1
+#define UART_INTERRUPT_MASK__WFIFO_RE_RESET_VAL 1
+#define UART_INTERRUPT_MASK__WFIFO_RE_RMASK 0x1
+#define UART_INTERRUPT_MASK__WFIFO_RE_MASK 0x200
+#define UART_INTERRUPT_MASK__WFIFO_RE_FIELD 9,9
+#define UART_INTERRUPT_MASK__RFIFO_ERR_SHIFT 10
+#define UART_INTERRUPT_MASK__RFIFO_ERR_WIDTH 1
+#define UART_INTERRUPT_MASK__RFIFO_ERR_RESET_VAL 1
+#define UART_INTERRUPT_MASK__RFIFO_ERR_RMASK 0x1
+#define UART_INTERRUPT_MASK__RFIFO_ERR_MASK 0x400
+#define UART_INTERRUPT_MASK__RFIFO_ERR_FIELD 10,10
+#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_SHIFT 11
+#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_WIDTH 1
+#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_RESET_VAL 1
+#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_RMASK 0x1
+#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_MASK 0x800
+#define UART_INTERRUPT_MASK__TFIFO_AEMPTY_FIELD 11,11
+#define UART_INTERRUPT_STATUS 0x0200
+#define UART_RECEIVE_DATA 0x0148
+#define UART_TRANSMIT_DATA 0x0140
+#define UART_TYPE 0x0160
+#define UART_TYPE__SBITS_SHIFT 0
+#define UART_TYPE__SBITS_WIDTH 1
+#define UART_TYPE__SBITS_RESET_VAL 1
+#define UART_TYPE__SBITS_RMASK 0x1
+#define UART_TYPE__SBITS_MASK 0x1
+#define UART_TYPE__SBITS_FIELD 0,0
+#define UART_TYPE__SBITS_VAL_ONE_SBITS 0x0
+#define UART_TYPE__SBITS_VAL_TWO_SBITS 0x1
+#define UART_TYPE__DBITS_SHIFT 2
+#define UART_TYPE__DBITS_WIDTH 1
+#define UART_TYPE__DBITS_RESET_VAL 0
+#define UART_TYPE__DBITS_RMASK 0x1
+#define UART_TYPE__DBITS_MASK 0x4
+#define UART_TYPE__DBITS_FIELD 2,2
+#define UART_TYPE__DBITS_VAL_EIGHT_DBITS 0x0
+#define UART_TYPE__DBITS_VAL_SEVEN_DBITS 0x1
+#define UART_TYPE__PTYPE_SHIFT 4
+#define UART_TYPE__PTYPE_WIDTH 3
+#define UART_TYPE__PTYPE_RESET_VAL 3
+#define UART_TYPE__PTYPE_RMASK 0x7
+#define UART_TYPE__PTYPE_MASK 0x70
+#define UART_TYPE__PTYPE_FIELD 4,6
+#define UART_TYPE__PTYPE_VAL_NONE 0x0
+#define UART_TYPE__PTYPE_VAL_MARK 0x1
+#define UART_TYPE__PTYPE_VAL_SPACE 0x2
+#define UART_TYPE__PTYPE_VAL_EVEN 0x3
+#define UART_TYPE__PTYPE_VAL_ODD 0x4
+#endif /* !defined(__ARCH_UART_DEF_H__) */
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index b17b9b8e53cd..664d6ad23f80 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -11,12 +11,13 @@ generic-y += errno.h
generic-y += exec.h
generic-y += fb.h
generic-y += fcntl.h
+generic-y += hw_irq.h
generic-y += ioctl.h
generic-y += ioctls.h
generic-y += ipcbuf.h
generic-y += irq_regs.h
-generic-y += kdebug.h
generic-y += local.h
+generic-y += local64.h
generic-y += msgbuf.h
generic-y += mutex.h
generic-y += param.h
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
index e71387ab20ca..d385eaadece7 100644
--- a/arch/tile/include/asm/atomic.h
+++ b/arch/tile/include/asm/atomic.h
@@ -114,6 +114,32 @@ static inline int atomic_read(const atomic_t *v)
#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
/**
+ * atomic_xchg - atomically exchange contents of memory with a new value
+ * @v: pointer of type atomic_t
+ * @i: integer value to store in memory
+ *
+ * Atomically sets @v to @i and returns old @v
+ */
+static inline int atomic_xchg(atomic_t *v, int n)
+{
+ return xchg(&v->counter, n);
+}
+
+/**
+ * atomic_cmpxchg - atomically exchange contents of memory if it matches
+ * @v: pointer of type atomic_t
+ * @o: old value that memory should have
+ * @n: new value to write to memory if it matches
+ *
+ * Atomically checks if @v holds @o and replaces it with @n if so.
+ * Returns the old value at @v.
+ */
+static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
+{
+ return cmpxchg(&v->counter, o, n);
+}
+
+/**
* atomic_add_negative - add and test if negative
* @v: pointer of type atomic_t
* @i: integer value to add
@@ -133,6 +159,32 @@ static inline int atomic_read(const atomic_t *v)
#ifndef __ASSEMBLY__
+/**
+ * atomic64_xchg - atomically exchange contents of memory with a new value
+ * @v: pointer of type atomic64_t
+ * @i: integer value to store in memory
+ *
+ * Atomically sets @v to @i and returns old @v
+ */
+static inline u64 atomic64_xchg(atomic64_t *v, u64 n)
+{
+ return xchg64(&v->counter, n);
+}
+
+/**
+ * atomic64_cmpxchg - atomically exchange contents of memory if it matches
+ * @v: pointer of type atomic64_t
+ * @o: old value that memory should have
+ * @n: new value to write to memory if it matches
+ *
+ * Atomically checks if @v holds @o and replaces it with @n if so.
+ * Returns the old value at @v.
+ */
+static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
+{
+ return cmpxchg64(&v->counter, o, n);
+}
+
static inline long long atomic64_dec_if_positive(atomic64_t *v)
{
long long c, old, dec;
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index e7fb5cfb9597..0d0395b1b152 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -22,40 +22,6 @@
#ifndef __ASSEMBLY__
-/* Tile-specific routines to support <linux/atomic.h>. */
-int _atomic_xchg(atomic_t *v, int n);
-int _atomic_xchg_add(atomic_t *v, int i);
-int _atomic_xchg_add_unless(atomic_t *v, int a, int u);
-int _atomic_cmpxchg(atomic_t *v, int o, int n);
-
-/**
- * atomic_xchg - atomically exchange contents of memory with a new value
- * @v: pointer of type atomic_t
- * @i: integer value to store in memory
- *
- * Atomically sets @v to @i and returns old @v
- */
-static inline int atomic_xchg(atomic_t *v, int n)
-{
- smp_mb(); /* barrier for proper semantics */
- return _atomic_xchg(v, n);
-}
-
-/**
- * atomic_cmpxchg - atomically exchange contents of memory if it matches
- * @v: pointer of type atomic_t
- * @o: old value that memory should have
- * @n: new value to write to memory if it matches
- *
- * Atomically checks if @v holds @o and replaces it with @n if so.
- * Returns the old value at @v.
- */
-static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
-{
- smp_mb(); /* barrier for proper semantics */
- return _atomic_cmpxchg(v, o, n);
-}
-
/**
* atomic_add - add integer to atomic variable
* @i: integer value to add
@@ -65,7 +31,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
*/
static inline void atomic_add(int i, atomic_t *v)
{
- _atomic_xchg_add(v, i);
+ _atomic_xchg_add(&v->counter, i);
}
/**
@@ -78,7 +44,7 @@ static inline void atomic_add(int i, atomic_t *v)
static inline int atomic_add_return(int i, atomic_t *v)
{
smp_mb(); /* barrier for proper semantics */
- return _atomic_xchg_add(v, i) + i;
+ return _atomic_xchg_add(&v->counter, i) + i;
}
/**
@@ -93,7 +59,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
static inline int __atomic_add_unless(atomic_t *v, int a, int u)
{
smp_mb(); /* barrier for proper semantics */
- return _atomic_xchg_add_unless(v, a, u);
+ return _atomic_xchg_add_unless(&v->counter, a, u);
}
/**
@@ -108,7 +74,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
*/
static inline void atomic_set(atomic_t *v, int n)
{
- _atomic_xchg(v, n);
+ _atomic_xchg(&v->counter, n);
}
/* A 64bit atomic type */
@@ -119,11 +85,6 @@ typedef struct {
#define ATOMIC64_INIT(val) { (val) }
-u64 _atomic64_xchg(atomic64_t *v, u64 n);
-u64 _atomic64_xchg_add(atomic64_t *v, u64 i);
-u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u);
-u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n);
-
/**
* atomic64_read - read atomic variable
* @v: pointer of type atomic64_t
@@ -137,35 +98,7 @@ static inline u64 atomic64_read(const atomic64_t *v)
* Casting away const is safe since the atomic support routines
* do not write to memory if the value has not been modified.
*/
- return _atomic64_xchg_add((atomic64_t *)v, 0);
-}
-
-/**
- * atomic64_xchg - atomically exchange contents of memory with a new value
- * @v: pointer of type atomic64_t
- * @i: integer value to store in memory
- *
- * Atomically sets @v to @i and returns old @v
- */
-static inline u64 atomic64_xchg(atomic64_t *v, u64 n)
-{
- smp_mb(); /* barrier for proper semantics */
- return _atomic64_xchg(v, n);
-}
-
-/**
- * atomic64_cmpxchg - atomically exchange contents of memory if it matches
- * @v: pointer of type atomic64_t
- * @o: old value that memory should have
- * @n: new value to write to memory if it matches
- *
- * Atomically checks if @v holds @o and replaces it with @n if so.
- * Returns the old value at @v.
- */
-static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
-{
- smp_mb(); /* barrier for proper semantics */
- return _atomic64_cmpxchg(v, o, n);
+ return _atomic64_xchg_add((u64 *)&v->counter, 0);
}
/**
@@ -177,7 +110,7 @@ static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
*/
static inline void atomic64_add(u64 i, atomic64_t *v)
{
- _atomic64_xchg_add(v, i);
+ _atomic64_xchg_add(&v->counter, i);
}
/**
@@ -190,7 +123,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
{
smp_mb(); /* barrier for proper semantics */
- return _atomic64_xchg_add(v, i) + i;
+ return _atomic64_xchg_add(&v->counter, i) + i;
}
/**
@@ -205,7 +138,7 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
{
smp_mb(); /* barrier for proper semantics */
- return _atomic64_xchg_add_unless(v, a, u) != u;
+ return _atomic64_xchg_add_unless(&v->counter, a, u) != u;
}
/**
@@ -220,7 +153,7 @@ static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
*/
static inline void atomic64_set(atomic64_t *v, u64 n)
{
- _atomic64_xchg(v, n);
+ _atomic64_xchg(&v->counter, n);
}
#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
@@ -252,21 +185,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
* Internal definitions only beyond this point.
*/
-#define ATOMIC_LOCKS_FOUND_VIA_TABLE() \
- (!CHIP_HAS_CBOX_HOME_MAP() && defined(CONFIG_SMP))
-
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
-
-/* Number of entries in atomic_lock_ptr[]. */
-#define ATOMIC_HASH_L1_SHIFT 6
-#define ATOMIC_HASH_L1_SIZE (1 << ATOMIC_HASH_L1_SHIFT)
-
-/* Number of locks in each struct pointed to by atomic_lock_ptr[]. */
-#define ATOMIC_HASH_L2_SHIFT (CHIP_L2_LOG_LINE_SIZE() - 2)
-#define ATOMIC_HASH_L2_SIZE (1 << ATOMIC_HASH_L2_SHIFT)
-
-#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
/*
* Number of atomic locks in atomic_locks[]. Must be a power of two.
* There is no reason for more than PAGE_SIZE / 8 entries, since that
@@ -281,8 +199,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
extern int atomic_locks[];
#endif
-#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
/*
* All the code that may fault while holding an atomic lock must
* place the pointer to the lock in ATOMIC_LOCK_REG so the fault code
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index f4500c688ffa..ad220eed05fc 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -32,25 +32,6 @@
* on any routine which updates memory and returns a value.
*/
-static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
-{
- int val;
- __insn_mtspr(SPR_CMPEXCH_VALUE, o);
- smp_mb(); /* barrier for proper semantics */
- val = __insn_cmpexch4((void *)&v->counter, n);
- smp_mb(); /* barrier for proper semantics */
- return val;
-}
-
-static inline int atomic_xchg(atomic_t *v, int n)
-{
- int val;
- smp_mb(); /* barrier for proper semantics */
- val = __insn_exch4((void *)&v->counter, n);
- smp_mb(); /* barrier for proper semantics */
- return val;
-}
-
static inline void atomic_add(int i, atomic_t *v)
{
__insn_fetchadd4((void *)&v->counter, i);
@@ -72,7 +53,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
if (oldval == u)
break;
guess = oldval;
- oldval = atomic_cmpxchg(v, guess, guess + a);
+ oldval = cmpxchg(&v->counter, guess, guess + a);
} while (guess != oldval);
return oldval;
}
@@ -84,25 +65,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
#define atomic64_read(v) ((v)->counter)
#define atomic64_set(v, i) ((v)->counter = (i))
-static inline long atomic64_cmpxchg(atomic64_t *v, long o, long n)
-{
- long val;
- smp_mb(); /* barrier for proper semantics */
- __insn_mtspr(SPR_CMPEXCH_VALUE, o);
- val = __insn_cmpexch((void *)&v->counter, n);
- smp_mb(); /* barrier for proper semantics */
- return val;
-}
-
-static inline long atomic64_xchg(atomic64_t *v, long n)
-{
- long val;
- smp_mb(); /* barrier for proper semantics */
- val = __insn_exch((void *)&v->counter, n);
- smp_mb(); /* barrier for proper semantics */
- return val;
-}
-
static inline void atomic64_add(long i, atomic64_t *v)
{
__insn_fetchadd((void *)&v->counter, i);
@@ -124,7 +86,7 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
if (oldval == u)
break;
guess = oldval;
- oldval = atomic64_cmpxchg(v, guess, guess + a);
+ oldval = cmpxchg(&v->counter, guess, guess + a);
} while (guess != oldval);
return oldval != u;
}
diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h
index 990a217a0b72..a9a73da5865d 100644
--- a/arch/tile/include/asm/barrier.h
+++ b/arch/tile/include/asm/barrier.h
@@ -77,7 +77,6 @@
#define __sync() __insn_mf()
-#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
#include <hv/syscall_public.h>
/*
* Issue an uncacheable load to each memory controller, then
@@ -96,7 +95,6 @@ static inline void __mb_incoherent(void)
"r20", "r21", "r22", "r23", "r24",
"r25", "r26", "r27", "r28", "r29");
}
-#endif
/* Fence to guarantee visibility of stores to incoherent memory. */
static inline void
@@ -104,7 +102,6 @@ mb_incoherent(void)
{
__insn_mf();
-#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
{
#if CHIP_HAS_TILE_WRITE_PENDING()
const unsigned long WRITE_TIMEOUT_CYCLES = 400;
@@ -116,7 +113,6 @@ mb_incoherent(void)
#endif /* CHIP_HAS_TILE_WRITE_PENDING() */
(void) __mb_incoherent();
}
-#endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */
}
#define fast_wmb() __sync()
diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h
index bd186c4eaa50..d5a206865036 100644
--- a/arch/tile/include/asm/bitops.h
+++ b/arch/tile/include/asm/bitops.h
@@ -29,17 +29,6 @@
#endif
/**
- * __ffs - find first set bit in word
- * @word: The word to search
- *
- * Undefined if no set bit exists, so code should check against 0 first.
- */
-static inline unsigned long __ffs(unsigned long word)
-{
- return __builtin_ctzl(word);
-}
-
-/**
* ffz - find first zero bit in word
* @word: The word to search
*
@@ -50,33 +39,6 @@ static inline unsigned long ffz(unsigned long word)
return __builtin_ctzl(~word);
}
-/**
- * __fls - find last set bit in word
- * @word: The word to search
- *
- * Undefined if no set bit exists, so code should check against 0 first.
- */
-static inline unsigned long __fls(unsigned long word)
-{
- return (sizeof(word) * 8) - 1 - __builtin_clzl(word);
-}
-
-/**
- * ffs - find first set bit in word
- * @x: the word to search
- *
- * This is defined the same way as the libc and compiler builtin ffs
- * routines, therefore differs in spirit from the other bitops.
- *
- * ffs(value) returns 0 if value is 0 or the position of the first
- * set bit if value is nonzero. The first (least significant) bit
- * is at position 1.
- */
-static inline int ffs(int x)
-{
- return __builtin_ffs(x);
-}
-
static inline int fls64(__u64 w)
{
return (sizeof(__u64) * 8) - __builtin_clzll(w);
@@ -118,6 +80,9 @@ static inline unsigned long __arch_hweight64(__u64 w)
return __builtin_popcountll(w);
}
+#include <asm-generic/bitops/builtin-__ffs.h>
+#include <asm-generic/bitops/builtin-__fls.h>
+#include <asm-generic/bitops/builtin-ffs.h>
#include <asm-generic/bitops/const_hweight.h>
#include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/find.h>
diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h
index ddc4c1efde43..386865ad2f55 100644
--- a/arch/tile/include/asm/bitops_32.h
+++ b/arch/tile/include/asm/bitops_32.h
@@ -16,7 +16,7 @@
#define _ASM_TILE_BITOPS_32_H
#include <linux/compiler.h>
-#include <linux/atomic.h>
+#include <asm/barrier.h>
/* Tile-specific routines to support <asm/bitops.h>. */
unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask);
diff --git a/arch/tile/include/asm/bitops_64.h b/arch/tile/include/asm/bitops_64.h
index 60b87ee54fb8..ad34cd056085 100644
--- a/arch/tile/include/asm/bitops_64.h
+++ b/arch/tile/include/asm/bitops_64.h
@@ -16,7 +16,7 @@
#define _ASM_TILE_BITOPS_64_H
#include <linux/compiler.h>
-#include <linux/atomic.h>
+#include <asm/cmpxchg.h>
/* See <asm/bitops.h> for API comments. */
@@ -44,8 +44,7 @@ static inline void change_bit(unsigned nr, volatile unsigned long *addr)
oldval = *addr;
do {
guess = oldval;
- oldval = atomic64_cmpxchg((atomic64_t *)addr,
- guess, guess ^ mask);
+ oldval = cmpxchg(addr, guess, guess ^ mask);
} while (guess != oldval);
}
@@ -90,8 +89,7 @@ static inline int test_and_change_bit(unsigned nr,
oldval = *addr;
do {
guess = oldval;
- oldval = atomic64_cmpxchg((atomic64_t *)addr,
- guess, guess ^ mask);
+ oldval = cmpxchg(addr, guess, guess ^ mask);
} while (guess != oldval);
return (oldval & mask) != 0;
}
diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h
index a9a529964e07..6160761d5f61 100644
--- a/arch/tile/include/asm/cache.h
+++ b/arch/tile/include/asm/cache.h
@@ -49,9 +49,16 @@
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
/*
- * Attribute for data that is kept read/write coherent until the end of
- * initialization, then bumped to read/only incoherent for performance.
+ * Originally we used small TLB pages for kernel data and grouped some
+ * things together as "write once", enforcing the property at the end
+ * of initialization by making those pages read-only and non-coherent.
+ * This allowed better cache utilization since cache inclusion did not
+ * need to be maintained. However, to do this requires an extra TLB
+ * entry, which on balance is more of a performance hit than the
+ * non-coherence is a performance gain, so we now just make "read
+ * mostly" and "write once" be synonyms. We keep the attribute
+ * separate in case we change our minds at a future date.
*/
-#define __write_once __attribute__((__section__(".w1data")))
+#define __write_once __read_mostly
#endif /* _ASM_TILE_CACHE_H */
diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h
index 0fc63c488edf..92ee4c8a4f76 100644
--- a/arch/tile/include/asm/cacheflush.h
+++ b/arch/tile/include/asm/cacheflush.h
@@ -75,23 +75,6 @@ static inline void copy_to_user_page(struct vm_area_struct *vma,
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
memcpy((dst), (src), (len))
-/*
- * Invalidate a VA range; pads to L2 cacheline boundaries.
- *
- * Note that on TILE64, __inv_buffer() actually flushes modified
- * cache lines in addition to invalidating them, i.e., it's the
- * same as __finv_buffer().
- */
-static inline void __inv_buffer(void *buffer, size_t size)
-{
- char *next = (char *)((long)buffer & -L2_CACHE_BYTES);
- char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size);
- while (next < finish) {
- __insn_inv(next);
- next += CHIP_INV_STRIDE();
- }
-}
-
/* Flush a VA range; pads to L2 cacheline boundaries. */
static inline void __flush_buffer(void *buffer, size_t size)
{
@@ -115,13 +98,6 @@ static inline void __finv_buffer(void *buffer, size_t size)
}
-/* Invalidate a VA range and wait for it to be complete. */
-static inline void inv_buffer(void *buffer, size_t size)
-{
- __inv_buffer(buffer, size);
- mb();
-}
-
/*
* Flush a locally-homecached VA range and wait for the evicted
* cachelines to hit memory.
@@ -142,6 +118,26 @@ static inline void finv_buffer_local(void *buffer, size_t size)
mb_incoherent();
}
+#ifdef __tilepro__
+/* Invalidate a VA range; pads to L2 cacheline boundaries. */
+static inline void __inv_buffer(void *buffer, size_t size)
+{
+ char *next = (char *)((long)buffer & -L2_CACHE_BYTES);
+ char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size);
+ while (next < finish) {
+ __insn_inv(next);
+ next += CHIP_INV_STRIDE();
+ }
+}
+
+/* Invalidate a VA range and wait for it to be complete. */
+static inline void inv_buffer(void *buffer, size_t size)
+{
+ __inv_buffer(buffer, size);
+ mb();
+}
+#endif
+
/*
* Flush and invalidate a VA range that is homed remotely, waiting
* until the memory controller holds the flushed values. If "hfh" is
diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h
index 276f067e3640..4001d5eab4bb 100644
--- a/arch/tile/include/asm/cmpxchg.h
+++ b/arch/tile/include/asm/cmpxchg.h
@@ -20,53 +20,108 @@
#ifndef __ASSEMBLY__
-/* Nonexistent functions intended to cause link errors. */
-extern unsigned long __xchg_called_with_bad_pointer(void);
-extern unsigned long __cmpxchg_called_with_bad_pointer(void);
+#include <asm/barrier.h>
-#define xchg(ptr, x) \
+/* Nonexistent functions intended to cause compile errors. */
+extern void __xchg_called_with_bad_pointer(void)
+ __compiletime_error("Bad argument size for xchg");
+extern void __cmpxchg_called_with_bad_pointer(void)
+ __compiletime_error("Bad argument size for cmpxchg");
+
+#ifndef __tilegx__
+
+/* Note the _atomic_xxx() routines include a final mb(). */
+int _atomic_xchg(int *ptr, int n);
+int _atomic_xchg_add(int *v, int i);
+int _atomic_xchg_add_unless(int *v, int a, int u);
+int _atomic_cmpxchg(int *ptr, int o, int n);
+u64 _atomic64_xchg(u64 *v, u64 n);
+u64 _atomic64_xchg_add(u64 *v, u64 i);
+u64 _atomic64_xchg_add_unless(u64 *v, u64 a, u64 u);
+u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n);
+
+#define xchg(ptr, n) \
+ ({ \
+ if (sizeof(*(ptr)) != 4) \
+ __xchg_called_with_bad_pointer(); \
+ smp_mb(); \
+ (typeof(*(ptr)))_atomic_xchg((int *)(ptr), (int)(n)); \
+ })
+
+#define cmpxchg(ptr, o, n) \
+ ({ \
+ if (sizeof(*(ptr)) != 4) \
+ __cmpxchg_called_with_bad_pointer(); \
+ smp_mb(); \
+ (typeof(*(ptr)))_atomic_cmpxchg((int *)ptr, (int)o, (int)n); \
+ })
+
+#define xchg64(ptr, n) \
+ ({ \
+ if (sizeof(*(ptr)) != 8) \
+ __xchg_called_with_bad_pointer(); \
+ smp_mb(); \
+ (typeof(*(ptr)))_atomic64_xchg((u64 *)(ptr), (u64)(n)); \
+ })
+
+#define cmpxchg64(ptr, o, n) \
+ ({ \
+ if (sizeof(*(ptr)) != 8) \
+ __cmpxchg_called_with_bad_pointer(); \
+ smp_mb(); \
+ (typeof(*(ptr)))_atomic64_cmpxchg((u64 *)ptr, (u64)o, (u64)n); \
+ })
+
+#else
+
+#define xchg(ptr, n) \
({ \
typeof(*(ptr)) __x; \
+ smp_mb(); \
switch (sizeof(*(ptr))) { \
case 4: \
- __x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \
- (atomic_t *)(ptr), \
- (u32)(typeof((x)-(x)))(x)); \
+ __x = (typeof(__x))(unsigned long) \
+ __insn_exch4((ptr), (u32)(unsigned long)(n)); \
break; \
case 8: \
- __x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \
- (atomic64_t *)(ptr), \
- (u64)(typeof((x)-(x)))(x)); \
+ __x = (typeof(__x)) \
+ __insn_exch((ptr), (unsigned long)(n)); \
break; \
default: \
__xchg_called_with_bad_pointer(); \
+ break; \
} \
+ smp_mb(); \
__x; \
})
#define cmpxchg(ptr, o, n) \
({ \
typeof(*(ptr)) __x; \
+ __insn_mtspr(SPR_CMPEXCH_VALUE, (unsigned long)(o)); \
+ smp_mb(); \
switch (sizeof(*(ptr))) { \
case 4: \
- __x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \
- (atomic_t *)(ptr), \
- (u32)(typeof((o)-(o)))(o), \
- (u32)(typeof((n)-(n)))(n)); \
+ __x = (typeof(__x))(unsigned long) \
+ __insn_cmpexch4((ptr), (u32)(unsigned long)(n)); \
break; \
case 8: \
- __x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \
- (atomic64_t *)(ptr), \
- (u64)(typeof((o)-(o)))(o), \
- (u64)(typeof((n)-(n)))(n)); \
+ __x = (typeof(__x))__insn_cmpexch((ptr), (u64)(n)); \
break; \
default: \
__cmpxchg_called_with_bad_pointer(); \
+ break; \
} \
+ smp_mb(); \
__x; \
})
-#define tas(ptr) (xchg((ptr), 1))
+#define xchg64 xchg
+#define cmpxchg64 cmpxchg
+
+#endif
+
+#define tas(ptr) xchg((ptr), 1)
#endif /* __ASSEMBLY__ */
diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h
index 5182705bd056..6ab8bf146d4c 100644
--- a/arch/tile/include/asm/device.h
+++ b/arch/tile/include/asm/device.h
@@ -23,7 +23,10 @@ struct dev_archdata {
/* Offset of the DMA address from the PA. */
dma_addr_t dma_offset;
- /* Highest DMA address that can be generated by this device. */
+ /*
+ * Highest DMA address that can be generated by devices that
+ * have limited DMA capability, i.e. non 64-bit capable.
+ */
dma_addr_t max_direct_dma_addr;
};
diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
index f2ff191376b4..1eae359d8315 100644
--- a/arch/tile/include/asm/dma-mapping.h
+++ b/arch/tile/include/asm/dma-mapping.h
@@ -20,9 +20,14 @@
#include <linux/cache.h>
#include <linux/io.h>
+#ifdef __tilegx__
+#define ARCH_HAS_DMA_GET_REQUIRED_MASK
+#endif
+
extern struct dma_map_ops *tile_dma_map_ops;
extern struct dma_map_ops *gx_pci_dma_map_ops;
extern struct dma_map_ops *gx_legacy_pci_dma_map_ops;
+extern struct dma_map_ops *gx_hybrid_pci_dma_map_ops;
static inline struct dma_map_ops *get_dma_ops(struct device *dev)
{
@@ -44,12 +49,12 @@ static inline void set_dma_offset(struct device *dev, dma_addr_t off)
static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
{
- return paddr + get_dma_offset(dev);
+ return paddr;
}
static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
{
- return daddr - get_dma_offset(dev);
+ return daddr;
}
static inline void dma_mark_clean(void *addr, size_t size) {}
@@ -87,11 +92,19 @@ dma_set_mask(struct device *dev, u64 mask)
{
struct dma_map_ops *dma_ops = get_dma_ops(dev);
- /* Handle legacy PCI devices with limited memory addressability. */
- if ((dma_ops == gx_pci_dma_map_ops) && (mask <= DMA_BIT_MASK(32))) {
- set_dma_ops(dev, gx_legacy_pci_dma_map_ops);
- set_dma_offset(dev, 0);
- if (mask > dev->archdata.max_direct_dma_addr)
+ /*
+ * For PCI devices with 64-bit DMA addressing capability, promote
+ * the dma_ops to hybrid, with the consistent memory DMA space limited
+ * to 32-bit. For 32-bit capable devices, limit the streaming DMA
+ * address range to max_direct_dma_addr.
+ */
+ if (dma_ops == gx_pci_dma_map_ops ||
+ dma_ops == gx_hybrid_pci_dma_map_ops ||
+ dma_ops == gx_legacy_pci_dma_map_ops) {
+ if (mask == DMA_BIT_MASK(64) &&
+ dma_ops == gx_legacy_pci_dma_map_ops)
+ set_dma_ops(dev, gx_hybrid_pci_dma_map_ops);
+ else if (mask > dev->archdata.max_direct_dma_addr)
mask = dev->archdata.max_direct_dma_addr;
}
diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
index ff8a93408823..41d9878a9686 100644
--- a/arch/tile/include/asm/elf.h
+++ b/arch/tile/include/asm/elf.h
@@ -30,7 +30,6 @@ typedef unsigned long elf_greg_t;
#define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t))
typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-#define EM_TILE64 187
#define EM_TILEPRO 188
#define EM_TILEGX 191
@@ -132,6 +131,15 @@ extern int dump_task_regs(struct task_struct *, elf_gregset_t *);
struct linux_binprm;
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
int executable_stack);
+#define ARCH_DLINFO \
+do { \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \
+} while (0)
+
+struct mm_struct;
+extern unsigned long arch_randomize_brk(struct mm_struct *mm);
+#define arch_randomize_brk arch_randomize_brk
+
#ifdef CONFIG_COMPAT
#define COMPAT_ELF_PLATFORM "tilegx-m32"
diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h
index e16dbf929cb5..c6b9c1b38fd1 100644
--- a/arch/tile/include/asm/fixmap.h
+++ b/arch/tile/include/asm/fixmap.h
@@ -78,14 +78,6 @@ enum fixed_addresses {
#endif
};
-extern void __set_fixmap(enum fixed_addresses idx,
- unsigned long phys, pgprot_t flags);
-
-#define set_fixmap(idx, phys) \
- __set_fixmap(idx, phys, PAGE_KERNEL)
-#define clear_fixmap(idx) \
- __set_fixmap(idx, 0, __pgprot(0))
-
#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
#define __FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_SIZE)
diff --git a/arch/tile/include/asm/ftrace.h b/arch/tile/include/asm/ftrace.h
index 461459b06d98..13a9bb81a8ab 100644
--- a/arch/tile/include/asm/ftrace.h
+++ b/arch/tile/include/asm/ftrace.h
@@ -15,6 +15,26 @@
#ifndef _ASM_TILE_FTRACE_H
#define _ASM_TILE_FTRACE_H
-/* empty */
+#ifdef CONFIG_FUNCTION_TRACER
+
+#define MCOUNT_ADDR ((unsigned long)(__mcount))
+#define MCOUNT_INSN_SIZE 8 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void __mcount(void);
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ return addr;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_FUNCTION_TRACER */
#endif /* _ASM_TILE_FTRACE_H */
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index 5909ac3d7218..1a6ef1b69cb1 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -43,6 +43,7 @@
".pushsection .fixup,\"ax\"\n" \
"0: { movei %0, %5; j 9f }\n" \
".section __ex_table,\"a\"\n" \
+ ".align 8\n" \
".quad 1b, 0b\n" \
".popsection\n" \
"9:" \
diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h
index 7b7771328642..7ddd1b8d6910 100644
--- a/arch/tile/include/asm/homecache.h
+++ b/arch/tile/include/asm/homecache.h
@@ -33,8 +33,7 @@ struct zone;
/*
* Is this page immutable (unwritable) and thus able to be cached more
- * widely than would otherwise be possible? On tile64 this means we
- * mark the PTE to cache locally; on tilepro it means we have "nc" set.
+ * widely than would otherwise be possible? This means we have "nc" set.
*/
#define PAGE_HOME_IMMUTABLE -2
@@ -44,16 +43,8 @@ struct zone;
*/
#define PAGE_HOME_INCOHERENT -3
-#if CHIP_HAS_CBOX_HOME_MAP()
/* Home for the page is distributed via hash-for-home. */
#define PAGE_HOME_HASH -4
-#endif
-
-/* Homing is unknown or unspecified. Not valid for page_home(). */
-#define PAGE_HOME_UNKNOWN -5
-
-/* Home on the current cpu. Not valid for page_home(). */
-#define PAGE_HOME_HERE -6
/* Support wrapper to use instead of explicit hv_flush_remote(). */
extern void flush_remote(unsigned long cache_pfn, unsigned long cache_length,
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
index 31672918064c..9fe434969fab 100644
--- a/arch/tile/include/asm/io.h
+++ b/arch/tile/include/asm/io.h
@@ -19,7 +19,8 @@
#include <linux/bug.h>
#include <asm/page.h>
-#define IO_SPACE_LIMIT 0xfffffffful
+/* Maximum PCI I/O space address supported. */
+#define IO_SPACE_LIMIT 0xffffffff
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
@@ -254,7 +255,7 @@ static inline void writeq(u64 val, unsigned long addr)
static inline void memset_io(volatile void *dst, int val, size_t len)
{
- int x;
+ size_t x;
BUG_ON((unsigned long)dst & 0x3);
val = (val & 0xff) * 0x01010101;
for (x = 0; x < len; x += 4)
@@ -264,7 +265,7 @@ static inline void memset_io(volatile void *dst, int val, size_t len)
static inline void memcpy_fromio(void *dst, const volatile void __iomem *src,
size_t len)
{
- int x;
+ size_t x;
BUG_ON((unsigned long)src & 0x3);
for (x = 0; x < len; x += 4)
*(u32 *)(dst + x) = readl(src + x);
@@ -273,7 +274,7 @@ static inline void memcpy_fromio(void *dst, const volatile void __iomem *src,
static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
size_t len)
{
- int x;
+ size_t x;
BUG_ON((unsigned long)dst & 0x3);
for (x = 0; x < len; x += 4)
writel(*(u32 *)(src + x), dst + x);
@@ -281,8 +282,108 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
#endif
+#if CHIP_HAS_MMIO() && defined(CONFIG_TILE_PCI_IO)
+
+static inline u8 inb(unsigned long addr)
+{
+ return readb((volatile void __iomem *) addr);
+}
+
+static inline u16 inw(unsigned long addr)
+{
+ return readw((volatile void __iomem *) addr);
+}
+
+static inline u32 inl(unsigned long addr)
+{
+ return readl((volatile void __iomem *) addr);
+}
+
+static inline void outb(u8 b, unsigned long addr)
+{
+ writeb(b, (volatile void __iomem *) addr);
+}
+
+static inline void outw(u16 b, unsigned long addr)
+{
+ writew(b, (volatile void __iomem *) addr);
+}
+
+static inline void outl(u32 b, unsigned long addr)
+{
+ writel(b, (volatile void __iomem *) addr);
+}
+
+static inline void insb(unsigned long addr, void *buffer, int count)
+{
+ if (count) {
+ u8 *buf = buffer;
+ do {
+ u8 x = inb(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+
+static inline void insw(unsigned long addr, void *buffer, int count)
+{
+ if (count) {
+ u16 *buf = buffer;
+ do {
+ u16 x = inw(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+
+static inline void insl(unsigned long addr, void *buffer, int count)
+{
+ if (count) {
+ u32 *buf = buffer;
+ do {
+ u32 x = inl(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+
+static inline void outsb(unsigned long addr, const void *buffer, int count)
+{
+ if (count) {
+ const u8 *buf = buffer;
+ do {
+ outb(*buf++, addr);
+ } while (--count);
+ }
+}
+
+static inline void outsw(unsigned long addr, const void *buffer, int count)
+{
+ if (count) {
+ const u16 *buf = buffer;
+ do {
+ outw(*buf++, addr);
+ } while (--count);
+ }
+}
+
+static inline void outsl(unsigned long addr, const void *buffer, int count)
+{
+ if (count) {
+ const u32 *buf = buffer;
+ do {
+ outl(*buf++, addr);
+ } while (--count);
+ }
+}
+
+extern void __iomem *ioport_map(unsigned long port, unsigned int len);
+extern void ioport_unmap(void __iomem *addr);
+
+#else
+
/*
- * The Tile architecture does not support IOPORT, even with PCI.
+ * The TilePro architecture does not support IOPORT, even with PCI.
* Unfortunately we can't yet simply not declare these methods,
* since some generic code that compiles into the kernel, but
* we never run, uses them unconditionally.
@@ -290,7 +391,12 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
static inline long ioport_panic(void)
{
+#ifdef __tilegx__
+ panic("PCI IO space support is disabled. Configure the kernel with"
+ " CONFIG_TILE_PCI_IO to enable it");
+#else
panic("inb/outb and friends do not exist on tile");
+#endif
return 0;
}
@@ -335,13 +441,6 @@ static inline void outl(u32 b, unsigned long addr)
ioport_panic();
}
-#define inb_p(addr) inb(addr)
-#define inw_p(addr) inw(addr)
-#define inl_p(addr) inl(addr)
-#define outb_p(x, addr) outb((x), (addr))
-#define outw_p(x, addr) outw((x), (addr))
-#define outl_p(x, addr) outl((x), (addr))
-
static inline void insb(unsigned long addr, void *buffer, int count)
{
ioport_panic();
@@ -372,6 +471,15 @@ static inline void outsl(unsigned long addr, const void *buffer, int count)
ioport_panic();
}
+#endif /* CHIP_HAS_MMIO() && defined(CONFIG_TILE_PCI_IO) */
+
+#define inb_p(addr) inb(addr)
+#define inw_p(addr) inw(addr)
+#define inl_p(addr) inl(addr)
+#define outb_p(x, addr) outb((x), (addr))
+#define outw_p(x, addr) outw((x), (addr))
+#define outl_p(x, addr) outl((x), (addr))
+
#define ioread16be(addr) be16_to_cpu(ioread16(addr))
#define ioread32be(addr) be32_to_cpu(ioread32(addr))
#define iowrite16be(v, addr) iowrite16(be16_to_cpu(v), (addr))
diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h
index c96f9bbb760d..71af5747874d 100644
--- a/arch/tile/include/asm/irqflags.h
+++ b/arch/tile/include/asm/irqflags.h
@@ -124,6 +124,12 @@
DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
#define INITIAL_INTERRUPTS_ENABLED (1ULL << INT_MEM_ERROR)
+#ifdef CONFIG_DEBUG_PREEMPT
+/* Due to inclusion issues, we can't rely on <linux/smp.h> here. */
+extern unsigned int debug_smp_processor_id(void);
+# define smp_processor_id() debug_smp_processor_id()
+#endif
+
/* Disable interrupts. */
#define arch_local_irq_disable() \
interrupt_mask_set_mask(LINUX_MASKABLE_INTERRUPTS)
@@ -132,9 +138,18 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
#define arch_local_irq_disable_all() \
interrupt_mask_set_mask(-1ULL)
+/*
+ * Read the set of maskable interrupts.
+ * We avoid the preemption warning here via __this_cpu_ptr since even
+ * if irqs are already enabled, it's harmless to read the wrong cpu's
+ * enabled mask.
+ */
+#define arch_local_irqs_enabled() \
+ (*__this_cpu_ptr(&interrupts_enabled_mask))
+
/* Re-enable all maskable interrupts. */
#define arch_local_irq_enable() \
- interrupt_mask_reset_mask(__get_cpu_var(interrupts_enabled_mask))
+ interrupt_mask_reset_mask(arch_local_irqs_enabled())
/* Disable or enable interrupts based on flag argument. */
#define arch_local_irq_restore(disabled) do { \
@@ -161,7 +176,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
/* Prevent the given interrupt from being enabled next time we enable irqs. */
#define arch_local_irq_mask(interrupt) \
- (__get_cpu_var(interrupts_enabled_mask) &= ~(1ULL << (interrupt)))
+ this_cpu_and(interrupts_enabled_mask, ~(1ULL << (interrupt)))
/* Prevent the given interrupt from being enabled immediately. */
#define arch_local_irq_mask_now(interrupt) do { \
@@ -171,7 +186,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
/* Allow the given interrupt to be enabled next time we enable irqs. */
#define arch_local_irq_unmask(interrupt) \
- (__get_cpu_var(interrupts_enabled_mask) |= (1ULL << (interrupt)))
+ this_cpu_or(interrupts_enabled_mask, (1ULL << (interrupt)))
/* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */
#define arch_local_irq_unmask_now(interrupt) do { \
diff --git a/arch/tile/include/asm/hw_irq.h b/arch/tile/include/asm/kdebug.h
index 4fac5fbf333e..5bbbfa904c2d 100644
--- a/arch/tile/include/asm/hw_irq.h
+++ b/arch/tile/include/asm/kdebug.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -12,7 +12,17 @@
* more details.
*/
-#ifndef _ASM_TILE_HW_IRQ_H
-#define _ASM_TILE_HW_IRQ_H
+#ifndef _ASM_TILE_KDEBUG_H
+#define _ASM_TILE_KDEBUG_H
-#endif /* _ASM_TILE_HW_IRQ_H */
+#include <linux/notifier.h>
+
+enum die_val {
+ DIE_OOPS = 1,
+ DIE_BREAK,
+ DIE_SSTEPBP,
+ DIE_PAGE_FAULT,
+ DIE_COMPILED_BPT
+};
+
+#endif /* _ASM_TILE_KDEBUG_H */
diff --git a/arch/tile/include/asm/kgdb.h b/arch/tile/include/asm/kgdb.h
new file mode 100644
index 000000000000..280c181cf0db
--- /dev/null
+++ b/arch/tile/include/asm/kgdb.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * TILE-Gx KGDB support.
+ */
+
+#ifndef __TILE_KGDB_H__
+#define __TILE_KGDB_H__
+
+#include <linux/kdebug.h>
+#include <arch/opcode.h>
+
+#define GDB_SIZEOF_REG sizeof(unsigned long)
+
+/*
+ * TILE-Gx gdb is expecting the following register layout:
+ * 56 GPRs(R0 - R52, TP, SP, LR), 8 special GPRs(networks and ZERO),
+ * plus the PC and the faultnum.
+ *
+ * Even though kernel not use the 8 special GPRs, they need to be present
+ * in the registers sent for correct processing in the host-side gdb.
+ *
+ */
+#define DBG_MAX_REG_NUM (56+8+2)
+#define NUMREGBYTES (DBG_MAX_REG_NUM * GDB_SIZEOF_REG)
+
+/*
+ * BUFMAX defines the maximum number of characters in inbound/outbound
+ * buffers at least NUMREGBYTES*2 are needed for register packets,
+ * Longer buffer is needed to list all threads.
+ */
+#define BUFMAX 2048
+
+#define BREAK_INSTR_SIZE TILEGX_BUNDLE_SIZE_IN_BYTES
+
+/*
+ * Require cache flush for set/clear a software breakpoint or write memory.
+ */
+#define CACHE_FLUSH_IS_SAFE 1
+
+/*
+ * The compiled-in breakpoint instruction can be used to "break" into
+ * the debugger via magic system request key (sysrq-G).
+ */
+static tile_bundle_bits compiled_bpt = TILEGX_BPT_BUNDLE | DIE_COMPILED_BPT;
+
+enum tilegx_regnum {
+ TILEGX_PC_REGNUM = TREG_LAST_GPR + 9,
+ TILEGX_FAULTNUM_REGNUM,
+};
+
+/*
+ * Generate a breakpoint exception to "break" into the debugger.
+ */
+static inline void arch_kgdb_breakpoint(void)
+{
+ asm volatile (".quad %0\n\t"
+ ::""(compiled_bpt));
+}
+
+#endif /* __TILE_KGDB_H__ */
diff --git a/arch/tile/include/asm/kprobes.h b/arch/tile/include/asm/kprobes.h
new file mode 100644
index 000000000000..d8f9a83943b1
--- /dev/null
+++ b/arch/tile/include/asm/kprobes.h
@@ -0,0 +1,79 @@
+/*
+ * arch/tile/include/asm/kprobes.h
+ *
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_KPROBES_H
+#define _ASM_TILE_KPROBES_H
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+
+#include <arch/opcode.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE 2
+
+#define kretprobe_blacklist_size 0
+
+typedef tile_bundle_bits kprobe_opcode_t;
+
+#define flush_insn_slot(p) \
+ flush_icache_range((unsigned long)p->addr, \
+ (unsigned long)p->addr + \
+ (MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
+
+struct kprobe;
+
+/* Architecture specific copy of original instruction. */
+struct arch_specific_insn {
+ kprobe_opcode_t *insn;
+};
+
+struct prev_kprobe {
+ struct kprobe *kp;
+ unsigned long status;
+ unsigned long saved_pc;
+};
+
+#define MAX_JPROBES_STACK_SIZE 128
+#define MAX_JPROBES_STACK_ADDR \
+ (((unsigned long)current_thread_info()) + THREAD_SIZE - 32 \
+ - sizeof(struct pt_regs))
+
+#define MIN_JPROBES_STACK_SIZE(ADDR) \
+ ((((ADDR) + MAX_JPROBES_STACK_SIZE) > MAX_JPROBES_STACK_ADDR) \
+ ? MAX_JPROBES_STACK_ADDR - (ADDR) \
+ : MAX_JPROBES_STACK_SIZE)
+
+/* per-cpu kprobe control block. */
+struct kprobe_ctlblk {
+ unsigned long kprobe_status;
+ unsigned long kprobe_saved_pc;
+ unsigned long jprobe_saved_sp;
+ struct prev_kprobe prev_kprobe;
+ struct pt_regs jprobe_saved_regs;
+ char jprobes_stack[MAX_JPROBES_STACK_SIZE];
+};
+
+extern tile_bundle_bits breakpoint2_insn;
+extern tile_bundle_bits breakpoint_insn;
+
+void arch_remove_kprobe(struct kprobe *);
+
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data);
+
+#endif /* _ASM_TILE_KPROBES_H */
diff --git a/arch/tile/include/asm/mmu.h b/arch/tile/include/asm/mmu.h
index e2c789096795..0cab1182bde1 100644
--- a/arch/tile/include/asm/mmu.h
+++ b/arch/tile/include/asm/mmu.h
@@ -22,6 +22,7 @@ struct mm_context {
* semaphore but atomically, but it is conservatively set.
*/
unsigned long priority_cached;
+ unsigned long vdso_base;
};
typedef struct mm_context mm_context_t;
diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h
index 37f0b741dee7..4734215e2ad4 100644
--- a/arch/tile/include/asm/mmu_context.h
+++ b/arch/tile/include/asm/mmu_context.h
@@ -45,7 +45,7 @@ static inline void __install_page_table(pgd_t *pgdir, int asid, pgprot_t prot)
static inline void install_page_table(pgd_t *pgdir, int asid)
{
- pte_t *ptep = virt_to_pte(NULL, (unsigned long)pgdir);
+ pte_t *ptep = virt_to_kpte((unsigned long)pgdir);
__install_page_table(pgdir, asid, *ptep);
}
diff --git a/arch/tile/include/asm/mmzone.h b/arch/tile/include/asm/mmzone.h
index 9d3dbce8f953..804f1098b6cd 100644
--- a/arch/tile/include/asm/mmzone.h
+++ b/arch/tile/include/asm/mmzone.h
@@ -42,7 +42,7 @@ static inline int pfn_to_nid(unsigned long pfn)
#define kern_addr_valid(kaddr) virt_addr_valid((void *)kaddr)
-static inline int pfn_valid(int pfn)
+static inline int pfn_valid(unsigned long pfn)
{
int nid = pfn_to_nid(pfn);
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index dd033a4fd627..6346888f7bdc 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -39,6 +39,12 @@
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
/*
+ * We do define AT_SYSINFO_EHDR to support vDSO,
+ * but don't use the gate mechanism.
+ */
+#define __HAVE_ARCH_GATE_AREA 1
+
+/*
* If the Kconfig doesn't specify, set a maximum zone order that
* is enough so that we can create huge pages from small pages given
* the respective sizes of the two page types. See <linux/mmzone.h>.
@@ -142,8 +148,12 @@ static inline __attribute_const__ int get_order(unsigned long size)
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#endif
+/* Allow overriding how much VA or PA the kernel will use. */
+#define MAX_PA_WIDTH CHIP_PA_WIDTH()
+#define MAX_VA_WIDTH CHIP_VA_WIDTH()
+
/* Each memory controller has PAs distinct in their high bits. */
-#define NR_PA_HIGHBIT_SHIFT (CHIP_PA_WIDTH() - CHIP_LOG_NUM_MSHIMS())
+#define NR_PA_HIGHBIT_SHIFT (MAX_PA_WIDTH - CHIP_LOG_NUM_MSHIMS())
#define NR_PA_HIGHBIT_VALUES (1 << CHIP_LOG_NUM_MSHIMS())
#define __pa_to_highbits(pa) ((phys_addr_t)(pa) >> NR_PA_HIGHBIT_SHIFT)
#define __pfn_to_highbits(pfn) ((pfn) >> (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT))
@@ -154,7 +164,7 @@ static inline __attribute_const__ int get_order(unsigned long size)
* We reserve the lower half of memory for user-space programs, and the
* upper half for system code. We re-map all of physical memory in the
* upper half, which takes a quarter of our VA space. Then we have
- * the vmalloc regions. The supervisor code lives at 0xfffffff700000000,
+ * the vmalloc regions. The supervisor code lives at the highest address,
* with the hypervisor above that.
*
* Loadable kernel modules are placed immediately after the static
@@ -166,26 +176,19 @@ static inline __attribute_const__ int get_order(unsigned long size)
* Similarly, for now we don't play any struct page mapping games.
*/
-#if CHIP_PA_WIDTH() + 2 > CHIP_VA_WIDTH()
+#if MAX_PA_WIDTH + 2 > MAX_VA_WIDTH
# error Too much PA to map with the VA available!
#endif
-#define HALF_VA_SPACE (_AC(1, UL) << (CHIP_VA_WIDTH() - 1))
-#define MEM_LOW_END (HALF_VA_SPACE - 1) /* low half */
-#define MEM_HIGH_START (-HALF_VA_SPACE) /* high half */
-#define PAGE_OFFSET MEM_HIGH_START
-#define FIXADDR_BASE _AC(0xfffffff400000000, UL) /* 4 GB */
-#define FIXADDR_TOP _AC(0xfffffff500000000, UL) /* 4 GB */
+#define PAGE_OFFSET (-(_AC(1, UL) << (MAX_VA_WIDTH - 1)))
+#define KERNEL_HIGH_VADDR _AC(0xfffffff800000000, UL) /* high 32GB */
+#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x400000000) /* 4 GB */
+#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */
#define _VMALLOC_START FIXADDR_TOP
-#define HUGE_VMAP_BASE _AC(0xfffffff600000000, UL) /* 4 GB */
-#define MEM_SV_START _AC(0xfffffff700000000, UL) /* 256 MB */
-#define MEM_SV_INTRPT MEM_SV_START
-#define MEM_MODULE_START _AC(0xfffffff710000000, UL) /* 256 MB */
+#define HUGE_VMAP_BASE (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */
+#define MEM_SV_START (KERNEL_HIGH_VADDR - 0x100000000) /* 256 MB */
+#define MEM_MODULE_START (MEM_SV_START + (256*1024*1024)) /* 256 MB */
#define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024))
-#define MEM_HV_START _AC(0xfffffff800000000, UL) /* 32 GB */
-
-/* Highest DTLB address we will use */
-#define KERNEL_HIGH_VADDR MEM_SV_START
#else /* !__tilegx__ */
@@ -207,25 +210,18 @@ static inline __attribute_const__ int get_order(unsigned long size)
* values, and after that, we show "typical" values, since the actual
* addresses depend on kernel #defines.
*
- * MEM_HV_INTRPT 0xfe000000
- * MEM_SV_INTRPT (kernel code) 0xfd000000
+ * MEM_HV_START 0xfe000000
+ * MEM_SV_START (kernel code) 0xfd000000
* MEM_USER_INTRPT (user vector) 0xfc000000
- * FIX_KMAP_xxx 0xf8000000 (via NR_CPUS * KM_TYPE_NR)
- * PKMAP_BASE 0xf7000000 (via LAST_PKMAP)
- * HUGE_VMAP 0xf3000000 (via CONFIG_NR_HUGE_VMAPS)
- * VMALLOC_START 0xf0000000 (via __VMALLOC_RESERVE)
+ * FIX_KMAP_xxx 0xfa000000 (via NR_CPUS * KM_TYPE_NR)
+ * PKMAP_BASE 0xf9000000 (via LAST_PKMAP)
+ * VMALLOC_START 0xf7000000 (via VMALLOC_RESERVE)
* mapped LOWMEM 0xc0000000
*/
#define MEM_USER_INTRPT _AC(0xfc000000, UL)
-#if CONFIG_KERNEL_PL == 1
-#define MEM_SV_INTRPT _AC(0xfd000000, UL)
-#define MEM_HV_INTRPT _AC(0xfe000000, UL)
-#else
-#define MEM_GUEST_INTRPT _AC(0xfd000000, UL)
-#define MEM_SV_INTRPT _AC(0xfe000000, UL)
-#define MEM_HV_INTRPT _AC(0xff000000, UL)
-#endif
+#define MEM_SV_START _AC(0xfd000000, UL)
+#define MEM_HV_START _AC(0xfe000000, UL)
#define INTRPT_SIZE 0x4000
@@ -246,7 +242,7 @@ static inline __attribute_const__ int get_order(unsigned long size)
#endif /* __tilegx__ */
-#ifndef __ASSEMBLY__
+#if !defined(__ASSEMBLY__) && !defined(VDSO_BUILD)
#ifdef CONFIG_HIGHMEM
@@ -332,6 +328,7 @@ static inline int pfn_valid(unsigned long pfn)
struct mm_struct;
extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
+extern pte_t *virt_to_kpte(unsigned long kaddr);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index 54a924208d3c..dfedd7ac7298 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -17,7 +17,6 @@
#include <linux/dma-mapping.h>
#include <linux/pci.h>
-#include <linux/numa.h>
#include <asm-generic/pci_iomap.h>
#ifndef __tilegx__
@@ -29,7 +28,6 @@ struct pci_controller {
int index; /* PCI domain number */
struct pci_bus *root_bus;
- int first_busno;
int last_busno;
int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */
@@ -124,6 +122,11 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
* the CPA plus TILE_PCI_MEM_MAP_BASE_OFFSET. To support 32-bit
* devices, we create a separate map region that handles the low
* 4GB.
+ *
+ * This design lets us avoid the "PCI hole" problem where the host bridge
+ * won't pass DMA traffic with target addresses that happen to fall within the
+ * BAR space. This enables us to use all the physical memory for DMA, instead
+ * of wasting the same amount of physical memory as the BAR window size.
*/
#define TILE_PCI_MEM_MAP_BASE_OFFSET (1ULL << CHIP_PA_WIDTH())
@@ -145,6 +148,10 @@ struct pci_controller {
int pio_mem_index; /* PIO region index for memory access */
+#ifdef CONFIG_TILE_PCI_IO
+ int pio_io_index; /* PIO region index for I/O space access */
+#endif
+
/*
* Mem-Map regions for all the memory controllers so that Linux can
* map all of its physical memory space to the PCI bus.
@@ -154,6 +161,10 @@ struct pci_controller {
int index; /* PCI domain number */
struct pci_bus *root_bus;
+ /* PCI I/O space resource for this controller. */
+ struct resource io_space;
+ char io_space_name[32];
+
/* PCI memory space resource for this controller. */
struct resource mem_space;
char mem_space_name[32];
@@ -166,13 +177,11 @@ struct pci_controller {
/* Table that maps the INTx numbers to Linux irq numbers. */
int irq_intx_table[4];
-
- /* Address ranges that are routed to this controller/bridge. */
- struct resource mem_resources[3];
};
extern struct pci_controller pci_controllers[TILEGX_NUM_TRIO * TILEGX_TRIO_PCIES];
extern gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO];
+extern int num_trio_shims;
extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
@@ -211,7 +220,8 @@ static inline int pcibios_assign_all_busses(void)
}
#define PCIBIOS_MIN_MEM 0
-#define PCIBIOS_MIN_IO 0
+/* Minimum PCI I/O address, starting at the page boundary. */
+#define PCIBIOS_MIN_IO PAGE_SIZE
/* Use any cpu for PCI. */
#define cpumask_of_pcibus(bus) cpu_online_mask
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h
index 4ce4a7a99c24..63142ab3b3dd 100644
--- a/arch/tile/include/asm/pgtable_32.h
+++ b/arch/tile/include/asm/pgtable_32.h
@@ -84,10 +84,12 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */;
/* We have no pmd or pud since we are strictly a two-level page table */
#include <asm-generic/pgtable-nopmd.h>
+static inline int pud_huge_page(pud_t pud) { return 0; }
+
/* We don't define any pgds for these addresses. */
static inline int pgd_addr_invalid(unsigned long addr)
{
- return addr >= MEM_HV_INTRPT;
+ return addr >= MEM_HV_START;
}
/*
diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h
index 2492fa5478e7..3421177f7370 100644
--- a/arch/tile/include/asm/pgtable_64.h
+++ b/arch/tile/include/asm/pgtable_64.h
@@ -63,6 +63,15 @@
/* We have no pud since we are a three-level page table. */
#include <asm-generic/pgtable-nopud.h>
+/*
+ * pmds are the same as pgds and ptes, so converting is a no-op.
+ */
+#define pmd_pte(pmd) (pmd)
+#define pmdp_ptep(pmdp) (pmdp)
+#define pte_pmd(pte) (pte)
+
+#define pud_pte(pud) ((pud).pgd)
+
static inline int pud_none(pud_t pud)
{
return pud_val(pud) == 0;
@@ -73,6 +82,11 @@ static inline int pud_present(pud_t pud)
return pud_val(pud) & _PAGE_PRESENT;
}
+static inline int pud_huge_page(pud_t pud)
+{
+ return pud_val(pud) & _PAGE_HUGE_PAGE;
+}
+
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd 0x%016llx.\n", __FILE__, __LINE__, pmd_val(e))
@@ -89,6 +103,9 @@ static inline int pud_bad(pud_t pud)
/* Return the page-table frame number (ptfn) that a pud_t points at. */
#define pud_ptfn(pud) hv_pte_get_ptfn((pud).pgd)
+/* Return the page frame number (pfn) that a pud_t points at. */
+#define pud_pfn(pud) pte_pfn(pud_pte(pud))
+
/*
* A given kernel pud_t maps to a kernel pmd_t table at a specific
* virtual address. Since kernel pmd_t tables can be aligned at
@@ -123,8 +140,7 @@ static inline unsigned long pgd_addr_normalize(unsigned long addr)
/* We don't define any pgds for these addresses. */
static inline int pgd_addr_invalid(unsigned long addr)
{
- return addr >= MEM_HV_START ||
- (addr > MEM_LOW_END && addr < MEM_HIGH_START);
+ return addr >= KERNEL_HIGH_VADDR || addr != pgd_addr_normalize(addr);
}
/*
@@ -152,13 +168,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
return hv_pte(__insn_exch(&ptep->val, 0UL));
}
-/*
- * pmds are the same as pgds and ptes, so converting is a no-op.
- */
-#define pmd_pte(pmd) (pmd)
-#define pmdp_ptep(pmdp) (pmdp)
-#define pte_pmd(pte) (pte)
-
#endif /* __ASSEMBLY__ */
#endif /* _ASM_TILE_PGTABLE_64_H */
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index b3f104953da2..42323636c459 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -15,6 +15,8 @@
#ifndef _ASM_TILE_PROCESSOR_H
#define _ASM_TILE_PROCESSOR_H
+#include <arch/chip.h>
+
#ifndef __ASSEMBLY__
/*
@@ -25,7 +27,6 @@
#include <asm/ptrace.h>
#include <asm/percpu.h>
-#include <arch/chip.h>
#include <arch/spr_def.h>
struct task_struct;
@@ -110,18 +111,16 @@ struct thread_struct {
unsigned long long interrupt_mask;
/* User interrupt-control 0 state */
unsigned long intctrl_0;
-#if CHIP_HAS_PROC_STATUS_SPR()
+ /* Is this task currently doing a backtrace? */
+ bool in_backtrace;
/* Any other miscellaneous processor state bits */
unsigned long proc_status;
-#endif
#if !CHIP_HAS_FIXED_INTVEC_BASE()
/* Interrupt base for PL0 interrupts */
unsigned long interrupt_vector_base;
#endif
-#if CHIP_HAS_TILE_RTF_HWM()
/* Tile cache retry fifo high-water mark */
unsigned long tile_rtf_hwm;
-#endif
#if CHIP_HAS_DSTREAM_PF()
/* Data stream prefetch control */
unsigned long dstream_pf;
@@ -134,21 +133,16 @@ struct thread_struct {
/* Async DMA TLB fault information */
struct async_tlb dma_async_tlb;
#endif
-#if CHIP_HAS_SN_PROC()
- /* Was static network processor when we were switched out? */
- int sn_proc_running;
- /* Async SNI TLB fault information */
- struct async_tlb sn_async_tlb;
-#endif
};
#endif /* !__ASSEMBLY__ */
/*
* Start with "sp" this many bytes below the top of the kernel stack.
- * This preserves the invariant that a called function may write to *sp.
+ * This allows us to be cache-aware when handling the initial save
+ * of the pt_regs value to the stack.
*/
-#define STACK_TOP_DELTA 8
+#define STACK_TOP_DELTA 64
/*
* When entering the kernel via a fault, start with the top of the
@@ -164,7 +158,7 @@ struct thread_struct {
#ifndef __ASSEMBLY__
#ifdef __tilegx__
-#define TASK_SIZE_MAX (MEM_LOW_END + 1)
+#define TASK_SIZE_MAX (_AC(1, UL) << (MAX_VA_WIDTH - 1))
#else
#define TASK_SIZE_MAX PAGE_OFFSET
#endif
@@ -178,10 +172,10 @@ struct thread_struct {
#define TASK_SIZE TASK_SIZE_MAX
#endif
-/* We provide a minimal "vdso" a la x86; just the sigreturn code for now. */
-#define VDSO_BASE (TASK_SIZE - PAGE_SIZE)
+#define VDSO_BASE ((unsigned long)current->active_mm->context.vdso_base)
+#define VDSO_SYM(x) (VDSO_BASE + (unsigned long)(x))
-#define STACK_TOP VDSO_BASE
+#define STACK_TOP TASK_SIZE
/* STACK_TOP_MAX is used temporarily in execve and should not check COMPAT. */
#define STACK_TOP_MAX TASK_SIZE_MAX
@@ -232,21 +226,28 @@ extern int do_work_pending(struct pt_regs *regs, u32 flags);
unsigned long get_wchan(struct task_struct *p);
/* Return initial ksp value for given task. */
-#define task_ksp0(task) ((unsigned long)(task)->stack + THREAD_SIZE)
+#define task_ksp0(task) \
+ ((unsigned long)(task)->stack + THREAD_SIZE - STACK_TOP_DELTA)
/* Return some info about the user process TASK. */
-#define KSTK_TOP(task) (task_ksp0(task) - STACK_TOP_DELTA)
#define task_pt_regs(task) \
- ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1)
+ ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1)
#define current_pt_regs() \
- ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \
- (KSTK_PTREGS_GAP - 1)) - 1)
+ ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \
+ STACK_TOP_DELTA - (KSTK_PTREGS_GAP - 1)) - 1)
#define task_sp(task) (task_pt_regs(task)->sp)
#define task_pc(task) (task_pt_regs(task)->pc)
/* Aliases for pc and sp (used in fs/proc/array.c) */
#define KSTK_EIP(task) task_pc(task)
#define KSTK_ESP(task) task_sp(task)
+/* Fine-grained unaligned JIT support */
+#define GET_UNALIGN_CTL(tsk, adr) get_unalign_ctl((tsk), (adr))
+#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val))
+
+extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr);
+extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
+
/* Standard format for printing registers and other word-size data. */
#ifdef __tilegx__
# define REGFMT "0x%016lx"
@@ -275,7 +276,6 @@ extern char chip_model[64];
/* Data on which physical memory controller corresponds to which NUMA node. */
extern int node_controller[];
-#if CHIP_HAS_CBOX_HOME_MAP()
/* Does the heap allocator return hash-for-home pages by default? */
extern int hash_default;
@@ -285,11 +285,6 @@ extern int kstack_hash;
/* Does MAP_ANONYMOUS return hash-for-home pages by default? */
#define uheap_hash hash_default
-#else
-#define hash_default 0
-#define kstack_hash 0
-#define uheap_hash 0
-#endif
/* Are we using huge pages in the TLB for kernel data? */
extern int kdata_huge;
@@ -337,7 +332,6 @@ extern int kdata_huge;
/*
* Provide symbolic constants for PLs.
- * Note that assembly code assumes that USER_PL is zero.
*/
#define USER_PL 0
#if CONFIG_KERNEL_PL == 2
@@ -346,20 +340,38 @@ extern int kdata_huge;
#define KERNEL_PL CONFIG_KERNEL_PL
/* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */
-#define CPU_LOG_MASK_VALUE 12
-#define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1)
-#if CONFIG_NR_CPUS > CPU_MASK_VALUE
-# error Too many cpus!
+#ifdef __tilegx__
+#define CPU_SHIFT 48
+#if CHIP_VA_WIDTH() > CPU_SHIFT
+# error Too many VA bits!
#endif
+#define MAX_CPU_ID ((1 << (64 - CPU_SHIFT)) - 1)
+#define raw_smp_processor_id() \
+ ((int)(__insn_mfspr(SPR_SYSTEM_SAVE_K_0) >> CPU_SHIFT))
+#define get_current_ksp0() \
+ ((unsigned long)(((long)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) << \
+ (64 - CPU_SHIFT)) >> (64 - CPU_SHIFT)))
+#define next_current_ksp0(task) ({ \
+ unsigned long __ksp0 = task_ksp0(task) & ((1UL << CPU_SHIFT) - 1); \
+ unsigned long __cpu = (long)raw_smp_processor_id() << CPU_SHIFT; \
+ __ksp0 | __cpu; \
+})
+#else
+#define LOG2_NR_CPU_IDS 6
+#define MAX_CPU_ID ((1 << LOG2_NR_CPU_IDS) - 1)
#define raw_smp_processor_id() \
- ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE)
+ ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & MAX_CPU_ID)
#define get_current_ksp0() \
- (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE)
+ (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~MAX_CPU_ID)
#define next_current_ksp0(task) ({ \
unsigned long __ksp0 = task_ksp0(task); \
int __cpu = raw_smp_processor_id(); \
- BUG_ON(__ksp0 & CPU_MASK_VALUE); \
+ BUG_ON(__ksp0 & MAX_CPU_ID); \
__ksp0 | __cpu; \
})
+#endif
+#if CONFIG_NR_CPUS > (MAX_CPU_ID + 1)
+# error Too many cpus!
+#endif
#endif /* _ASM_TILE_PROCESSOR_H */
diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h
index fd412260aff7..b9620c077abc 100644
--- a/arch/tile/include/asm/ptrace.h
+++ b/arch/tile/include/asm/ptrace.h
@@ -33,12 +33,13 @@ typedef unsigned long pt_reg_t;
#ifndef __ASSEMBLY__
+#define regs_return_value(regs) ((regs)->regs[0])
#define instruction_pointer(regs) ((regs)->pc)
#define profile_pc(regs) instruction_pointer(regs)
#define user_stack_pointer(regs) ((regs)->sp)
/* Does the process account for user or for system time? */
-#define user_mode(regs) (EX1_PL((regs)->ex1) == USER_PL)
+#define user_mode(regs) (EX1_PL((regs)->ex1) < KERNEL_PL)
/* Fill in a struct pt_regs with the current kernel registers. */
struct pt_regs *get_pt_regs(struct pt_regs *);
@@ -79,8 +80,7 @@ extern void single_step_execve(void);
struct task_struct;
-extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code);
+extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs);
#ifdef __tilegx__
/* We need this since sigval_t has a user pointer in it, for GETSIGINFO etc. */
diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h
index 7d8a935a9238..5d5d3b739a6b 100644
--- a/arch/tile/include/asm/sections.h
+++ b/arch/tile/include/asm/sections.h
@@ -25,10 +25,16 @@ extern char _sinitdata[], _einitdata[];
/* Write-once data is writable only till the end of initialization. */
extern char __w1data_begin[], __w1data_end[];
+extern char vdso_start[], vdso_end[];
+#ifdef CONFIG_COMPAT
+extern char vdso32_start[], vdso32_end[];
+#endif
/* Not exactly sections, but PC comparison points in the code. */
extern char __rt_sigreturn[], __rt_sigreturn_end[];
-#ifndef __tilegx__
+#ifdef __tilegx__
+extern char __start_unalign_asm_code[], __end_unalign_asm_code[];
+#else
extern char sys_cmpxchg[], __sys_cmpxchg_end[];
extern char __sys_cmpxchg_grab_lock[];
extern char __start_atomic_asm_code[], __end_atomic_asm_code[];
diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h
index d048888c5d9a..e98909033e5b 100644
--- a/arch/tile/include/asm/setup.h
+++ b/arch/tile/include/asm/setup.h
@@ -24,9 +24,8 @@
*/
#define MAXMEM_PFN PFN_DOWN(MAXMEM)
+int tile_console_write(const char *buf, int count);
void early_panic(const char *fmt, ...);
-void warn_early_printk(void);
-void __init disable_early_printk(void);
/* Init-time routine to do tile-specific per-cpu setup. */
void setup_cpu(int boot);
diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h
index 1aa759aeb5b3..9a326b64f7ae 100644
--- a/arch/tile/include/asm/smp.h
+++ b/arch/tile/include/asm/smp.h
@@ -101,10 +101,8 @@ void print_disabled_cpus(void);
extern struct cpumask cpu_lotar_map;
#define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map)
-#if CHIP_HAS_CBOX_HOME_MAP()
/* Which processors are used for hash-for-home mapping */
extern struct cpumask hash_for_home_map;
-#endif
/* Which cpus can have their cache flushed by hv_flush_remote(). */
extern struct cpumask cpu_cacheable_map;
diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h
index 5f8b6a095fd8..9a12b9c7e5d3 100644
--- a/arch/tile/include/asm/spinlock_64.h
+++ b/arch/tile/include/asm/spinlock_64.h
@@ -27,7 +27,7 @@
* Return the "current" portion of a ticket lock value,
* i.e. the number that currently owns the lock.
*/
-static inline int arch_spin_current(u32 val)
+static inline u32 arch_spin_current(u32 val)
{
return val >> __ARCH_SPIN_CURRENT_SHIFT;
}
@@ -36,7 +36,7 @@ static inline int arch_spin_current(u32 val)
* Return the "next" portion of a ticket lock value,
* i.e. the number that the next task to try to acquire the lock will get.
*/
-static inline int arch_spin_next(u32 val)
+static inline u32 arch_spin_next(u32 val)
{
return val & __ARCH_SPIN_NEXT_MASK;
}
diff --git a/arch/tile/include/asm/string.h b/arch/tile/include/asm/string.h
index 7535cf1a30e4..92b271bd9ebd 100644
--- a/arch/tile/include/asm/string.h
+++ b/arch/tile/include/asm/string.h
@@ -21,8 +21,10 @@
#define __HAVE_ARCH_MEMMOVE
#define __HAVE_ARCH_STRCHR
#define __HAVE_ARCH_STRLEN
+#define __HAVE_ARCH_STRNLEN
extern __kernel_size_t strlen(const char *);
+extern __kernel_size_t strnlen(const char *, __kernel_size_t);
extern char *strchr(const char *s, int c);
extern void *memchr(const void *s, int c, size_t n);
extern void *memset(void *, int, __kernel_size_t);
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index d1733dee98a2..b8aa6df3e102 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -39,6 +39,11 @@ struct thread_info {
struct restart_block restart_block;
struct single_step_state *step_state; /* single step state
(if non-zero) */
+ int align_ctl; /* controls unaligned access */
+#ifdef __tilegx__
+ unsigned long unalign_jit_tmp[4]; /* temp r0..r3 storage */
+ void __user *unalign_jit_base; /* unalign fixup JIT base */
+#endif
};
/*
@@ -56,6 +61,7 @@ struct thread_info {
.fn = do_no_restart_syscall, \
}, \
.step_state = NULL, \
+ .align_ctl = 0, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
index e28c3df4176a..4b99a1c3aab2 100644
--- a/arch/tile/include/asm/traps.h
+++ b/arch/tile/include/asm/traps.h
@@ -15,12 +15,13 @@
#ifndef _ASM_TILE_TRAPS_H
#define _ASM_TILE_TRAPS_H
+#ifndef __ASSEMBLY__
#include <arch/chip.h>
/* mm/fault.c */
void do_page_fault(struct pt_regs *, int fault_num,
unsigned long address, unsigned long write);
-#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
+#if CHIP_HAS_TILE_DMA()
void do_async_page_fault(struct pt_regs *);
#endif
@@ -69,6 +70,16 @@ void gx_singlestep_handle(struct pt_regs *, int fault_num);
/* kernel/intvec_64.S */
void fill_ra_stack(void);
+
+/* Handle unalign data fixup. */
+extern void do_unaligned(struct pt_regs *regs, int vecnum);
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#ifdef __tilegx__
+/* 128 byte JIT per unalign fixup. */
+#define UNALIGN_JIT_SHIFT 7
#endif
#endif /* _ASM_TILE_TRAPS_H */
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index e4d44bd7df27..b6cde3209b96 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -127,8 +127,10 @@ extern int fixup_exception(struct pt_regs *regs);
#ifdef __LP64__
#define _ASM_PTR ".quad"
+#define _ASM_ALIGN ".align 8"
#else
#define _ASM_PTR ".long"
+#define _ASM_ALIGN ".align 4"
#endif
#define __get_user_asm(OP, x, ptr, ret) \
@@ -137,6 +139,7 @@ extern int fixup_exception(struct pt_regs *regs);
"0: { movei %1, 0; movei %0, %3 }\n" \
"j 9f\n" \
".section __ex_table,\"a\"\n" \
+ _ASM_ALIGN "\n" \
_ASM_PTR " 1b, 0b\n" \
".popsection\n" \
"9:" \
@@ -168,6 +171,7 @@ extern int fixup_exception(struct pt_regs *regs);
"0: { movei %1, 0; movei %2, 0 }\n" \
"{ movei %0, %4; j 9f }\n" \
".section __ex_table,\"a\"\n" \
+ ".align 4\n" \
".word 1b, 0b\n" \
".word 2b, 0b\n" \
".popsection\n" \
@@ -224,6 +228,7 @@ extern int __get_user_bad(void)
".pushsection .fixup,\"ax\"\n" \
"0: { movei %0, %3; j 9f }\n" \
".section __ex_table,\"a\"\n" \
+ _ASM_ALIGN "\n" \
_ASM_PTR " 1b, 0b\n" \
".popsection\n" \
"9:" \
@@ -248,6 +253,7 @@ extern int __get_user_bad(void)
".pushsection .fixup,\"ax\"\n" \
"0: { movei %0, %4; j 9f }\n" \
".section __ex_table,\"a\"\n" \
+ ".align 4\n" \
".word 1b, 0b\n" \
".word 2b, 0b\n" \
".popsection\n" \
@@ -567,37 +573,6 @@ static inline unsigned long __must_check flush_user(
}
/**
- * inv_user: - Invalidate a block of memory in user space from cache.
- * @mem: Destination address, in user space.
- * @len: Number of bytes to invalidate.
- *
- * Returns number of bytes that could not be invalidated.
- * On success, this will be zero.
- *
- * Note that on Tile64, the "inv" operation is in fact a
- * "flush and invalidate", so cache write-backs will occur prior
- * to the cache being marked invalid.
- */
-extern unsigned long inv_user_asm(void __user *mem, unsigned long len);
-static inline unsigned long __must_check __inv_user(
- void __user *mem, unsigned long len)
-{
- int retval;
-
- might_fault();
- retval = inv_user_asm(mem, len);
- mb_incoherent();
- return retval;
-}
-static inline unsigned long __must_check inv_user(
- void __user *mem, unsigned long len)
-{
- if (access_ok(VERIFY_WRITE, mem, len))
- return __inv_user(mem, len);
- return len;
-}
-
-/**
* finv_user: - Flush-inval a block of memory in user space from cache.
* @mem: Destination address, in user space.
* @len: Number of bytes to invalidate.
diff --git a/arch/tile/include/asm/unaligned.h b/arch/tile/include/asm/unaligned.h
index 37dfbe598872..5a58a0d11449 100644
--- a/arch/tile/include/asm/unaligned.h
+++ b/arch/tile/include/asm/unaligned.h
@@ -15,11 +15,15 @@
#ifndef _ASM_TILE_UNALIGNED_H
#define _ASM_TILE_UNALIGNED_H
-#include <linux/unaligned/le_struct.h>
-#include <linux/unaligned/be_byteshift.h>
-#include <linux/unaligned/generic.h>
-#define get_unaligned __get_unaligned_le
-#define put_unaligned __put_unaligned_le
+/*
+ * We could implement faster get_unaligned_[be/le]64 using the ldna
+ * instruction on tilegx; however, we need to either copy all of the
+ * other generic functions to here (which is pretty ugly) or else
+ * modify both the generic code and other arch code to allow arch
+ * specific unaligned data access functions. Given these functions
+ * are not often called, we'll stick with the generic version.
+ */
+#include <asm-generic/unaligned.h>
/*
* Is the kernel doing fixups of unaligned accesses? If <0, no kernel
diff --git a/arch/tile/include/asm/vdso.h b/arch/tile/include/asm/vdso.h
new file mode 100644
index 000000000000..9f6a78d665fa
--- /dev/null
+++ b/arch/tile/include/asm/vdso.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __TILE_VDSO_H__
+#define __TILE_VDSO_H__
+
+#include <linux/types.h>
+
+/*
+ * Note about the vdso_data structure:
+ *
+ * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the
+ * structure is supposed to be known only to the function in the vdso
+ * itself and may change without notice.
+ */
+
+struct vdso_data {
+ __u64 tz_update_count; /* Timezone atomicity ctr */
+ __u64 tb_update_count; /* Timebase atomicity ctr */
+ __u64 xtime_tod_stamp; /* TOD clock for xtime */
+ __u64 xtime_clock_sec; /* Kernel time second */
+ __u64 xtime_clock_nsec; /* Kernel time nanosecond */
+ __u64 wtom_clock_sec; /* Wall to monotonic clock second */
+ __u64 wtom_clock_nsec; /* Wall to monotonic clock nanosecond */
+ __u32 mult; /* Cycle to nanosecond multiplier */
+ __u32 shift; /* Cycle to nanosecond divisor (power of two) */
+ __u32 tz_minuteswest; /* Minutes west of Greenwich */
+ __u32 tz_dsttime; /* Type of dst correction */
+};
+
+extern struct vdso_data *vdso_data;
+
+/* __vdso_rt_sigreturn is defined with the addresses in the vdso page. */
+extern void __vdso_rt_sigreturn(void);
+
+extern int setup_vdso_pages(void);
+
+#endif /* __TILE_VDSO_H__ */
diff --git a/arch/tile/include/gxio/iorpc_trio.h b/arch/tile/include/gxio/iorpc_trio.h
index 58105c31228b..d95b96fd6c93 100644
--- a/arch/tile/include/gxio/iorpc_trio.h
+++ b/arch/tile/include/gxio/iorpc_trio.h
@@ -30,6 +30,7 @@
#define GXIO_TRIO_OP_ALLOC_MEMORY_MAPS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1404)
+#define GXIO_TRIO_OP_ALLOC_SCATTER_QUEUES IORPC_OPCODE(IORPC_FORMAT_NONE, 0x140e)
#define GXIO_TRIO_OP_ALLOC_PIO_REGIONS IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1412)
#define GXIO_TRIO_OP_INIT_PIO_REGION_AUX IORPC_OPCODE(IORPC_FORMAT_NONE, 0x1414)
@@ -54,6 +55,10 @@ int gxio_trio_alloc_memory_maps(gxio_trio_context_t * context,
unsigned int flags);
+int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context,
+ unsigned int count, unsigned int first,
+ unsigned int flags);
+
int gxio_trio_alloc_pio_regions(gxio_trio_context_t * context,
unsigned int count, unsigned int first,
unsigned int flags);
diff --git a/arch/tile/include/gxio/iorpc_uart.h b/arch/tile/include/gxio/iorpc_uart.h
new file mode 100644
index 000000000000..55429d48ea56
--- /dev/null
+++ b/arch/tile/include/gxio/iorpc_uart.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/* This file is machine-generated; DO NOT EDIT! */
+#ifndef __GXIO_UART_LINUX_RPC_H__
+#define __GXIO_UART_LINUX_RPC_H__
+
+#include <hv/iorpc.h>
+
+#include <hv/drv_uart_intf.h>
+#include <gxio/uart.h>
+#include <gxio/kiorpc.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <asm/pgtable.h>
+
+#define GXIO_UART_OP_CFG_INTERRUPT IORPC_OPCODE(IORPC_FORMAT_KERNEL_INTERRUPT, 0x1900)
+#define GXIO_UART_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000)
+#define GXIO_UART_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
+
+int gxio_uart_cfg_interrupt(gxio_uart_context_t *context, int inter_x,
+ int inter_y, int inter_ipi, int inter_event);
+
+int gxio_uart_get_mmio_base(gxio_uart_context_t *context, HV_PTE *base);
+
+int gxio_uart_check_mmio_offset(gxio_uart_context_t *context,
+ unsigned long offset, unsigned long size);
+
+#endif /* !__GXIO_UART_LINUX_RPC_H__ */
diff --git a/arch/tile/include/gxio/uart.h b/arch/tile/include/gxio/uart.h
new file mode 100644
index 000000000000..438ee7e46c7b
--- /dev/null
+++ b/arch/tile/include/gxio/uart.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _GXIO_UART_H_
+#define _GXIO_UART_H_
+
+#include "common.h"
+
+#include <hv/drv_uart_intf.h>
+#include <hv/iorpc.h>
+
+/*
+ *
+ * An API for manipulating UART interface.
+ */
+
+/*
+ *
+ * The Rshim allows access to the processor's UART interface.
+ */
+
+/* A context object used to manage UART resources. */
+typedef struct {
+
+ /* File descriptor for calling up to the hypervisor. */
+ int fd;
+
+ /* The VA at which our MMIO registers are mapped. */
+ char *mmio_base;
+
+} gxio_uart_context_t;
+
+/* Request UART interrupts.
+ *
+ * Request that interrupts be delivered to a tile when the UART's
+ * Receive FIFO is written, or the Write FIFO is read.
+ *
+ * @param context Pointer to a properly initialized gxio_uart_context_t.
+ * @param bind_cpu_x X coordinate of CPU to which interrupt will be delivered.
+ * @param bind_cpu_y Y coordinate of CPU to which interrupt will be delivered.
+ * @param bind_interrupt IPI interrupt number.
+ * @param bind_event Sub-interrupt event bit number; a negative value can
+ * disable the interrupt.
+ * @return Zero if all of the requested UART events were successfully
+ * configured to interrupt.
+ */
+extern int gxio_uart_cfg_interrupt(gxio_uart_context_t *context,
+ int bind_cpu_x,
+ int bind_cpu_y,
+ int bind_interrupt, int bind_event);
+
+/* Initialize a UART context.
+ *
+ * A properly initialized context must be obtained before any of the other
+ * gxio_uart routines may be used.
+ *
+ * @param context Pointer to a gxio_uart_context_t, which will be initialized
+ * by this routine, if it succeeds.
+ * @param uart_index Index of the UART to use.
+ * @return Zero if the context was successfully initialized, else a
+ * GXIO_ERR_xxx error code.
+ */
+extern int gxio_uart_init(gxio_uart_context_t *context, int uart_index);
+
+/* Destroy a UART context.
+ *
+ * Once destroyed, a context may not be used with any gxio_uart routines
+ * other than gxio_uart_init(). After this routine returns, no further
+ * interrupts requested on this context will be delivered. The state and
+ * configuration of the pins which had been attached to this context are
+ * unchanged by this operation.
+ *
+ * @param context Pointer to a gxio_uart_context_t.
+ * @return Zero if the context was successfully destroyed, else a
+ * GXIO_ERR_xxx error code.
+ */
+extern int gxio_uart_destroy(gxio_uart_context_t *context);
+
+/* Write UART register.
+ * @param context Pointer to a gxio_uart_context_t.
+ * @param offset UART register offset.
+ * @param word Data will be wrote to UART reigister.
+ */
+extern void gxio_uart_write(gxio_uart_context_t *context, uint64_t offset,
+ uint64_t word);
+
+/* Read UART register.
+ * @param context Pointer to a gxio_uart_context_t.
+ * @param offset UART register offset.
+ * @return Data read from UART register.
+ */
+extern uint64_t gxio_uart_read(gxio_uart_context_t *context, uint64_t offset);
+
+#endif /* _GXIO_UART_H_ */
diff --git a/arch/tile/include/hv/drv_trio_intf.h b/arch/tile/include/hv/drv_trio_intf.h
index ef9f3f52ee27..237e04dee66c 100644
--- a/arch/tile/include/hv/drv_trio_intf.h
+++ b/arch/tile/include/hv/drv_trio_intf.h
@@ -64,8 +64,9 @@ struct pcie_port_property
* will not consider it an error if the link comes up as a x8 link. */
uint8_t allow_x8: 1;
- /** Reserved. */
- uint8_t reserved: 1;
+ /** If true, this link is connected to a device which may or may not
+ * be present. */
+ uint8_t removable: 1;
};
@@ -167,6 +168,9 @@ pcie_stream_intr_config_sel_t;
struct pcie_trio_ports_property
{
struct pcie_port_property ports[TILEGX_TRIO_PCIES];
+
+ /** Set if this TRIO belongs to a Gx72 device. */
+ uint8_t is_gx72;
};
/* Flags indicating traffic class. */
diff --git a/arch/tile/include/hv/drv_uart_intf.h b/arch/tile/include/hv/drv_uart_intf.h
new file mode 100644
index 000000000000..f5379e2404fd
--- /dev/null
+++ b/arch/tile/include/hv/drv_uart_intf.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/**
+ * Interface definitions for the UART driver.
+ */
+
+#ifndef _SYS_HV_DRV_UART_INTF_H
+#define _SYS_HV_DRV_UART_INTF_H
+
+#include <arch/uart.h>
+
+/** Number of UART ports supported. */
+#define TILEGX_UART_NR 2
+
+/** The mmap file offset (PA) of the UART MMIO region. */
+#define HV_UART_MMIO_OFFSET 0
+
+/** The maximum size of the UARTs MMIO region (64K Bytes). */
+#define HV_UART_MMIO_SIZE (1UL << 16)
+
+#endif /* _SYS_HV_DRV_UART_INTF_H */
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index 837dca5328c2..dfcdeb61ba34 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -318,8 +318,11 @@
/** hv_set_pte_super_shift */
#define HV_DISPATCH_SET_PTE_SUPER_SHIFT 57
+/** hv_console_set_ipi */
+#define HV_DISPATCH_CONSOLE_SET_IPI 63
+
/** One more than the largest dispatch value */
-#define _HV_DISPATCH_END 58
+#define _HV_DISPATCH_END 64
#ifndef __ASSEMBLER__
@@ -541,14 +544,24 @@ typedef enum {
HV_CONFSTR_CPUMOD_REV = 18,
/** Human-readable CPU module description. */
- HV_CONFSTR_CPUMOD_DESC = 19
+ HV_CONFSTR_CPUMOD_DESC = 19,
+
+ /** Per-tile hypervisor statistics. When this identifier is specified,
+ * the hv_confstr call takes two extra arguments. The first is the
+ * HV_XY_TO_LOTAR of the target tile's coordinates. The second is
+ * a flag word. The only current flag is the lowest bit, which means
+ * "zero out the stats instead of retrieving them"; in this case the
+ * buffer and buffer length are ignored. */
+ HV_CONFSTR_HV_STATS = 20
} HV_ConfstrQuery;
/** Query a configuration string from the hypervisor.
*
* @param query Identifier for the specific string to be retrieved
- * (HV_CONFSTR_xxx).
+ * (HV_CONFSTR_xxx). Some strings may require or permit extra
+ * arguments to be appended which select specific objects to be
+ * described; see the string descriptions above.
* @param buf Buffer in which to place the string.
* @param len Length of the buffer.
* @return If query is valid, then the length of the corresponding string,
@@ -556,21 +569,16 @@ typedef enum {
* was truncated. If query is invalid, HV_EINVAL. If the specified
* buffer is not writable by the client, HV_EFAULT.
*/
-int hv_confstr(HV_ConfstrQuery query, HV_VirtAddr buf, int len);
+int hv_confstr(HV_ConfstrQuery query, HV_VirtAddr buf, int len, ...);
/** Tile coordinate */
typedef struct
{
-#ifndef __BIG_ENDIAN__
/** X coordinate, relative to supervisor's top-left coordinate */
int x;
/** Y coordinate, relative to supervisor's top-left coordinate */
int y;
-#else
- int y;
- int x;
-#endif
} HV_Coord;
@@ -585,6 +593,30 @@ typedef struct
*/
int hv_get_ipi_pte(HV_Coord tile, int pl, HV_PTE* pte);
+/** Configure the console interrupt.
+ *
+ * When the console client interrupt is enabled, the hypervisor will
+ * deliver the specified IPI to the client in the following situations:
+ *
+ * - The console has at least one character available for input.
+ *
+ * - The console can accept new characters for output, and the last call
+ * to hv_console_write() did not write all of the characters requested
+ * by the client.
+ *
+ * Note that in some system configurations, console interrupt will not
+ * be available; clients should be prepared for this routine to fail and
+ * to fall back to periodic console polling in that case.
+ *
+ * @param ipi Index of the IPI register which will receive the interrupt.
+ * @param event IPI event number for console interrupt. If less than 0,
+ * disable the console IPI interrupt.
+ * @param coord Tile to be targeted for console interrupt.
+ * @return 0 on success, otherwise, HV_EINVAL if illegal parameter,
+ * HV_ENOTSUP if console interrupt are not available.
+ */
+int hv_console_set_ipi(int ipi, int event, HV_Coord coord);
+
#else /* !CHIP_HAS_IPI() */
/** A set of interrupts. */
@@ -1092,13 +1124,8 @@ HV_VirtAddrRange hv_inquire_virtual(int idx);
/** A range of ASID values. */
typedef struct
{
-#ifndef __BIG_ENDIAN__
HV_ASID start; /**< First ASID in the range. */
unsigned int size; /**< Number of ASIDs. Zero for an invalid range. */
-#else
- unsigned int size; /**< Number of ASIDs. Zero for an invalid range. */
- HV_ASID start; /**< First ASID in the range. */
-#endif
} HV_ASIDRange;
/** Returns information about a range of ASIDs.
@@ -1422,7 +1449,6 @@ typedef enum
/** Message recipient. */
typedef struct
{
-#ifndef __BIG_ENDIAN__
/** X coordinate, relative to supervisor's top-left coordinate */
unsigned int x:11;
@@ -1431,11 +1457,6 @@ typedef struct
/** Status of this recipient */
HV_Recip_State state:10;
-#else //__BIG_ENDIAN__
- HV_Recip_State state:10;
- unsigned int y:11;
- unsigned int x:11;
-#endif
} HV_Recipient;
/** Send a message to a set of recipients.
diff --git a/arch/tile/include/uapi/arch/Kbuild b/arch/tile/include/uapi/arch/Kbuild
index 4ebc34f4768d..97dfbecec6b6 100644
--- a/arch/tile/include/uapi/arch/Kbuild
+++ b/arch/tile/include/uapi/arch/Kbuild
@@ -1,7 +1,6 @@
# UAPI Header export list
header-y += abi.h
header-y += chip.h
-header-y += chip_tile64.h
header-y += chip_tilegx.h
header-y += chip_tilepro.h
header-y += icache.h
diff --git a/arch/tile/include/uapi/arch/chip.h b/arch/tile/include/uapi/arch/chip.h
index 926d3db0e91e..4c91f90b9369 100644
--- a/arch/tile/include/uapi/arch/chip.h
+++ b/arch/tile/include/uapi/arch/chip.h
@@ -12,9 +12,7 @@
* more details.
*/
-#if __tile_chip__ == 0
-#include <arch/chip_tile64.h>
-#elif __tile_chip__ == 1
+#if __tile_chip__ == 1
#include <arch/chip_tilepro.h>
#elif defined(__tilegx__)
#include <arch/chip_tilegx.h>
diff --git a/arch/tile/include/uapi/arch/chip_tile64.h b/arch/tile/include/uapi/arch/chip_tile64.h
deleted file mode 100644
index 261aaba092d4..000000000000
--- a/arch/tile/include/uapi/arch/chip_tile64.h
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- */
-
-/*
- * @file
- * Global header file.
- * This header file specifies defines for TILE64.
- */
-
-#ifndef __ARCH_CHIP_H__
-#define __ARCH_CHIP_H__
-
-/** Specify chip version.
- * When possible, prefer the CHIP_xxx symbols below for future-proofing.
- * This is intended for cross-compiling; native compilation should
- * use the predefined __tile_chip__ symbol.
- */
-#define TILE_CHIP 0
-
-/** Specify chip revision.
- * This provides for the case of a respin of a particular chip type;
- * the normal value for this symbol is "0".
- * This is intended for cross-compiling; native compilation should
- * use the predefined __tile_chip_rev__ symbol.
- */
-#define TILE_CHIP_REV 0
-
-/** The name of this architecture. */
-#define CHIP_ARCH_NAME "tile64"
-
-/** The ELF e_machine type for binaries for this chip. */
-#define CHIP_ELF_TYPE() EM_TILE64
-
-/** The alternate ELF e_machine type for binaries for this chip. */
-#define CHIP_COMPAT_ELF_TYPE() 0x2506
-
-/** What is the native word size of the machine? */
-#define CHIP_WORD_SIZE() 32
-
-/** How many bits of a virtual address are used. Extra bits must be
- * the sign extension of the low bits.
- */
-#define CHIP_VA_WIDTH() 32
-
-/** How many bits are in a physical address? */
-#define CHIP_PA_WIDTH() 36
-
-/** Size of the L2 cache, in bytes. */
-#define CHIP_L2_CACHE_SIZE() 65536
-
-/** Log size of an L2 cache line in bytes. */
-#define CHIP_L2_LOG_LINE_SIZE() 6
-
-/** Size of an L2 cache line, in bytes. */
-#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE())
-
-/** Associativity of the L2 cache. */
-#define CHIP_L2_ASSOC() 2
-
-/** Size of the L1 data cache, in bytes. */
-#define CHIP_L1D_CACHE_SIZE() 8192
-
-/** Log size of an L1 data cache line in bytes. */
-#define CHIP_L1D_LOG_LINE_SIZE() 4
-
-/** Size of an L1 data cache line, in bytes. */
-#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE())
-
-/** Associativity of the L1 data cache. */
-#define CHIP_L1D_ASSOC() 2
-
-/** Size of the L1 instruction cache, in bytes. */
-#define CHIP_L1I_CACHE_SIZE() 8192
-
-/** Log size of an L1 instruction cache line in bytes. */
-#define CHIP_L1I_LOG_LINE_SIZE() 6
-
-/** Size of an L1 instruction cache line, in bytes. */
-#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE())
-
-/** Associativity of the L1 instruction cache. */
-#define CHIP_L1I_ASSOC() 1
-
-/** Stride with which flush instructions must be issued. */
-#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE()
-
-/** Stride with which inv instructions must be issued. */
-#define CHIP_INV_STRIDE() CHIP_L1D_LINE_SIZE()
-
-/** Stride with which finv instructions must be issued. */
-#define CHIP_FINV_STRIDE() CHIP_L1D_LINE_SIZE()
-
-/** Can the local cache coherently cache data that is homed elsewhere? */
-#define CHIP_HAS_COHERENT_LOCAL_CACHE() 0
-
-/** How many simultaneous outstanding victims can the L2 cache have? */
-#define CHIP_MAX_OUTSTANDING_VICTIMS() 2
-
-/** Does the TLB support the NC and NOALLOC bits? */
-#define CHIP_HAS_NC_AND_NOALLOC_BITS() 0
-
-/** Does the chip support hash-for-home caching? */
-#define CHIP_HAS_CBOX_HOME_MAP() 0
-
-/** Number of entries in the chip's home map tables. */
-/* #define CHIP_CBOX_HOME_MAP_SIZE() -- does not apply to chip 0 */
-
-/** Do uncacheable requests miss in the cache regardless of whether
- * there is matching data? */
-#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 0
-
-/** Does the mf instruction wait for victims? */
-#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 1
-
-/** Does the chip have an "inv" instruction that doesn't also flush? */
-#define CHIP_HAS_INV() 0
-
-/** Does the chip have a "wh64" instruction? */
-#define CHIP_HAS_WH64() 0
-
-/** Does this chip have a 'dword_align' instruction? */
-#define CHIP_HAS_DWORD_ALIGN() 0
-
-/** Number of performance counters. */
-#define CHIP_PERFORMANCE_COUNTERS() 2
-
-/** Does this chip have auxiliary performance counters? */
-#define CHIP_HAS_AUX_PERF_COUNTERS() 0
-
-/** Is the CBOX_MSR1 SPR supported? */
-#define CHIP_HAS_CBOX_MSR1() 0
-
-/** Is the TILE_RTF_HWM SPR supported? */
-#define CHIP_HAS_TILE_RTF_HWM() 0
-
-/** Is the TILE_WRITE_PENDING SPR supported? */
-#define CHIP_HAS_TILE_WRITE_PENDING() 0
-
-/** Is the PROC_STATUS SPR supported? */
-#define CHIP_HAS_PROC_STATUS_SPR() 0
-
-/** Is the DSTREAM_PF SPR supported? */
-#define CHIP_HAS_DSTREAM_PF() 0
-
-/** Log of the number of mshims we have. */
-#define CHIP_LOG_NUM_MSHIMS() 2
-
-/** Are the bases of the interrupt vector areas fixed? */
-#define CHIP_HAS_FIXED_INTVEC_BASE() 1
-
-/** Are the interrupt masks split up into 2 SPRs? */
-#define CHIP_HAS_SPLIT_INTR_MASK() 1
-
-/** Is the cycle count split up into 2 SPRs? */
-#define CHIP_HAS_SPLIT_CYCLE() 1
-
-/** Does the chip have a static network? */
-#define CHIP_HAS_SN() 1
-
-/** Does the chip have a static network processor? */
-#define CHIP_HAS_SN_PROC() 1
-
-/** Size of the L1 static network processor instruction cache, in bytes. */
-#define CHIP_L1SNI_CACHE_SIZE() 2048
-
-/** Does the chip have DMA support in each tile? */
-#define CHIP_HAS_TILE_DMA() 1
-
-/** Does the chip have the second revision of the directly accessible
- * dynamic networks? This encapsulates a number of characteristics,
- * including the absence of the catch-all, the absence of inline message
- * tags, the absence of support for network context-switching, and so on.
- */
-#define CHIP_HAS_REV1_XDN() 0
-
-/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */
-#define CHIP_HAS_CMPEXCH() 0
-
-/** Does the chip have memory-mapped I/O support? */
-#define CHIP_HAS_MMIO() 0
-
-/** Does the chip have post-completion interrupts? */
-#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0
-
-/** Does the chip have native single step support? */
-#define CHIP_HAS_SINGLE_STEP() 0
-
-#ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */
-
-/** How many entries are present in the instruction TLB? */
-#define CHIP_ITLB_ENTRIES() 8
-
-/** How many entries are present in the data TLB? */
-#define CHIP_DTLB_ENTRIES() 16
-
-/** How many MAF entries does the XAUI shim have? */
-#define CHIP_XAUI_MAF_ENTRIES() 16
-
-/** Does the memory shim have a source-id table? */
-#define CHIP_HAS_MSHIM_SRCID_TABLE() 1
-
-/** Does the L1 instruction cache clear on reset? */
-#define CHIP_HAS_L1I_CLEAR_ON_RESET() 0
-
-/** Does the chip come out of reset with valid coordinates on all tiles?
- * Note that if defined, this also implies that the upper left is 1,1.
- */
-#define CHIP_HAS_VALID_TILE_COORD_RESET() 0
-
-/** Does the chip have unified packet formats? */
-#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 0
-
-/** Does the chip support write reordering? */
-#define CHIP_HAS_WRITE_REORDERING() 0
-
-/** Does the chip support Y-X routing as well as X-Y? */
-#define CHIP_HAS_Y_X_ROUTING() 0
-
-/** Is INTCTRL_3 managed with the correct MPL? */
-#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 0
-
-/** Is it possible to configure the chip to be big-endian? */
-#define CHIP_HAS_BIG_ENDIAN_CONFIG() 0
-
-/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */
-#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0
-
-/** Is the DIAG_TRACE_WAY SPR supported? */
-#define CHIP_HAS_DIAG_TRACE_WAY() 0
-
-/** Is the MEM_STRIPE_CONFIG SPR supported? */
-#define CHIP_HAS_MEM_STRIPE_CONFIG() 0
-
-/** Are the TLB_PERF SPRs supported? */
-#define CHIP_HAS_TLB_PERF() 0
-
-/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */
-#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0
-
-/** Does the chip support rev1 DMA packets? */
-#define CHIP_HAS_REV1_DMA_PACKETS() 0
-
-/** Does the chip have an IPI shim? */
-#define CHIP_HAS_IPI() 0
-
-#endif /* !__OPEN_SOURCE__ */
-#endif /* __ARCH_CHIP_H__ */
diff --git a/arch/tile/include/uapi/arch/opcode_tilegx.h b/arch/tile/include/uapi/arch/opcode_tilegx.h
index c14d02c81600..d76ff2db745e 100644
--- a/arch/tile/include/uapi/arch/opcode_tilegx.h
+++ b/arch/tile/include/uapi/arch/opcode_tilegx.h
@@ -61,6 +61,7 @@ typedef tilegx_bundle_bits tile_bundle_bits;
#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEGX_BUNDLE_ALIGNMENT_IN_BYTES
#define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \
TILEGX_LOG2_BUNDLE_ALIGNMENT_IN_BYTES
+#define TILE_BPT_BUNDLE TILEGX_BPT_BUNDLE
/* 64-bit pattern for a { bpt ; nop } bundle. */
#define TILEGX_BPT_BUNDLE 0x286a44ae51485000ULL
diff --git a/arch/tile/include/uapi/arch/opcode_tilepro.h b/arch/tile/include/uapi/arch/opcode_tilepro.h
index 71b763b8ce83..4451cff1a861 100644
--- a/arch/tile/include/uapi/arch/opcode_tilepro.h
+++ b/arch/tile/include/uapi/arch/opcode_tilepro.h
@@ -71,6 +71,7 @@ typedef tilepro_bundle_bits tile_bundle_bits;
#define TILE_BUNDLE_ALIGNMENT_IN_BYTES TILEPRO_BUNDLE_ALIGNMENT_IN_BYTES
#define TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES \
TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES
+#define TILE_BPT_BUNDLE TILEPRO_BPT_BUNDLE
/* 64-bit pattern for a { bpt ; nop } bundle. */
#define TILEPRO_BPT_BUNDLE 0x400b3cae70166000ULL
diff --git a/arch/tile/include/uapi/arch/spr_def_32.h b/arch/tile/include/uapi/arch/spr_def_32.h
index c689446e6284..78daa3146d25 100644
--- a/arch/tile/include/uapi/arch/spr_def_32.h
+++ b/arch/tile/include/uapi/arch/spr_def_32.h
@@ -200,8 +200,6 @@
#define SPR_SIM_CONTROL 0x4e0c
#define SPR_SNCTL 0x0805
#define SPR_SNCTL__FRZFABRIC_MASK 0x1
-#define SPR_SNCTL__FRZPROC_MASK 0x2
-#define SPR_SNPC 0x080b
#define SPR_SNSTATIC 0x080c
#define SPR_SYSTEM_SAVE_0_0 0x4b00
#define SPR_SYSTEM_SAVE_0_1 0x4b01
diff --git a/arch/tile/include/uapi/asm/auxvec.h b/arch/tile/include/uapi/asm/auxvec.h
index 1d393edb0641..c93e92709f14 100644
--- a/arch/tile/include/uapi/asm/auxvec.h
+++ b/arch/tile/include/uapi/asm/auxvec.h
@@ -15,6 +15,7 @@
#ifndef _ASM_TILE_AUXVEC_H
#define _ASM_TILE_AUXVEC_H
-/* No extensions to auxvec */
+/* The vDSO location. */
+#define AT_SYSINFO_EHDR 33
#endif /* _ASM_TILE_AUXVEC_H */
diff --git a/arch/tile/include/uapi/asm/cachectl.h b/arch/tile/include/uapi/asm/cachectl.h
index af4c9f9154d1..572ddcad2090 100644
--- a/arch/tile/include/uapi/asm/cachectl.h
+++ b/arch/tile/include/uapi/asm/cachectl.h
@@ -29,8 +29,8 @@
* to honor the arguments at some point.)
*
* Flush and invalidation of memory can normally be performed with the
- * __insn_flush(), __insn_inv(), and __insn_finv() instructions from
- * userspace. The DCACHE option to the system call allows userspace
+ * __insn_flush() and __insn_finv() instructions from userspace.
+ * The DCACHE option to the system call allows userspace
* to flush the entire L1+L2 data cache from the core. In this case,
* the address and length arguments are not used. The DCACHE flush is
* restricted to the current core, not all cores in the address space.
diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile
index 5334be8e2538..27a2bf39dae8 100644
--- a/arch/tile/kernel/Makefile
+++ b/arch/tile/kernel/Makefile
@@ -3,11 +3,17 @@
#
extra-y := vmlinux.lds head_$(BITS).o
-obj-y := backtrace.o entry.o irq.o messaging.o \
+obj-y := backtrace.o entry.o hvglue.o irq.o messaging.o \
pci-dma.o proc.o process.o ptrace.o reboot.o \
- setup.o signal.o single_step.o stack.o sys.o sysfs.o time.o traps.o \
+ setup.o signal.o single_step.o stack.o sys.o \
+ sysfs.o time.o traps.o unaligned.o vdso.o \
intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_early_printk.o = -pg
+endif
+
obj-$(CONFIG_HARDWALL) += hardwall.o
obj-$(CONFIG_COMPAT) += compat.o compat_signal.o
obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o
@@ -20,3 +26,9 @@ else
obj-$(CONFIG_PCI) += pci.o
endif
obj-$(CONFIG_TILE_USB) += usb.o
+obj-$(CONFIG_TILE_HVGLUE_TRACE) += hvglue_trace.o
+obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o mcount_64.o
+obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_KGDB) += kgdb.o
+
+obj-y += vdso/
diff --git a/arch/tile/kernel/asm-offsets.c b/arch/tile/kernel/asm-offsets.c
index 01ddf19cc36d..375e7c321eef 100644
--- a/arch/tile/kernel/asm-offsets.c
+++ b/arch/tile/kernel/asm-offsets.c
@@ -14,13 +14,6 @@
* Generates definitions from c-type structures used by assembly sources.
*/
-#include <linux/kbuild.h>
-#include <linux/thread_info.h>
-#include <linux/sched.h>
-#include <linux/hardirq.h>
-#include <linux/ptrace.h>
-#include <hv/hypervisor.h>
-
/* Check for compatible compiler early in the build. */
#ifdef CONFIG_TILEGX
# ifndef __tilegx__
@@ -31,46 +24,61 @@
# endif
#else
# ifdef __tilegx__
-# error Can not build TILEPro/TILE64 configurations with tilegx compiler
+# error Can not build TILEPro configurations with tilegx compiler
# endif
#endif
+#include <linux/kbuild.h>
+#include <linux/thread_info.h>
+#include <linux/sched.h>
+#include <linux/hardirq.h>
+#include <linux/ptrace.h>
+#include <hv/hypervisor.h>
+
void foo(void)
{
- DEFINE(SINGLESTEP_STATE_BUFFER_OFFSET, \
+ DEFINE(SINGLESTEP_STATE_BUFFER_OFFSET,
offsetof(struct single_step_state, buffer));
- DEFINE(SINGLESTEP_STATE_FLAGS_OFFSET, \
+ DEFINE(SINGLESTEP_STATE_FLAGS_OFFSET,
offsetof(struct single_step_state, flags));
- DEFINE(SINGLESTEP_STATE_ORIG_PC_OFFSET, \
+ DEFINE(SINGLESTEP_STATE_ORIG_PC_OFFSET,
offsetof(struct single_step_state, orig_pc));
- DEFINE(SINGLESTEP_STATE_NEXT_PC_OFFSET, \
+ DEFINE(SINGLESTEP_STATE_NEXT_PC_OFFSET,
offsetof(struct single_step_state, next_pc));
- DEFINE(SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET, \
+ DEFINE(SINGLESTEP_STATE_BRANCH_NEXT_PC_OFFSET,
offsetof(struct single_step_state, branch_next_pc));
- DEFINE(SINGLESTEP_STATE_UPDATE_VALUE_OFFSET, \
+ DEFINE(SINGLESTEP_STATE_UPDATE_VALUE_OFFSET,
offsetof(struct single_step_state, update_value));
- DEFINE(THREAD_INFO_TASK_OFFSET, \
+ DEFINE(THREAD_INFO_TASK_OFFSET,
offsetof(struct thread_info, task));
- DEFINE(THREAD_INFO_FLAGS_OFFSET, \
+ DEFINE(THREAD_INFO_FLAGS_OFFSET,
offsetof(struct thread_info, flags));
- DEFINE(THREAD_INFO_STATUS_OFFSET, \
+ DEFINE(THREAD_INFO_STATUS_OFFSET,
offsetof(struct thread_info, status));
- DEFINE(THREAD_INFO_HOMECACHE_CPU_OFFSET, \
+ DEFINE(THREAD_INFO_HOMECACHE_CPU_OFFSET,
offsetof(struct thread_info, homecache_cpu));
- DEFINE(THREAD_INFO_STEP_STATE_OFFSET, \
+ DEFINE(THREAD_INFO_PREEMPT_COUNT_OFFSET,
+ offsetof(struct thread_info, preempt_count));
+ DEFINE(THREAD_INFO_STEP_STATE_OFFSET,
offsetof(struct thread_info, step_state));
+#ifdef __tilegx__
+ DEFINE(THREAD_INFO_UNALIGN_JIT_BASE_OFFSET,
+ offsetof(struct thread_info, unalign_jit_base));
+ DEFINE(THREAD_INFO_UNALIGN_JIT_TMP_OFFSET,
+ offsetof(struct thread_info, unalign_jit_tmp));
+#endif
DEFINE(TASK_STRUCT_THREAD_KSP_OFFSET,
offsetof(struct task_struct, thread.ksp));
DEFINE(TASK_STRUCT_THREAD_PC_OFFSET,
offsetof(struct task_struct, thread.pc));
- DEFINE(HV_TOPOLOGY_WIDTH_OFFSET, \
+ DEFINE(HV_TOPOLOGY_WIDTH_OFFSET,
offsetof(HV_Topology, width));
- DEFINE(HV_TOPOLOGY_HEIGHT_OFFSET, \
+ DEFINE(HV_TOPOLOGY_HEIGHT_OFFSET,
offsetof(HV_Topology, height));
- DEFINE(IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET, \
+ DEFINE(IRQ_CPUSTAT_SYSCALL_COUNT_OFFSET,
offsetof(irq_cpustat_t, irq_syscall_count));
}
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index d0a052e725be..85e00b2f39bf 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -32,6 +32,7 @@
#include <asm/ucontext.h>
#include <asm/sigframe.h>
#include <asm/syscalls.h>
+#include <asm/vdso.h>
#include <arch/interrupts.h>
struct compat_ucontext {
@@ -227,7 +228,7 @@ int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
if (err)
goto give_sigsegv;
- restorer = VDSO_BASE;
+ restorer = VDSO_SYM(&__vdso_rt_sigreturn);
if (ka->sa.sa_flags & SA_RESTORER)
restorer = ptr_to_compat_reg(ka->sa.sa_restorer);
diff --git a/arch/tile/kernel/early_printk.c b/arch/tile/kernel/early_printk.c
index 34d72a151bf3..b608e00e7f6d 100644
--- a/arch/tile/kernel/early_printk.c
+++ b/arch/tile/kernel/early_printk.c
@@ -23,19 +23,24 @@
static void early_hv_write(struct console *con, const char *s, unsigned n)
{
- hv_console_write((HV_VirtAddr) s, n);
+ tile_console_write(s, n);
+
+ /*
+ * Convert NL to NLCR (close enough to CRNL) during early boot.
+ * We assume newlines are at the ends of strings, which turns out
+ * to be good enough for early boot console output.
+ */
+ if (n && s[n-1] == '\n')
+ tile_console_write("\r", 1);
}
static struct console early_hv_console = {
.name = "earlyhv",
.write = early_hv_write,
- .flags = CON_PRINTBUFFER,
+ .flags = CON_PRINTBUFFER | CON_BOOT,
.index = -1,
};
-/* Direct interface for emergencies */
-static int early_console_complete;
-
void early_panic(const char *fmt, ...)
{
va_list ap;
@@ -43,51 +48,21 @@ void early_panic(const char *fmt, ...)
va_start(ap, fmt);
early_printk("Kernel panic - not syncing: ");
early_vprintk(fmt, ap);
- early_console->write(early_console, "\n", 1);
+ early_printk("\n");
va_end(ap);
dump_stack();
hv_halt();
}
-static int __initdata keep_early;
-
static int __init setup_early_printk(char *str)
{
if (early_console)
return 1;
- if (str != NULL && strncmp(str, "keep", 4) == 0)
- keep_early = 1;
-
early_console = &early_hv_console;
register_console(early_console);
return 0;
}
-void __init disable_early_printk(void)
-{
- early_console_complete = 1;
- if (!early_console)
- return;
- if (!keep_early) {
- early_printk("disabling early console\n");
- unregister_console(early_console);
- early_console = NULL;
- } else {
- early_printk("keeping early console\n");
- }
-}
-
-void warn_early_printk(void)
-{
- if (early_console_complete || early_console)
- return;
- early_printk("\
-Machine shutting down before console output is fully initialized.\n\
-You may wish to reboot and add the option 'earlyprintk' to your\n\
-boot command line to see any diagnostic early console output.\n\
-");
-}
-
early_param("earlyprintk", setup_early_printk);
diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S
index f116cb0bce20..3d9175992a20 100644
--- a/arch/tile/kernel/entry.S
+++ b/arch/tile/kernel/entry.S
@@ -27,22 +27,6 @@ STD_ENTRY(current_text_addr)
{ move r0, lr; jrp lr }
STD_ENDPROC(current_text_addr)
-/*
- * We don't run this function directly, but instead copy it to a page
- * we map into every user process. See vdso_setup().
- *
- * Note that libc has a copy of this function that it uses to compare
- * against the PC when a stack backtrace ends, so if this code is
- * changed, the libc implementation(s) should also be updated.
- */
- .pushsection .data
-ENTRY(__rt_sigreturn)
- moveli TREG_SYSCALL_NR_NAME,__NR_rt_sigreturn
- swint1
- ENDPROC(__rt_sigreturn)
- ENTRY(__rt_sigreturn_end)
- .popsection
-
STD_ENTRY(dump_stack)
{ move r2, lr; lnk r1 }
{ move r4, r52; addli r1, r1, dump_stack - . }
diff --git a/arch/tile/kernel/ftrace.c b/arch/tile/kernel/ftrace.c
new file mode 100644
index 000000000000..f1c452092eeb
--- /dev/null
+++ b/arch/tile/kernel/ftrace.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * TILE-Gx specific ftrace support
+ */
+
+#include <linux/ftrace.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ftrace.h>
+#include <asm/sections.h>
+
+#include <arch/opcode.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+static inline tilegx_bundle_bits NOP(void)
+{
+ return create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0) |
+ create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) |
+ create_Opcode_X0(RRR_0_OPCODE_X0) |
+ create_UnaryOpcodeExtension_X1(NOP_UNARY_OPCODE_X1) |
+ create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) |
+ create_Opcode_X1(RRR_0_OPCODE_X1);
+}
+
+static int machine_stopped __read_mostly;
+
+int ftrace_arch_code_modify_prepare(void)
+{
+ machine_stopped = 1;
+ return 0;
+}
+
+int ftrace_arch_code_modify_post_process(void)
+{
+ flush_icache_range(0, CHIP_L1I_CACHE_SIZE());
+ machine_stopped = 0;
+ return 0;
+}
+
+/*
+ * Put { move r10, lr; jal ftrace_caller } in a bundle, this lets dynamic
+ * tracer just add one cycle overhead to every kernel function when disabled.
+ */
+static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
+ bool link)
+{
+ tilegx_bundle_bits opcode_x0, opcode_x1;
+ long pcrel_by_instr = (addr - pc) >> TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES;
+
+ if (link) {
+ /* opcode: jal addr */
+ opcode_x1 =
+ create_Opcode_X1(JUMP_OPCODE_X1) |
+ create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) |
+ create_JumpOff_X1(pcrel_by_instr);
+ } else {
+ /* opcode: j addr */
+ opcode_x1 =
+ create_Opcode_X1(JUMP_OPCODE_X1) |
+ create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) |
+ create_JumpOff_X1(pcrel_by_instr);
+ }
+
+ if (addr == FTRACE_ADDR) {
+ /* opcode: or r10, lr, zero */
+ opcode_x0 =
+ create_Dest_X0(10) |
+ create_SrcA_X0(TREG_LR) |
+ create_SrcB_X0(TREG_ZERO) |
+ create_RRROpcodeExtension_X0(OR_RRR_0_OPCODE_X0) |
+ create_Opcode_X0(RRR_0_OPCODE_X0);
+ } else {
+ /* opcode: fnop */
+ opcode_x0 =
+ create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0) |
+ create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) |
+ create_Opcode_X0(RRR_0_OPCODE_X0);
+ }
+
+ return opcode_x1 | opcode_x0;
+}
+
+static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec)
+{
+ return NOP();
+}
+
+static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
+{
+ return ftrace_gen_branch(pc, addr, true);
+}
+
+static int ftrace_modify_code(unsigned long pc, unsigned long old,
+ unsigned long new)
+{
+ unsigned long pc_wr;
+
+ /* Check if the address is in kernel text space and module space. */
+ if (!kernel_text_address(pc))
+ return -EINVAL;
+
+ /* Operate on writable kernel text mapping. */
+ pc_wr = pc - MEM_SV_START + PAGE_OFFSET;
+
+ if (probe_kernel_write((void *)pc_wr, &new, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+ smp_wmb();
+
+ if (!machine_stopped && num_online_cpus() > 1)
+ flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
+
+ return 0;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long pc, old;
+ unsigned long new;
+ int ret;
+
+ pc = (unsigned long)&ftrace_call;
+ memcpy(&old, &ftrace_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(pc, (unsigned long)func);
+
+ ret = ftrace_modify_code(pc, old, new);
+
+ return ret;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long new, old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_nop_replace(rec);
+ new = ftrace_call_replace(ip, addr);
+
+ return ftrace_modify_code(rec->ip, old, new);
+}
+
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ unsigned long old;
+ unsigned long new;
+ int ret;
+
+ old = ftrace_call_replace(ip, addr);
+ new = ftrace_nop_replace(rec);
+ ret = ftrace_modify_code(ip, old, new);
+
+ return ret;
+}
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+ *(unsigned long *)data = 0;
+
+ return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+ unsigned long frame_pointer)
+{
+ unsigned long return_hooker = (unsigned long) &return_to_handler;
+ struct ftrace_graph_ent trace;
+ unsigned long old;
+ int err;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+
+ old = *parent;
+ *parent = return_hooker;
+
+ err = ftrace_push_return_trace(old, self_addr, &trace.depth,
+ frame_pointer);
+ if (err == -EBUSY) {
+ *parent = old;
+ return;
+ }
+
+ trace.func = self_addr;
+
+ /* Only trace if the calling function expects to */
+ if (!ftrace_graph_entry(&trace)) {
+ current->curr_ret_stack--;
+ *parent = old;
+ }
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern unsigned long ftrace_graph_call;
+
+static int __ftrace_modify_caller(unsigned long *callsite,
+ void (*func) (void), bool enable)
+{
+ unsigned long caller_fn = (unsigned long) func;
+ unsigned long pc = (unsigned long) callsite;
+ unsigned long branch = ftrace_gen_branch(pc, caller_fn, false);
+ unsigned long nop = NOP();
+ unsigned long old = enable ? nop : branch;
+ unsigned long new = enable ? branch : nop;
+
+ return ftrace_modify_code(pc, old, new);
+}
+
+static int ftrace_modify_graph_caller(bool enable)
+{
+ int ret;
+
+ ret = __ftrace_modify_caller(&ftrace_graph_call,
+ ftrace_graph_caller,
+ enable);
+
+ return ret;
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_graph_caller(false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c
index 38ac189d9575..df27a1fd94a3 100644
--- a/arch/tile/kernel/hardwall.c
+++ b/arch/tile/kernel/hardwall.c
@@ -272,9 +272,9 @@ static void hardwall_setup_func(void *info)
struct hardwall_info *r = info;
struct hardwall_type *hwt = r->type;
- int cpu = smp_processor_id();
- int x = cpu % smp_width;
- int y = cpu / smp_width;
+ int cpu = smp_processor_id(); /* on_each_cpu disables preemption */
+ int x = cpu_x(cpu);
+ int y = cpu_y(cpu);
int bits = 0;
if (x == r->ulhc_x)
bits |= W_PROTECT;
@@ -317,6 +317,7 @@ static void hardwall_protect_rectangle(struct hardwall_info *r)
on_each_cpu_mask(&rect_cpus, hardwall_setup_func, r, 1);
}
+/* Entered from INT_xDN_FIREWALL interrupt vector with irqs disabled. */
void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num)
{
struct hardwall_info *rect;
@@ -325,7 +326,6 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num)
struct siginfo info;
int cpu = smp_processor_id();
int found_processes;
- unsigned long flags;
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
@@ -346,7 +346,7 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num)
BUG_ON(hwt->disabled);
/* This tile trapped a network access; find the rectangle. */
- spin_lock_irqsave(&hwt->lock, flags);
+ spin_lock(&hwt->lock);
list_for_each_entry(rect, &hwt->list, list) {
if (cpumask_test_cpu(cpu, &rect->cpumask))
break;
@@ -401,7 +401,7 @@ void __kprobes do_hardwall_trap(struct pt_regs* regs, int fault_num)
pr_notice("hardwall: no associated processes!\n");
done:
- spin_unlock_irqrestore(&hwt->lock, flags);
+ spin_unlock(&hwt->lock);
/*
* We have to disable firewall interrupts now, or else when we
@@ -540,6 +540,14 @@ static struct hardwall_info *hardwall_create(struct hardwall_type *hwt,
}
}
+ /*
+ * Eliminate cpus that are not part of this Linux client.
+ * Note that this allows for configurations that we might not want to
+ * support, such as one client on every even cpu, another client on
+ * every odd cpu.
+ */
+ cpumask_and(&info->cpumask, &info->cpumask, cpu_online_mask);
+
/* Confirm it doesn't overlap and add it to the list. */
spin_lock_irqsave(&hwt->lock, flags);
list_for_each_entry(iter, &hwt->list, list) {
@@ -612,7 +620,7 @@ static int hardwall_activate(struct hardwall_info *info)
/*
* Deactivate a task's hardwall. Must hold lock for hardwall_type.
- * This method may be called from free_task(), so we don't want to
+ * This method may be called from exit_thread(), so we don't want to
* rely on too many fields of struct task_struct still being valid.
* We assume the cpus_allowed, pid, and comm fields are still valid.
*/
@@ -653,7 +661,7 @@ static int hardwall_deactivate(struct hardwall_type *hwt,
return -EINVAL;
printk(KERN_DEBUG "Pid %d (%s) deactivated for %s hardwall: cpu %d\n",
- task->pid, task->comm, hwt->name, smp_processor_id());
+ task->pid, task->comm, hwt->name, raw_smp_processor_id());
return 0;
}
@@ -795,8 +803,8 @@ static void reset_xdn_network_state(struct hardwall_type *hwt)
/* Reset UDN coordinates to their standard value */
{
unsigned int cpu = smp_processor_id();
- unsigned int x = cpu % smp_width;
- unsigned int y = cpu / smp_width;
+ unsigned int x = cpu_x(cpu);
+ unsigned int y = cpu_y(cpu);
__insn_mtspr(SPR_UDN_TILE_COORD, (x << 18) | (y << 7));
}
diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S
index ac115307e5e4..8d5b40ff2922 100644
--- a/arch/tile/kernel/head_32.S
+++ b/arch/tile/kernel/head_32.S
@@ -39,12 +39,12 @@ ENTRY(_start)
}
{
moveli r0, _HV_VERSION_OLD_HV_INIT
- jal hv_init
+ jal _hv_init
}
/* Get a reasonable default ASID in r0 */
{
move r0, zero
- jal hv_inquire_asid
+ jal _hv_inquire_asid
}
/* Install the default page table */
{
@@ -64,7 +64,7 @@ ENTRY(_start)
auli r0, r0, ha16(swapper_pg_dir - PAGE_OFFSET)
}
{
- inv r6
+ finv r6
move r1, zero /* high 32 bits of CPA is zero */
}
{
@@ -73,12 +73,12 @@ ENTRY(_start)
}
{
auli lr, lr, ha16(1f)
- j hv_install_context
+ j _hv_install_context
}
1:
/* Get our processor number and save it away in SAVE_K_0. */
- jal hv_inquire_topology
+ jal _hv_inquire_topology
mulll_uu r4, r1, r2 /* r1 == y, r2 == width */
add r4, r4, r0 /* r0 == x, so r4 == cpu == y*width + x */
@@ -86,7 +86,7 @@ ENTRY(_start)
/*
* Load up our per-cpu offset. When the first (master) tile
* boots, this value is still zero, so we will load boot_pc
- * with start_kernel, and boot_sp with init_stack + THREAD_SIZE.
+ * with start_kernel, and boot_sp at the top of init_stack.
* The master tile initializes the per-cpu offset array, so that
* when subsequent (secondary) tiles boot, they will instead load
* from their per-cpu versions of boot_sp and boot_pc.
@@ -126,7 +126,6 @@ ENTRY(_start)
lw sp, r1
or r4, sp, r4
mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */
- addi sp, sp, -STACK_TOP_DELTA
{
move lr, zero /* stop backtraces in the called function */
jr r0
@@ -163,8 +162,8 @@ ENTRY(swapper_pg_dir)
.set addr, addr + PGDIR_SIZE
.endr
- /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */
- PTE MEM_SV_INTRPT, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \
+ /* The true text VAs are mapped as VA = PA + MEM_SV_START */
+ PTE MEM_SV_START, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \
(1 << (HV_PTE_INDEX_EXECUTABLE - 32))
.org swapper_pg_dir + PGDIR_SIZE
END(swapper_pg_dir)
diff --git a/arch/tile/kernel/head_64.S b/arch/tile/kernel/head_64.S
index 6093964fa5c7..bd0e12f283f3 100644
--- a/arch/tile/kernel/head_64.S
+++ b/arch/tile/kernel/head_64.S
@@ -25,6 +25,15 @@
#include <arch/chip.h>
#include <arch/spr_def.h>
+/* Extract two 32-bit bit values that were read into one register. */
+#ifdef __BIG_ENDIAN__
+#define GET_FIRST_INT(rd, rs) shrsi rd, rs, 32
+#define GET_SECOND_INT(rd, rs) addxi rd, rs, 0
+#else
+#define GET_FIRST_INT(rd, rs) addxi rd, rs, 0
+#define GET_SECOND_INT(rd, rs) shrsi rd, rs, 32
+#endif
+
/*
* This module contains the entry code for kernel images. It performs the
* minimal setup needed to call the generic C routines.
@@ -46,11 +55,11 @@ ENTRY(_start)
movei r2, TILE_CHIP_REV
movei r3, KERNEL_PL
}
- jal hv_init
+ jal _hv_init
/* Get a reasonable default ASID in r0 */
{
move r0, zero
- jal hv_inquire_asid
+ jal _hv_inquire_asid
}
/*
@@ -61,7 +70,7 @@ ENTRY(_start)
* other CPUs should see a properly-constructed page table.
*/
{
- v4int_l r2, zero, r0 /* ASID for hv_install_context */
+ GET_FIRST_INT(r2, r0) /* ASID for hv_install_context */
moveli r4, hw1_last(swapper_pgprot - PAGE_OFFSET)
}
{
@@ -77,7 +86,7 @@ ENTRY(_start)
{
/* After initializing swapper_pgprot, HV_PTE_GLOBAL is set. */
bfextu r7, r1, HV_PTE_INDEX_GLOBAL, HV_PTE_INDEX_GLOBAL
- inv r4
+ finv r4
}
bnez r7, .Lno_write
{
@@ -121,29 +130,24 @@ ENTRY(_start)
}
{
moveli r3, CTX_PAGE_FLAG
- j hv_install_context
+ j _hv_install_context
}
1:
/* Install the interrupt base. */
- moveli r0, hw2_last(MEM_SV_START)
- shl16insli r0, r0, hw1(MEM_SV_START)
- shl16insli r0, r0, hw0(MEM_SV_START)
+ moveli r0, hw2_last(intrpt_start)
+ shl16insli r0, r0, hw1(intrpt_start)
+ shl16insli r0, r0, hw0(intrpt_start)
mtspr SPR_INTERRUPT_VECTOR_BASE_K, r0
- /*
- * Get our processor number and save it away in SAVE_K_0.
- * Extract stuff from the topology structure: r4 = y, r6 = x,
- * r5 = width. FIXME: consider whether we want to just make these
- * 64-bit values (and if so fix smp_topology write below, too).
- */
- jal hv_inquire_topology
+ /* Get our processor number and save it away in SAVE_K_0. */
+ jal _hv_inquire_topology
{
- v4int_l r5, zero, r1 /* r5 = width */
- shrui r4, r0, 32 /* r4 = y */
+ GET_FIRST_INT(r5, r1) /* r5 = width */
+ GET_SECOND_INT(r4, r0) /* r4 = y */
}
{
- v4int_l r6, zero, r0 /* r6 = x */
+ GET_FIRST_INT(r6, r0) /* r6 = x */
mul_lu_lu r4, r4, r5
}
{
@@ -154,7 +158,7 @@ ENTRY(_start)
/*
* Load up our per-cpu offset. When the first (master) tile
* boots, this value is still zero, so we will load boot_pc
- * with start_kernel, and boot_sp with init_stack + THREAD_SIZE.
+ * with start_kernel, and boot_sp with at the top of init_stack.
* The master tile initializes the per-cpu offset array, so that
* when subsequent (secondary) tiles boot, they will instead load
* from their per-cpu versions of boot_sp and boot_pc.
@@ -198,9 +202,9 @@ ENTRY(_start)
}
ld r0, r0
ld sp, r1
- or r4, sp, r4
+ shli r4, r4, CPU_SHIFT
+ bfins r4, sp, 0, CPU_SHIFT-1
mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */
- addi sp, sp, -STACK_TOP_DELTA
{
move lr, zero /* stop backtraces in the called function */
jr r0
diff --git a/arch/tile/kernel/hvglue.S b/arch/tile/kernel/hvglue.S
new file mode 100644
index 000000000000..2ab456622391
--- /dev/null
+++ b/arch/tile/kernel/hvglue.S
@@ -0,0 +1,74 @@
+/* Hypervisor call vector addresses; see <hv/hypervisor.h> */
+.macro gensym sym, val, size
+.org \val
+.global _\sym
+.type _\sym,function
+_\sym:
+.size _\sym,\size
+#ifndef CONFIG_TILE_HVGLUE_TRACE
+.globl \sym
+.set \sym,_\sym
+#endif
+.endm
+
+.section .hvglue,"x",@nobits
+.align 8
+gensym hv_init, 0x20, 32
+gensym hv_install_context, 0x40, 32
+gensym hv_sysconf, 0x60, 32
+gensym hv_get_rtc, 0x80, 32
+gensym hv_set_rtc, 0xa0, 32
+gensym hv_flush_asid, 0xc0, 32
+gensym hv_flush_page, 0xe0, 32
+gensym hv_flush_pages, 0x100, 32
+gensym hv_restart, 0x120, 32
+gensym hv_halt, 0x140, 32
+gensym hv_power_off, 0x160, 32
+gensym hv_inquire_physical, 0x180, 32
+gensym hv_inquire_memory_controller, 0x1a0, 32
+gensym hv_inquire_virtual, 0x1c0, 32
+gensym hv_inquire_asid, 0x1e0, 32
+gensym hv_nanosleep, 0x200, 32
+gensym hv_console_read_if_ready, 0x220, 32
+gensym hv_console_write, 0x240, 32
+gensym hv_downcall_dispatch, 0x260, 32
+gensym hv_inquire_topology, 0x280, 32
+gensym hv_fs_findfile, 0x2a0, 32
+gensym hv_fs_fstat, 0x2c0, 32
+gensym hv_fs_pread, 0x2e0, 32
+gensym hv_physaddr_read64, 0x300, 32
+gensym hv_physaddr_write64, 0x320, 32
+gensym hv_get_command_line, 0x340, 32
+gensym hv_set_caching, 0x360, 32
+gensym hv_bzero_page, 0x380, 32
+gensym hv_register_message_state, 0x3a0, 32
+gensym hv_send_message, 0x3c0, 32
+gensym hv_receive_message, 0x3e0, 32
+gensym hv_inquire_context, 0x400, 32
+gensym hv_start_all_tiles, 0x420, 32
+gensym hv_dev_open, 0x440, 32
+gensym hv_dev_close, 0x460, 32
+gensym hv_dev_pread, 0x480, 32
+gensym hv_dev_pwrite, 0x4a0, 32
+gensym hv_dev_poll, 0x4c0, 32
+gensym hv_dev_poll_cancel, 0x4e0, 32
+gensym hv_dev_preada, 0x500, 32
+gensym hv_dev_pwritea, 0x520, 32
+gensym hv_flush_remote, 0x540, 32
+gensym hv_console_putc, 0x560, 32
+gensym hv_inquire_tiles, 0x580, 32
+gensym hv_confstr, 0x5a0, 32
+gensym hv_reexec, 0x5c0, 32
+gensym hv_set_command_line, 0x5e0, 32
+gensym hv_clear_intr, 0x600, 32
+gensym hv_enable_intr, 0x620, 32
+gensym hv_disable_intr, 0x640, 32
+gensym hv_raise_intr, 0x660, 32
+gensym hv_trigger_ipi, 0x680, 32
+gensym hv_store_mapping, 0x6a0, 32
+gensym hv_inquire_realpa, 0x6c0, 32
+gensym hv_flush_all, 0x6e0, 32
+gensym hv_get_ipi_pte, 0x700, 32
+gensym hv_set_pte_super_shift, 0x720, 32
+gensym hv_console_set_ipi, 0x7e0, 32
+gensym hv_glue_internals, 0x800, 30720
diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds
deleted file mode 100644
index d44c5a67a1ed..000000000000
--- a/arch/tile/kernel/hvglue.lds
+++ /dev/null
@@ -1,59 +0,0 @@
-/* Hypervisor call vector addresses; see <hv/hypervisor.h> */
-hv_init = TEXT_OFFSET + 0x10020;
-hv_install_context = TEXT_OFFSET + 0x10040;
-hv_sysconf = TEXT_OFFSET + 0x10060;
-hv_get_rtc = TEXT_OFFSET + 0x10080;
-hv_set_rtc = TEXT_OFFSET + 0x100a0;
-hv_flush_asid = TEXT_OFFSET + 0x100c0;
-hv_flush_page = TEXT_OFFSET + 0x100e0;
-hv_flush_pages = TEXT_OFFSET + 0x10100;
-hv_restart = TEXT_OFFSET + 0x10120;
-hv_halt = TEXT_OFFSET + 0x10140;
-hv_power_off = TEXT_OFFSET + 0x10160;
-hv_inquire_physical = TEXT_OFFSET + 0x10180;
-hv_inquire_memory_controller = TEXT_OFFSET + 0x101a0;
-hv_inquire_virtual = TEXT_OFFSET + 0x101c0;
-hv_inquire_asid = TEXT_OFFSET + 0x101e0;
-hv_nanosleep = TEXT_OFFSET + 0x10200;
-hv_console_read_if_ready = TEXT_OFFSET + 0x10220;
-hv_console_write = TEXT_OFFSET + 0x10240;
-hv_downcall_dispatch = TEXT_OFFSET + 0x10260;
-hv_inquire_topology = TEXT_OFFSET + 0x10280;
-hv_fs_findfile = TEXT_OFFSET + 0x102a0;
-hv_fs_fstat = TEXT_OFFSET + 0x102c0;
-hv_fs_pread = TEXT_OFFSET + 0x102e0;
-hv_physaddr_read64 = TEXT_OFFSET + 0x10300;
-hv_physaddr_write64 = TEXT_OFFSET + 0x10320;
-hv_get_command_line = TEXT_OFFSET + 0x10340;
-hv_set_caching = TEXT_OFFSET + 0x10360;
-hv_bzero_page = TEXT_OFFSET + 0x10380;
-hv_register_message_state = TEXT_OFFSET + 0x103a0;
-hv_send_message = TEXT_OFFSET + 0x103c0;
-hv_receive_message = TEXT_OFFSET + 0x103e0;
-hv_inquire_context = TEXT_OFFSET + 0x10400;
-hv_start_all_tiles = TEXT_OFFSET + 0x10420;
-hv_dev_open = TEXT_OFFSET + 0x10440;
-hv_dev_close = TEXT_OFFSET + 0x10460;
-hv_dev_pread = TEXT_OFFSET + 0x10480;
-hv_dev_pwrite = TEXT_OFFSET + 0x104a0;
-hv_dev_poll = TEXT_OFFSET + 0x104c0;
-hv_dev_poll_cancel = TEXT_OFFSET + 0x104e0;
-hv_dev_preada = TEXT_OFFSET + 0x10500;
-hv_dev_pwritea = TEXT_OFFSET + 0x10520;
-hv_flush_remote = TEXT_OFFSET + 0x10540;
-hv_console_putc = TEXT_OFFSET + 0x10560;
-hv_inquire_tiles = TEXT_OFFSET + 0x10580;
-hv_confstr = TEXT_OFFSET + 0x105a0;
-hv_reexec = TEXT_OFFSET + 0x105c0;
-hv_set_command_line = TEXT_OFFSET + 0x105e0;
-hv_clear_intr = TEXT_OFFSET + 0x10600;
-hv_enable_intr = TEXT_OFFSET + 0x10620;
-hv_disable_intr = TEXT_OFFSET + 0x10640;
-hv_raise_intr = TEXT_OFFSET + 0x10660;
-hv_trigger_ipi = TEXT_OFFSET + 0x10680;
-hv_store_mapping = TEXT_OFFSET + 0x106a0;
-hv_inquire_realpa = TEXT_OFFSET + 0x106c0;
-hv_flush_all = TEXT_OFFSET + 0x106e0;
-hv_get_ipi_pte = TEXT_OFFSET + 0x10700;
-hv_set_pte_super_shift = TEXT_OFFSET + 0x10720;
-hv_glue_internals = TEXT_OFFSET + 0x10740;
diff --git a/arch/tile/kernel/hvglue_trace.c b/arch/tile/kernel/hvglue_trace.c
new file mode 100644
index 000000000000..85c74ad29312
--- /dev/null
+++ b/arch/tile/kernel/hvglue_trace.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+/*
+ * Pull in the hypervisor header so we declare all the ABI functions
+ * with the underscore versions, then undef the names so that we can
+ * provide our own wrapper versions.
+ */
+#define hv_init _hv_init
+#define hv_install_context _hv_install_context
+#define hv_sysconf _hv_sysconf
+#define hv_get_rtc _hv_get_rtc
+#define hv_set_rtc _hv_set_rtc
+#define hv_flush_asid _hv_flush_asid
+#define hv_flush_page _hv_flush_page
+#define hv_flush_pages _hv_flush_pages
+#define hv_restart _hv_restart
+#define hv_halt _hv_halt
+#define hv_power_off _hv_power_off
+#define hv_inquire_physical _hv_inquire_physical
+#define hv_inquire_memory_controller _hv_inquire_memory_controller
+#define hv_inquire_virtual _hv_inquire_virtual
+#define hv_inquire_asid _hv_inquire_asid
+#define hv_nanosleep _hv_nanosleep
+#define hv_console_read_if_ready _hv_console_read_if_ready
+#define hv_console_write _hv_console_write
+#define hv_downcall_dispatch _hv_downcall_dispatch
+#define hv_inquire_topology _hv_inquire_topology
+#define hv_fs_findfile _hv_fs_findfile
+#define hv_fs_fstat _hv_fs_fstat
+#define hv_fs_pread _hv_fs_pread
+#define hv_physaddr_read64 _hv_physaddr_read64
+#define hv_physaddr_write64 _hv_physaddr_write64
+#define hv_get_command_line _hv_get_command_line
+#define hv_set_caching _hv_set_caching
+#define hv_bzero_page _hv_bzero_page
+#define hv_register_message_state _hv_register_message_state
+#define hv_send_message _hv_send_message
+#define hv_receive_message _hv_receive_message
+#define hv_inquire_context _hv_inquire_context
+#define hv_start_all_tiles _hv_start_all_tiles
+#define hv_dev_open _hv_dev_open
+#define hv_dev_close _hv_dev_close
+#define hv_dev_pread _hv_dev_pread
+#define hv_dev_pwrite _hv_dev_pwrite
+#define hv_dev_poll _hv_dev_poll
+#define hv_dev_poll_cancel _hv_dev_poll_cancel
+#define hv_dev_preada _hv_dev_preada
+#define hv_dev_pwritea _hv_dev_pwritea
+#define hv_flush_remote _hv_flush_remote
+#define hv_console_putc _hv_console_putc
+#define hv_inquire_tiles _hv_inquire_tiles
+#define hv_confstr _hv_confstr
+#define hv_reexec _hv_reexec
+#define hv_set_command_line _hv_set_command_line
+#define hv_clear_intr _hv_clear_intr
+#define hv_enable_intr _hv_enable_intr
+#define hv_disable_intr _hv_disable_intr
+#define hv_raise_intr _hv_raise_intr
+#define hv_trigger_ipi _hv_trigger_ipi
+#define hv_store_mapping _hv_store_mapping
+#define hv_inquire_realpa _hv_inquire_realpa
+#define hv_flush_all _hv_flush_all
+#define hv_get_ipi_pte _hv_get_ipi_pte
+#define hv_set_pte_super_shift _hv_set_pte_super_shift
+#define hv_console_set_ipi _hv_console_set_ipi
+#include <hv/hypervisor.h>
+#undef hv_init
+#undef hv_install_context
+#undef hv_sysconf
+#undef hv_get_rtc
+#undef hv_set_rtc
+#undef hv_flush_asid
+#undef hv_flush_page
+#undef hv_flush_pages
+#undef hv_restart
+#undef hv_halt
+#undef hv_power_off
+#undef hv_inquire_physical
+#undef hv_inquire_memory_controller
+#undef hv_inquire_virtual
+#undef hv_inquire_asid
+#undef hv_nanosleep
+#undef hv_console_read_if_ready
+#undef hv_console_write
+#undef hv_downcall_dispatch
+#undef hv_inquire_topology
+#undef hv_fs_findfile
+#undef hv_fs_fstat
+#undef hv_fs_pread
+#undef hv_physaddr_read64
+#undef hv_physaddr_write64
+#undef hv_get_command_line
+#undef hv_set_caching
+#undef hv_bzero_page
+#undef hv_register_message_state
+#undef hv_send_message
+#undef hv_receive_message
+#undef hv_inquire_context
+#undef hv_start_all_tiles
+#undef hv_dev_open
+#undef hv_dev_close
+#undef hv_dev_pread
+#undef hv_dev_pwrite
+#undef hv_dev_poll
+#undef hv_dev_poll_cancel
+#undef hv_dev_preada
+#undef hv_dev_pwritea
+#undef hv_flush_remote
+#undef hv_console_putc
+#undef hv_inquire_tiles
+#undef hv_confstr
+#undef hv_reexec
+#undef hv_set_command_line
+#undef hv_clear_intr
+#undef hv_enable_intr
+#undef hv_disable_intr
+#undef hv_raise_intr
+#undef hv_trigger_ipi
+#undef hv_store_mapping
+#undef hv_inquire_realpa
+#undef hv_flush_all
+#undef hv_get_ipi_pte
+#undef hv_set_pte_super_shift
+#undef hv_console_set_ipi
+
+/*
+ * Provide macros based on <linux/syscalls.h> to provide a wrapper
+ * function that invokes the same function with an underscore prefix.
+ * We can't use the existing __SC_xxx macros because we need to
+ * support up to nine arguments rather than up to six, and also this
+ * way the file stands alone from possible changes in the
+ * implementation of <linux/syscalls.h>.
+ */
+#define HV_WRAP0(type, name) \
+ type name(void); \
+ type name(void) \
+ { \
+ return _##name(); \
+ }
+#define __HV_DECL1(t1, a1) t1 a1
+#define __HV_DECL2(t2, a2, ...) t2 a2, __HV_DECL1(__VA_ARGS__)
+#define __HV_DECL3(t3, a3, ...) t3 a3, __HV_DECL2(__VA_ARGS__)
+#define __HV_DECL4(t4, a4, ...) t4 a4, __HV_DECL3(__VA_ARGS__)
+#define __HV_DECL5(t5, a5, ...) t5 a5, __HV_DECL4(__VA_ARGS__)
+#define __HV_DECL6(t6, a6, ...) t6 a6, __HV_DECL5(__VA_ARGS__)
+#define __HV_DECL7(t7, a7, ...) t7 a7, __HV_DECL6(__VA_ARGS__)
+#define __HV_DECL8(t8, a8, ...) t8 a8, __HV_DECL7(__VA_ARGS__)
+#define __HV_DECL9(t9, a9, ...) t9 a9, __HV_DECL8(__VA_ARGS__)
+#define __HV_PASS1(t1, a1) a1
+#define __HV_PASS2(t2, a2, ...) a2, __HV_PASS1(__VA_ARGS__)
+#define __HV_PASS3(t3, a3, ...) a3, __HV_PASS2(__VA_ARGS__)
+#define __HV_PASS4(t4, a4, ...) a4, __HV_PASS3(__VA_ARGS__)
+#define __HV_PASS5(t5, a5, ...) a5, __HV_PASS4(__VA_ARGS__)
+#define __HV_PASS6(t6, a6, ...) a6, __HV_PASS5(__VA_ARGS__)
+#define __HV_PASS7(t7, a7, ...) a7, __HV_PASS6(__VA_ARGS__)
+#define __HV_PASS8(t8, a8, ...) a8, __HV_PASS7(__VA_ARGS__)
+#define __HV_PASS9(t9, a9, ...) a9, __HV_PASS8(__VA_ARGS__)
+#define HV_WRAPx(x, type, name, ...) \
+ type name(__HV_DECL##x(__VA_ARGS__)); \
+ type name(__HV_DECL##x(__VA_ARGS__)) \
+ { \
+ return _##name(__HV_PASS##x(__VA_ARGS__)); \
+ }
+#define HV_WRAP1(type, name, ...) HV_WRAPx(1, type, name, __VA_ARGS__)
+#define HV_WRAP2(type, name, ...) HV_WRAPx(2, type, name, __VA_ARGS__)
+#define HV_WRAP3(type, name, ...) HV_WRAPx(3, type, name, __VA_ARGS__)
+#define HV_WRAP4(type, name, ...) HV_WRAPx(4, type, name, __VA_ARGS__)
+#define HV_WRAP5(type, name, ...) HV_WRAPx(5, type, name, __VA_ARGS__)
+#define HV_WRAP6(type, name, ...) HV_WRAPx(6, type, name, __VA_ARGS__)
+#define HV_WRAP7(type, name, ...) HV_WRAPx(7, type, name, __VA_ARGS__)
+#define HV_WRAP8(type, name, ...) HV_WRAPx(8, type, name, __VA_ARGS__)
+#define HV_WRAP9(type, name, ...) HV_WRAPx(9, type, name, __VA_ARGS__)
+
+/* List all the hypervisor API functions. */
+HV_WRAP4(void, hv_init, HV_VersionNumber, interface_version_number,
+ int, chip_num, int, chip_rev_num, int, client_pl)
+HV_WRAP1(long, hv_sysconf, HV_SysconfQuery, query)
+HV_WRAP3(int, hv_confstr, HV_ConfstrQuery, query, HV_VirtAddr, buf, int, len)
+#if CHIP_HAS_IPI()
+HV_WRAP3(int, hv_get_ipi_pte, HV_Coord, tile, int, pl, HV_PTE*, pte)
+HV_WRAP3(int, hv_console_set_ipi, int, ipi, int, event, HV_Coord, coord);
+#else
+HV_WRAP1(void, hv_enable_intr, HV_IntrMask, enab_mask)
+HV_WRAP1(void, hv_disable_intr, HV_IntrMask, disab_mask)
+HV_WRAP1(void, hv_clear_intr, HV_IntrMask, clear_mask)
+HV_WRAP1(void, hv_raise_intr, HV_IntrMask, raise_mask)
+HV_WRAP2(HV_Errno, hv_trigger_ipi, HV_Coord, tile, int, interrupt)
+#endif /* !CHIP_HAS_IPI() */
+HV_WRAP3(int, hv_store_mapping, HV_VirtAddr, va, unsigned int, len,
+ HV_PhysAddr, pa)
+HV_WRAP2(HV_PhysAddr, hv_inquire_realpa, HV_PhysAddr, cpa, unsigned int, len)
+HV_WRAP0(HV_RTCTime, hv_get_rtc)
+HV_WRAP1(void, hv_set_rtc, HV_RTCTime, time)
+HV_WRAP4(int, hv_install_context, HV_PhysAddr, page_table, HV_PTE, access,
+ HV_ASID, asid, __hv32, flags)
+HV_WRAP2(int, hv_set_pte_super_shift, int, level, int, log2_count)
+HV_WRAP0(HV_Context, hv_inquire_context)
+HV_WRAP1(int, hv_flush_asid, HV_ASID, asid)
+HV_WRAP2(int, hv_flush_page, HV_VirtAddr, address, HV_PageSize, page_size)
+HV_WRAP3(int, hv_flush_pages, HV_VirtAddr, start, HV_PageSize, page_size,
+ unsigned long, size)
+HV_WRAP1(int, hv_flush_all, int, preserve_global)
+HV_WRAP2(void, hv_restart, HV_VirtAddr, cmd, HV_VirtAddr, args)
+HV_WRAP0(void, hv_halt)
+HV_WRAP0(void, hv_power_off)
+HV_WRAP1(int, hv_reexec, HV_PhysAddr, entry)
+HV_WRAP0(HV_Topology, hv_inquire_topology)
+HV_WRAP3(HV_Errno, hv_inquire_tiles, HV_InqTileSet, set, HV_VirtAddr, cpumask,
+ int, length)
+HV_WRAP1(HV_PhysAddrRange, hv_inquire_physical, int, idx)
+HV_WRAP2(HV_MemoryControllerInfo, hv_inquire_memory_controller, HV_Coord, coord,
+ int, controller)
+HV_WRAP1(HV_VirtAddrRange, hv_inquire_virtual, int, idx)
+HV_WRAP1(HV_ASIDRange, hv_inquire_asid, int, idx)
+HV_WRAP1(void, hv_nanosleep, int, nanosecs)
+HV_WRAP0(int, hv_console_read_if_ready)
+HV_WRAP1(void, hv_console_putc, int, byte)
+HV_WRAP2(int, hv_console_write, HV_VirtAddr, bytes, int, len)
+HV_WRAP0(void, hv_downcall_dispatch)
+HV_WRAP1(int, hv_fs_findfile, HV_VirtAddr, filename)
+HV_WRAP1(HV_FS_StatInfo, hv_fs_fstat, int, inode)
+HV_WRAP4(int, hv_fs_pread, int, inode, HV_VirtAddr, buf,
+ int, length, int, offset)
+HV_WRAP2(unsigned long long, hv_physaddr_read64, HV_PhysAddr, addr,
+ HV_PTE, access)
+HV_WRAP3(void, hv_physaddr_write64, HV_PhysAddr, addr, HV_PTE, access,
+ unsigned long long, val)
+HV_WRAP2(int, hv_get_command_line, HV_VirtAddr, buf, int, length)
+HV_WRAP2(HV_Errno, hv_set_command_line, HV_VirtAddr, buf, int, length)
+HV_WRAP1(void, hv_set_caching, unsigned long, bitmask)
+HV_WRAP2(void, hv_bzero_page, HV_VirtAddr, va, unsigned int, size)
+HV_WRAP1(HV_Errno, hv_register_message_state, HV_MsgState*, msgstate)
+HV_WRAP4(int, hv_send_message, HV_Recipient *, recips, int, nrecip,
+ HV_VirtAddr, buf, int, buflen)
+HV_WRAP3(HV_RcvMsgInfo, hv_receive_message, HV_MsgState, msgstate,
+ HV_VirtAddr, buf, int, buflen)
+HV_WRAP0(void, hv_start_all_tiles)
+HV_WRAP2(int, hv_dev_open, HV_VirtAddr, name, __hv32, flags)
+HV_WRAP1(int, hv_dev_close, int, devhdl)
+HV_WRAP5(int, hv_dev_pread, int, devhdl, __hv32, flags, HV_VirtAddr, va,
+ __hv32, len, __hv64, offset)
+HV_WRAP5(int, hv_dev_pwrite, int, devhdl, __hv32, flags, HV_VirtAddr, va,
+ __hv32, len, __hv64, offset)
+HV_WRAP3(int, hv_dev_poll, int, devhdl, __hv32, events, HV_IntArg, intarg)
+HV_WRAP1(int, hv_dev_poll_cancel, int, devhdl)
+HV_WRAP6(int, hv_dev_preada, int, devhdl, __hv32, flags, __hv32, sgl_len,
+ HV_SGL *, sglp, __hv64, offset, HV_IntArg, intarg)
+HV_WRAP6(int, hv_dev_pwritea, int, devhdl, __hv32, flags, __hv32, sgl_len,
+ HV_SGL *, sglp, __hv64, offset, HV_IntArg, intarg)
+HV_WRAP9(int, hv_flush_remote, HV_PhysAddr, cache_pa,
+ unsigned long, cache_control, unsigned long*, cache_cpumask,
+ HV_VirtAddr, tlb_va, unsigned long, tlb_length,
+ unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask,
+ HV_Remote_ASID*, asids, int, asidcount)
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index cb52d66343ed..088d5c141e68 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -28,20 +28,10 @@
#include <arch/interrupts.h>
#include <arch/spr_def.h>
-#ifdef CONFIG_PREEMPT
-# error "No support for kernel preemption currently"
-#endif
-
#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg)
#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR)
-#if !CHIP_HAS_WH64()
- /* By making this an empty macro, we can use wh64 in the code. */
- .macro wh64 reg
- .endm
-#endif
-
.macro push_reg reg, ptr=sp, delta=-4
{
sw \ptr, \reg
@@ -189,7 +179,7 @@ intvec_\vecname:
* point sp at the top aligned address on the actual stack page.
*/
mfspr r0, SPR_SYSTEM_SAVE_K_0
- mm r0, r0, zero, LOG2_THREAD_SIZE, 31
+ mm r0, r0, zero, LOG2_NR_CPU_IDS, 31
0:
/*
@@ -207,6 +197,9 @@ intvec_\vecname:
* cache line 1: r14...r29
* cache line 0: 2 x frame, r0..r13
*/
+#if STACK_TOP_DELTA != 64
+#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs()
+#endif
andi r0, r0, -64
/*
@@ -326,18 +319,14 @@ intvec_\vecname:
movei r3, -1 /* not used, but set for consistency */
}
.else
-#if CHIP_HAS_AUX_PERF_COUNTERS()
.ifc \c_routine, op_handle_aux_perf_interrupt
{
mfspr r2, AUX_PERF_COUNT_STS
movei r3, -1 /* not used, but set for consistency */
}
.else
-#endif
movei r3, 0
-#if CHIP_HAS_AUX_PERF_COUNTERS()
.endif
-#endif
.endif
.endif
.endif
@@ -354,7 +343,7 @@ intvec_\vecname:
#ifdef __COLLECT_LINKER_FEEDBACK__
.pushsection .text.intvec_feedback,"ax"
.org (\vecnum << 5)
- FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8)
+ FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt, 1 << 8)
jrp lr
.popsection
#endif
@@ -468,7 +457,7 @@ intvec_\vecname:
}
{
auli r21, r21, ha16(__per_cpu_offset)
- mm r20, r20, zero, 0, LOG2_THREAD_SIZE-1
+ mm r20, r20, zero, 0, LOG2_NR_CPU_IDS-1
}
s2a r20, r20, r21
lw tp, r20
@@ -562,7 +551,6 @@ intvec_\vecname:
.endif
mtspr INTERRUPT_CRITICAL_SECTION, zero
-#if CHIP_HAS_WH64()
/*
* Prepare the first 256 stack bytes to be rapidly accessible
* without having to fetch the background data. We don't really
@@ -583,7 +571,6 @@ intvec_\vecname:
addi r52, r52, -64
}
wh64 r52
-#endif
#ifdef CONFIG_TRACE_IRQFLAGS
.ifnc \function,handle_nmi
@@ -762,7 +749,7 @@ intvec_\vecname:
.macro dc_dispatch vecnum, vecname
.org (\vecnum << 8)
intvec_\vecname:
- j hv_downcall_dispatch
+ j _hv_downcall_dispatch
ENDPROC(intvec_\vecname)
.endm
@@ -812,17 +799,34 @@ STD_ENTRY(interrupt_return)
}
lw r29, r29
andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ bzt r29, .Lresume_userspace
+
+#ifdef CONFIG_PREEMPT
+ /* Returning to kernel space. Check if we need preemption. */
+ GET_THREAD_INFO(r29)
+ addli r28, r29, THREAD_INFO_FLAGS_OFFSET
{
- bzt r29, .Lresume_userspace
- PTREGS_PTR(r29, PTREGS_OFFSET_PC)
+ lw r28, r28
+ addli r29, r29, THREAD_INFO_PREEMPT_COUNT_OFFSET
}
+ {
+ andi r28, r28, _TIF_NEED_RESCHED
+ lw r29, r29
+ }
+ bzt r28, 1f
+ bnz r29, 1f
+ jal preempt_schedule_irq
+ FEEDBACK_REENTER(interrupt_return)
+1:
+#endif
/* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */
{
- lw r28, r29
+ PTREGS_PTR(r29, PTREGS_OFFSET_PC)
moveli r27, lo16(_cpu_idle_nap)
}
{
+ lw r28, r29
auli r27, r27, ha16(_cpu_idle_nap)
}
{
@@ -1420,7 +1424,6 @@ handle_ill:
{
lw r0, r0 /* indirect thru thread_info to get task_info*/
addi r1, sp, C_ABI_SAVE_AREA_SIZE /* put ptregs pointer into r1 */
- move r2, zero /* load error code into r2 */
}
jal send_sigtrap /* issue a SIGTRAP */
@@ -1518,12 +1521,10 @@ STD_ENTRY(_sys_clone)
__HEAD
.align 64
/* Align much later jump on the start of a cache line. */
-#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
nop
#if PAGE_SIZE >= 0x10000
nop
#endif
-#endif
ENTRY(sys_cmpxchg)
/*
@@ -1557,45 +1558,6 @@ ENTRY(sys_cmpxchg)
# error Code here assumes PAGE_OFFSET can be loaded with just hi16()
#endif
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
- {
- /* Check for unaligned input. */
- bnz sp, .Lcmpxchg_badaddr
- mm r25, r0, zero, 3, PAGE_SHIFT-1
- }
- {
- crc32_32 r25, zero, r25
- moveli r21, lo16(atomic_lock_ptr)
- }
- {
- auli r21, r21, ha16(atomic_lock_ptr)
- auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */
- }
- {
- shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT
- slt_u r23, r0, r23
- lw r26, r0 /* see comment in the "#else" for the "lw r26". */
- }
- {
- s2a r21, r20, r21
- bbns r23, .Lcmpxchg_badaddr
- }
- {
- lw r21, r21
- seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64
- andi r25, r25, ATOMIC_HASH_L2_SIZE - 1
- }
- {
- /* Branch away at this point if we're doing a 64-bit cmpxchg. */
- bbs r23, .Lcmpxchg64
- andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */
- }
- {
- s2a ATOMIC_LOCK_REG_NAME, r25, r21
- j .Lcmpxchg32_tns /* see comment in the #else for the jump. */
- }
-
-#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
{
/* Check for unaligned input. */
bnz sp, .Lcmpxchg_badaddr
@@ -1609,7 +1571,7 @@ ENTRY(sys_cmpxchg)
* Because of C pointer arithmetic, we want to compute this:
*
* ((char*)atomic_locks +
- * (((r0 >> 3) & (1 << (ATOMIC_HASH_SIZE - 1))) << 2))
+ * (((r0 >> 3) & ((1 << ATOMIC_HASH_SHIFT) - 1)) << 2))
*
* Instead of two shifts we just ">> 1", and use 'mm'
* to ignore the low and high bits we don't want.
@@ -1620,12 +1582,9 @@ ENTRY(sys_cmpxchg)
/*
* Ensure that the TLB is loaded before we take out the lock.
- * On tilepro, this will start fetching the value all the way
- * into our L1 as well (and if it gets modified before we
- * grab the lock, it will be invalidated from our cache
- * before we reload it). On tile64, we'll start fetching it
- * into our L1 if we're the home, and if we're not, we'll
- * still at least start fetching it into the home's L2.
+ * This will start fetching the value all the way into our L1
+ * as well (and if it gets modified before we grab the lock,
+ * it will be invalidated from our cache before we reload it).
*/
lw r26, r0
}
@@ -1668,8 +1627,6 @@ ENTRY(sys_cmpxchg)
j .Lcmpxchg32_tns
}
-#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
/* Symbol for do_page_fault_ics() to use to compare against the PC. */
.global __sys_cmpxchg_grab_lock
__sys_cmpxchg_grab_lock:
@@ -1807,9 +1764,6 @@ __sys_cmpxchg_grab_lock:
.align 64
.Lcmpxchg64:
{
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
- s2a ATOMIC_LOCK_REG_NAME, r25, r21
-#endif
bzt r23, .Lcmpxchg64_tns
}
j .Lcmpxchg_badaddr
@@ -1875,8 +1829,8 @@ int_unalign:
push_extra_callee_saves r0
j do_trap
-/* Include .intrpt1 array of interrupt vectors */
- .section ".intrpt1", "ax"
+/* Include .intrpt array of interrupt vectors */
+ .section ".intrpt", "ax"
#define op_handle_perf_interrupt bad_intr
#define op_handle_aux_perf_interrupt bad_intr
@@ -1944,10 +1898,8 @@ int_unalign:
do_page_fault
int_hand INT_SN_CPL, SN_CPL, bad_intr
int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap
-#if CHIP_HAS_AUX_PERF_COUNTERS()
int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \
op_handle_aux_perf_interrupt, handle_nmi
-#endif
/* Synthetic interrupt delivered only by the simulator */
int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 85d483957027..ec755d3f3734 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -17,25 +17,33 @@
#include <linux/linkage.h>
#include <linux/errno.h>
#include <linux/unistd.h>
+#include <linux/init.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
#include <asm/irqflags.h>
#include <asm/asm-offsets.h>
#include <asm/types.h>
+#include <asm/traps.h>
#include <asm/signal.h>
#include <hv/hypervisor.h>
#include <arch/abi.h>
#include <arch/interrupts.h>
#include <arch/spr_def.h>
-#ifdef CONFIG_PREEMPT
-# error "No support for kernel preemption currently"
-#endif
-
#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg)
#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR)
+#if CONFIG_KERNEL_PL == 1 || CONFIG_KERNEL_PL == 2
+/*
+ * Set "result" non-zero if ex1 holds the PL of the kernel
+ * (with or without ICS being set). Note this works only
+ * because we never find the PL at level 3.
+ */
+# define IS_KERNEL_EX1(result, ex1) andi result, ex1, CONFIG_KERNEL_PL
+#else
+# error Recode IS_KERNEL_EX1 for CONFIG_KERNEL_PL
+#endif
.macro push_reg reg, ptr=sp, delta=-8
{
@@ -98,6 +106,185 @@
}
.endm
+ /*
+ * Unalign data exception fast handling: In order to handle
+ * unaligned data access, a fast JIT version is generated and stored
+ * in a specific area in user space. We first need to do a quick poke
+ * to see if the JIT is available. We use certain bits in the fault
+ * PC (3 to 9 is used for 16KB page size) as index to address the JIT
+ * code area. The first 64bit word is the fault PC, and the 2nd one is
+ * the fault bundle itself. If these 2 words both match, then we
+ * directly "iret" to JIT code. If not, a slow path is invoked to
+ * generate new JIT code. Note: the current JIT code WILL be
+ * overwritten if it existed. So, ideally we can handle 128 unalign
+ * fixups via JIT. For lookup efficiency and to effectively support
+ * tight loops with multiple unaligned reference, a simple
+ * direct-mapped cache is used.
+ *
+ * SPR_EX_CONTEXT_K_0 is modified to return to JIT code.
+ * SPR_EX_CONTEXT_K_1 has ICS set.
+ * SPR_EX_CONTEXT_0_0 is setup to user program's next PC.
+ * SPR_EX_CONTEXT_0_1 = 0.
+ */
+ .macro int_hand_unalign_fast vecnum, vecname
+ .org (\vecnum << 8)
+intvec_\vecname:
+ /* Put r3 in SPR_SYSTEM_SAVE_K_1. */
+ mtspr SPR_SYSTEM_SAVE_K_1, r3
+
+ mfspr r3, SPR_EX_CONTEXT_K_1
+ /*
+ * Examine if exception comes from user without ICS set.
+ * If not, just go directly to the slow path.
+ */
+ bnez r3, hand_unalign_slow_nonuser
+
+ mfspr r3, SPR_SYSTEM_SAVE_K_0
+
+ /* Get &thread_info->unalign_jit_tmp[0] in r3. */
+ bfexts r3, r3, 0, CPU_SHIFT-1
+ mm r3, zero, LOG2_THREAD_SIZE, 63
+ addli r3, r3, THREAD_INFO_UNALIGN_JIT_TMP_OFFSET
+
+ /*
+ * Save r0, r1, r2 into thread_info array r3 points to
+ * from low to high memory in order.
+ */
+ st_add r3, r0, 8
+ st_add r3, r1, 8
+ {
+ st_add r3, r2, 8
+ andi r2, sp, 7
+ }
+
+ /* Save stored r3 value so we can revert it on a page fault. */
+ mfspr r1, SPR_SYSTEM_SAVE_K_1
+ st r3, r1
+
+ {
+ /* Generate a SIGBUS if sp is not 8-byte aligned. */
+ bnez r2, hand_unalign_slow_badsp
+ }
+
+ /*
+ * Get the thread_info in r0; load r1 with pc. Set the low bit of sp
+ * as an indicator to the page fault code in case we fault.
+ */
+ {
+ ori sp, sp, 1
+ mfspr r1, SPR_EX_CONTEXT_K_0
+ }
+
+ /* Add the jit_info offset in thread_info; extract r1 [3:9] into r2. */
+ {
+ addli r0, r3, THREAD_INFO_UNALIGN_JIT_BASE_OFFSET - \
+ (THREAD_INFO_UNALIGN_JIT_TMP_OFFSET + (3 * 8))
+ bfextu r2, r1, 3, (2 + PAGE_SHIFT - UNALIGN_JIT_SHIFT)
+ }
+
+ /* Load the jit_info; multiply r2 by 128. */
+ {
+ ld r0, r0
+ shli r2, r2, UNALIGN_JIT_SHIFT
+ }
+
+ /*
+ * If r0 is NULL, the JIT page is not mapped, so go to slow path;
+ * add offset r2 to r0 at the same time.
+ */
+ {
+ beqz r0, hand_unalign_slow
+ add r2, r0, r2
+ }
+
+ /*
+ * We are loading from userspace (both the JIT info PC and
+ * instruction word, and the instruction word we executed)
+ * and since either could fault while holding the interrupt
+ * critical section, we must tag this region and check it in
+ * do_page_fault() to handle it properly.
+ */
+ENTRY(__start_unalign_asm_code)
+
+ /* Load first word of JIT in r0 and increment r2 by 8. */
+ ld_add r0, r2, 8
+
+ /*
+ * Compare the PC with the 1st word in JIT; load the fault bundle
+ * into r1.
+ */
+ {
+ cmpeq r0, r0, r1
+ ld r1, r1
+ }
+
+ /* Go to slow path if PC doesn't match. */
+ beqz r0, hand_unalign_slow
+
+ /*
+ * Load the 2nd word of JIT, which is supposed to be the fault
+ * bundle for a cache hit. Increment r2; after this bundle r2 will
+ * point to the potential start of the JIT code we want to run.
+ */
+ ld_add r0, r2, 8
+
+ /* No further accesses to userspace are done after this point. */
+ENTRY(__end_unalign_asm_code)
+
+ /* Compare the real bundle with what is saved in the JIT area. */
+ {
+ cmpeq r0, r1, r0
+ mtspr SPR_EX_CONTEXT_0_1, zero
+ }
+
+ /* Go to slow path if the fault bundle does not match. */
+ beqz r0, hand_unalign_slow
+
+ /*
+ * A cache hit is found.
+ * r2 points to start of JIT code (3rd word).
+ * r0 is the fault pc.
+ * r1 is the fault bundle.
+ * Reset the low bit of sp.
+ */
+ {
+ mfspr r0, SPR_EX_CONTEXT_K_0
+ andi sp, sp, ~1
+ }
+
+ /* Write r2 into EX_CONTEXT_K_0 and increment PC. */
+ {
+ mtspr SPR_EX_CONTEXT_K_0, r2
+ addi r0, r0, 8
+ }
+
+ /*
+ * Set ICS on kernel EX_CONTEXT_K_1 in order to "iret" to
+ * user with ICS set. This way, if the JIT fixup causes another
+ * unalign exception (which shouldn't be possible) the user
+ * process will be terminated with SIGBUS. Also, our fixup will
+ * run without interleaving with external interrupts.
+ * Each fixup is at most 14 bundles, so it won't hold ICS for long.
+ */
+ {
+ movei r1, PL_ICS_EX1(USER_PL, 1)
+ mtspr SPR_EX_CONTEXT_0_0, r0
+ }
+
+ {
+ mtspr SPR_EX_CONTEXT_K_1, r1
+ addi r3, r3, -(3 * 8)
+ }
+
+ /* Restore r0..r3. */
+ ld_add r0, r3, 8
+ ld_add r1, r3, 8
+ ld_add r2, r3, 8
+ ld r3, r3
+
+ iret
+ ENDPROC(intvec_\vecname)
+ .endm
#ifdef __COLLECT_LINKER_FEEDBACK__
.pushsection .text.intvec_feedback,"ax"
@@ -118,15 +305,21 @@ intvec_feedback:
* The "processing" argument specifies the code for processing
* the interrupt. Defaults to "handle_interrupt".
*/
- .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt
- .org (\vecnum << 8)
+ .macro __int_hand vecnum, vecname, c_routine,processing=handle_interrupt
intvec_\vecname:
/* Temporarily save a register so we have somewhere to work. */
mtspr SPR_SYSTEM_SAVE_K_1, r0
mfspr r0, SPR_EX_CONTEXT_K_1
- andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ /*
+ * The unalign data fastpath code sets the low bit in sp to
+ * force us to reset it here on fault.
+ */
+ {
+ blbs sp, 2f
+ IS_KERNEL_EX1(r0, r0)
+ }
.ifc \vecnum, INT_DOUBLE_FAULT
/*
@@ -176,15 +369,15 @@ intvec_\vecname:
}
.endif
-
+2:
/*
- * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and
- * the current stack top in the higher bits. So we recover
- * our stack top by just masking off the low bits, then
+ * SYSTEM_SAVE_K_0 holds the cpu number in the high bits, and
+ * the current stack top in the lower bits. So we recover
+ * our starting stack value by sign-extending the low bits, then
* point sp at the top aligned address on the actual stack page.
*/
mfspr r0, SPR_SYSTEM_SAVE_K_0
- mm r0, zero, LOG2_THREAD_SIZE, 63
+ bfexts r0, r0, 0, CPU_SHIFT-1
0:
/*
@@ -206,6 +399,9 @@ intvec_\vecname:
* cache line 1: r6...r13
* cache line 0: 2 x frame, r0..r5
*/
+#if STACK_TOP_DELTA != 64
+#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs()
+#endif
andi r0, r0, -64
/*
@@ -305,7 +501,7 @@ intvec_\vecname:
mfspr r3, SPR_SYSTEM_SAVE_K_2 /* info about page fault */
.else
.ifc \vecnum, INT_ILL_TRANS
- mfspr r2, ILL_TRANS_REASON
+ mfspr r2, ILL_VA_PC
.else
.ifc \vecnum, INT_DOUBLE_FAULT
mfspr r2, SPR_SYSTEM_SAVE_K_2 /* double fault info from HV */
@@ -315,12 +511,10 @@ intvec_\vecname:
.else
.ifc \c_routine, op_handle_perf_interrupt
mfspr r2, PERF_COUNT_STS
-#if CHIP_HAS_AUX_PERF_COUNTERS()
.else
.ifc \c_routine, op_handle_aux_perf_interrupt
mfspr r2, AUX_PERF_COUNT_STS
.endif
-#endif
.endif
.endif
.endif
@@ -339,7 +533,7 @@ intvec_\vecname:
#ifdef __COLLECT_LINKER_FEEDBACK__
.pushsection .text.intvec_feedback,"ax"
.org (\vecnum << 5)
- FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8)
+ FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt, 1 << 8)
jrp lr
.popsection
#endif
@@ -455,11 +649,12 @@ intvec_\vecname:
/*
* If we will be returning to the kernel, we will need to
* reset the interrupt masks to the state they had before.
- * Set DISABLE_IRQ in flags iff we came from PL1 with irqs disabled.
+ * Set DISABLE_IRQ in flags iff we came from kernel pl with
+ * irqs disabled.
*/
mfspr r32, SPR_EX_CONTEXT_K_1
{
- andi r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ IS_KERNEL_EX1(r22, r22)
PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS)
}
beqzt r32, 1f /* zero if from user space */
@@ -503,7 +698,7 @@ intvec_\vecname:
}
{
shl16insli r21, r21, hw1(__per_cpu_offset)
- bfextu r20, r20, 0, LOG2_THREAD_SIZE-1
+ bfextu r20, r20, CPU_SHIFT, 63
}
shl16insli r21, r21, hw0(__per_cpu_offset)
shl3add r20, r20, r21
@@ -585,7 +780,7 @@ intvec_\vecname:
.macro dc_dispatch vecnum, vecname
.org (\vecnum << 8)
intvec_\vecname:
- j hv_downcall_dispatch
+ j _hv_downcall_dispatch
ENDPROC(intvec_\vecname)
.endm
@@ -626,14 +821,36 @@ STD_ENTRY(interrupt_return)
PTREGS_PTR(r29, PTREGS_OFFSET_EX1)
}
ld r29, r29
- andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ IS_KERNEL_EX1(r29, r29)
{
beqzt r29, .Lresume_userspace
- PTREGS_PTR(r29, PTREGS_OFFSET_PC)
+ move r29, sp
+ }
+
+#ifdef CONFIG_PREEMPT
+ /* Returning to kernel space. Check if we need preemption. */
+ EXTRACT_THREAD_INFO(r29)
+ addli r28, r29, THREAD_INFO_FLAGS_OFFSET
+ {
+ ld r28, r28
+ addli r29, r29, THREAD_INFO_PREEMPT_COUNT_OFFSET
+ }
+ {
+ andi r28, r28, _TIF_NEED_RESCHED
+ ld4s r29, r29
}
+ beqzt r28, 1f
+ bnez r29, 1f
+ jal preempt_schedule_irq
+ FEEDBACK_REENTER(interrupt_return)
+1:
+#endif
/* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */
- moveli r27, hw2_last(_cpu_idle_nap)
+ {
+ moveli r27, hw2_last(_cpu_idle_nap)
+ PTREGS_PTR(r29, PTREGS_OFFSET_PC)
+ }
{
ld r28, r29
shl16insli r27, r27, hw1(_cpu_idle_nap)
@@ -728,7 +945,7 @@ STD_ENTRY(interrupt_return)
PTREGS_PTR(r32, PTREGS_OFFSET_FLAGS)
}
{
- andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK
+ IS_KERNEL_EX1(r0, r0)
ld r32, r32
}
bnez r0, 1f
@@ -799,7 +1016,7 @@ STD_ENTRY(interrupt_return)
pop_reg r21, sp, PTREGS_OFFSET_REG(31) - PTREGS_OFFSET_PC
{
mtspr SPR_EX_CONTEXT_K_1, lr
- andi lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ IS_KERNEL_EX1(lr, lr)
}
{
mtspr SPR_EX_CONTEXT_K_0, r21
@@ -1223,10 +1440,31 @@ STD_ENTRY(_sys_clone)
j sys_clone
STD_ENDPROC(_sys_clone)
-/* The single-step support may need to read all the registers. */
+ /*
+ * Recover r3, r2, r1 and r0 here saved by unalign fast vector.
+ * The vector area limit is 32 bundles, so we handle the reload here.
+ * r0, r1, r2 are in thread_info from low to high memory in order.
+ * r3 points to location the original r3 was saved.
+ * We put this code in the __HEAD section so it can be reached
+ * via a conditional branch from the fast path.
+ */
+ __HEAD
+hand_unalign_slow:
+ andi sp, sp, ~1
+hand_unalign_slow_badsp:
+ addi r3, r3, -(3 * 8)
+ ld_add r0, r3, 8
+ ld_add r1, r3, 8
+ ld r2, r3
+hand_unalign_slow_nonuser:
+ mfspr r3, SPR_SYSTEM_SAVE_K_1
+ __int_hand INT_UNALIGN_DATA, UNALIGN_DATA_SLOW, int_unalign
+
+/* The unaligned data support needs to read all the registers. */
int_unalign:
push_extra_callee_saves r0
- j do_trap
+ j do_unaligned
+ENDPROC(hand_unalign_slow)
/* Fill the return address stack with nonzero entries. */
STD_ENTRY(fill_ra_stack)
@@ -1240,8 +1478,15 @@ STD_ENTRY(fill_ra_stack)
4: jrp r0
STD_ENDPROC(fill_ra_stack)
-/* Include .intrpt1 array of interrupt vectors */
- .section ".intrpt1", "ax"
+ .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt
+ .org (\vecnum << 8)
+ __int_hand \vecnum, \vecname, \c_routine, \processing
+ .endm
+
+/* Include .intrpt array of interrupt vectors */
+ .section ".intrpt", "ax"
+ .global intrpt_start
+intrpt_start:
#define op_handle_perf_interrupt bad_intr
#define op_handle_aux_perf_interrupt bad_intr
@@ -1272,7 +1517,7 @@ STD_ENTRY(fill_ra_stack)
int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall
int_hand INT_SWINT_0, SWINT_0, do_trap
int_hand INT_ILL_TRANS, ILL_TRANS, do_trap
- int_hand INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign
+ int_hand_unalign_fast INT_UNALIGN_DATA, UNALIGN_DATA
int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault
int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault
int_hand INT_IDN_FIREWALL, IDN_FIREWALL, do_hardwall_trap
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c
index 3ccf2cd7182e..0586fdb9352d 100644
--- a/arch/tile/kernel/irq.c
+++ b/arch/tile/kernel/irq.c
@@ -55,7 +55,8 @@ static DEFINE_PER_CPU(int, irq_depth);
/* State for allocating IRQs on Gx. */
#if CHIP_HAS_IPI()
-static unsigned long available_irqs = ~(1UL << IRQ_RESCHEDULE);
+static unsigned long available_irqs = ((1UL << NR_IRQS) - 1) &
+ (~(1UL << IRQ_RESCHEDULE));
static DEFINE_SPINLOCK(available_irqs_lock);
#endif
@@ -73,7 +74,8 @@ static DEFINE_SPINLOCK(available_irqs_lock);
/*
* The interrupt handling path, implemented in terms of HV interrupt
- * emulation on TILE64 and TILEPro, and IPI hardware on TILE-Gx.
+ * emulation on TILEPro, and IPI hardware on TILE-Gx.
+ * Entered with interrupts disabled.
*/
void tile_dev_intr(struct pt_regs *regs, int intnum)
{
@@ -233,7 +235,7 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type)
{
/*
* We use handle_level_irq() by default because the pending
- * interrupt vector (whether modeled by the HV on TILE64 and
+ * interrupt vector (whether modeled by the HV on
* TILEPro or implemented in hardware on TILE-Gx) has
* level-style semantics for each bit. An interrupt fires
* whenever a bit is high, not just at edges.
diff --git a/arch/tile/kernel/kgdb.c b/arch/tile/kernel/kgdb.c
new file mode 100644
index 000000000000..4cd88381a83e
--- /dev/null
+++ b/arch/tile/kernel/kgdb.c
@@ -0,0 +1,499 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * TILE-Gx KGDB support.
+ */
+
+#include <linux/ptrace.h>
+#include <linux/kgdb.h>
+#include <linux/kdebug.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <asm/cacheflush.h>
+
+static tile_bundle_bits singlestep_insn = TILEGX_BPT_BUNDLE | DIE_SSTEPBP;
+static unsigned long stepped_addr;
+static tile_bundle_bits stepped_instr;
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+ { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0])},
+ { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1])},
+ { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2])},
+ { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3])},
+ { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4])},
+ { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5])},
+ { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6])},
+ { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7])},
+ { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8])},
+ { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9])},
+ { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10])},
+ { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11])},
+ { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12])},
+ { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13])},
+ { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14])},
+ { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15])},
+ { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16])},
+ { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17])},
+ { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18])},
+ { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19])},
+ { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20])},
+ { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21])},
+ { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22])},
+ { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23])},
+ { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24])},
+ { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25])},
+ { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26])},
+ { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27])},
+ { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28])},
+ { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29])},
+ { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30])},
+ { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31])},
+ { "r32", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[32])},
+ { "r33", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[33])},
+ { "r34", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[34])},
+ { "r35", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[35])},
+ { "r36", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[36])},
+ { "r37", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[37])},
+ { "r38", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[38])},
+ { "r39", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[39])},
+ { "r40", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[40])},
+ { "r41", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[41])},
+ { "r42", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[42])},
+ { "r43", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[43])},
+ { "r44", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[44])},
+ { "r45", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[45])},
+ { "r46", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[46])},
+ { "r47", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[47])},
+ { "r48", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[48])},
+ { "r49", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[49])},
+ { "r50", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[50])},
+ { "r51", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[51])},
+ { "r52", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[52])},
+ { "tp", GDB_SIZEOF_REG, offsetof(struct pt_regs, tp)},
+ { "sp", GDB_SIZEOF_REG, offsetof(struct pt_regs, sp)},
+ { "lr", GDB_SIZEOF_REG, offsetof(struct pt_regs, lr)},
+ { "sn", GDB_SIZEOF_REG, -1},
+ { "idn0", GDB_SIZEOF_REG, -1},
+ { "idn1", GDB_SIZEOF_REG, -1},
+ { "udn0", GDB_SIZEOF_REG, -1},
+ { "udn1", GDB_SIZEOF_REG, -1},
+ { "udn2", GDB_SIZEOF_REG, -1},
+ { "udn3", GDB_SIZEOF_REG, -1},
+ { "zero", GDB_SIZEOF_REG, -1},
+ { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, pc)},
+ { "faultnum", GDB_SIZEOF_REG, offsetof(struct pt_regs, faultnum)},
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ if (regno >= DBG_MAX_REG_NUM || regno < 0)
+ return NULL;
+
+ if (dbg_reg_def[regno].offset != -1)
+ memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
+ dbg_reg_def[regno].size);
+ else
+ memset(mem, 0, dbg_reg_def[regno].size);
+ return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ if (regno >= DBG_MAX_REG_NUM || regno < 0)
+ return -EINVAL;
+
+ if (dbg_reg_def[regno].offset != -1)
+ memcpy((void *)regs + dbg_reg_def[regno].offset, mem,
+ dbg_reg_def[regno].size);
+ return 0;
+}
+
+/*
+ * Similar to pt_regs_to_gdb_regs() except that process is sleeping and so
+ * we may not be able to get all the info.
+ */
+void
+sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
+{
+ int reg;
+ struct pt_regs *thread_regs;
+ unsigned long *ptr = gdb_regs;
+
+ if (task == NULL)
+ return;
+
+ /* Initialize to zero. */
+ memset(gdb_regs, 0, NUMREGBYTES);
+
+ thread_regs = task_pt_regs(task);
+ for (reg = 0; reg <= TREG_LAST_GPR; reg++)
+ *(ptr++) = thread_regs->regs[reg];
+
+ gdb_regs[TILEGX_PC_REGNUM] = thread_regs->pc;
+ gdb_regs[TILEGX_FAULTNUM_REGNUM] = thread_regs->faultnum;
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+ regs->pc = pc;
+}
+
+static void kgdb_call_nmi_hook(void *ignored)
+{
+ kgdb_nmicallback(raw_smp_processor_id(), NULL);
+}
+
+void kgdb_roundup_cpus(unsigned long flags)
+{
+ local_irq_enable();
+ smp_call_function(kgdb_call_nmi_hook, NULL, 0);
+ local_irq_disable();
+}
+
+/*
+ * Convert a kernel address to the writable kernel text mapping.
+ */
+static unsigned long writable_address(unsigned long addr)
+{
+ unsigned long ret = 0;
+
+ if (core_kernel_text(addr))
+ ret = addr - MEM_SV_START + PAGE_OFFSET;
+ else if (is_module_text_address(addr))
+ ret = addr;
+ else
+ pr_err("Unknown virtual address 0x%lx\n", addr);
+
+ return ret;
+}
+
+/*
+ * Calculate the new address for after a step.
+ */
+static unsigned long get_step_address(struct pt_regs *regs)
+{
+ int src_reg;
+ int jump_off;
+ int br_off;
+ unsigned long addr;
+ unsigned int opcode;
+ tile_bundle_bits bundle;
+
+ /* Move to the next instruction by default. */
+ addr = regs->pc + TILEGX_BUNDLE_SIZE_IN_BYTES;
+ bundle = *(unsigned long *)instruction_pointer(regs);
+
+ /* 0: X mode, Otherwise: Y mode. */
+ if (bundle & TILEGX_BUNDLE_MODE_MASK) {
+ if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
+ get_RRROpcodeExtension_Y1(bundle) ==
+ UNARY_RRR_1_OPCODE_Y1) {
+ opcode = get_UnaryOpcodeExtension_Y1(bundle);
+
+ switch (opcode) {
+ case JALR_UNARY_OPCODE_Y1:
+ case JALRP_UNARY_OPCODE_Y1:
+ case JR_UNARY_OPCODE_Y1:
+ case JRP_UNARY_OPCODE_Y1:
+ src_reg = get_SrcA_Y1(bundle);
+ dbg_get_reg(src_reg, &addr, regs);
+ break;
+ }
+ }
+ } else if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
+ if (get_RRROpcodeExtension_X1(bundle) ==
+ UNARY_RRR_0_OPCODE_X1) {
+ opcode = get_UnaryOpcodeExtension_X1(bundle);
+
+ switch (opcode) {
+ case JALR_UNARY_OPCODE_X1:
+ case JALRP_UNARY_OPCODE_X1:
+ case JR_UNARY_OPCODE_X1:
+ case JRP_UNARY_OPCODE_X1:
+ src_reg = get_SrcA_X1(bundle);
+ dbg_get_reg(src_reg, &addr, regs);
+ break;
+ }
+ }
+ } else if (get_Opcode_X1(bundle) == JUMP_OPCODE_X1) {
+ opcode = get_JumpOpcodeExtension_X1(bundle);
+
+ switch (opcode) {
+ case JAL_JUMP_OPCODE_X1:
+ case J_JUMP_OPCODE_X1:
+ jump_off = sign_extend(get_JumpOff_X1(bundle), 27);
+ addr = regs->pc +
+ (jump_off << TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES);
+ break;
+ }
+ } else if (get_Opcode_X1(bundle) == BRANCH_OPCODE_X1) {
+ br_off = 0;
+ opcode = get_BrType_X1(bundle);
+
+ switch (opcode) {
+ case BEQZT_BRANCH_OPCODE_X1:
+ case BEQZ_BRANCH_OPCODE_X1:
+ if (get_SrcA_X1(bundle) == 0)
+ br_off = get_BrOff_X1(bundle);
+ break;
+ case BGEZT_BRANCH_OPCODE_X1:
+ case BGEZ_BRANCH_OPCODE_X1:
+ if (get_SrcA_X1(bundle) >= 0)
+ br_off = get_BrOff_X1(bundle);
+ break;
+ case BGTZT_BRANCH_OPCODE_X1:
+ case BGTZ_BRANCH_OPCODE_X1:
+ if (get_SrcA_X1(bundle) > 0)
+ br_off = get_BrOff_X1(bundle);
+ break;
+ case BLBCT_BRANCH_OPCODE_X1:
+ case BLBC_BRANCH_OPCODE_X1:
+ if (!(get_SrcA_X1(bundle) & 1))
+ br_off = get_BrOff_X1(bundle);
+ break;
+ case BLBST_BRANCH_OPCODE_X1:
+ case BLBS_BRANCH_OPCODE_X1:
+ if (get_SrcA_X1(bundle) & 1)
+ br_off = get_BrOff_X1(bundle);
+ break;
+ case BLEZT_BRANCH_OPCODE_X1:
+ case BLEZ_BRANCH_OPCODE_X1:
+ if (get_SrcA_X1(bundle) <= 0)
+ br_off = get_BrOff_X1(bundle);
+ break;
+ case BLTZT_BRANCH_OPCODE_X1:
+ case BLTZ_BRANCH_OPCODE_X1:
+ if (get_SrcA_X1(bundle) < 0)
+ br_off = get_BrOff_X1(bundle);
+ break;
+ case BNEZT_BRANCH_OPCODE_X1:
+ case BNEZ_BRANCH_OPCODE_X1:
+ if (get_SrcA_X1(bundle) != 0)
+ br_off = get_BrOff_X1(bundle);
+ break;
+ }
+
+ if (br_off != 0) {
+ br_off = sign_extend(br_off, 17);
+ addr = regs->pc +
+ (br_off << TILEGX_LOG2_BUNDLE_SIZE_IN_BYTES);
+ }
+ }
+
+ return addr;
+}
+
+/*
+ * Replace the next instruction after the current instruction with a
+ * breakpoint instruction.
+ */
+static void do_single_step(struct pt_regs *regs)
+{
+ unsigned long addr_wr;
+
+ /* Determine where the target instruction will send us to. */
+ stepped_addr = get_step_address(regs);
+ probe_kernel_read((char *)&stepped_instr, (char *)stepped_addr,
+ BREAK_INSTR_SIZE);
+
+ addr_wr = writable_address(stepped_addr);
+ probe_kernel_write((char *)addr_wr, (char *)&singlestep_insn,
+ BREAK_INSTR_SIZE);
+ smp_wmb();
+ flush_icache_range(stepped_addr, stepped_addr + BREAK_INSTR_SIZE);
+}
+
+static void undo_single_step(struct pt_regs *regs)
+{
+ unsigned long addr_wr;
+
+ if (stepped_instr == 0)
+ return;
+
+ addr_wr = writable_address(stepped_addr);
+ probe_kernel_write((char *)addr_wr, (char *)&stepped_instr,
+ BREAK_INSTR_SIZE);
+ stepped_instr = 0;
+ smp_wmb();
+ flush_icache_range(stepped_addr, stepped_addr + BREAK_INSTR_SIZE);
+}
+
+/*
+ * Calls linux_debug_hook before the kernel dies. If KGDB is enabled,
+ * then try to fall into the debugger.
+ */
+static int
+kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+ int ret;
+ unsigned long flags;
+ struct die_args *args = (struct die_args *)ptr;
+ struct pt_regs *regs = args->regs;
+
+#ifdef CONFIG_KPROBES
+ /*
+ * Return immediately if the kprobes fault notifier has set
+ * DIE_PAGE_FAULT.
+ */
+ if (cmd == DIE_PAGE_FAULT)
+ return NOTIFY_DONE;
+#endif /* CONFIG_KPROBES */
+
+ switch (cmd) {
+ case DIE_BREAK:
+ case DIE_COMPILED_BPT:
+ break;
+ case DIE_SSTEPBP:
+ local_irq_save(flags);
+ kgdb_handle_exception(0, SIGTRAP, 0, regs);
+ local_irq_restore(flags);
+ return NOTIFY_STOP;
+ default:
+ /* Userspace events, ignore. */
+ if (user_mode(regs))
+ return NOTIFY_DONE;
+ }
+
+ local_irq_save(flags);
+ ret = kgdb_handle_exception(args->trapnr, args->signr, args->err, regs);
+ local_irq_restore(flags);
+ if (ret)
+ return NOTIFY_DONE;
+
+ return NOTIFY_STOP;
+}
+
+static struct notifier_block kgdb_notifier = {
+ .notifier_call = kgdb_notify,
+};
+
+/*
+ * kgdb_arch_handle_exception - Handle architecture specific GDB packets.
+ * @vector: The error vector of the exception that happened.
+ * @signo: The signal number of the exception that happened.
+ * @err_code: The error code of the exception that happened.
+ * @remcom_in_buffer: The buffer of the packet we have read.
+ * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into.
+ * @regs: The &struct pt_regs of the current process.
+ *
+ * This function MUST handle the 'c' and 's' command packets,
+ * as well packets to set / remove a hardware breakpoint, if used.
+ * If there are additional packets which the hardware needs to handle,
+ * they are handled here. The code should return -1 if it wants to
+ * process more packets, and a %0 or %1 if it wants to exit from the
+ * kgdb callback.
+ */
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+ char *remcom_in_buffer, char *remcom_out_buffer,
+ struct pt_regs *regs)
+{
+ char *ptr;
+ unsigned long address;
+
+ /* Undo any stepping we may have done. */
+ undo_single_step(regs);
+
+ switch (remcom_in_buffer[0]) {
+ case 'c':
+ case 's':
+ case 'D':
+ case 'k':
+ /*
+ * Try to read optional parameter, pc unchanged if no parm.
+ * If this was a compiled-in breakpoint, we need to move
+ * to the next instruction or we will just breakpoint
+ * over and over again.
+ */
+ ptr = &remcom_in_buffer[1];
+ if (kgdb_hex2long(&ptr, &address))
+ regs->pc = address;
+ else if (*(unsigned long *)regs->pc == compiled_bpt)
+ regs->pc += BREAK_INSTR_SIZE;
+
+ if (remcom_in_buffer[0] == 's') {
+ do_single_step(regs);
+ kgdb_single_step = 1;
+ atomic_set(&kgdb_cpu_doing_single_step,
+ raw_smp_processor_id());
+ } else
+ atomic_set(&kgdb_cpu_doing_single_step, -1);
+
+ return 0;
+ }
+
+ return -1; /* this means that we do not want to exit from the handler */
+}
+
+struct kgdb_arch arch_kgdb_ops;
+
+/*
+ * kgdb_arch_init - Perform any architecture specific initalization.
+ *
+ * This function will handle the initalization of any architecture
+ * specific callbacks.
+ */
+int kgdb_arch_init(void)
+{
+ tile_bundle_bits bundle = TILEGX_BPT_BUNDLE;
+
+ memcpy(arch_kgdb_ops.gdb_bpt_instr, &bundle, BREAK_INSTR_SIZE);
+ return register_die_notifier(&kgdb_notifier);
+}
+
+/*
+ * kgdb_arch_exit - Perform any architecture specific uninitalization.
+ *
+ * This function will handle the uninitalization of any architecture
+ * specific callbacks, for dynamic registration and unregistration.
+ */
+void kgdb_arch_exit(void)
+{
+ unregister_die_notifier(&kgdb_notifier);
+}
+
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+ unsigned long addr_wr = writable_address(bpt->bpt_addr);
+
+ if (addr_wr == 0)
+ return -1;
+
+ err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+ BREAK_INSTR_SIZE);
+ if (err)
+ return err;
+
+ err = probe_kernel_write((char *)addr_wr, arch_kgdb_ops.gdb_bpt_instr,
+ BREAK_INSTR_SIZE);
+ smp_wmb();
+ flush_icache_range((unsigned long)bpt->bpt_addr,
+ (unsigned long)bpt->bpt_addr + BREAK_INSTR_SIZE);
+ return err;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+ unsigned long addr_wr = writable_address(bpt->bpt_addr);
+
+ if (addr_wr == 0)
+ return -1;
+
+ err = probe_kernel_write((char *)addr_wr, (char *)bpt->saved_instr,
+ BREAK_INSTR_SIZE);
+ smp_wmb();
+ flush_icache_range((unsigned long)bpt->bpt_addr,
+ (unsigned long)bpt->bpt_addr + BREAK_INSTR_SIZE);
+ return err;
+}
diff --git a/arch/tile/kernel/kprobes.c b/arch/tile/kernel/kprobes.c
new file mode 100644
index 000000000000..27cdcacbe81d
--- /dev/null
+++ b/arch/tile/kernel/kprobes.c
@@ -0,0 +1,528 @@
+/*
+ * arch/tile/kernel/kprobes.c
+ * Kprobes on TILE-Gx
+ *
+ * Some portions copied from the MIPS version.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ * Copyright 2006 Sony Corp.
+ * Copyright 2010 Cavium Networks
+ *
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+#include <arch/opcode.h>
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+tile_bundle_bits breakpoint_insn = TILEGX_BPT_BUNDLE;
+tile_bundle_bits breakpoint2_insn = TILEGX_BPT_BUNDLE | DIE_SSTEPBP;
+
+/*
+ * Check whether instruction is branch or jump, or if executing it
+ * has different results depending on where it is executed (e.g. lnk).
+ */
+static int __kprobes insn_has_control(kprobe_opcode_t insn)
+{
+ if (get_Mode(insn) != 0) { /* Y-format bundle */
+ if (get_Opcode_Y1(insn) != RRR_1_OPCODE_Y1 ||
+ get_RRROpcodeExtension_Y1(insn) != UNARY_RRR_1_OPCODE_Y1)
+ return 0;
+
+ switch (get_UnaryOpcodeExtension_Y1(insn)) {
+ case JALRP_UNARY_OPCODE_Y1:
+ case JALR_UNARY_OPCODE_Y1:
+ case JRP_UNARY_OPCODE_Y1:
+ case JR_UNARY_OPCODE_Y1:
+ case LNK_UNARY_OPCODE_Y1:
+ return 1;
+ default:
+ return 0;
+ }
+ }
+
+ switch (get_Opcode_X1(insn)) {
+ case BRANCH_OPCODE_X1: /* branch instructions */
+ case JUMP_OPCODE_X1: /* jump instructions: j and jal */
+ return 1;
+
+ case RRR_0_OPCODE_X1: /* other jump instructions */
+ if (get_RRROpcodeExtension_X1(insn) != UNARY_RRR_0_OPCODE_X1)
+ return 0;
+ switch (get_UnaryOpcodeExtension_X1(insn)) {
+ case JALRP_UNARY_OPCODE_X1:
+ case JALR_UNARY_OPCODE_X1:
+ case JRP_UNARY_OPCODE_X1:
+ case JR_UNARY_OPCODE_X1:
+ case LNK_UNARY_OPCODE_X1:
+ return 1;
+ default:
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ unsigned long addr = (unsigned long)p->addr;
+
+ if (addr & (sizeof(kprobe_opcode_t) - 1))
+ return -EINVAL;
+
+ if (insn_has_control(*p->addr)) {
+ pr_notice("Kprobes for control instructions are not "
+ "supported\n");
+ return -EINVAL;
+ }
+
+ /* insn: must be on special executable page on tile. */
+ p->ainsn.insn = get_insn_slot();
+ if (!p->ainsn.insn)
+ return -ENOMEM;
+
+ /*
+ * In the kprobe->ainsn.insn[] array we store the original
+ * instruction at index zero and a break trap instruction at
+ * index one.
+ */
+ memcpy(&p->ainsn.insn[0], p->addr, sizeof(kprobe_opcode_t));
+ p->ainsn.insn[1] = breakpoint2_insn;
+ p->opcode = *p->addr;
+
+ return 0;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+ unsigned long addr_wr;
+
+ /* Operate on writable kernel text mapping. */
+ addr_wr = (unsigned long)p->addr - MEM_SV_START + PAGE_OFFSET;
+
+ if (probe_kernel_write((void *)addr_wr, &breakpoint_insn,
+ sizeof(breakpoint_insn)))
+ pr_err("%s: failed to enable kprobe\n", __func__);
+
+ smp_wmb();
+ flush_insn_slot(p);
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *kp)
+{
+ unsigned long addr_wr;
+
+ /* Operate on writable kernel text mapping. */
+ addr_wr = (unsigned long)kp->addr - MEM_SV_START + PAGE_OFFSET;
+
+ if (probe_kernel_write((void *)addr_wr, &kp->opcode,
+ sizeof(kp->opcode)))
+ pr_err("%s: failed to enable kprobe\n", __func__);
+
+ smp_wmb();
+ flush_insn_slot(kp);
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+ if (p->ainsn.insn) {
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
+ }
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+ kcb->prev_kprobe.saved_pc = kcb->kprobe_saved_pc;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+ kcb->kprobe_saved_pc = kcb->prev_kprobe.saved_pc;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ __this_cpu_write(current_kprobe, p);
+ kcb->kprobe_saved_pc = regs->pc;
+}
+
+static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+{
+ /* Single step inline if the instruction is a break. */
+ if (p->opcode == breakpoint_insn ||
+ p->opcode == breakpoint2_insn)
+ regs->pc = (unsigned long)p->addr;
+ else
+ regs->pc = (unsigned long)&p->ainsn.insn[0];
+}
+
+static int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *p;
+ int ret = 0;
+ kprobe_opcode_t *addr;
+ struct kprobe_ctlblk *kcb;
+
+ addr = (kprobe_opcode_t *)regs->pc;
+
+ /*
+ * We don't want to be preempted for the entire
+ * duration of kprobe processing.
+ */
+ preempt_disable();
+ kcb = get_kprobe_ctlblk();
+
+ /* Check we're not actually recursing. */
+ if (kprobe_running()) {
+ p = get_kprobe(addr);
+ if (p) {
+ if (kcb->kprobe_status == KPROBE_HIT_SS &&
+ p->ainsn.insn[0] == breakpoint_insn) {
+ goto no_kprobe;
+ }
+ /*
+ * We have reentered the kprobe_handler(), since
+ * another probe was hit while within the handler.
+ * We here save the original kprobes variables and
+ * just single step on the instruction of the new probe
+ * without calling any user handlers.
+ */
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
+ kprobes_inc_nmissed_count(p);
+ prepare_singlestep(p, regs);
+ kcb->kprobe_status = KPROBE_REENTER;
+ return 1;
+ } else {
+ if (*addr != breakpoint_insn) {
+ /*
+ * The breakpoint instruction was removed by
+ * another cpu right after we hit, no further
+ * handling of this interrupt is appropriate.
+ */
+ ret = 1;
+ goto no_kprobe;
+ }
+ p = __this_cpu_read(current_kprobe);
+ if (p->break_handler && p->break_handler(p, regs))
+ goto ss_probe;
+ }
+ goto no_kprobe;
+ }
+
+ p = get_kprobe(addr);
+ if (!p) {
+ if (*addr != breakpoint_insn) {
+ /*
+ * The breakpoint instruction was removed right
+ * after we hit it. Another cpu has removed
+ * either a probepoint or a debugger breakpoint
+ * at this address. In either case, no further
+ * handling of this interrupt is appropriate.
+ */
+ ret = 1;
+ }
+ /* Not one of ours: let kernel handle it. */
+ goto no_kprobe;
+ }
+
+ set_current_kprobe(p, regs, kcb);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+ if (p->pre_handler && p->pre_handler(p, regs)) {
+ /* Handler has already set things up, so skip ss setup. */
+ return 1;
+ }
+
+ss_probe:
+ prepare_singlestep(p, regs);
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ return 1;
+
+no_kprobe:
+ preempt_enable_no_resched();
+ return ret;
+}
+
+/*
+ * Called after single-stepping. p->addr is the address of the
+ * instruction that has been replaced by the breakpoint. To avoid the
+ * SMP problems that can occur when we temporarily put back the
+ * original opcode to single-step, we single-stepped a copy of the
+ * instruction. The address of this copy is p->ainsn.insn.
+ *
+ * This function prepares to return from the post-single-step
+ * breakpoint trap.
+ */
+static void __kprobes resume_execution(struct kprobe *p,
+ struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ unsigned long orig_pc = kcb->kprobe_saved_pc;
+ regs->pc = orig_pc + 8;
+}
+
+static inline int post_kprobe_handler(struct pt_regs *regs)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (!cur)
+ return 0;
+
+ if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ cur->post_handler(cur, regs, 0);
+ }
+
+ resume_execution(cur, regs, kcb);
+
+ /* Restore back the original saved kprobes variables and continue. */
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
+ goto out;
+ }
+ reset_current_kprobe();
+out:
+ preempt_enable_no_resched();
+
+ return 1;
+}
+
+static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+ return 1;
+
+ if (kcb->kprobe_status & KPROBE_HIT_SS) {
+ /*
+ * We are here because the instruction being single
+ * stepped caused a page fault. We reset the current
+ * kprobe and the ip points back to the probe address
+ * and allow the page fault handler to continue as a
+ * normal page fault.
+ */
+ resume_execution(cur, regs, kcb);
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ }
+ return 0;
+}
+
+/*
+ * Wrapper routine for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct die_args *args = (struct die_args *)data;
+ int ret = NOTIFY_DONE;
+
+ switch (val) {
+ case DIE_BREAK:
+ if (kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_SSTEPBP:
+ if (post_kprobe_handler(args->regs))
+ ret = NOTIFY_STOP;
+ break;
+ case DIE_PAGE_FAULT:
+ /* kprobe_running() needs smp_processor_id(). */
+ preempt_disable();
+
+ if (kprobe_running()
+ && kprobe_fault_handler(args->regs, args->trapnr))
+ ret = NOTIFY_STOP;
+ preempt_enable();
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct jprobe *jp = container_of(p, struct jprobe, kp);
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ kcb->jprobe_saved_regs = *regs;
+ kcb->jprobe_saved_sp = regs->sp;
+
+ memcpy(kcb->jprobes_stack, (void *)kcb->jprobe_saved_sp,
+ MIN_JPROBES_STACK_SIZE(kcb->jprobe_saved_sp));
+
+ regs->pc = (unsigned long)(jp->entry);
+
+ return 1;
+}
+
+/* Defined in the inline asm below. */
+void jprobe_return_end(void);
+
+void __kprobes jprobe_return(void)
+{
+ asm volatile(
+ "bpt\n\t"
+ ".globl jprobe_return_end\n"
+ "jprobe_return_end:\n");
+}
+
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (regs->pc >= (unsigned long)jprobe_return &&
+ regs->pc <= (unsigned long)jprobe_return_end) {
+ *regs = kcb->jprobe_saved_regs;
+ memcpy((void *)kcb->jprobe_saved_sp, kcb->jprobes_stack,
+ MIN_JPROBES_STACK_SIZE(kcb->jprobe_saved_sp));
+ preempt_enable_no_resched();
+
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Function return probe trampoline:
+ * - init_kprobes() establishes a probepoint here
+ * - When the probed function returns, this probe causes the
+ * handlers to fire
+ */
+static void __used kretprobe_trampoline_holder(void)
+{
+ asm volatile(
+ "nop\n\t"
+ ".global kretprobe_trampoline\n"
+ "kretprobe_trampoline:\n\t"
+ "nop\n\t"
+ : : : "memory");
+}
+
+void kretprobe_trampoline(void);
+
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ ri->ret_addr = (kprobe_opcode_t *) regs->lr;
+
+ /* Replace the return addr with trampoline addr */
+ regs->lr = (unsigned long)kretprobe_trampoline;
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit.
+ */
+static int __kprobes trampoline_probe_handler(struct kprobe *p,
+ struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *tmp;
+ unsigned long flags, orig_ret_address = 0;
+ unsigned long trampoline_address = (unsigned long)kretprobe_trampoline;
+
+ INIT_HLIST_HEAD(&empty_rp);
+ kretprobe_hash_lock(current, &head, &flags);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because multiple functions in the call path have
+ * a return probe installed on them, and/or more than one return
+ * return probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ if (ri->rp && ri->rp->handler)
+ ri->rp->handler(ri, regs);
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ recycle_rp_inst(ri, &empty_rp);
+
+ if (orig_ret_address != trampoline_address) {
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+ instruction_pointer(regs) = orig_ret_address;
+
+ reset_current_kprobe();
+ kretprobe_hash_unlock(current, &flags);
+ preempt_enable_no_resched();
+
+ hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ /*
+ * By returning a non-zero value, we are telling
+ * kprobe_handler() that we don't want the post_handler
+ * to run (and have re-enabled preemption)
+ */
+ return 1;
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+ if (p->addr == (kprobe_opcode_t *)kretprobe_trampoline)
+ return 1;
+
+ return 0;
+}
+
+static struct kprobe trampoline_p = {
+ .addr = (kprobe_opcode_t *)kretprobe_trampoline,
+ .pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+ register_kprobe(&trampoline_p);
+ return 0;
+}
diff --git a/arch/tile/kernel/mcount_64.S b/arch/tile/kernel/mcount_64.S
new file mode 100644
index 000000000000..70d7bb0c4d8f
--- /dev/null
+++ b/arch/tile/kernel/mcount_64.S
@@ -0,0 +1,224 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * TILE-Gx specific __mcount support
+ */
+
+#include <linux/linkage.h>
+#include <asm/ftrace.h>
+
+#define REGSIZE 8
+
+ .text
+ .global __mcount
+
+ .macro MCOUNT_SAVE_REGS
+ addli sp, sp, -REGSIZE
+ {
+ st sp, lr
+ addli r29, sp, - (12 * REGSIZE)
+ }
+ {
+ addli sp, sp, - (13 * REGSIZE)
+ st r29, sp
+ }
+ addli r29, r29, REGSIZE
+ { st r29, r0; addli r29, r29, REGSIZE }
+ { st r29, r1; addli r29, r29, REGSIZE }
+ { st r29, r2; addli r29, r29, REGSIZE }
+ { st r29, r3; addli r29, r29, REGSIZE }
+ { st r29, r4; addli r29, r29, REGSIZE }
+ { st r29, r5; addli r29, r29, REGSIZE }
+ { st r29, r6; addli r29, r29, REGSIZE }
+ { st r29, r7; addli r29, r29, REGSIZE }
+ { st r29, r8; addli r29, r29, REGSIZE }
+ { st r29, r9; addli r29, r29, REGSIZE }
+ { st r29, r10; addli r29, r29, REGSIZE }
+ .endm
+
+ .macro MCOUNT_RESTORE_REGS
+ addli r29, sp, (2 * REGSIZE)
+ { ld r0, r29; addli r29, r29, REGSIZE }
+ { ld r1, r29; addli r29, r29, REGSIZE }
+ { ld r2, r29; addli r29, r29, REGSIZE }
+ { ld r3, r29; addli r29, r29, REGSIZE }
+ { ld r4, r29; addli r29, r29, REGSIZE }
+ { ld r5, r29; addli r29, r29, REGSIZE }
+ { ld r6, r29; addli r29, r29, REGSIZE }
+ { ld r7, r29; addli r29, r29, REGSIZE }
+ { ld r8, r29; addli r29, r29, REGSIZE }
+ { ld r9, r29; addli r29, r29, REGSIZE }
+ { ld r10, r29; addli lr, sp, (13 * REGSIZE) }
+ { ld lr, lr; addli sp, sp, (14 * REGSIZE) }
+ .endm
+
+ .macro RETURN_BACK
+ { move r12, lr; move lr, r10 }
+ jrp r12
+ .endm
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+ .align 64
+STD_ENTRY(__mcount)
+__mcount:
+ j ftrace_stub
+STD_ENDPROC(__mcount)
+
+ .align 64
+STD_ENTRY(ftrace_caller)
+ moveli r11, hw2_last(function_trace_stop)
+ { shl16insli r11, r11, hw1(function_trace_stop); move r12, lr }
+ { shl16insli r11, r11, hw0(function_trace_stop); move lr, r10 }
+ ld r11, r11
+ beqz r11, 1f
+ jrp r12
+
+1:
+ { move r10, lr; move lr, r12 }
+ MCOUNT_SAVE_REGS
+
+ /* arg1: self return address */
+ /* arg2: parent's return address */
+ { move r0, lr; move r1, r10 }
+
+ .global ftrace_call
+ftrace_call:
+ /*
+ * a placeholder for the call to a real tracing function, i.e.
+ * ftrace_trace_function()
+ */
+ nop
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ .global ftrace_graph_call
+ftrace_graph_call:
+ /*
+ * a placeholder for the call to a real tracing function, i.e.
+ * ftrace_graph_caller()
+ */
+ nop
+#endif
+ MCOUNT_RESTORE_REGS
+ .global ftrace_stub
+ftrace_stub:
+ RETURN_BACK
+STD_ENDPROC(ftrace_caller)
+
+#else /* ! CONFIG_DYNAMIC_FTRACE */
+
+ .align 64
+STD_ENTRY(__mcount)
+ moveli r11, hw2_last(function_trace_stop)
+ { shl16insli r11, r11, hw1(function_trace_stop); move r12, lr }
+ { shl16insli r11, r11, hw0(function_trace_stop); move lr, r10 }
+ ld r11, r11
+ beqz r11, 1f
+ jrp r12
+
+1:
+ { move r10, lr; move lr, r12 }
+ {
+ moveli r11, hw2_last(ftrace_trace_function)
+ moveli r13, hw2_last(ftrace_stub)
+ }
+ {
+ shl16insli r11, r11, hw1(ftrace_trace_function)
+ shl16insli r13, r13, hw1(ftrace_stub)
+ }
+ {
+ shl16insli r11, r11, hw0(ftrace_trace_function)
+ shl16insli r13, r13, hw0(ftrace_stub)
+ }
+
+ ld r11, r11
+ sub r14, r13, r11
+ bnez r14, static_trace
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ moveli r15, hw2_last(ftrace_graph_return)
+ shl16insli r15, r15, hw1(ftrace_graph_return)
+ shl16insli r15, r15, hw0(ftrace_graph_return)
+ ld r15, r15
+ sub r15, r15, r13
+ bnez r15, ftrace_graph_caller
+
+ {
+ moveli r16, hw2_last(ftrace_graph_entry)
+ moveli r17, hw2_last(ftrace_graph_entry_stub)
+ }
+ {
+ shl16insli r16, r16, hw1(ftrace_graph_entry)
+ shl16insli r17, r17, hw1(ftrace_graph_entry_stub)
+ }
+ {
+ shl16insli r16, r16, hw0(ftrace_graph_entry)
+ shl16insli r17, r17, hw0(ftrace_graph_entry_stub)
+ }
+ ld r16, r16
+ sub r17, r16, r17
+ bnez r17, ftrace_graph_caller
+
+#endif
+ RETURN_BACK
+
+static_trace:
+ MCOUNT_SAVE_REGS
+
+ /* arg1: self return address */
+ /* arg2: parent's return address */
+ { move r0, lr; move r1, r10 }
+
+ /* call ftrace_trace_function() */
+ jalr r11
+
+ MCOUNT_RESTORE_REGS
+
+ .global ftrace_stub
+ftrace_stub:
+ RETURN_BACK
+STD_ENDPROC(__mcount)
+
+#endif /* ! CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+STD_ENTRY(ftrace_graph_caller)
+ftrace_graph_caller:
+#ifndef CONFIG_DYNAMIC_FTRACE
+ MCOUNT_SAVE_REGS
+#endif
+
+ /* arg1: Get the location of the parent's return address */
+ addi r0, sp, 12 * REGSIZE
+ /* arg2: Get self return address */
+ move r1, lr
+
+ jal prepare_ftrace_return
+
+ MCOUNT_RESTORE_REGS
+ RETURN_BACK
+STD_ENDPROC(ftrace_graph_caller)
+
+ .global return_to_handler
+return_to_handler:
+ MCOUNT_SAVE_REGS
+
+ jal ftrace_return_to_handler
+ /* restore the real parent address */
+ move r11, r0
+
+ MCOUNT_RESTORE_REGS
+ jr r11
+
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c
index b9fe80ec1089..09b58703ac26 100644
--- a/arch/tile/kernel/pci-dma.c
+++ b/arch/tile/kernel/pci-dma.c
@@ -36,8 +36,9 @@ static void *tile_dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp,
struct dma_attrs *attrs)
{
- u64 dma_mask = dev->coherent_dma_mask ?: DMA_BIT_MASK(32);
- int node = dev_to_node(dev);
+ u64 dma_mask = (dev && dev->coherent_dma_mask) ?
+ dev->coherent_dma_mask : DMA_BIT_MASK(32);
+ int node = dev ? dev_to_node(dev) : 0;
int order = get_order(size);
struct page *pg;
dma_addr_t addr;
@@ -256,7 +257,7 @@ static void tile_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
BUG_ON(!valid_dma_direction(direction));
__dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
- dma_address & PAGE_OFFSET, size, direction);
+ dma_address & (PAGE_SIZE - 1), size, direction);
}
static void tile_dma_sync_single_for_cpu(struct device *dev,
@@ -357,7 +358,7 @@ static void *tile_pci_dma_alloc_coherent(struct device *dev, size_t size,
addr = page_to_phys(pg);
- *dma_handle = phys_to_dma(dev, addr);
+ *dma_handle = addr + get_dma_offset(dev);
return page_address(pg);
}
@@ -387,7 +388,7 @@ static int tile_pci_dma_map_sg(struct device *dev, struct scatterlist *sglist,
sg->dma_address = sg_phys(sg);
__dma_prep_pa_range(sg->dma_address, sg->length, direction);
- sg->dma_address = phys_to_dma(dev, sg->dma_address);
+ sg->dma_address = sg->dma_address + get_dma_offset(dev);
#ifdef CONFIG_NEED_SG_DMA_LENGTH
sg->dma_length = sg->length;
#endif
@@ -422,7 +423,7 @@ static dma_addr_t tile_pci_dma_map_page(struct device *dev, struct page *page,
BUG_ON(offset + size > PAGE_SIZE);
__dma_prep_page(page, offset, size, direction);
- return phys_to_dma(dev, page_to_pa(page) + offset);
+ return page_to_pa(page) + offset + get_dma_offset(dev);
}
static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
@@ -432,10 +433,10 @@ static void tile_pci_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
{
BUG_ON(!valid_dma_direction(direction));
- dma_address = dma_to_phys(dev, dma_address);
+ dma_address -= get_dma_offset(dev);
__dma_complete_page(pfn_to_page(PFN_DOWN(dma_address)),
- dma_address & PAGE_OFFSET, size, direction);
+ dma_address & (PAGE_SIZE - 1), size, direction);
}
static void tile_pci_dma_sync_single_for_cpu(struct device *dev,
@@ -445,7 +446,7 @@ static void tile_pci_dma_sync_single_for_cpu(struct device *dev,
{
BUG_ON(!valid_dma_direction(direction));
- dma_handle = dma_to_phys(dev, dma_handle);
+ dma_handle -= get_dma_offset(dev);
__dma_complete_pa_range(dma_handle, size, direction);
}
@@ -456,7 +457,7 @@ static void tile_pci_dma_sync_single_for_device(struct device *dev,
enum dma_data_direction
direction)
{
- dma_handle = dma_to_phys(dev, dma_handle);
+ dma_handle -= get_dma_offset(dev);
__dma_prep_pa_range(dma_handle, size, direction);
}
@@ -558,22 +559,47 @@ static struct dma_map_ops pci_swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
};
+static struct dma_map_ops pci_hybrid_dma_ops = {
+ .alloc = tile_swiotlb_alloc_coherent,
+ .free = tile_swiotlb_free_coherent,
+ .map_page = tile_pci_dma_map_page,
+ .unmap_page = tile_pci_dma_unmap_page,
+ .map_sg = tile_pci_dma_map_sg,
+ .unmap_sg = tile_pci_dma_unmap_sg,
+ .sync_single_for_cpu = tile_pci_dma_sync_single_for_cpu,
+ .sync_single_for_device = tile_pci_dma_sync_single_for_device,
+ .sync_sg_for_cpu = tile_pci_dma_sync_sg_for_cpu,
+ .sync_sg_for_device = tile_pci_dma_sync_sg_for_device,
+ .mapping_error = tile_pci_dma_mapping_error,
+ .dma_supported = tile_pci_dma_supported
+};
+
struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops;
+struct dma_map_ops *gx_hybrid_pci_dma_map_ops = &pci_hybrid_dma_ops;
#else
struct dma_map_ops *gx_legacy_pci_dma_map_ops;
+struct dma_map_ops *gx_hybrid_pci_dma_map_ops;
#endif
EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops);
+EXPORT_SYMBOL(gx_hybrid_pci_dma_map_ops);
#ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
int dma_set_coherent_mask(struct device *dev, u64 mask)
{
struct dma_map_ops *dma_ops = get_dma_ops(dev);
- /* Handle legacy PCI devices with limited memory addressability. */
- if (((dma_ops == gx_pci_dma_map_ops) ||
- (dma_ops == gx_legacy_pci_dma_map_ops)) &&
- (mask <= DMA_BIT_MASK(32))) {
- if (mask > dev->archdata.max_direct_dma_addr)
+ /*
+ * For PCI devices with 64-bit DMA addressing capability, promote
+ * the dma_ops to full capability for both streams and consistent
+ * memory access. For 32-bit capable devices, limit the consistent
+ * memory DMA range to max_direct_dma_addr.
+ */
+ if (dma_ops == gx_pci_dma_map_ops ||
+ dma_ops == gx_hybrid_pci_dma_map_ops ||
+ dma_ops == gx_legacy_pci_dma_map_ops) {
+ if (mask == DMA_BIT_MASK(64))
+ set_dma_ops(dev, gx_pci_dma_map_ops);
+ else if (mask > dev->archdata.max_direct_dma_addr)
mask = dev->archdata.max_direct_dma_addr;
}
@@ -584,3 +610,21 @@ int dma_set_coherent_mask(struct device *dev, u64 mask)
}
EXPORT_SYMBOL(dma_set_coherent_mask);
#endif
+
+#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
+/*
+ * The generic dma_get_required_mask() uses the highest physical address
+ * (max_pfn) to provide the hint to the PCI drivers regarding 32-bit or
+ * 64-bit DMA configuration. Since TILEGx has I/O TLB/MMU, allowing the
+ * DMAs to use the full 64-bit PCI address space and not limited by
+ * the physical memory space, we always let the PCI devices use
+ * 64-bit DMA if they have that capability, by returning the 64-bit
+ * DMA mask here. The device driver has the option to use 32-bit DMA if
+ * the device is not capable of 64-bit DMA.
+ */
+u64 dma_get_required_mask(struct device *dev)
+{
+ return DMA_BIT_MASK(64);
+}
+EXPORT_SYMBOL_GPL(dma_get_required_mask);
+#endif
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
index 67237d34c2e2..b7180e6e900d 100644
--- a/arch/tile/kernel/pci.c
+++ b/arch/tile/kernel/pci.c
@@ -20,7 +20,6 @@
#include <linux/capability.h>
#include <linux/sched.h>
#include <linux/errno.h>
-#include <linux/bootmem.h>
#include <linux/irq.h>
#include <linux/io.h>
#include <linux/uaccess.h>
@@ -52,6 +51,8 @@
*
*/
+static int pci_probe = 1;
+
/*
* This flag tells if the platform is TILEmpower that needs
* special configuration for the PLX switch chip.
@@ -144,6 +145,11 @@ int __init tile_pci_init(void)
{
int i;
+ if (!pci_probe) {
+ pr_info("PCI: disabled by boot argument\n");
+ return 0;
+ }
+
pr_info("PCI: Searching for controllers...\n");
/* Re-init number of PCIe controllers to support hot-plug feature. */
@@ -192,7 +198,6 @@ int __init tile_pci_init(void)
controller->hv_cfg_fd[0] = hv_cfg_fd0;
controller->hv_cfg_fd[1] = hv_cfg_fd1;
controller->hv_mem_fd = hv_mem_fd;
- controller->first_busno = 0;
controller->last_busno = 0xff;
controller->ops = &tile_cfg_ops;
@@ -283,7 +288,7 @@ int __init pcibios_init(void)
* known to require at least 20ms here, but we use a more
* conservative value.
*/
- mdelay(250);
+ msleep(250);
/* Scan all of the recorded PCI controllers. */
for (i = 0; i < TILE_NUM_PCIE; i++) {
@@ -304,18 +309,10 @@ int __init pcibios_init(void)
pr_info("PCI: initializing controller #%d\n", i);
- /*
- * This comes from the generic Linux PCI driver.
- *
- * It reads the PCI tree for this bus into the Linux
- * data structures.
- *
- * This is inlined in linux/pci.h and calls into
- * pci_scan_bus_parented() in probe.c.
- */
pci_add_resource(&resources, &ioport_resource);
pci_add_resource(&resources, &iomem_resource);
- bus = pci_scan_root_bus(NULL, 0, controller->ops, controller, &resources);
+ bus = pci_scan_root_bus(NULL, 0, controller->ops,
+ controller, &resources);
controller->root_bus = bus;
controller->last_busno = bus->busn_res.end;
}
@@ -388,6 +385,16 @@ void pcibios_set_master(struct pci_dev *dev)
/* No special bus mastering setup handling. */
}
+/* Process any "pci=" kernel boot arguments. */
+char *__init pcibios_setup(char *str)
+{
+ if (!strcmp(str, "off")) {
+ pci_probe = 0;
+ return NULL;
+ }
+ return str;
+}
+
/*
* Enable memory and/or address decoding, as appropriate, for the
* device described by the 'dev' struct.
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index 6640e7bbeaa2..a97a6452b812 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -69,19 +69,32 @@ static int pcie_rc[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES];
* a HW PCIe link-training bug. The exact delay is specified with
* a kernel boot argument in the form of "pcie_rc_delay=T,P,S",
* where T is the TRIO instance number, P is the port number and S is
- * the delay in seconds. If the delay is not provided, the value
- * will be DEFAULT_RC_DELAY.
+ * the delay in seconds. If the argument is specified, but the delay is
+ * not provided, the value will be DEFAULT_RC_DELAY.
*/
static int rc_delay[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES];
/* Default number of seconds that the PCIe RC port probe can be delayed. */
#define DEFAULT_RC_DELAY 10
-/* Max number of seconds that the PCIe RC port probe can be delayed. */
-#define MAX_RC_DELAY 20
+/* The PCI I/O space size in each PCI domain. */
+#define IO_SPACE_SIZE 0x10000
+
+/* Provide shorter versions of some very long constant names. */
+#define AUTO_CONFIG_RC \
+ TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC
+#define AUTO_CONFIG_RC_G1 \
+ TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC_G1
+#define AUTO_CONFIG_EP \
+ TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT
+#define AUTO_CONFIG_EP_G1 \
+ TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT_G1
/* Array of the PCIe ports configuration info obtained from the BIB. */
-struct pcie_port_property pcie_ports[TILEGX_NUM_TRIO][TILEGX_TRIO_PCIES];
+struct pcie_trio_ports_property pcie_ports[TILEGX_NUM_TRIO];
+
+/* Number of configured TRIO instances. */
+int num_trio_shims;
/* All drivers share the TRIO contexts defined here. */
gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO];
@@ -89,24 +102,21 @@ gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO];
/* Pointer to an array of PCIe RC controllers. */
struct pci_controller pci_controllers[TILEGX_NUM_TRIO * TILEGX_TRIO_PCIES];
int num_rc_controllers;
-static int num_ep_controllers;
static struct pci_ops tile_cfg_ops;
/* Mask of CPUs that should receive PCIe interrupts. */
static struct cpumask intr_cpus_map;
-/*
- * We don't need to worry about the alignment of resources.
- */
+/* We don't need to worry about the alignment of resources. */
resource_size_t pcibios_align_resource(void *data, const struct resource *res,
- resource_size_t size, resource_size_t align)
+ resource_size_t size,
+ resource_size_t align)
{
return res->start;
}
EXPORT_SYMBOL(pcibios_align_resource);
-
/*
* Pick a CPU to receive and handle the PCIe interrupts, based on the IRQ #.
* For now, we simply send interrupts to non-dataplane CPUs.
@@ -134,24 +144,19 @@ static int tile_irq_cpu(int irq)
return cpu;
}
-/*
- * Open a file descriptor to the TRIO shim.
- */
+/* Open a file descriptor to the TRIO shim. */
static int tile_pcie_open(int trio_index)
{
gxio_trio_context_t *context = &trio_contexts[trio_index];
int ret;
+ int mac;
- /*
- * This opens a file descriptor to the TRIO shim.
- */
+ /* This opens a file descriptor to the TRIO shim. */
ret = gxio_trio_init(context, trio_index);
if (ret < 0)
- return ret;
+ goto gxio_trio_init_failure;
- /*
- * Allocate an ASID for the kernel.
- */
+ /* Allocate an ASID for the kernel. */
ret = gxio_trio_alloc_asids(context, 1, 0, 0);
if (ret < 0) {
pr_err("PCI: ASID alloc failure on TRIO %d, give up\n",
@@ -189,31 +194,97 @@ static int tile_pcie_open(int trio_index)
}
#endif
+ /* Get the properties of the PCIe ports on this TRIO instance. */
+ ret = gxio_trio_get_port_property(context, &pcie_ports[trio_index]);
+ if (ret < 0) {
+ pr_err("PCI: PCIE_GET_PORT_PROPERTY failure, error %d,"
+ " on TRIO %d\n", ret, trio_index);
+ goto get_port_property_failure;
+ }
+
+ context->mmio_base_mac =
+ iorpc_ioremap(context->fd, 0, HV_TRIO_CONFIG_IOREMAP_SIZE);
+ if (context->mmio_base_mac == NULL) {
+ pr_err("PCI: TRIO config space mapping failure, error %d,"
+ " on TRIO %d\n", ret, trio_index);
+ ret = -ENOMEM;
+
+ goto trio_mmio_mapping_failure;
+ }
+
+ /* Check the port strap state which will override the BIB setting. */
+ for (mac = 0; mac < TILEGX_TRIO_PCIES; mac++) {
+ TRIO_PCIE_INTFC_PORT_CONFIG_t port_config;
+ unsigned int reg_offset;
+
+ /* Ignore ports that are not specified in the BIB. */
+ if (!pcie_ports[trio_index].ports[mac].allow_rc &&
+ !pcie_ports[trio_index].ports[mac].allow_ep)
+ continue;
+
+ reg_offset =
+ (TRIO_PCIE_INTFC_PORT_CONFIG <<
+ TRIO_CFG_REGION_ADDR__REG_SHIFT) |
+ (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE <<
+ TRIO_CFG_REGION_ADDR__INTFC_SHIFT) |
+ (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
+
+ port_config.word =
+ __gxio_mmio_read(context->mmio_base_mac + reg_offset);
+
+ if (port_config.strap_state != AUTO_CONFIG_RC &&
+ port_config.strap_state != AUTO_CONFIG_RC_G1) {
+ /*
+ * If this is really intended to be an EP port, record
+ * it so that the endpoint driver will know about it.
+ */
+ if (port_config.strap_state == AUTO_CONFIG_EP ||
+ port_config.strap_state == AUTO_CONFIG_EP_G1)
+ pcie_ports[trio_index].ports[mac].allow_ep = 1;
+ }
+ }
+
return ret;
+trio_mmio_mapping_failure:
+get_port_property_failure:
asid_alloc_failure:
#ifdef USE_SHARED_PCIE_CONFIG_REGION
pio_alloc_failure:
#endif
hv_dev_close(context->fd);
+gxio_trio_init_failure:
+ context->fd = -1;
return ret;
}
-static void
-tilegx_legacy_irq_ack(struct irq_data *d)
+static int __init tile_trio_init(void)
+{
+ int i;
+
+ /* We loop over all the TRIO shims. */
+ for (i = 0; i < TILEGX_NUM_TRIO; i++) {
+ if (tile_pcie_open(i) < 0)
+ continue;
+ num_trio_shims++;
+ }
+
+ return 0;
+}
+postcore_initcall(tile_trio_init);
+
+static void tilegx_legacy_irq_ack(struct irq_data *d)
{
__insn_mtspr(SPR_IPI_EVENT_RESET_K, 1UL << d->irq);
}
-static void
-tilegx_legacy_irq_mask(struct irq_data *d)
+static void tilegx_legacy_irq_mask(struct irq_data *d)
{
__insn_mtspr(SPR_IPI_MASK_SET_K, 1UL << d->irq);
}
-static void
-tilegx_legacy_irq_unmask(struct irq_data *d)
+static void tilegx_legacy_irq_unmask(struct irq_data *d)
{
__insn_mtspr(SPR_IPI_MASK_RESET_K, 1UL << d->irq);
}
@@ -234,8 +305,7 @@ static struct irq_chip tilegx_legacy_irq_chip = {
* to Linux which just calls handle_level_irq() after clearing the
* MAC INTx Assert status bit associated with this interrupt.
*/
-static void
-trio_handle_level_irq(unsigned int irq, struct irq_desc *desc)
+static void trio_handle_level_irq(unsigned int irq, struct irq_desc *desc)
{
struct pci_controller *controller = irq_desc_get_handler_data(desc);
gxio_trio_context_t *trio_context = controller->trio;
@@ -301,9 +371,7 @@ static int tile_init_irqs(struct pci_controller *controller)
goto free_irqs;
}
- /*
- * Register the IRQ handler with the kernel.
- */
+ /* Register the IRQ handler with the kernel. */
irq_set_chip_and_handler(irq, &tilegx_legacy_irq_chip,
trio_handle_level_irq);
irq_set_chip_data(irq, (void *)(uint64_t)i);
@@ -320,14 +388,39 @@ free_irqs:
}
/*
+ * Return 1 if the port is strapped to operate in RC mode.
+ */
+static int
+strapped_for_rc(gxio_trio_context_t *trio_context, int mac)
+{
+ TRIO_PCIE_INTFC_PORT_CONFIG_t port_config;
+ unsigned int reg_offset;
+
+ /* Check the port configuration. */
+ reg_offset =
+ (TRIO_PCIE_INTFC_PORT_CONFIG <<
+ TRIO_CFG_REGION_ADDR__REG_SHIFT) |
+ (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE <<
+ TRIO_CFG_REGION_ADDR__INTFC_SHIFT) |
+ (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
+ port_config.word =
+ __gxio_mmio_read(trio_context->mmio_base_mac + reg_offset);
+
+ if (port_config.strap_state == AUTO_CONFIG_RC ||
+ port_config.strap_state == AUTO_CONFIG_RC_G1)
+ return 1;
+ else
+ return 0;
+}
+
+/*
* Find valid controllers and fill in pci_controller structs for each
* of them.
*
- * Returns the number of controllers discovered.
+ * Return the number of controllers discovered.
*/
int __init tile_pci_init(void)
{
- int num_trio_shims = 0;
int ctl_index = 0;
int i, j;
@@ -338,64 +431,62 @@ int __init tile_pci_init(void)
pr_info("PCI: Searching for controllers...\n");
- /*
- * We loop over all the TRIO shims.
- */
- for (i = 0; i < TILEGX_NUM_TRIO; i++) {
- int ret;
-
- ret = tile_pcie_open(i);
- if (ret < 0)
- continue;
-
- num_trio_shims++;
- }
-
if (num_trio_shims == 0 || sim_is_simulator())
return 0;
/*
- * Now determine which PCIe ports are configured to operate in RC mode.
- * We look at the Board Information Block first and then see if there
- * are any overriding configuration by the HW strapping pin.
+ * Now determine which PCIe ports are configured to operate in RC
+ * mode. There is a differece in the port configuration capability
+ * between the Gx36 and Gx72 devices.
+ *
+ * The Gx36 has configuration capability for each of the 3 PCIe
+ * interfaces (disable, auto endpoint, auto RC, etc.).
+ * On the Gx72, you can only select one of the 3 PCIe interfaces per
+ * TRIO to train automatically. Further, the allowable training modes
+ * are reduced to four options (auto endpoint, auto RC, stream x1,
+ * stream x4).
+ *
+ * For Gx36 ports, it must be allowed to be in RC mode by the
+ * Board Information Block, and the hardware strapping pins must be
+ * set to RC mode.
+ *
+ * For Gx72 ports, the port will operate in RC mode if either of the
+ * following is true:
+ * 1. It is allowed to be in RC mode by the Board Information Block,
+ * and the BIB doesn't allow the EP mode.
+ * 2. It is allowed to be in either the RC or the EP mode by the BIB,
+ * and the hardware strapping pin is set to RC mode.
*/
for (i = 0; i < TILEGX_NUM_TRIO; i++) {
gxio_trio_context_t *context = &trio_contexts[i];
- int ret;
if (context->fd < 0)
continue;
- ret = hv_dev_pread(context->fd, 0,
- (HV_VirtAddr)&pcie_ports[i][0],
- sizeof(struct pcie_port_property) * TILEGX_TRIO_PCIES,
- GXIO_TRIO_OP_GET_PORT_PROPERTY);
- if (ret < 0) {
- pr_err("PCI: PCIE_GET_PORT_PROPERTY failure, error %d,"
- " on TRIO %d\n", ret, i);
- continue;
- }
-
for (j = 0; j < TILEGX_TRIO_PCIES; j++) {
- if (pcie_ports[i][j].allow_rc) {
+ int is_rc = 0;
+
+ if (pcie_ports[i].is_gx72 &&
+ pcie_ports[i].ports[j].allow_rc) {
+ if (!pcie_ports[i].ports[j].allow_ep ||
+ strapped_for_rc(context, j))
+ is_rc = 1;
+ } else if (pcie_ports[i].ports[j].allow_rc &&
+ strapped_for_rc(context, j)) {
+ is_rc = 1;
+ }
+ if (is_rc) {
pcie_rc[i][j] = 1;
num_rc_controllers++;
}
- else if (pcie_ports[i][j].allow_ep) {
- num_ep_controllers++;
- }
}
}
- /*
- * Return if no PCIe ports are configured to operate in RC mode.
- */
+ /* Return if no PCIe ports are configured to operate in RC mode. */
if (num_rc_controllers == 0)
return 0;
- /*
- * Set the TRIO pointer and MAC index for each PCIe RC port.
- */
+ /* Set the TRIO pointer and MAC index for each PCIe RC port. */
for (i = 0; i < TILEGX_NUM_TRIO; i++) {
for (j = 0; j < TILEGX_TRIO_PCIES; j++) {
if (pcie_rc[i][j]) {
@@ -411,26 +502,32 @@ int __init tile_pci_init(void)
}
out:
- /*
- * Configure each PCIe RC port.
- */
+ /* Configure each PCIe RC port. */
for (i = 0; i < num_rc_controllers; i++) {
- /*
- * Configure the PCIe MAC to run in RC mode.
- */
+ /* Configure the PCIe MAC to run in RC mode. */
struct pci_controller *controller = &pci_controllers[i];
controller->index = i;
controller->ops = &tile_cfg_ops;
+ controller->io_space.start = PCIBIOS_MIN_IO +
+ (i * IO_SPACE_SIZE);
+ controller->io_space.end = controller->io_space.start +
+ IO_SPACE_SIZE - 1;
+ BUG_ON(controller->io_space.end > IO_SPACE_LIMIT);
+ controller->io_space.flags = IORESOURCE_IO;
+ snprintf(controller->io_space_name,
+ sizeof(controller->io_space_name),
+ "PCI I/O domain %d", i);
+ controller->io_space.name = controller->io_space_name;
+
/*
* The PCI memory resource is located above the PA space.
* For every host bridge, the BAR window or the MMIO aperture
* is in range [3GB, 4GB - 1] of a 4GB space beyond the
* PA space.
*/
-
controller->mem_offset = TILE_PCI_MEM_START +
(i * TILE_PCI_BAR_WINDOW_TOP);
controller->mem_space.start = controller->mem_offset +
@@ -458,7 +555,6 @@ static int tile_map_irq(const struct pci_dev *dev, u8 device, u8 pin)
return controller->irq_intx_table[pin - 1];
}
-
static void fixup_read_and_payload_sizes(struct pci_controller *controller)
{
gxio_trio_context_t *trio_context = controller->trio;
@@ -472,9 +568,7 @@ static void fixup_read_and_payload_sizes(struct pci_controller *controller)
mac = controller->mac;
- /*
- * Set our max read request size to be 4KB.
- */
+ /* Set our max read request size to be 4KB. */
reg_offset =
(TRIO_PCIE_RC_DEVICE_CONTROL <<
TRIO_CFG_REGION_ADDR__REG_SHIFT) |
@@ -483,10 +577,10 @@ static void fixup_read_and_payload_sizes(struct pci_controller *controller)
(mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
dev_control.word = __gxio_mmio_read32(trio_context->mmio_base_mac +
- reg_offset);
+ reg_offset);
dev_control.max_read_req_sz = 5;
__gxio_mmio_write32(trio_context->mmio_base_mac + reg_offset,
- dev_control.word);
+ dev_control.word);
/*
* Set the max payload size supported by this Gx PCIe MAC.
@@ -502,10 +596,10 @@ static void fixup_read_and_payload_sizes(struct pci_controller *controller)
(mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
rc_dev_cap.word = __gxio_mmio_read32(trio_context->mmio_base_mac +
- reg_offset);
+ reg_offset);
rc_dev_cap.mps_sup = 1;
__gxio_mmio_write32(trio_context->mmio_base_mac + reg_offset,
- rc_dev_cap.word);
+ rc_dev_cap.word);
/* Configure PCI Express MPS setting. */
list_for_each_entry(child, &root_bus->children, node)
@@ -528,7 +622,7 @@ static void fixup_read_and_payload_sizes(struct pci_controller *controller)
dev_control.max_payload_size,
dev_control.max_read_req_sz,
mac);
- if (err < 0) {
+ if (err < 0) {
pr_err("PCI: PCIE_CONFIGURE_MAC_MPS_MRS failure, "
"MAC %d on TRIO %d\n",
mac, controller->trio_index);
@@ -565,21 +659,14 @@ static int setup_pcie_rc_delay(char *str)
if (!isdigit(*str))
return -EINVAL;
delay = simple_strtoul(str, (char **)&str, 10);
- if (delay > MAX_RC_DELAY)
- return -EINVAL;
}
rc_delay[trio_index][mac] = delay ? : DEFAULT_RC_DELAY;
- pr_info("Delaying PCIe RC link training for %u sec"
- " on MAC %lu on TRIO %lu\n", rc_delay[trio_index][mac],
- mac, trio_index);
return 0;
}
early_param("pcie_rc_delay", setup_pcie_rc_delay);
-/*
- * PCI initialization entry point, called by subsys_initcall.
- */
+/* PCI initialization entry point, called by subsys_initcall. */
int __init pcibios_init(void)
{
resource_size_t offset;
@@ -589,35 +676,10 @@ int __init pcibios_init(void)
tile_pci_init();
- if (num_rc_controllers == 0 && num_ep_controllers == 0)
+ if (num_rc_controllers == 0)
return 0;
/*
- * We loop over all the TRIO shims and set up the MMIO mappings.
- */
- for (i = 0; i < TILEGX_NUM_TRIO; i++) {
- gxio_trio_context_t *context = &trio_contexts[i];
-
- if (context->fd < 0)
- continue;
-
- /*
- * Map in the MMIO space for the MAC.
- */
- offset = 0;
- context->mmio_base_mac =
- iorpc_ioremap(context->fd, offset,
- HV_TRIO_CONFIG_IOREMAP_SIZE);
- if (context->mmio_base_mac == NULL) {
- pr_err("PCI: MAC map failure on TRIO %d\n", i);
-
- hv_dev_close(context->fd);
- context->fd = -1;
- continue;
- }
- }
-
- /*
* Delay a bit in case devices aren't ready. Some devices are
* known to require at least 20ms here, but we use a more
* conservative value.
@@ -628,7 +690,6 @@ int __init pcibios_init(void)
for (next_busno = 0, i = 0; i < num_rc_controllers; i++) {
struct pci_controller *controller = &pci_controllers[i];
gxio_trio_context_t *trio_context = controller->trio;
- TRIO_PCIE_INTFC_PORT_CONFIG_t port_config;
TRIO_PCIE_INTFC_PORT_STATUS_t port_status;
TRIO_PCIE_INTFC_TX_FIFO_CTL_t tx_fifo_ctl;
struct pci_bus *bus;
@@ -645,75 +706,64 @@ int __init pcibios_init(void)
mac = controller->mac;
/*
- * Check the port strap state which will override the BIB
- * setting.
+ * Check for PCIe link-up status to decide if we need
+ * to force the link to come up.
*/
-
reg_offset =
- (TRIO_PCIE_INTFC_PORT_CONFIG <<
+ (TRIO_PCIE_INTFC_PORT_STATUS <<
TRIO_CFG_REGION_ADDR__REG_SHIFT) |
(TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE <<
- TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
+ TRIO_CFG_REGION_ADDR__INTFC_SHIFT) |
(mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
- port_config.word =
+ port_status.word =
__gxio_mmio_read(trio_context->mmio_base_mac +
reg_offset);
-
- if ((port_config.strap_state !=
- TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC) &&
- (port_config.strap_state !=
- TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_RC_G1)) {
- /*
- * If this is really intended to be an EP port,
- * record it so that the endpoint driver will know about it.
- */
- if (port_config.strap_state ==
- TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT ||
- port_config.strap_state ==
- TRIO_PCIE_INTFC_PORT_CONFIG__STRAP_STATE_VAL_AUTO_CONFIG_ENDPOINT_G1)
- pcie_ports[trio_index][mac].allow_ep = 1;
-
- continue;
+ if (!port_status.dl_up) {
+ if (rc_delay[trio_index][mac]) {
+ pr_info("Delaying PCIe RC TRIO init %d sec"
+ " on MAC %d on TRIO %d\n",
+ rc_delay[trio_index][mac], mac,
+ trio_index);
+ msleep(rc_delay[trio_index][mac] * 1000);
+ }
+ ret = gxio_trio_force_rc_link_up(trio_context, mac);
+ if (ret < 0)
+ pr_err("PCI: PCIE_FORCE_LINK_UP failure, "
+ "MAC %d on TRIO %d\n", mac, trio_index);
}
- /*
- * Delay the RC link training if needed.
- */
- if (rc_delay[trio_index][mac])
- msleep(rc_delay[trio_index][mac] * 1000);
-
- ret = gxio_trio_force_rc_link_up(trio_context, mac);
- if (ret < 0)
- pr_err("PCI: PCIE_FORCE_LINK_UP failure, "
- "MAC %d on TRIO %d\n", mac, trio_index);
-
pr_info("PCI: Found PCI controller #%d on TRIO %d MAC %d\n", i,
trio_index, controller->mac);
- /*
- * Wait a bit here because some EP devices take longer
- * to come up.
- */
- msleep(1000);
-
- /*
- * Check for PCIe link-up status.
- */
-
- reg_offset =
- (TRIO_PCIE_INTFC_PORT_STATUS <<
- TRIO_CFG_REGION_ADDR__REG_SHIFT) |
- (TRIO_CFG_REGION_ADDR__INTFC_VAL_MAC_INTERFACE <<
- TRIO_CFG_REGION_ADDR__INTFC_SHIFT ) |
- (mac << TRIO_CFG_REGION_ADDR__MAC_SEL_SHIFT);
+ /* Delay the bus probe if needed. */
+ if (rc_delay[trio_index][mac]) {
+ pr_info("Delaying PCIe RC bus enumerating %d sec"
+ " on MAC %d on TRIO %d\n",
+ rc_delay[trio_index][mac], mac,
+ trio_index);
+ msleep(rc_delay[trio_index][mac] * 1000);
+ } else {
+ /*
+ * Wait a bit here because some EP devices
+ * take longer to come up.
+ */
+ msleep(1000);
+ }
+ /* Check for PCIe link-up status again. */
port_status.word =
__gxio_mmio_read(trio_context->mmio_base_mac +
reg_offset);
if (!port_status.dl_up) {
- pr_err("PCI: link is down, MAC %d on TRIO %d\n",
- mac, trio_index);
+ if (pcie_ports[trio_index].ports[mac].removable) {
+ pr_info("PCI: link is down, MAC %d on TRIO %d\n",
+ mac, trio_index);
+ pr_info("This is expected if no PCIe card"
+ " is connected to this link\n");
+ } else
+ pr_err("PCI: link is down, MAC %d on TRIO %d\n",
+ mac, trio_index);
continue;
}
@@ -739,7 +789,6 @@ int __init pcibios_init(void)
* Change the device ID so that Linux bus crawl doesn't confuse
* the internal bridge with any Tilera endpoints.
*/
-
reg_offset =
(TRIO_PCIE_RC_DEVICE_ID_VEN_ID <<
TRIO_CFG_REGION_ADDR__REG_SHIFT) |
@@ -752,10 +801,7 @@ int __init pcibios_init(void)
TRIO_PCIE_RC_DEVICE_ID_VEN_ID__DEV_ID_SHIFT) |
TILERA_VENDOR_ID);
- /*
- * Set the internal P2P bridge class code.
- */
-
+ /* Set the internal P2P bridge class code. */
reg_offset =
(TRIO_PCIE_RC_REVISION_ID <<
TRIO_CFG_REGION_ADDR__REG_SHIFT) |
@@ -766,26 +812,22 @@ int __init pcibios_init(void)
class_code_revision =
__gxio_mmio_read32(trio_context->mmio_base_mac +
reg_offset);
- class_code_revision = (class_code_revision & 0xff ) |
- (PCI_CLASS_BRIDGE_PCI << 16);
+ class_code_revision = (class_code_revision & 0xff) |
+ (PCI_CLASS_BRIDGE_PCI << 16);
__gxio_mmio_write32(trio_context->mmio_base_mac +
reg_offset, class_code_revision);
#ifdef USE_SHARED_PCIE_CONFIG_REGION
- /*
- * Map in the MMIO space for the PIO region.
- */
+ /* Map in the MMIO space for the PIO region. */
offset = HV_TRIO_PIO_OFFSET(trio_context->pio_cfg_index) |
(((unsigned long long)mac) <<
TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT);
#else
- /*
- * Alloc a PIO region for PCI config access per MAC.
- */
+ /* Alloc a PIO region for PCI config access per MAC. */
ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0);
if (ret < 0) {
pr_err("PCI: PCI CFG PIO alloc failure for mac %d "
@@ -796,9 +838,7 @@ int __init pcibios_init(void)
trio_context->pio_cfg_index[mac] = ret;
- /*
- * For PIO CFG, the bus_address_hi parameter is 0.
- */
+ /* For PIO CFG, the bus_address_hi parameter is 0. */
ret = gxio_trio_init_pio_region_aux(trio_context,
trio_context->pio_cfg_index[mac],
mac, 0, HV_TRIO_PIO_FLAG_CONFIG_SPACE);
@@ -815,9 +855,15 @@ int __init pcibios_init(void)
#endif
+ /*
+ * To save VMALLOC space, we take advantage of the fact that
+ * bit 29 in the PIO CFG address format is reserved 0. With
+ * TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT being 30,
+ * this cuts VMALLOC space usage from 1GB to 512MB per mac.
+ */
trio_context->mmio_base_pio_cfg[mac] =
- iorpc_ioremap(trio_context->fd, offset,
- (1 << TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT));
+ iorpc_ioremap(trio_context->fd, offset, (1UL <<
+ (TRIO_TILE_PIO_REGION_SETUP_CFG_ADDR__MAC_SHIFT - 1)));
if (trio_context->mmio_base_pio_cfg[mac] == NULL) {
pr_err("PCI: PIO map failure for mac %d on TRIO %d\n",
mac, trio_index);
@@ -825,9 +871,7 @@ int __init pcibios_init(void)
continue;
}
- /*
- * Initialize the PCIe interrupts.
- */
+ /* Initialize the PCIe interrupts. */
if (tile_init_irqs(controller)) {
pr_err("PCI: IRQs init failure for mac %d on TRIO %d\n",
mac, trio_index);
@@ -838,17 +882,16 @@ int __init pcibios_init(void)
/*
* The PCI memory resource is located above the PA space.
* The memory range for the PCI root bus should not overlap
- * with the physical RAM
+ * with the physical RAM.
*/
pci_add_resource_offset(&resources, &controller->mem_space,
controller->mem_offset);
-
+ pci_add_resource(&resources, &controller->io_space);
controller->first_busno = next_busno;
bus = pci_scan_root_bus(NULL, next_busno, controller->ops,
controller, &resources);
controller->root_bus = bus;
next_busno = bus->busn_res.end + 1;
-
}
/* Do machine dependent PCI interrupt routing */
@@ -860,7 +903,6 @@ int __init pcibios_init(void)
* It allocates all of the resources (I/O memory, etc)
* associated with the devices read in above.
*/
-
pci_assign_unassigned_resources();
/* Record the I/O resources in the PCI controller structure. */
@@ -868,9 +910,6 @@ int __init pcibios_init(void)
struct pci_controller *controller = &pci_controllers[i];
gxio_trio_context_t *trio_context = controller->trio;
struct pci_bus *root_bus = pci_controllers[i].root_bus;
- struct pci_bus *next_bus;
- uint32_t bus_address_hi;
- struct pci_dev *dev;
int ret;
int j;
@@ -884,43 +923,12 @@ int __init pcibios_init(void)
/* Configure the max_payload_size values for this domain. */
fixup_read_and_payload_sizes(controller);
- list_for_each_entry(dev, &root_bus->devices, bus_list) {
- /* Find the PCI host controller, ie. the 1st bridge. */
- if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
- (PCI_SLOT(dev->devfn) == 0)) {
- next_bus = dev->subordinate;
- pci_controllers[i].mem_resources[0] =
- *next_bus->resource[0];
- pci_controllers[i].mem_resources[1] =
- *next_bus->resource[1];
- pci_controllers[i].mem_resources[2] =
- *next_bus->resource[2];
-
- break;
- }
- }
-
- if (pci_controllers[i].mem_resources[1].flags & IORESOURCE_MEM)
- bus_address_hi =
- pci_controllers[i].mem_resources[1].start >> 32;
- else if (pci_controllers[i].mem_resources[2].flags & IORESOURCE_PREFETCH)
- bus_address_hi =
- pci_controllers[i].mem_resources[2].start >> 32;
- else {
- /* This is unlikely. */
- pr_err("PCI: no memory resources on TRIO %d mac %d\n",
- controller->trio_index, controller->mac);
- continue;
- }
-
- /*
- * Alloc a PIO region for PCI memory access for each RC port.
- */
+ /* Alloc a PIO region for PCI memory access for each RC port. */
ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0);
if (ret < 0) {
pr_err("PCI: MEM PIO alloc failure on TRIO %d mac %d, "
- "give up\n", controller->trio_index,
- controller->mac);
+ "give up\n", controller->trio_index,
+ controller->mac);
continue;
}
@@ -938,12 +946,45 @@ int __init pcibios_init(void)
0);
if (ret < 0) {
pr_err("PCI: MEM PIO init failure on TRIO %d mac %d, "
- "give up\n", controller->trio_index,
- controller->mac);
+ "give up\n", controller->trio_index,
+ controller->mac);
continue;
}
+#ifdef CONFIG_TILE_PCI_IO
+ /*
+ * Alloc a PIO region for PCI I/O space access for each RC port.
+ */
+ ret = gxio_trio_alloc_pio_regions(trio_context, 1, 0, 0);
+ if (ret < 0) {
+ pr_err("PCI: I/O PIO alloc failure on TRIO %d mac %d, "
+ "give up\n", controller->trio_index,
+ controller->mac);
+
+ continue;
+ }
+
+ controller->pio_io_index = ret;
+
+ /*
+ * For PIO IO, the bus_address_hi parameter is hard-coded 0
+ * because PCI I/O address space is 32-bit.
+ */
+ ret = gxio_trio_init_pio_region_aux(trio_context,
+ controller->pio_io_index,
+ controller->mac,
+ 0,
+ HV_TRIO_PIO_FLAG_IO_SPACE);
+ if (ret < 0) {
+ pr_err("PCI: I/O PIO init failure on TRIO %d mac %d, "
+ "give up\n", controller->trio_index,
+ controller->mac);
+
+ continue;
+ }
+#endif
+
/*
* Configure a Mem-Map region for each memory controller so
* that Linux can map all of its PA space to the PCI bus.
@@ -958,9 +999,9 @@ int __init pcibios_init(void)
0);
if (ret < 0) {
pr_err("PCI: Mem-Map alloc failure on TRIO %d "
- "mac %d for MC %d, give up\n",
- controller->trio_index,
- controller->mac, j);
+ "mac %d for MC %d, give up\n",
+ controller->trio_index,
+ controller->mac, j);
goto alloc_mem_map_failed;
}
@@ -991,9 +1032,9 @@ int __init pcibios_init(void)
GXIO_TRIO_ORDER_MODE_UNORDERED);
if (ret < 0) {
pr_err("PCI: Mem-Map init failure on TRIO %d "
- "mac %d for MC %d, give up\n",
- controller->trio_index,
- controller->mac, j);
+ "mac %d for MC %d, give up\n",
+ controller->trio_index,
+ controller->mac, j);
goto alloc_mem_map_failed;
}
@@ -1002,23 +1043,19 @@ int __init pcibios_init(void)
alloc_mem_map_failed:
break;
}
-
}
return 0;
}
subsys_initcall(pcibios_init);
-/* Note: to be deleted after Linux 3.6 merge. */
+/* No bus fixups needed. */
void pcibios_fixup_bus(struct pci_bus *bus)
{
}
-/*
- * This can be called from the generic PCI layer, but doesn't need to
- * do anything.
- */
-char *pcibios_setup(char *str)
+/* Process any "pci=" kernel boot arguments. */
+char *__init pcibios_setup(char *str)
{
if (!strcmp(str, "off")) {
pci_probe = 0;
@@ -1029,8 +1066,7 @@ char *pcibios_setup(char *str)
/*
* Enable memory address decoding, as appropriate, for the
- * device described by the 'dev' struct. The I/O decoding
- * is disabled, though the TILE-Gx supports I/O addressing.
+ * device described by the 'dev' struct.
*
* This is called from the generic PCI layer, and can be called
* for bridges or endpoints.
@@ -1040,13 +1076,24 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
return pci_enable_resources(dev, mask);
}
-/* Called for each device after PCI setup is done. */
+/*
+ * Called for each device after PCI setup is done.
+ * We initialize the PCI device capabilities conservatively, assuming that
+ * all devices can only address the 32-bit DMA space. The exception here is
+ * that the device dma_offset is set to the value that matches the 64-bit
+ * capable devices. This is OK because dma_offset is not used by legacy
+ * dma_ops, nor by the hybrid dma_ops's streaming DMAs, which are 64-bit ops.
+ * This implementation matches the kernel design of setting PCI devices'
+ * coherent_dma_mask to 0xffffffffull by default, allowing the device drivers
+ * to skip calling pci_set_consistent_dma_mask(DMA_BIT_MASK(32)).
+ */
static void pcibios_fixup_final(struct pci_dev *pdev)
{
- set_dma_ops(&pdev->dev, gx_pci_dma_map_ops);
+ set_dma_ops(&pdev->dev, gx_legacy_pci_dma_map_ops);
set_dma_offset(&pdev->dev, TILE_PCI_MEM_MAP_BASE_OFFSET);
pdev->dev.archdata.max_direct_dma_addr =
TILE_PCI_MAX_DIRECT_DMA_ADDRESS;
+ pdev->dev.coherent_dma_mask = TILE_PCI_MAX_DIRECT_DMA_ADDRESS;
}
DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_final);
@@ -1060,19 +1107,15 @@ void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
resource_size_t start;
resource_size_t end;
int trio_fd;
- int i, j;
+ int i;
start = phys_addr;
end = phys_addr + size - 1;
/*
- * In the following, each PCI controller's mem_resources[1]
- * represents its (non-prefetchable) PCI memory resource and
- * mem_resources[2] refers to its prefetchable PCI memory resource.
- * By searching phys_addr in each controller's mem_resources[], we can
+ * By searching phys_addr in each controller's mem_space, we can
* determine the controller that should accept the PCI memory access.
*/
-
for (i = 0; i < num_rc_controllers; i++) {
/*
* Skip controllers that are not properly initialized or
@@ -1081,25 +1124,18 @@ void __iomem *ioremap(resource_size_t phys_addr, unsigned long size)
if (pci_controllers[i].root_bus == NULL)
continue;
- for (j = 1; j < 3; j++) {
- bar_start =
- pci_controllers[i].mem_resources[j].start;
- bar_end =
- pci_controllers[i].mem_resources[j].end;
-
- if ((start >= bar_start) && (end <= bar_end)) {
+ bar_start = pci_controllers[i].mem_space.start;
+ bar_end = pci_controllers[i].mem_space.end;
- controller = &pci_controllers[i];
-
- goto got_it;
- }
+ if ((start >= bar_start) && (end <= bar_end)) {
+ controller = &pci_controllers[i];
+ break;
}
}
if (controller == NULL)
return NULL;
-got_it:
trio_fd = controller->trio->fd;
/* Convert the resource start to the bus address offset. */
@@ -1107,14 +1143,71 @@ got_it:
offset = HV_TRIO_PIO_OFFSET(controller->pio_mem_index) + start;
- /*
- * We need to keep the PCI bus address's in-page offset in the VA.
- */
+ /* We need to keep the PCI bus address's in-page offset in the VA. */
return iorpc_ioremap(trio_fd, offset, size) +
- (phys_addr & (PAGE_SIZE - 1));
+ (start & (PAGE_SIZE - 1));
}
EXPORT_SYMBOL(ioremap);
+#ifdef CONFIG_TILE_PCI_IO
+/* Map a PCI I/O address into VA space. */
+void __iomem *ioport_map(unsigned long port, unsigned int size)
+{
+ struct pci_controller *controller = NULL;
+ resource_size_t bar_start;
+ resource_size_t bar_end;
+ resource_size_t offset;
+ resource_size_t start;
+ resource_size_t end;
+ int trio_fd;
+ int i;
+
+ start = port;
+ end = port + size - 1;
+
+ /*
+ * By searching the port in each controller's io_space, we can
+ * determine the controller that should accept the PCI I/O access.
+ */
+ for (i = 0; i < num_rc_controllers; i++) {
+ /*
+ * Skip controllers that are not properly initialized or
+ * have down links.
+ */
+ if (pci_controllers[i].root_bus == NULL)
+ continue;
+
+ bar_start = pci_controllers[i].io_space.start;
+ bar_end = pci_controllers[i].io_space.end;
+
+ if ((start >= bar_start) && (end <= bar_end)) {
+ controller = &pci_controllers[i];
+ break;
+ }
+ }
+
+ if (controller == NULL)
+ return NULL;
+
+ trio_fd = controller->trio->fd;
+
+ /* Convert the resource start to the bus address offset. */
+ port -= controller->io_space.start;
+
+ offset = HV_TRIO_PIO_OFFSET(controller->pio_io_index) + port;
+
+ /* We need to keep the PCI bus address's in-page offset in the VA. */
+ return iorpc_ioremap(trio_fd, offset, size) + (port & (PAGE_SIZE - 1));
+}
+EXPORT_SYMBOL(ioport_map);
+
+void ioport_unmap(void __iomem *addr)
+{
+ iounmap(addr);
+}
+EXPORT_SYMBOL(ioport_unmap);
+#endif
+
void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
{
iounmap(addr);
@@ -1136,7 +1229,6 @@ EXPORT_SYMBOL(pci_iounmap);
* offset is in bytes, from the start of config space for the
* specified bus & device.
*/
-
static int tile_cfg_read(struct pci_bus *bus, unsigned int devfn, int offset,
int size, u32 *val)
{
@@ -1186,7 +1278,6 @@ static int tile_cfg_read(struct pci_bus *bus, unsigned int devfn, int offset,
* Accesses to the directly attached device have to be
* sent as type-0 configs.
*/
-
if (busnum == (controller->first_busno + 1)) {
/*
* There is only one device off of our built-in P2P bridge.
@@ -1208,9 +1299,8 @@ static int tile_cfg_read(struct pci_bus *bus, unsigned int devfn, int offset,
* Note that we don't set the mac field in cfg_addr because the
* mapping is per port.
*/
-
mmio_addr = trio_context->mmio_base_pio_cfg[controller->mac] +
- cfg_addr.word;
+ cfg_addr.word;
valid_device:
@@ -1314,7 +1404,6 @@ static int tile_cfg_write(struct pci_bus *bus, unsigned int devfn, int offset,
* Accesses to the directly attached device have to be
* sent as type-0 configs.
*/
-
if (busnum == (controller->first_busno + 1)) {
/*
* There is only one device off of our built-in P2P bridge.
@@ -1336,7 +1425,6 @@ static int tile_cfg_write(struct pci_bus *bus, unsigned int devfn, int offset,
* Note that we don't set the mac field in cfg_addr because the
* mapping is per port.
*/
-
mmio_addr = trio_context->mmio_base_pio_cfg[controller->mac] +
cfg_addr.word;
@@ -1374,11 +1462,8 @@ static struct pci_ops tile_cfg_ops = {
};
-/*
- * MSI support starts here.
- */
-static unsigned int
-tilegx_msi_startup(struct irq_data *d)
+/* MSI support starts here. */
+static unsigned int tilegx_msi_startup(struct irq_data *d)
{
if (d->msi_desc)
unmask_msi_irq(d);
@@ -1386,21 +1471,18 @@ tilegx_msi_startup(struct irq_data *d)
return 0;
}
-static void
-tilegx_msi_ack(struct irq_data *d)
+static void tilegx_msi_ack(struct irq_data *d)
{
__insn_mtspr(SPR_IPI_EVENT_RESET_K, 1UL << d->irq);
}
-static void
-tilegx_msi_mask(struct irq_data *d)
+static void tilegx_msi_mask(struct irq_data *d)
{
mask_msi_irq(d);
__insn_mtspr(SPR_IPI_MASK_SET_K, 1UL << d->irq);
}
-static void
-tilegx_msi_unmask(struct irq_data *d)
+static void tilegx_msi_unmask(struct irq_data *d)
{
__insn_mtspr(SPR_IPI_MASK_RESET_K, 1UL << d->irq);
unmask_msi_irq(d);
@@ -1457,32 +1539,55 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
trio_context = controller->trio;
/*
- * Allocate the Mem-Map that will accept the MSI write and
- * trigger the TILE-side interrupts.
+ * Allocate a scatter-queue that will accept the MSI write and
+ * trigger the TILE-side interrupts. We use the scatter-queue regions
+ * before the mem map regions, because the latter are needed by more
+ * applications.
*/
- mem_map = gxio_trio_alloc_memory_maps(trio_context, 1, 0, 0);
- if (mem_map < 0) {
- dev_printk(KERN_INFO, &pdev->dev,
- "%s Mem-Map alloc failure. "
- "Failed to initialize MSI interrupts. "
- "Falling back to legacy interrupts.\n",
- desc->msi_attrib.is_msix ? "MSI-X" : "MSI");
+ mem_map = gxio_trio_alloc_scatter_queues(trio_context, 1, 0, 0);
+ if (mem_map >= 0) {
+ TRIO_MAP_SQ_DOORBELL_FMT_t doorbell_template = {{
+ .pop = 0,
+ .doorbell = 1,
+ }};
+
+ mem_map += TRIO_NUM_MAP_MEM_REGIONS;
+ mem_map_base = MEM_MAP_INTR_REGIONS_BASE +
+ mem_map * MEM_MAP_INTR_REGION_SIZE;
+ mem_map_limit = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 1;
+
+ msi_addr = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 8;
+ msg.data = (unsigned int)doorbell_template.word;
+ } else {
+ /* SQ regions are out, allocate from map mem regions. */
+ mem_map = gxio_trio_alloc_memory_maps(trio_context, 1, 0, 0);
+ if (mem_map < 0) {
+ dev_printk(KERN_INFO, &pdev->dev,
+ "%s Mem-Map alloc failure. "
+ "Failed to initialize MSI interrupts. "
+ "Falling back to legacy interrupts.\n",
+ desc->msi_attrib.is_msix ? "MSI-X" : "MSI");
+ ret = -ENOMEM;
+ goto msi_mem_map_alloc_failure;
+ }
- ret = -ENOMEM;
- goto msi_mem_map_alloc_failure;
+ mem_map_base = MEM_MAP_INTR_REGIONS_BASE +
+ mem_map * MEM_MAP_INTR_REGION_SIZE;
+ mem_map_limit = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 1;
+
+ msi_addr = mem_map_base + TRIO_MAP_MEM_REG_INT3 -
+ TRIO_MAP_MEM_REG_INT0;
+
+ msg.data = mem_map;
}
/* We try to distribute different IRQs to different tiles. */
cpu = tile_irq_cpu(irq);
/*
- * Now call up to the HV to configure the Mem-Map interrupt and
+ * Now call up to the HV to configure the MSI interrupt and
* set up the IPI binding.
*/
- mem_map_base = MEM_MAP_INTR_REGIONS_BASE +
- mem_map * MEM_MAP_INTR_REGION_SIZE;
- mem_map_limit = mem_map_base + MEM_MAP_INTR_REGION_SIZE - 1;
-
ret = gxio_trio_config_msi_intr(trio_context, cpu_x(cpu), cpu_y(cpu),
KERNEL_PL, irq, controller->mac,
mem_map, mem_map_base, mem_map_limit,
@@ -1495,13 +1600,9 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
irq_set_msi_desc(irq, desc);
- msi_addr = mem_map_base + TRIO_MAP_MEM_REG_INT3 - TRIO_MAP_MEM_REG_INT0;
-
msg.address_hi = msi_addr >> 32;
msg.address_lo = msi_addr & 0xffffffff;
- msg.data = mem_map;
-
write_msi_msg(irq, &msg);
irq_set_chip_and_handler(irq, &tilegx_msi_chip, handle_level_irq);
irq_set_handler_data(irq, controller);
diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c
index dafc447b5125..681100c59fda 100644
--- a/arch/tile/kernel/proc.c
+++ b/arch/tile/kernel/proc.c
@@ -113,7 +113,6 @@ arch_initcall(proc_tile_init);
* Support /proc/sys/tile directory
*/
-#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */
static ctl_table unaligned_subtable[] = {
{
.procname = "enabled",
@@ -160,4 +159,3 @@ static int __init proc_sys_tile_init(void)
}
arch_initcall(proc_sys_tile_init);
-#endif
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 8ac304484f98..16ed58948757 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -33,6 +33,7 @@
#include <asm/syscalls.h>
#include <asm/traps.h>
#include <asm/setup.h>
+#include <asm/uaccess.h>
#ifdef CONFIG_HARDWALL
#include <asm/hardwall.h>
#endif
@@ -74,19 +75,6 @@ void arch_release_thread_info(struct thread_info *info)
{
struct single_step_state *step_state = info->step_state;
-#ifdef CONFIG_HARDWALL
- /*
- * We free a thread_info from the context of the task that has
- * been scheduled next, so the original task is already dead.
- * Calling deactivate here just frees up the data structures.
- * If the task we're freeing held the last reference to a
- * hardwall fd, it would have been released prior to this point
- * anyway via exit_files(), and the hardwall_task.info pointers
- * would be NULL by now.
- */
- hardwall_deactivate_all(info->task);
-#endif
-
if (step_state) {
/*
@@ -160,6 +148,14 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
*/
task_thread_info(p)->step_state = NULL;
+#ifdef __tilegx__
+ /*
+ * Do not clone unalign jit fixup from the parent; each thread
+ * must allocate its own on demand.
+ */
+ task_thread_info(p)->unalign_jit_base = NULL;
+#endif
+
/*
* Copy the registers onto the kernel stack so the
* return-from-interrupt code will reload it into registers.
@@ -191,16 +187,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb));
#endif
-#if CHIP_HAS_SN_PROC()
- /* Likewise, the new thread is not running static processor code. */
- p->thread.sn_proc_running = 0;
- memset(&p->thread.sn_async_tlb, 0, sizeof(struct async_tlb));
-#endif
-
-#if CHIP_HAS_PROC_STATUS_SPR()
/* New thread has its miscellaneous processor state bits clear. */
p->thread.proc_status = 0;
-#endif
#ifdef CONFIG_HARDWALL
/* New thread does not own any networks. */
@@ -218,19 +206,32 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
return 0;
}
+int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
+{
+ task_thread_info(tsk)->align_ctl = val;
+ return 0;
+}
+
+int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
+{
+ return put_user(task_thread_info(tsk)->align_ctl,
+ (unsigned int __user *)adr);
+}
+
+static struct task_struct corrupt_current = { .comm = "<corrupt>" };
+
/*
* Return "current" if it looks plausible, or else a pointer to a dummy.
* This can be helpful if we are just trying to emit a clean panic.
*/
struct task_struct *validate_current(void)
{
- static struct task_struct corrupt = { .comm = "<corrupt>" };
struct task_struct *tsk = current;
if (unlikely((unsigned long)tsk < PAGE_OFFSET ||
(high_memory && (void *)tsk > high_memory) ||
((unsigned long)tsk & (__alignof__(*tsk) - 1)) != 0)) {
pr_err("Corrupt 'current' %p (sp %#lx)\n", tsk, stack_pointer);
- tsk = &corrupt;
+ tsk = &corrupt_current;
}
return tsk;
}
@@ -369,15 +370,11 @@ static void save_arch_state(struct thread_struct *t)
t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2);
t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3);
t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS);
-#if CHIP_HAS_PROC_STATUS_SPR()
t->proc_status = __insn_mfspr(SPR_PROC_STATUS);
-#endif
#if !CHIP_HAS_FIXED_INTVEC_BASE()
t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0);
#endif
-#if CHIP_HAS_TILE_RTF_HWM()
t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM);
-#endif
#if CHIP_HAS_DSTREAM_PF()
t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF);
#endif
@@ -398,15 +395,11 @@ static void restore_arch_state(const struct thread_struct *t)
__insn_mtspr(SPR_SYSTEM_SAVE_0_2, t->system_save[2]);
__insn_mtspr(SPR_SYSTEM_SAVE_0_3, t->system_save[3]);
__insn_mtspr(SPR_INTCTRL_0_STATUS, t->intctrl_0);
-#if CHIP_HAS_PROC_STATUS_SPR()
__insn_mtspr(SPR_PROC_STATUS, t->proc_status);
-#endif
#if !CHIP_HAS_FIXED_INTVEC_BASE()
__insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base);
#endif
-#if CHIP_HAS_TILE_RTF_HWM()
__insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm);
-#endif
#if CHIP_HAS_DSTREAM_PF()
__insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf);
#endif
@@ -415,26 +408,11 @@ static void restore_arch_state(const struct thread_struct *t)
void _prepare_arch_switch(struct task_struct *next)
{
-#if CHIP_HAS_SN_PROC()
- int snctl;
-#endif
#if CHIP_HAS_TILE_DMA()
struct tile_dma_state *dma = &current->thread.tile_dma_state;
if (dma->enabled)
save_tile_dma_state(dma);
#endif
-#if CHIP_HAS_SN_PROC()
- /*
- * Suspend the static network processor if it was running.
- * We do not suspend the fabric itself, just like we don't
- * try to suspend the UDN.
- */
- snctl = __insn_mfspr(SPR_SNCTL);
- current->thread.sn_proc_running =
- (snctl & SPR_SNCTL__FRZPROC_MASK) == 0;
- if (current->thread.sn_proc_running)
- __insn_mtspr(SPR_SNCTL, snctl | SPR_SNCTL__FRZPROC_MASK);
-#endif
}
@@ -462,17 +440,6 @@ struct task_struct *__sched _switch_to(struct task_struct *prev,
/* Restore other arch state. */
restore_arch_state(&next->thread);
-#if CHIP_HAS_SN_PROC()
- /*
- * Restart static network processor in the new process
- * if it was running before.
- */
- if (next->thread.sn_proc_running) {
- int snctl = __insn_mfspr(SPR_SNCTL);
- __insn_mtspr(SPR_SNCTL, snctl & ~SPR_SNCTL__FRZPROC_MASK);
- }
-#endif
-
#ifdef CONFIG_HARDWALL
/* Enable or disable access to the network registers appropriately. */
hardwall_switch_tasks(prev, next);
@@ -514,7 +481,7 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
schedule();
return 1;
}
-#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
+#if CHIP_HAS_TILE_DMA()
if (thread_info_flags & _TIF_ASYNC_TLB) {
do_async_page_fault(regs);
return 1;
@@ -564,7 +531,15 @@ void flush_thread(void)
*/
void exit_thread(void)
{
- /* Nothing */
+#ifdef CONFIG_HARDWALL
+ /*
+ * Remove the task from the list of tasks that are associated
+ * with any live hardwalls. (If the task that is exiting held
+ * the last reference to a hardwall fd, it would already have
+ * been released and deactivated at this point.)
+ */
+ hardwall_deactivate_all(current);
+#endif
}
void show_regs(struct pt_regs *regs)
@@ -573,23 +548,24 @@ void show_regs(struct pt_regs *regs)
int i;
pr_err("\n");
- show_regs_print_info(KERN_ERR);
+ if (tsk != &corrupt_current)
+ show_regs_print_info(KERN_ERR);
#ifdef __tilegx__
- for (i = 0; i < 51; i += 3)
+ for (i = 0; i < 17; i++)
pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
- i, regs->regs[i], i+1, regs->regs[i+1],
- i+2, regs->regs[i+2]);
- pr_err(" r51: "REGFMT" r52: "REGFMT" tp : "REGFMT"\n",
- regs->regs[51], regs->regs[52], regs->tp);
+ i, regs->regs[i], i+18, regs->regs[i+18],
+ i+36, regs->regs[i+36]);
+ pr_err(" r17: "REGFMT" r35: "REGFMT" tp : "REGFMT"\n",
+ regs->regs[17], regs->regs[35], regs->tp);
pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr);
#else
- for (i = 0; i < 52; i += 4)
+ for (i = 0; i < 13; i++)
pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT
" r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
- i, regs->regs[i], i+1, regs->regs[i+1],
- i+2, regs->regs[i+2], i+3, regs->regs[i+3]);
- pr_err(" r52: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n",
- regs->regs[52], regs->tp, regs->sp, regs->lr);
+ i, regs->regs[i], i+14, regs->regs[i+14],
+ i+27, regs->regs[i+27], i+40, regs->regs[i+40]);
+ pr_err(" r13: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n",
+ regs->regs[13], regs->tp, regs->sp, regs->lr);
#endif
pr_err(" pc : "REGFMT" ex1: %ld faultnum: %ld\n",
regs->pc, regs->ex1, regs->faultnum);
diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c
index 0f83ed4602b2..de98c6ddf136 100644
--- a/arch/tile/kernel/ptrace.c
+++ b/arch/tile/kernel/ptrace.c
@@ -265,6 +265,21 @@ int do_syscall_trace_enter(struct pt_regs *regs)
void do_syscall_trace_exit(struct pt_regs *regs)
{
+ long errno;
+
+ /*
+ * The standard tile calling convention returns the value (or negative
+ * errno) in r0, and zero (or positive errno) in r1.
+ * It saves a couple of cycles on the hot path to do this work in
+ * registers only as we return, rather than updating the in-memory
+ * struct ptregs.
+ */
+ errno = (long) regs->regs[0];
+ if (errno < 0 && errno > -4096)
+ regs->regs[1] = -errno;
+ else
+ regs->regs[1] = 0;
+
if (test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, 0);
@@ -272,7 +287,7 @@ void do_syscall_trace_exit(struct pt_regs *regs)
trace_sys_exit(regs, regs->regs[0]);
}
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
+void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs)
{
struct siginfo info;
@@ -288,5 +303,5 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
/* Handle synthetic interrupt delivered only by the simulator. */
void __kprobes do_breakpoint(struct pt_regs* regs, int fault_num)
{
- send_sigtrap(current, regs, fault_num);
+ send_sigtrap(current, regs);
}
diff --git a/arch/tile/kernel/reboot.c b/arch/tile/kernel/reboot.c
index d1b5c913ae72..6c5d2c070a12 100644
--- a/arch/tile/kernel/reboot.c
+++ b/arch/tile/kernel/reboot.c
@@ -27,7 +27,6 @@
void machine_halt(void)
{
- warn_early_printk();
arch_local_irq_disable_all();
smp_send_stop();
hv_halt();
@@ -35,7 +34,6 @@ void machine_halt(void)
void machine_power_off(void)
{
- warn_early_printk();
arch_local_irq_disable_all();
smp_send_stop();
hv_power_off();
diff --git a/arch/tile/kernel/regs_32.S b/arch/tile/kernel/regs_32.S
index c12280c2d904..542cae17a93a 100644
--- a/arch/tile/kernel/regs_32.S
+++ b/arch/tile/kernel/regs_32.S
@@ -20,7 +20,7 @@
#include <asm/switch_to.h>
/*
- * See <asm/system.h>; called with prev and next task_struct pointers.
+ * See <asm/switch_to.h>; called with prev and next task_struct pointers.
* "prev" is returned in r0 for _switch_to and also for ret_from_fork.
*
* We want to save pc/sp in "prev", and get the new pc/sp from "next".
@@ -39,7 +39,7 @@
*/
#if CALLEE_SAVED_REGS_COUNT != 24
-# error Mismatch between <asm/system.h> and kernel/entry.S
+# error Mismatch between <asm/switch_to.h> and kernel/entry.S
#endif
#define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 4)
diff --git a/arch/tile/kernel/regs_64.S b/arch/tile/kernel/regs_64.S
index 0829fd01fa30..bbffcc6f340f 100644
--- a/arch/tile/kernel/regs_64.S
+++ b/arch/tile/kernel/regs_64.S
@@ -20,7 +20,7 @@
#include <asm/switch_to.h>
/*
- * See <asm/system.h>; called with prev and next task_struct pointers.
+ * See <asm/switch_to.h>; called with prev and next task_struct pointers.
* "prev" is returned in r0 for _switch_to and also for ret_from_fork.
*
* We want to save pc/sp in "prev", and get the new pc/sp from "next".
@@ -39,7 +39,7 @@
*/
#if CALLEE_SAVED_REGS_COUNT != 24
-# error Mismatch between <asm/system.h> and kernel/entry.S
+# error Mismatch between <asm/switch_to.h> and kernel/entry.S
#endif
#define FRAME_SIZE ((2 + CALLEE_SAVED_REGS_COUNT) * 8)
diff --git a/arch/tile/kernel/relocate_kernel_32.S b/arch/tile/kernel/relocate_kernel_32.S
index 010b418515f8..e44fbcf8cbd5 100644
--- a/arch/tile/kernel/relocate_kernel_32.S
+++ b/arch/tile/kernel/relocate_kernel_32.S
@@ -20,15 +20,6 @@
#include <asm/page.h>
#include <hv/hypervisor.h>
-#define ___hvb MEM_SV_INTRPT + HV_GLUE_START_CPA
-
-#define ___hv_dispatch(f) (___hvb + (HV_DISPATCH_ENTRY_SIZE * f))
-
-#define ___hv_console_putc ___hv_dispatch(HV_DISPATCH_CONSOLE_PUTC)
-#define ___hv_halt ___hv_dispatch(HV_DISPATCH_HALT)
-#define ___hv_reexec ___hv_dispatch(HV_DISPATCH_REEXEC)
-#define ___hv_flush_remote ___hv_dispatch(HV_DISPATCH_FLUSH_REMOTE)
-
#undef RELOCATE_NEW_KERNEL_VERBOSE
STD_ENTRY(relocate_new_kernel)
@@ -43,8 +34,8 @@ STD_ENTRY(relocate_new_kernel)
addi sp, sp, -8
/* we now have a stack (whether we need one or not) */
- moveli r40, lo16(___hv_console_putc)
- auli r40, r40, ha16(___hv_console_putc)
+ moveli r40, lo16(hv_console_putc)
+ auli r40, r40, ha16(hv_console_putc)
#ifdef RELOCATE_NEW_KERNEL_VERBOSE
moveli r0, 'r'
@@ -86,7 +77,6 @@ STD_ENTRY(relocate_new_kernel)
move r30, sp
addi sp, sp, -8
-#if CHIP_HAS_CBOX_HOME_MAP()
/*
* On TILEPro, we need to flush all tiles' caches, since we may
* have been doing hash-for-home caching there. Note that we
@@ -114,15 +104,14 @@ STD_ENTRY(relocate_new_kernel)
}
{
move r8, zero /* asids */
- moveli r20, lo16(___hv_flush_remote)
+ moveli r20, lo16(hv_flush_remote)
}
{
move r9, zero /* asidcount */
- auli r20, r20, ha16(___hv_flush_remote)
+ auli r20, r20, ha16(hv_flush_remote)
}
jalr r20
-#endif
/* r33 is destination pointer, default to zero */
@@ -175,8 +164,8 @@ STD_ENTRY(relocate_new_kernel)
move r0, r32
moveli r1, 0 /* arg to hv_reexec is 64 bits */
- moveli r41, lo16(___hv_reexec)
- auli r41, r41, ha16(___hv_reexec)
+ moveli r41, lo16(hv_reexec)
+ auli r41, r41, ha16(hv_reexec)
jalr r41
@@ -267,8 +256,8 @@ STD_ENTRY(relocate_new_kernel)
moveli r0, '\n'
jalr r40
.Lhalt:
- moveli r41, lo16(___hv_halt)
- auli r41, r41, ha16(___hv_halt)
+ moveli r41, lo16(hv_halt)
+ auli r41, r41, ha16(hv_halt)
jalr r41
STD_ENDPROC(relocate_new_kernel)
diff --git a/arch/tile/kernel/relocate_kernel_64.S b/arch/tile/kernel/relocate_kernel_64.S
index 1c09a4f5a4ea..d9d8cf6176e8 100644
--- a/arch/tile/kernel/relocate_kernel_64.S
+++ b/arch/tile/kernel/relocate_kernel_64.S
@@ -34,11 +34,11 @@ STD_ENTRY(relocate_new_kernel)
addi sp, sp, -8
/* we now have a stack (whether we need one or not) */
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
moveli r40, hw2_last(hv_console_putc)
shl16insli r40, r40, hw1(hv_console_putc)
shl16insli r40, r40, hw0(hv_console_putc)
-#ifdef RELOCATE_NEW_KERNEL_VERBOSE
moveli r0, 'r'
jalr r40
@@ -78,7 +78,6 @@ STD_ENTRY(relocate_new_kernel)
move r30, sp
addi sp, sp, -16
-#if CHIP_HAS_CBOX_HOME_MAP()
/*
* On TILE-GX, we need to flush all tiles' caches, since we may
* have been doing hash-for-home caching there. Note that we
@@ -116,7 +115,6 @@ STD_ENTRY(relocate_new_kernel)
shl16insli r20, r20, hw0(hv_flush_remote)
jalr r20
-#endif
/* r33 is destination pointer, default to zero */
@@ -176,10 +174,12 @@ STD_ENTRY(relocate_new_kernel)
/* we should not get here */
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
moveli r0, '?'
jalr r40
moveli r0, '\n'
jalr r40
+#endif
j .Lhalt
@@ -237,7 +237,9 @@ STD_ENTRY(relocate_new_kernel)
j .Lloop
-.Lerr: moveli r0, 'e'
+.Lerr:
+#ifdef RELOCATE_NEW_KERNEL_VERBOSE
+ moveli r0, 'e'
jalr r40
moveli r0, 'r'
jalr r40
@@ -245,6 +247,7 @@ STD_ENTRY(relocate_new_kernel)
jalr r40
moveli r0, '\n'
jalr r40
+#endif
.Lhalt:
moveli r41, hw2_last(hv_halt)
shl16insli r41, r41, hw1(hv_halt)
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index eceb8344280f..4c34caea9dd3 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -154,6 +154,65 @@ static int __init setup_maxnodemem(char *str)
}
early_param("maxnodemem", setup_maxnodemem);
+struct memmap_entry {
+ u64 addr; /* start of memory segment */
+ u64 size; /* size of memory segment */
+};
+static struct memmap_entry memmap_map[64];
+static int memmap_nr;
+
+static void add_memmap_region(u64 addr, u64 size)
+{
+ if (memmap_nr >= ARRAY_SIZE(memmap_map)) {
+ pr_err("Ooops! Too many entries in the memory map!\n");
+ return;
+ }
+ memmap_map[memmap_nr].addr = addr;
+ memmap_map[memmap_nr].size = size;
+ memmap_nr++;
+}
+
+static int __init setup_memmap(char *p)
+{
+ char *oldp;
+ u64 start_at, mem_size;
+
+ if (!p)
+ return -EINVAL;
+
+ if (!strncmp(p, "exactmap", 8)) {
+ pr_err("\"memmap=exactmap\" not valid on tile\n");
+ return 0;
+ }
+
+ oldp = p;
+ mem_size = memparse(p, &p);
+ if (p == oldp)
+ return -EINVAL;
+
+ if (*p == '@') {
+ pr_err("\"memmap=nn@ss\" (force RAM) invalid on tile\n");
+ } else if (*p == '#') {
+ pr_err("\"memmap=nn#ss\" (force ACPI data) invalid on tile\n");
+ } else if (*p == '$') {
+ start_at = memparse(p+1, &p);
+ add_memmap_region(start_at, mem_size);
+ } else {
+ if (mem_size == 0)
+ return -EINVAL;
+ maxmem_pfn = (mem_size >> HPAGE_SHIFT) <<
+ (HPAGE_SHIFT - PAGE_SHIFT);
+ }
+ return *p == '\0' ? 0 : -EINVAL;
+}
+early_param("memmap", setup_memmap);
+
+static int __init setup_mem(char *str)
+{
+ return setup_maxmem(str);
+}
+early_param("mem", setup_mem); /* compatibility with x86 */
+
static int __init setup_isolnodes(char *str)
{
char buf[MAX_NUMNODES * 5];
@@ -209,7 +268,7 @@ early_param("vmalloc", parse_vmalloc);
/*
* Determine for each controller where its lowmem is mapped and how much of
* it is mapped there. On controller zero, the first few megabytes are
- * already mapped in as code at MEM_SV_INTRPT, so in principle we could
+ * already mapped in as code at MEM_SV_START, so in principle we could
* start our data mappings higher up, but for now we don't bother, to avoid
* additional confusion.
*
@@ -614,11 +673,12 @@ static void __init setup_bootmem_allocator_node(int i)
/*
* Throw away any memory aliased by the PCI region.
*/
- if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start)
- reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn),
- PFN_PHYS(pci_reserve_end_pfn -
- pci_reserve_start_pfn),
+ if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start) {
+ start = max(pci_reserve_start_pfn, start);
+ end = min(pci_reserve_end_pfn, end);
+ reserve_bootmem(PFN_PHYS(start), PFN_PHYS(end - start),
BOOTMEM_EXCLUSIVE);
+ }
#endif
}
@@ -628,6 +688,31 @@ static void __init setup_bootmem_allocator(void)
for (i = 0; i < MAX_NUMNODES; ++i)
setup_bootmem_allocator_node(i);
+ /* Reserve any memory excluded by "memmap" arguments. */
+ for (i = 0; i < memmap_nr; ++i) {
+ struct memmap_entry *m = &memmap_map[i];
+ reserve_bootmem(m->addr, m->size, 0);
+ }
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (initrd_start) {
+ /* Make sure the initrd memory region is not modified. */
+ if (reserve_bootmem(initrd_start, initrd_end - initrd_start,
+ BOOTMEM_EXCLUSIVE)) {
+ pr_crit("The initrd memory region has been polluted. Disabling it.\n");
+ initrd_start = 0;
+ initrd_end = 0;
+ } else {
+ /*
+ * Translate initrd_start & initrd_end from PA to VA for
+ * future access.
+ */
+ initrd_start += PAGE_OFFSET;
+ initrd_end += PAGE_OFFSET;
+ }
+ }
+#endif
+
#ifdef CONFIG_KEXEC
if (crashk_res.start != crashk_res.end)
reserve_bootmem(crashk_res.start, resource_size(&crashk_res), 0);
@@ -961,9 +1046,6 @@ void setup_cpu(int boot)
arch_local_irq_unmask(INT_DMATLB_MISS);
arch_local_irq_unmask(INT_DMATLB_ACCESS);
#endif
-#if CHIP_HAS_SN_PROC()
- arch_local_irq_unmask(INT_SNITLB_MISS);
-#endif
#ifdef __tilegx__
arch_local_irq_unmask(INT_SINGLE_STEP_K);
#endif
@@ -978,10 +1060,6 @@ void setup_cpu(int boot)
/* Static network is not restricted. */
__insn_mtspr(SPR_MPL_SN_ACCESS_SET_0, 1);
#endif
-#if CHIP_HAS_SN_PROC()
- __insn_mtspr(SPR_MPL_SN_NOTIFY_SET_0, 1);
- __insn_mtspr(SPR_MPL_SN_CPL_SET_0, 1);
-#endif
/*
* Set the MPL for interrupt control 0 & 1 to the corresponding
@@ -1029,6 +1107,10 @@ static void __init load_hv_initrd(void)
int fd, rc;
void *initrd;
+ /* If initrd has already been set, skip initramfs file in hvfs. */
+ if (initrd_start)
+ return;
+
fd = hv_fs_findfile((HV_VirtAddr) initramfs_file);
if (fd == HV_ENOENT) {
if (set_initramfs_file) {
@@ -1067,6 +1149,25 @@ void __init free_initrd_mem(unsigned long begin, unsigned long end)
free_bootmem(__pa(begin), end - begin);
}
+static int __init setup_initrd(char *str)
+{
+ char *endp;
+ unsigned long initrd_size;
+
+ initrd_size = str ? simple_strtoul(str, &endp, 0) : 0;
+ if (initrd_size == 0 || *endp != '@')
+ return -EINVAL;
+
+ initrd_start = simple_strtoul(endp+1, &endp, 0);
+ if (initrd_start == 0)
+ return -EINVAL;
+
+ initrd_end = initrd_start + initrd_size;
+
+ return 0;
+}
+early_param("initrd", setup_initrd);
+
#else
static inline void load_hv_initrd(void) {}
#endif /* CONFIG_BLK_DEV_INITRD */
@@ -1134,7 +1235,7 @@ static void __init validate_va(void)
#ifndef __tilegx__ /* FIXME: GX: probably some validation relevant here */
/*
* Similarly, make sure we're only using allowed VAs.
- * We assume we can contiguously use MEM_USER_INTRPT .. MEM_HV_INTRPT,
+ * We assume we can contiguously use MEM_USER_INTRPT .. MEM_HV_START,
* and 0 .. KERNEL_HIGH_VADDR.
* In addition, make sure we CAN'T use the end of memory, since
* we use the last chunk of each pgd for the pgd_list.
@@ -1149,7 +1250,7 @@ static void __init validate_va(void)
if (range.size == 0)
break;
if (range.start <= MEM_USER_INTRPT &&
- range.start + range.size >= MEM_HV_INTRPT)
+ range.start + range.size >= MEM_HV_START)
user_kernel_ok = 1;
if (range.start == 0)
max_va = range.size;
@@ -1183,7 +1284,6 @@ static void __init validate_va(void)
struct cpumask __write_once cpu_lotar_map;
EXPORT_SYMBOL(cpu_lotar_map);
-#if CHIP_HAS_CBOX_HOME_MAP()
/*
* hash_for_home_map lists all the tiles that hash-for-home data
* will be cached on. Note that this may includes tiles that are not
@@ -1193,7 +1293,6 @@ EXPORT_SYMBOL(cpu_lotar_map);
*/
struct cpumask hash_for_home_map;
EXPORT_SYMBOL(hash_for_home_map);
-#endif
/*
* cpu_cacheable_map lists all the cpus whose caches the hypervisor can
@@ -1286,7 +1385,6 @@ static void __init setup_cpu_maps(void)
cpu_lotar_map = *cpu_possible_mask;
}
-#if CHIP_HAS_CBOX_HOME_MAP()
/* Retrieve set of CPUs used for hash-for-home caching */
rc = hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE,
(HV_VirtAddr) hash_for_home_map.bits,
@@ -1294,9 +1392,6 @@ static void __init setup_cpu_maps(void)
if (rc < 0)
early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc);
cpumask_or(&cpu_cacheable_map, cpu_possible_mask, &hash_for_home_map);
-#else
- cpu_cacheable_map = *cpu_possible_mask;
-#endif
}
@@ -1492,7 +1587,7 @@ void __init setup_per_cpu_areas(void)
/* Update the vmalloc mapping and page home. */
unsigned long addr = (unsigned long)ptr + i;
- pte_t *ptep = virt_to_pte(NULL, addr);
+ pte_t *ptep = virt_to_kpte(addr);
pte_t pte = *ptep;
BUG_ON(pfn != pte_pfn(pte));
pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3);
@@ -1501,12 +1596,12 @@ void __init setup_per_cpu_areas(void)
/* Update the lowmem mapping for consistency. */
lowmem_va = (unsigned long)pfn_to_kaddr(pfn);
- ptep = virt_to_pte(NULL, lowmem_va);
+ ptep = virt_to_kpte(lowmem_va);
if (pte_huge(*ptep)) {
printk(KERN_DEBUG "early shatter of huge page"
" at %#lx\n", lowmem_va);
shatter_pmd((pmd_t *)ptep);
- ptep = virt_to_pte(NULL, lowmem_va);
+ ptep = virt_to_kpte(lowmem_va);
BUG_ON(pte_huge(*ptep));
}
BUG_ON(pfn != pte_pfn(*ptep));
@@ -1548,6 +1643,8 @@ insert_non_bus_resource(void)
{
struct resource *res =
kzalloc(sizeof(struct resource), GFP_ATOMIC);
+ if (!res)
+ return NULL;
res->name = "Non-Bus Physical Address Space";
res->start = (1ULL << 32);
res->end = -1LL;
@@ -1561,11 +1658,13 @@ insert_non_bus_resource(void)
#endif
static struct resource* __init
-insert_ram_resource(u64 start_pfn, u64 end_pfn)
+insert_ram_resource(u64 start_pfn, u64 end_pfn, bool reserved)
{
struct resource *res =
kzalloc(sizeof(struct resource), GFP_ATOMIC);
- res->name = "System RAM";
+ if (!res)
+ return NULL;
+ res->name = reserved ? "Reserved" : "System RAM";
res->start = start_pfn << PAGE_SHIFT;
res->end = (end_pfn << PAGE_SHIFT) - 1;
res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
@@ -1585,7 +1684,7 @@ insert_ram_resource(u64 start_pfn, u64 end_pfn)
static int __init request_standard_resources(void)
{
int i;
- enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET };
+ enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET };
#if defined(CONFIG_PCI) && !defined(__tilegx__)
insert_non_bus_resource();
@@ -1600,11 +1699,11 @@ static int __init request_standard_resources(void)
end_pfn > pci_reserve_start_pfn) {
if (end_pfn > pci_reserve_end_pfn)
insert_ram_resource(pci_reserve_end_pfn,
- end_pfn);
+ end_pfn, 0);
end_pfn = pci_reserve_start_pfn;
}
#endif
- insert_ram_resource(start_pfn, end_pfn);
+ insert_ram_resource(start_pfn, end_pfn, 0);
}
code_resource.start = __pa(_text - CODE_DELTA);
@@ -1615,6 +1714,13 @@ static int __init request_standard_resources(void)
insert_resource(&iomem_resource, &code_resource);
insert_resource(&iomem_resource, &data_resource);
+ /* Mark any "memmap" regions busy for the resource manager. */
+ for (i = 0; i < memmap_nr; ++i) {
+ struct memmap_entry *m = &memmap_map[i];
+ insert_ram_resource(PFN_DOWN(m->addr),
+ PFN_UP(m->addr + m->size - 1), 1);
+ }
+
#ifdef CONFIG_KEXEC
insert_resource(&iomem_resource, &crashk_res);
#endif
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index 9531845bf661..2d1dbf38a9ab 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -33,6 +33,7 @@
#include <asm/ucontext.h>
#include <asm/sigframe.h>
#include <asm/syscalls.h>
+#include <asm/vdso.h>
#include <arch/interrupts.h>
#define DEBUG_SIG 0
@@ -190,7 +191,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
if (err)
goto give_sigsegv;
- restorer = VDSO_BASE;
+ restorer = VDSO_SYM(&__vdso_rt_sigreturn);
if (ka->sa.sa_flags & SA_RESTORER)
restorer = (unsigned long) ka->sa.sa_restorer;
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
index 27742e87e255..de07fa7d1315 100644
--- a/arch/tile/kernel/single_step.c
+++ b/arch/tile/kernel/single_step.c
@@ -12,41 +12,30 @@
* more details.
*
* A code-rewriter that enables instruction single-stepping.
- * Derived from iLib's single-stepping code.
*/
-#ifndef __tilegx__ /* Hardware support for single step unavailable. */
-
-/* These functions are only used on the TILE platform */
+#include <linux/smp.h>
+#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/thread_info.h>
#include <linux/uaccess.h>
#include <linux/mman.h>
#include <linux/types.h>
#include <linux/err.h>
+#include <linux/prctl.h>
#include <asm/cacheflush.h>
+#include <asm/traps.h>
+#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <arch/abi.h>
+#include <arch/spr_def.h>
#include <arch/opcode.h>
-#define signExtend17(val) sign_extend((val), 17)
-#define TILE_X1_MASK (0xffffffffULL << 31)
-
-int unaligned_printk;
-static int __init setup_unaligned_printk(char *str)
-{
- long val;
- if (strict_strtol(str, 0, &val) != 0)
- return 0;
- unaligned_printk = val;
- pr_info("Printk for each unaligned data accesses is %s\n",
- unaligned_printk ? "enabled" : "disabled");
- return 1;
-}
-__setup("unaligned_printk=", setup_unaligned_printk);
+#ifndef __tilegx__ /* Hardware support for single step unavailable. */
-unsigned int unaligned_fixup_count;
+#define signExtend17(val) sign_extend((val), 17)
+#define TILE_X1_MASK (0xffffffffULL << 31)
enum mem_op {
MEMOP_NONE,
@@ -56,12 +45,13 @@ enum mem_op {
MEMOP_STORE_POSTINCR
};
-static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset)
+static inline tilepro_bundle_bits set_BrOff_X1(tilepro_bundle_bits n,
+ s32 offset)
{
- tile_bundle_bits result;
+ tilepro_bundle_bits result;
/* mask out the old offset */
- tile_bundle_bits mask = create_BrOff_X1(-1);
+ tilepro_bundle_bits mask = create_BrOff_X1(-1);
result = n & (~mask);
/* or in the new offset */
@@ -70,10 +60,11 @@ static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset)
return result;
}
-static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src)
+static inline tilepro_bundle_bits move_X1(tilepro_bundle_bits n, int dest,
+ int src)
{
- tile_bundle_bits result;
- tile_bundle_bits op;
+ tilepro_bundle_bits result;
+ tilepro_bundle_bits op;
result = n & (~TILE_X1_MASK);
@@ -87,13 +78,13 @@ static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src)
return result;
}
-static inline tile_bundle_bits nop_X1(tile_bundle_bits n)
+static inline tilepro_bundle_bits nop_X1(tilepro_bundle_bits n)
{
return move_X1(n, TREG_ZERO, TREG_ZERO);
}
-static inline tile_bundle_bits addi_X1(
- tile_bundle_bits n, int dest, int src, int imm)
+static inline tilepro_bundle_bits addi_X1(
+ tilepro_bundle_bits n, int dest, int src, int imm)
{
n &= ~TILE_X1_MASK;
@@ -107,15 +98,26 @@ static inline tile_bundle_bits addi_X1(
return n;
}
-static tile_bundle_bits rewrite_load_store_unaligned(
+static tilepro_bundle_bits rewrite_load_store_unaligned(
struct single_step_state *state,
- tile_bundle_bits bundle,
+ tilepro_bundle_bits bundle,
struct pt_regs *regs,
enum mem_op mem_op,
int size, int sign_ext)
{
unsigned char __user *addr;
int val_reg, addr_reg, err, val;
+ int align_ctl;
+
+ align_ctl = unaligned_fixup;
+ switch (task_thread_info(current)->align_ctl) {
+ case PR_UNALIGN_NOPRINT:
+ align_ctl = 1;
+ break;
+ case PR_UNALIGN_SIGBUS:
+ align_ctl = 0;
+ break;
+ }
/* Get address and value registers */
if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) {
@@ -160,7 +162,7 @@ static tile_bundle_bits rewrite_load_store_unaligned(
* tilepro hardware would be doing, if it could provide us with the
* actual bad address in an SPR, which it doesn't.
*/
- if (unaligned_fixup == 0) {
+ if (align_ctl == 0) {
siginfo_t info = {
.si_signo = SIGBUS,
.si_code = BUS_ADRALN,
@@ -209,14 +211,14 @@ static tile_bundle_bits rewrite_load_store_unaligned(
if (err) {
siginfo_t info = {
- .si_signo = SIGSEGV,
- .si_code = SEGV_MAPERR,
+ .si_signo = SIGBUS,
+ .si_code = BUS_ADRALN,
.si_addr = addr
};
- trace_unhandled_signal("segfault", regs,
- (unsigned long)addr, SIGSEGV);
+ trace_unhandled_signal("bad address for unaligned fixup", regs,
+ (unsigned long)addr, SIGBUS);
force_sig_info(info.si_signo, &info, current);
- return (tile_bundle_bits) 0;
+ return (tilepro_bundle_bits) 0;
}
if (unaligned_printk || unaligned_fixup_count == 0) {
@@ -285,7 +287,7 @@ void single_step_execve(void)
ti->step_state = NULL;
}
-/**
+/*
* single_step_once() - entry point when single stepping has been triggered.
* @regs: The machine register state
*
@@ -304,20 +306,31 @@ void single_step_execve(void)
*/
void single_step_once(struct pt_regs *regs)
{
- extern tile_bundle_bits __single_step_ill_insn;
- extern tile_bundle_bits __single_step_j_insn;
- extern tile_bundle_bits __single_step_addli_insn;
- extern tile_bundle_bits __single_step_auli_insn;
+ extern tilepro_bundle_bits __single_step_ill_insn;
+ extern tilepro_bundle_bits __single_step_j_insn;
+ extern tilepro_bundle_bits __single_step_addli_insn;
+ extern tilepro_bundle_bits __single_step_auli_insn;
struct thread_info *info = (void *)current_thread_info();
struct single_step_state *state = info->step_state;
int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP);
- tile_bundle_bits __user *buffer, *pc;
- tile_bundle_bits bundle;
+ tilepro_bundle_bits __user *buffer, *pc;
+ tilepro_bundle_bits bundle;
int temp_reg;
int target_reg = TREG_LR;
int err;
enum mem_op mem_op = MEMOP_NONE;
int size = 0, sign_ext = 0; /* happy compiler */
+ int align_ctl;
+
+ align_ctl = unaligned_fixup;
+ switch (task_thread_info(current)->align_ctl) {
+ case PR_UNALIGN_NOPRINT:
+ align_ctl = 1;
+ break;
+ case PR_UNALIGN_SIGBUS:
+ align_ctl = 0;
+ break;
+ }
asm(
" .pushsection .rodata.single_step\n"
@@ -390,7 +403,7 @@ void single_step_once(struct pt_regs *regs)
if (regs->faultnum == INT_SWINT_1)
regs->pc -= 8;
- pc = (tile_bundle_bits __user *)(regs->pc);
+ pc = (tilepro_bundle_bits __user *)(regs->pc);
if (get_user(bundle, pc) != 0) {
pr_err("Couldn't read instruction at %p trying to step\n", pc);
return;
@@ -533,7 +546,6 @@ void single_step_once(struct pt_regs *regs)
}
break;
-#if CHIP_HAS_WH64()
/* postincrement operations */
case IMM_0_OPCODE_X1:
switch (get_ImmOpcodeExtension_X1(bundle)) {
@@ -568,7 +580,6 @@ void single_step_once(struct pt_regs *regs)
break;
}
break;
-#endif /* CHIP_HAS_WH64() */
}
if (state->update) {
@@ -627,9 +638,9 @@ void single_step_once(struct pt_regs *regs)
/*
* Check if we need to rewrite an unaligned load/store.
- * Returning zero is a special value meaning we need to SIGSEGV.
+ * Returning zero is a special value meaning we generated a signal.
*/
- if (mem_op != MEMOP_NONE && unaligned_fixup >= 0) {
+ if (mem_op != MEMOP_NONE && align_ctl >= 0) {
bundle = rewrite_load_store_unaligned(state, bundle, regs,
mem_op, size, sign_ext);
if (bundle == 0)
@@ -668,9 +679,9 @@ void single_step_once(struct pt_regs *regs)
}
/* End with a jump back to the next instruction */
- delta = ((regs->pc + TILE_BUNDLE_SIZE_IN_BYTES) -
+ delta = ((regs->pc + TILEPRO_BUNDLE_SIZE_IN_BYTES) -
(unsigned long)buffer) >>
- TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES;
+ TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES;
bundle = __single_step_j_insn;
bundle |= create_JOffLong_X1(delta);
err |= __put_user(bundle, buffer++);
@@ -698,9 +709,6 @@ void single_step_once(struct pt_regs *regs)
}
#else
-#include <linux/smp.h>
-#include <linux/ptrace.h>
-#include <arch/spr_def.h>
static DEFINE_PER_CPU(unsigned long, ss_saved_pc);
@@ -743,10 +751,10 @@ void gx_singlestep_handle(struct pt_regs *regs, int fault_num)
} else if ((*ss_pc != regs->pc) ||
(!(control & SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK))) {
- ptrace_notify(SIGTRAP);
control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK;
control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK;
__insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control);
+ send_sigtrap(current, regs);
}
}
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index cbc73a8b8fe1..01e8ab29f43a 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -20,8 +20,13 @@
#include <linux/irq.h>
#include <linux/module.h>
#include <asm/cacheflush.h>
+#include <asm/homecache.h>
-HV_Topology smp_topology __write_once;
+/*
+ * We write to width and height with a single store in head_NN.S,
+ * so make the variable aligned to "long".
+ */
+HV_Topology smp_topology __write_once __aligned(sizeof(long));
EXPORT_SYMBOL(smp_topology);
#if CHIP_HAS_IPI()
@@ -100,8 +105,8 @@ static void smp_start_cpu_interrupt(void)
/* Handler to stop the current cpu. */
static void smp_stop_cpu_interrupt(void)
{
- set_cpu_online(smp_processor_id(), 0);
arch_local_irq_disable_all();
+ set_cpu_online(smp_processor_id(), 0);
for (;;)
asm("nap; nop");
}
@@ -167,9 +172,16 @@ static void ipi_flush_icache_range(void *info)
void flush_icache_range(unsigned long start, unsigned long end)
{
struct ipi_flush flush = { start, end };
- preempt_disable();
- on_each_cpu(ipi_flush_icache_range, &flush, 1);
- preempt_enable();
+
+ /* If invoked with irqs disabled, we can not issue IPIs. */
+ if (irqs_disabled())
+ flush_remote(0, HV_FLUSH_EVICT_L1I, NULL, 0, 0, 0,
+ NULL, NULL, 0);
+ else {
+ preempt_disable();
+ on_each_cpu(ipi_flush_icache_range, &flush, 1);
+ preempt_enable();
+ }
}
diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c
index a535655b7089..732e9d138661 100644
--- a/arch/tile/kernel/smpboot.c
+++ b/arch/tile/kernel/smpboot.c
@@ -142,13 +142,15 @@ static struct cpumask cpu_started;
*/
static void start_secondary(void)
{
- int cpuid = smp_processor_id();
+ int cpuid;
+
+ preempt_disable();
+
+ cpuid = smp_processor_id();
/* Set our thread pointer appropriately. */
set_my_cpu_offset(__per_cpu_offset[cpuid]);
- preempt_disable();
-
/*
* In large machines even this will slow us down, since we
* will be contending for for the printk spinlock.
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index af8dfc9665f6..362284af3afd 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -29,6 +29,7 @@
#include <asm/switch_to.h>
#include <asm/sigframe.h>
#include <asm/stack.h>
+#include <asm/vdso.h>
#include <arch/abi.h>
#include <arch/interrupts.h>
@@ -102,9 +103,8 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
p->sp >= sp) {
if (kbt->verbose)
pr_err(" <%s while in kernel mode>\n", fault);
- } else if (EX1_PL(p->ex1) == USER_PL &&
- p->pc < PAGE_OFFSET &&
- p->sp < PAGE_OFFSET) {
+ } else if (user_mode(p) &&
+ p->sp < PAGE_OFFSET && p->sp != 0) {
if (kbt->verbose)
pr_err(" <%s while in user mode>\n", fault);
} else if (kbt->verbose) {
@@ -120,7 +120,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
/* Is the pc pointing to a sigreturn trampoline? */
static int is_sigreturn(unsigned long pc)
{
- return (pc == VDSO_BASE);
+ return current->mm && (pc == VDSO_SYM(&__vdso_rt_sigreturn));
}
/* Return a pt_regs pointer for a valid signal handler frame */
@@ -129,7 +129,7 @@ static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt,
{
BacktraceIterator *b = &kbt->it;
- if (b->pc == VDSO_BASE && b->sp < PAGE_OFFSET &&
+ if (is_sigreturn(b->pc) && b->sp < PAGE_OFFSET &&
b->sp % sizeof(long) == 0) {
int retval;
pagefault_disable();
@@ -195,21 +195,21 @@ static int KBacktraceIterator_next_item_inclusive(
*/
static void validate_stack(struct pt_regs *regs)
{
- int cpu = smp_processor_id();
+ int cpu = raw_smp_processor_id();
unsigned long ksp0 = get_current_ksp0();
- unsigned long ksp0_base = ksp0 - THREAD_SIZE;
+ unsigned long ksp0_base = ksp0 & -THREAD_SIZE;
unsigned long sp = stack_pointer;
if (EX1_PL(regs->ex1) == KERNEL_PL && regs->sp >= ksp0) {
- pr_err("WARNING: cpu %d: kernel stack page %#lx underrun!\n"
+ pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx underrun!\n"
" sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n",
- cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr);
+ cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr);
}
else if (sp < ksp0_base + sizeof(struct thread_info)) {
- pr_err("WARNING: cpu %d: kernel stack page %#lx overrun!\n"
+ pr_err("WARNING: cpu %d: kernel stack %#lx..%#lx overrun!\n"
" sp %#lx (%#lx in caller), caller pc %#lx, lr %#lx\n",
- cpu, ksp0_base, sp, regs->sp, regs->pc, regs->lr);
+ cpu, ksp0_base, ksp0, sp, regs->sp, regs->pc, regs->lr);
}
}
@@ -352,6 +352,26 @@ static void describe_addr(struct KBacktraceIterator *kbt,
}
/*
+ * Avoid possible crash recursion during backtrace. If it happens, it
+ * makes it easy to lose the actual root cause of the failure, so we
+ * put a simple guard on all the backtrace loops.
+ */
+static bool start_backtrace(void)
+{
+ if (current->thread.in_backtrace) {
+ pr_err("Backtrace requested while in backtrace!\n");
+ return false;
+ }
+ current->thread.in_backtrace = true;
+ return true;
+}
+
+static void end_backtrace(void)
+{
+ current->thread.in_backtrace = false;
+}
+
+/*
* This method wraps the backtracer's more generic support.
* It is only invoked from the architecture-specific code; show_stack()
* and dump_stack() (in entry.S) are architecture-independent entry points.
@@ -361,6 +381,8 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
int i;
int have_mmap_sem = 0;
+ if (!start_backtrace())
+ return;
if (headers) {
/*
* Add a blank line since if we are called from panic(),
@@ -371,7 +393,7 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
pr_err("Starting stack dump of tid %d, pid %d (%s)"
" on cpu %d at cycle %lld\n",
kbt->task->pid, kbt->task->tgid, kbt->task->comm,
- smp_processor_id(), get_cycles());
+ raw_smp_processor_id(), get_cycles());
}
kbt->verbose = 1;
i = 0;
@@ -402,6 +424,7 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
pr_err("Stack dump complete\n");
if (have_mmap_sem)
up_read(&kbt->task->mm->mmap_sem);
+ end_backtrace();
}
EXPORT_SYMBOL(tile_show_stack);
@@ -463,6 +486,8 @@ void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
int skip = trace->skip;
int i = 0;
+ if (!start_backtrace())
+ goto done;
if (task == NULL || task == current)
KBacktraceIterator_init_current(&kbt);
else
@@ -476,6 +501,8 @@ void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
break;
trace->entries[i++] = kbt.it.pc;
}
+ end_backtrace();
+done:
trace->nr_entries = i;
}
EXPORT_SYMBOL(save_stack_trace_tsk);
diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c
index b881a7be24bd..38debe706061 100644
--- a/arch/tile/kernel/sys.c
+++ b/arch/tile/kernel/sys.c
@@ -38,8 +38,10 @@
SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, len,
unsigned long, flags)
{
+ /* DCACHE is not particularly effective if not bound to one cpu. */
if (flags & DCACHE)
- homecache_evict(cpumask_of(smp_processor_id()));
+ homecache_evict(cpumask_of(raw_smp_processor_id()));
+
if (flags & ICACHE)
flush_remote(0, HV_FLUSH_EVICT_L1I, mm_cpumask(current->mm),
0, 0, 0, NULL, NULL, 0);
diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c
index e25b0a89c18f..a3ed12f8f83b 100644
--- a/arch/tile/kernel/sysfs.c
+++ b/arch/tile/kernel/sysfs.c
@@ -157,6 +157,67 @@ hvconfig_bin_read(struct file *filp, struct kobject *kobj,
return count;
}
+static ssize_t hv_stats_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ int cpu = dev->id;
+ long lotar = HV_XY_TO_LOTAR(cpu_x(cpu), cpu_y(cpu));
+
+ ssize_t n = hv_confstr(HV_CONFSTR_HV_STATS,
+ (unsigned long)page, PAGE_SIZE - 1,
+ lotar, 0);
+ n = n < 0 ? 0 : min(n, (ssize_t)PAGE_SIZE - 1);
+ page[n] = '\0';
+ return n;
+}
+
+static ssize_t hv_stats_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *page,
+ size_t count)
+{
+ int cpu = dev->id;
+ long lotar = HV_XY_TO_LOTAR(cpu_x(cpu), cpu_y(cpu));
+
+ ssize_t n = hv_confstr(HV_CONFSTR_HV_STATS, 0, 0, lotar, 1);
+ return n < 0 ? n : count;
+}
+
+static DEVICE_ATTR(hv_stats, 0644, hv_stats_show, hv_stats_store);
+
+static int hv_stats_device_add(struct device *dev, struct subsys_interface *sif)
+{
+ int err, cpu = dev->id;
+
+ if (!cpu_online(cpu))
+ return 0;
+
+ err = sysfs_create_file(&dev->kobj, &dev_attr_hv_stats.attr);
+
+ return err;
+}
+
+static int hv_stats_device_remove(struct device *dev,
+ struct subsys_interface *sif)
+{
+ int cpu = dev->id;
+
+ if (!cpu_online(cpu))
+ return 0;
+
+ sysfs_remove_file(&dev->kobj, &dev_attr_hv_stats.attr);
+ return 0;
+}
+
+
+static struct subsys_interface hv_stats_interface = {
+ .name = "hv_stats",
+ .subsys = &cpu_subsys,
+ .add_dev = hv_stats_device_add,
+ .remove_dev = hv_stats_device_remove,
+};
+
static int __init create_sysfs_entries(void)
{
int err = 0;
@@ -188,6 +249,21 @@ static int __init create_sysfs_entries(void)
err = sysfs_create_bin_file(hypervisor_kobj, &hvconfig_bin);
}
+ if (!err) {
+ /*
+ * Don't bother adding the hv_stats files on each CPU if
+ * our hypervisor doesn't supply statistics.
+ */
+ int cpu = raw_smp_processor_id();
+ long lotar = HV_XY_TO_LOTAR(cpu_x(cpu), cpu_y(cpu));
+ char dummy;
+ ssize_t n = hv_confstr(HV_CONFSTR_HV_STATS,
+ (unsigned long) &dummy, 1,
+ lotar, 0);
+ if (n >= 0)
+ err = subsys_interface_register(&hv_stats_interface);
+ }
+
return err;
}
subsys_initcall(create_sysfs_entries);
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index 7c353d8c2da9..5d10642db63e 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -23,8 +23,10 @@
#include <linux/smp.h>
#include <linux/delay.h>
#include <linux/module.h>
+#include <linux/timekeeper_internal.h>
#include <asm/irq_regs.h>
#include <asm/traps.h>
+#include <asm/vdso.h>
#include <hv/hypervisor.h>
#include <arch/interrupts.h>
#include <arch/spr_def.h>
@@ -110,7 +112,6 @@ void __init time_init(void)
setup_tile_timer();
}
-
/*
* Define the tile timer clock event device. The timer is driven by
* the TILE_TIMER_CONTROL register, which consists of a 31-bit down
@@ -237,3 +238,37 @@ cycles_t ns2cycles(unsigned long nsecs)
struct clock_event_device *dev = &__raw_get_cpu_var(tile_timer);
return ((u64)nsecs * dev->mult) >> dev->shift;
}
+
+void update_vsyscall_tz(void)
+{
+ /* Userspace gettimeofday will spin while this value is odd. */
+ ++vdso_data->tz_update_count;
+ smp_wmb();
+ vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
+ vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+ smp_wmb();
+ ++vdso_data->tz_update_count;
+}
+
+void update_vsyscall(struct timekeeper *tk)
+{
+ struct timespec wall_time = tk_xtime(tk);
+ struct timespec *wtm = &tk->wall_to_monotonic;
+ struct clocksource *clock = tk->clock;
+
+ if (clock != &cycle_counter_cs)
+ return;
+
+ /* Userspace gettimeofday will spin while this value is odd. */
+ ++vdso_data->tb_update_count;
+ smp_wmb();
+ vdso_data->xtime_tod_stamp = clock->cycle_last;
+ vdso_data->xtime_clock_sec = wall_time.tv_sec;
+ vdso_data->xtime_clock_nsec = wall_time.tv_nsec;
+ vdso_data->wtom_clock_sec = wtm->tv_sec;
+ vdso_data->wtom_clock_nsec = wtm->tv_nsec;
+ vdso_data->mult = clock->mult;
+ vdso_data->shift = clock->shift;
+ smp_wmb();
+ ++vdso_data->tb_update_count;
+}
diff --git a/arch/tile/kernel/tlb.c b/arch/tile/kernel/tlb.c
index 3fd54d5bbd4c..f23b53515671 100644
--- a/arch/tile/kernel/tlb.c
+++ b/arch/tile/kernel/tlb.c
@@ -91,8 +91,14 @@ void flush_tlb_all(void)
}
}
+/*
+ * Callers need to flush the L1I themselves if necessary, e.g. for
+ * kernel module unload. Otherwise we assume callers are not using
+ * executable pgprot_t's. Using EVICT_L1I means that dataplane cpus
+ * will get an unnecessary interrupt otherwise.
+ */
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- flush_remote(0, HV_FLUSH_EVICT_L1I, cpu_online_mask,
+ flush_remote(0, 0, NULL,
start, end - start, PAGE_SIZE, cpu_online_mask, NULL, 0);
}
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index 5b19a23c8908..6b603d556ca6 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -15,6 +15,7 @@
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/kprobes.h>
+#include <linux/kdebug.h>
#include <linux/module.h>
#include <linux/reboot.h>
#include <linux/uaccess.h>
@@ -29,7 +30,7 @@
void __init trap_init(void)
{
- /* Nothing needed here since we link code at .intrpt1 */
+ /* Nothing needed here since we link code at .intrpt */
}
int unaligned_fixup = 1;
@@ -100,13 +101,7 @@ static int retry_gpv(unsigned int gpv_reason)
#endif /* CHIP_HAS_TILE_DMA() */
-#ifdef __tilegx__
-#define bundle_bits tilegx_bundle_bits
-#else
-#define bundle_bits tile_bundle_bits
-#endif
-
-extern bundle_bits bpt_code;
+extern tile_bundle_bits bpt_code;
asm(".pushsection .rodata.bpt_code,\"a\";"
".align 8;"
@@ -114,7 +109,7 @@ asm(".pushsection .rodata.bpt_code,\"a\";"
".size bpt_code,.-bpt_code;"
".popsection");
-static int special_ill(bundle_bits bundle, int *sigp, int *codep)
+static int special_ill(tile_bundle_bits bundle, int *sigp, int *codep)
{
int sig, code, maxcode;
@@ -214,24 +209,73 @@ static const char *const int_name[] = {
#endif
};
+static int do_bpt(struct pt_regs *regs)
+{
+ unsigned long bundle, bcode, bpt;
+
+ bundle = *(unsigned long *)instruction_pointer(regs);
+
+ /*
+ * bpt shoule be { bpt; nop }, which is 0x286a44ae51485000ULL.
+ * we encode the unused least significant bits for other purpose.
+ */
+ bpt = bundle & ~((1ULL << 12) - 1);
+ if (bpt != TILE_BPT_BUNDLE)
+ return 0;
+
+ bcode = bundle & ((1ULL << 12) - 1);
+ /*
+ * notify the kprobe handlers, if instruction is likely to
+ * pertain to them.
+ */
+ switch (bcode) {
+ /* breakpoint_insn */
+ case 0:
+ notify_die(DIE_BREAK, "debug", regs, bundle,
+ INT_ILL, SIGTRAP);
+ break;
+ /* compiled_bpt */
+ case DIE_COMPILED_BPT:
+ notify_die(DIE_COMPILED_BPT, "debug", regs, bundle,
+ INT_ILL, SIGTRAP);
+ break;
+ /* breakpoint2_insn */
+ case DIE_SSTEPBP:
+ notify_die(DIE_SSTEPBP, "single_step", regs, bundle,
+ INT_ILL, SIGTRAP);
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
void __kprobes do_trap(struct pt_regs *regs, int fault_num,
unsigned long reason)
{
siginfo_t info = { 0 };
int signo, code;
unsigned long address = 0;
- bundle_bits instr;
+ tile_bundle_bits instr;
+ int is_kernel = !user_mode(regs);
+
+ /* Handle breakpoints, etc. */
+ if (is_kernel && fault_num == INT_ILL && do_bpt(regs))
+ return;
- /* Re-enable interrupts. */
- local_irq_enable();
+ /* Re-enable interrupts, if they were previously enabled. */
+ if (!(regs->flags & PT_FLAGS_DISABLE_IRQ))
+ local_irq_enable();
/*
* If it hits in kernel mode and we can't fix it up, just exit the
* current process and hope for the best.
*/
- if (!user_mode(regs)) {
+ if (is_kernel) {
const char *name;
- if (fixup_exception(regs)) /* only UNALIGN_DATA in practice */
+ char buf[100];
+ if (fixup_exception(regs)) /* ILL_TRANS or UNALIGN_DATA */
return;
if (fault_num >= 0 &&
fault_num < sizeof(int_name)/sizeof(int_name[0]) &&
@@ -239,10 +283,16 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
name = int_name[fault_num];
else
name = "Unknown interrupt";
- pr_alert("Kernel took bad trap %d (%s) at PC %#lx\n",
- fault_num, name, regs->pc);
if (fault_num == INT_GPV)
- pr_alert("GPV_REASON is %#lx\n", reason);
+ snprintf(buf, sizeof(buf), "; GPV_REASON %#lx", reason);
+#ifdef __tilegx__
+ else if (fault_num == INT_ILL_TRANS)
+ snprintf(buf, sizeof(buf), "; address %#lx", reason);
+#endif
+ else
+ buf[0] = '\0';
+ pr_alert("Kernel took bad trap %d (%s) at PC %#lx%s\n",
+ fault_num, name, regs->pc, buf);
show_regs(regs);
do_exit(SIGKILL); /* FIXME: implement i386 die() */
return;
@@ -324,11 +374,8 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
fill_ra_stack();
signo = SIGSEGV;
+ address = reason;
code = SEGV_MAPERR;
- if (reason & SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK)
- address = regs->pc;
- else
- address = 0; /* FIXME: GX: single-step for address */
break;
}
#endif
diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c
new file mode 100644
index 000000000000..b425fb6a480d
--- /dev/null
+++ b/arch/tile/kernel/unaligned.c
@@ -0,0 +1,1609 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * A code-rewriter that handles unaligned exception.
+ */
+
+#include <linux/smp.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+#include <linux/mman.h>
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/compat.h>
+#include <linux/prctl.h>
+#include <asm/cacheflush.h>
+#include <asm/traps.h>
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+#include <arch/abi.h>
+#include <arch/spr_def.h>
+#include <arch/opcode.h>
+
+
+/*
+ * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
+ * exception is supported out of single_step.c
+ */
+
+int unaligned_printk;
+
+static int __init setup_unaligned_printk(char *str)
+{
+ long val;
+ if (kstrtol(str, 0, &val) != 0)
+ return 0;
+ unaligned_printk = val;
+ pr_info("Printk for each unaligned data accesses is %s\n",
+ unaligned_printk ? "enabled" : "disabled");
+ return 1;
+}
+__setup("unaligned_printk=", setup_unaligned_printk);
+
+unsigned int unaligned_fixup_count;
+
+#ifdef __tilegx__
+
+/*
+ * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
+ * The 1st 64-bit word saves fault PC address, 2nd word is the fault
+ * instruction bundle followed by 14 JIT bundles.
+ */
+
+struct unaligned_jit_fragment {
+ unsigned long pc;
+ tilegx_bundle_bits bundle;
+ tilegx_bundle_bits insn[14];
+};
+
+/*
+ * Check if a nop or fnop at bundle's pipeline X0.
+ */
+
+static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
+{
+ return (((get_UnaryOpcodeExtension_X0(bundle) ==
+ NOP_UNARY_OPCODE_X0) &&
+ (get_RRROpcodeExtension_X0(bundle) ==
+ UNARY_RRR_0_OPCODE_X0) &&
+ (get_Opcode_X0(bundle) ==
+ RRR_0_OPCODE_X0)) ||
+ ((get_UnaryOpcodeExtension_X0(bundle) ==
+ FNOP_UNARY_OPCODE_X0) &&
+ (get_RRROpcodeExtension_X0(bundle) ==
+ UNARY_RRR_0_OPCODE_X0) &&
+ (get_Opcode_X0(bundle) ==
+ RRR_0_OPCODE_X0)));
+}
+
+/*
+ * Check if nop or fnop at bundle's pipeline X1.
+ */
+
+static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
+{
+ return (((get_UnaryOpcodeExtension_X1(bundle) ==
+ NOP_UNARY_OPCODE_X1) &&
+ (get_RRROpcodeExtension_X1(bundle) ==
+ UNARY_RRR_0_OPCODE_X1) &&
+ (get_Opcode_X1(bundle) ==
+ RRR_0_OPCODE_X1)) ||
+ ((get_UnaryOpcodeExtension_X1(bundle) ==
+ FNOP_UNARY_OPCODE_X1) &&
+ (get_RRROpcodeExtension_X1(bundle) ==
+ UNARY_RRR_0_OPCODE_X1) &&
+ (get_Opcode_X1(bundle) ==
+ RRR_0_OPCODE_X1)));
+}
+
+/*
+ * Check if nop or fnop at bundle's Y0 pipeline.
+ */
+
+static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
+{
+ return (((get_UnaryOpcodeExtension_Y0(bundle) ==
+ NOP_UNARY_OPCODE_Y0) &&
+ (get_RRROpcodeExtension_Y0(bundle) ==
+ UNARY_RRR_1_OPCODE_Y0) &&
+ (get_Opcode_Y0(bundle) ==
+ RRR_1_OPCODE_Y0)) ||
+ ((get_UnaryOpcodeExtension_Y0(bundle) ==
+ FNOP_UNARY_OPCODE_Y0) &&
+ (get_RRROpcodeExtension_Y0(bundle) ==
+ UNARY_RRR_1_OPCODE_Y0) &&
+ (get_Opcode_Y0(bundle) ==
+ RRR_1_OPCODE_Y0)));
+}
+
+/*
+ * Check if nop or fnop at bundle's pipeline Y1.
+ */
+
+static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
+{
+ return (((get_UnaryOpcodeExtension_Y1(bundle) ==
+ NOP_UNARY_OPCODE_Y1) &&
+ (get_RRROpcodeExtension_Y1(bundle) ==
+ UNARY_RRR_1_OPCODE_Y1) &&
+ (get_Opcode_Y1(bundle) ==
+ RRR_1_OPCODE_Y1)) ||
+ ((get_UnaryOpcodeExtension_Y1(bundle) ==
+ FNOP_UNARY_OPCODE_Y1) &&
+ (get_RRROpcodeExtension_Y1(bundle) ==
+ UNARY_RRR_1_OPCODE_Y1) &&
+ (get_Opcode_Y1(bundle) ==
+ RRR_1_OPCODE_Y1)));
+}
+
+/*
+ * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
+ */
+
+static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
+{
+ return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
+}
+
+/*
+ * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
+ */
+
+static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
+{
+ return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
+}
+
+/*
+ * Find the destination, source registers of fault unalign access instruction
+ * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
+ * clob3, which are guaranteed different from any register used in the fault
+ * bundle. r_alias is used to return if the other instructions other than the
+ * unalign load/store shares same register with ra, rb and rd.
+ */
+
+static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
+ uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
+ uint64_t *clob3, bool *r_alias)
+{
+ int i;
+ uint64_t reg;
+ uint64_t reg_map = 0, alias_reg_map = 0, map;
+ bool alias;
+
+ *ra = -1;
+ *rb = -1;
+
+ if (rd)
+ *rd = -1;
+
+ *clob1 = -1;
+ *clob2 = -1;
+ *clob3 = -1;
+ alias = false;
+
+ /*
+ * Parse fault bundle, find potential used registers and mark
+ * corresponding bits in reg_map and alias_map. These 2 bit maps
+ * are used to find the scratch registers and determine if there
+ * is register alais.
+ */
+ if (bundle & TILEGX_BUNDLE_MODE_MASK) { /* Y Mode Bundle. */
+
+ reg = get_SrcA_Y2(bundle);
+ reg_map |= 1ULL << reg;
+ *ra = reg;
+ reg = get_SrcBDest_Y2(bundle);
+ reg_map |= 1ULL << reg;
+
+ if (rd) {
+ /* Load. */
+ *rd = reg;
+ alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
+ } else {
+ /* Store. */
+ *rb = reg;
+ alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
+ }
+
+ if (!is_bundle_y1_nop(bundle)) {
+ reg = get_SrcA_Y1(bundle);
+ reg_map |= (1ULL << reg);
+ map = (1ULL << reg);
+
+ reg = get_SrcB_Y1(bundle);
+ reg_map |= (1ULL << reg);
+ map |= (1ULL << reg);
+
+ reg = get_Dest_Y1(bundle);
+ reg_map |= (1ULL << reg);
+ map |= (1ULL << reg);
+
+ if (map & alias_reg_map)
+ alias = true;
+ }
+
+ if (!is_bundle_y0_nop(bundle)) {
+ reg = get_SrcA_Y0(bundle);
+ reg_map |= (1ULL << reg);
+ map = (1ULL << reg);
+
+ reg = get_SrcB_Y0(bundle);
+ reg_map |= (1ULL << reg);
+ map |= (1ULL << reg);
+
+ reg = get_Dest_Y0(bundle);
+ reg_map |= (1ULL << reg);
+ map |= (1ULL << reg);
+
+ if (map & alias_reg_map)
+ alias = true;
+ }
+ } else { /* X Mode Bundle. */
+
+ reg = get_SrcA_X1(bundle);
+ reg_map |= (1ULL << reg);
+ *ra = reg;
+ if (rd) {
+ /* Load. */
+ reg = get_Dest_X1(bundle);
+ reg_map |= (1ULL << reg);
+ *rd = reg;
+ alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
+ } else {
+ /* Store. */
+ reg = get_SrcB_X1(bundle);
+ reg_map |= (1ULL << reg);
+ *rb = reg;
+ alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
+ }
+
+ if (!is_bundle_x0_nop(bundle)) {
+ reg = get_SrcA_X0(bundle);
+ reg_map |= (1ULL << reg);
+ map = (1ULL << reg);
+
+ reg = get_SrcB_X0(bundle);
+ reg_map |= (1ULL << reg);
+ map |= (1ULL << reg);
+
+ reg = get_Dest_X0(bundle);
+ reg_map |= (1ULL << reg);
+ map |= (1ULL << reg);
+
+ if (map & alias_reg_map)
+ alias = true;
+ }
+ }
+
+ /*
+ * "alias" indicates if the unalign access registers have collision
+ * with others in the same bundle. We jsut simply test all register
+ * operands case (RRR), ignored the case with immidate. If a bundle
+ * has no register alias, we may do fixup in a simple or fast manner.
+ * So if an immidata field happens to hit with a register, we may end
+ * up fall back to the generic handling.
+ */
+
+ *r_alias = alias;
+
+ /* Flip bits on reg_map. */
+ reg_map ^= -1ULL;
+
+ /* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
+ for (i = 0; i < TREG_SP; i++) {
+ if (reg_map & (0x1ULL << i)) {
+ if (*clob1 == -1) {
+ *clob1 = i;
+ } else if (*clob2 == -1) {
+ *clob2 = i;
+ } else if (*clob3 == -1) {
+ *clob3 = i;
+ return;
+ }
+ }
+ }
+}
+
+/*
+ * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
+ * is unexpected.
+ */
+
+static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
+ uint64_t clob1, uint64_t clob2, uint64_t clob3)
+{
+ bool unexpected = false;
+ if ((ra >= 56) && (ra != TREG_ZERO))
+ unexpected = true;
+
+ if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
+ unexpected = true;
+
+ if (rd != -1) {
+ if ((rd >= 56) && (rd != TREG_ZERO))
+ unexpected = true;
+ } else {
+ if ((rb >= 56) && (rb != TREG_ZERO))
+ unexpected = true;
+ }
+ return unexpected;
+}
+
+
+#define GX_INSN_X0_MASK ((1ULL << 31) - 1)
+#define GX_INSN_X1_MASK (((1ULL << 31) - 1) << 31)
+#define GX_INSN_Y0_MASK ((0xFULL << 27) | (0xFFFFFULL))
+#define GX_INSN_Y1_MASK (GX_INSN_Y0_MASK << 31)
+#define GX_INSN_Y2_MASK ((0x7FULL << 51) | (0x7FULL << 20))
+
+#ifdef __LITTLE_ENDIAN
+#define GX_INSN_BSWAP(_bundle_) (_bundle_)
+#else
+#define GX_INSN_BSWAP(_bundle_) swab64(_bundle_)
+#endif /* __LITTLE_ENDIAN */
+
+/*
+ * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
+ * The corresponding static function jix_x#_###(.) generates partial or
+ * whole bundle based on the template and given arguments.
+ */
+
+#define __JIT_CODE(_X_) \
+ asm (".pushsection .rodata.unalign_data, \"a\"\n" \
+ _X_"\n" \
+ ".popsection\n")
+
+__JIT_CODE("__unalign_jit_x1_mtspr: {mtspr 0, r0}");
+static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_mtspr;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
+ create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
+}
+
+__JIT_CODE("__unalign_jit_x1_mfspr: {mfspr r0, 0}");
+static tilegx_bundle_bits jit_x1_mfspr(int reg, int spr)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_mfspr;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
+ create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
+}
+
+__JIT_CODE("__unalign_jit_x0_addi: {addi r0, r0, 0; iret}");
+static tilegx_bundle_bits jit_x0_addi(int rd, int ra, int imm8)
+{
+ extern tilegx_bundle_bits __unalign_jit_x0_addi;
+ return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
+ create_Dest_X0(rd) | create_SrcA_X0(ra) |
+ create_Imm8_X0(imm8);
+}
+
+__JIT_CODE("__unalign_jit_x1_ldna: {ldna r0, r0}");
+static tilegx_bundle_bits jit_x1_ldna(int rd, int ra)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_ldna;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) & GX_INSN_X1_MASK) |
+ create_Dest_X1(rd) | create_SrcA_X1(ra);
+}
+
+__JIT_CODE("__unalign_jit_x0_dblalign: {dblalign r0, r0 ,r0}");
+static tilegx_bundle_bits jit_x0_dblalign(int rd, int ra, int rb)
+{
+ extern tilegx_bundle_bits __unalign_jit_x0_dblalign;
+ return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
+ create_Dest_X0(rd) | create_SrcA_X0(ra) |
+ create_SrcB_X0(rb);
+}
+
+__JIT_CODE("__unalign_jit_x1_iret: {iret}");
+static tilegx_bundle_bits jit_x1_iret(void)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_iret;
+ return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
+}
+
+__JIT_CODE("__unalign_jit_x01_fnop: {fnop;fnop}");
+static tilegx_bundle_bits jit_x0_fnop(void)
+{
+ extern tilegx_bundle_bits __unalign_jit_x01_fnop;
+ return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
+}
+
+static tilegx_bundle_bits jit_x1_fnop(void)
+{
+ extern tilegx_bundle_bits __unalign_jit_x01_fnop;
+ return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
+}
+
+__JIT_CODE("__unalign_jit_y2_dummy: {fnop; fnop; ld zero, sp}");
+static tilegx_bundle_bits jit_y2_dummy(void)
+{
+ extern tilegx_bundle_bits __unalign_jit_y2_dummy;
+ return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
+}
+
+static tilegx_bundle_bits jit_y1_fnop(void)
+{
+ extern tilegx_bundle_bits __unalign_jit_y2_dummy;
+ return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
+}
+
+__JIT_CODE("__unalign_jit_x1_st1_add: {st1_add r1, r0, 0}");
+static tilegx_bundle_bits jit_x1_st1_add(int ra, int rb, int imm8)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_st1_add;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
+ (~create_SrcA_X1(-1)) &
+ GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
+ create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
+}
+
+__JIT_CODE("__unalign_jit_x1_st: {crc32_8 r1, r0, r0; st r0, r0}");
+static tilegx_bundle_bits jit_x1_st(int ra, int rb)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_st;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
+ create_SrcA_X1(ra) | create_SrcB_X1(rb);
+}
+
+__JIT_CODE("__unalign_jit_x1_st_add: {st_add r1, r0, 0}");
+static tilegx_bundle_bits jit_x1_st_add(int ra, int rb, int imm8)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_st_add;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
+ (~create_SrcA_X1(-1)) &
+ GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
+ create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
+}
+
+__JIT_CODE("__unalign_jit_x1_ld: {crc32_8 r1, r0, r0; ld r0, r0}");
+static tilegx_bundle_bits jit_x1_ld(int rd, int ra)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_ld;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
+ create_Dest_X1(rd) | create_SrcA_X1(ra);
+}
+
+__JIT_CODE("__unalign_jit_x1_ld_add: {ld_add r1, r0, 0}");
+static tilegx_bundle_bits jit_x1_ld_add(int rd, int ra, int imm8)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_ld_add;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
+ (~create_Dest_X1(-1)) &
+ GX_INSN_X1_MASK) | create_Dest_X1(rd) |
+ create_SrcA_X1(ra) | create_Imm8_X1(imm8);
+}
+
+__JIT_CODE("__unalign_jit_x0_bfexts: {bfexts r0, r0, 0, 0}");
+static tilegx_bundle_bits jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
+{
+ extern tilegx_bundle_bits __unalign_jit_x0_bfexts;
+ return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
+ GX_INSN_X0_MASK) |
+ create_Dest_X0(rd) | create_SrcA_X0(ra) |
+ create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
+}
+
+__JIT_CODE("__unalign_jit_x0_bfextu: {bfextu r0, r0, 0, 0}");
+static tilegx_bundle_bits jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
+{
+ extern tilegx_bundle_bits __unalign_jit_x0_bfextu;
+ return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
+ GX_INSN_X0_MASK) |
+ create_Dest_X0(rd) | create_SrcA_X0(ra) |
+ create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
+}
+
+__JIT_CODE("__unalign_jit_x1_addi: {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
+static tilegx_bundle_bits jit_x1_addi(int rd, int ra, int imm8)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_addi;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
+ create_Dest_X1(rd) | create_SrcA_X1(ra) |
+ create_Imm8_X1(imm8);
+}
+
+__JIT_CODE("__unalign_jit_x0_shrui: {shrui r0, r0, 0; iret}");
+static tilegx_bundle_bits jit_x0_shrui(int rd, int ra, int imm6)
+{
+ extern tilegx_bundle_bits __unalign_jit_x0_shrui;
+ return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
+ GX_INSN_X0_MASK) |
+ create_Dest_X0(rd) | create_SrcA_X0(ra) |
+ create_ShAmt_X0(imm6);
+}
+
+__JIT_CODE("__unalign_jit_x0_rotli: {rotli r0, r0, 0; iret}");
+static tilegx_bundle_bits jit_x0_rotli(int rd, int ra, int imm6)
+{
+ extern tilegx_bundle_bits __unalign_jit_x0_rotli;
+ return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
+ GX_INSN_X0_MASK) |
+ create_Dest_X0(rd) | create_SrcA_X0(ra) |
+ create_ShAmt_X0(imm6);
+}
+
+__JIT_CODE("__unalign_jit_x1_bnezt: {bnezt r0, __unalign_jit_x1_bnezt}");
+static tilegx_bundle_bits jit_x1_bnezt(int ra, int broff)
+{
+ extern tilegx_bundle_bits __unalign_jit_x1_bnezt;
+ return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
+ GX_INSN_X1_MASK) |
+ create_SrcA_X1(ra) | create_BrOff_X1(broff);
+}
+
+#undef __JIT_CODE
+
+/*
+ * This function generates unalign fixup JIT.
+ *
+ * We fist find unalign load/store instruction's destination, source
+ * reguisters: ra, rb and rd. and 3 scratch registers by calling
+ * find_regs(...). 3 scratch clobbers should not alias with any register
+ * used in the fault bundle. Then analyze the fault bundle to determine
+ * if it's a load or store, operand width, branch or address increment etc.
+ * At last generated JIT is copied into JIT code area in user space.
+ */
+
+static
+void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
+ int align_ctl)
+{
+ struct thread_info *info = current_thread_info();
+ struct unaligned_jit_fragment frag;
+ struct unaligned_jit_fragment *jit_code_area;
+ tilegx_bundle_bits bundle_2 = 0;
+ /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
+ bool bundle_2_enable = true;
+ uint64_t ra, rb, rd = -1, clob1, clob2, clob3;
+ /*
+ * Indicate if the unalign access
+ * instruction's registers hit with
+ * others in the same bundle.
+ */
+ bool alias = false;
+ bool load_n_store = true;
+ bool load_store_signed = false;
+ unsigned int load_store_size = 8;
+ bool y1_br = false; /* True, for a branch in same bundle at Y1.*/
+ int y1_br_reg = 0;
+ /* True for link operation. i.e. jalr or lnk at Y1 */
+ bool y1_lr = false;
+ int y1_lr_reg = 0;
+ bool x1_add = false;/* True, for load/store ADD instruction at X1*/
+ int x1_add_imm8 = 0;
+ bool unexpected = false;
+ int n = 0, k;
+
+ jit_code_area =
+ (struct unaligned_jit_fragment *)(info->unalign_jit_base);
+
+ memset((void *)&frag, 0, sizeof(frag));
+
+ /* 0: X mode, Otherwise: Y mode. */
+ if (bundle & TILEGX_BUNDLE_MODE_MASK) {
+ unsigned int mod, opcode;
+
+ if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
+ get_RRROpcodeExtension_Y1(bundle) ==
+ UNARY_RRR_1_OPCODE_Y1) {
+
+ opcode = get_UnaryOpcodeExtension_Y1(bundle);
+
+ /*
+ * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
+ * pipeline.
+ */
+ switch (opcode) {
+ case JALR_UNARY_OPCODE_Y1:
+ case JALRP_UNARY_OPCODE_Y1:
+ y1_lr = true;
+ y1_lr_reg = 55; /* Link register. */
+ /* FALLTHROUGH */
+ case JR_UNARY_OPCODE_Y1:
+ case JRP_UNARY_OPCODE_Y1:
+ y1_br = true;
+ y1_br_reg = get_SrcA_Y1(bundle);
+ break;
+ case LNK_UNARY_OPCODE_Y1:
+ /* "lnk" at Y1 pipeline. */
+ y1_lr = true;
+ y1_lr_reg = get_Dest_Y1(bundle);
+ break;
+ }
+ }
+
+ opcode = get_Opcode_Y2(bundle);
+ mod = get_Mode(bundle);
+
+ /*
+ * bundle_2 is bundle after making Y2 as a dummy operation
+ * - ld zero, sp
+ */
+ bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
+
+ /* Make Y1 as fnop if Y1 is a branch or lnk operation. */
+ if (y1_br || y1_lr) {
+ bundle_2 &= ~(GX_INSN_Y1_MASK);
+ bundle_2 |= jit_y1_fnop();
+ }
+
+ if (is_y0_y1_nop(bundle_2))
+ bundle_2_enable = false;
+
+ if (mod == MODE_OPCODE_YC2) {
+ /* Store. */
+ load_n_store = false;
+ load_store_size = 1 << opcode;
+ load_store_signed = false;
+ find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
+ &clob3, &alias);
+ if (load_store_size > 8)
+ unexpected = true;
+ } else {
+ /* Load. */
+ load_n_store = true;
+ if (mod == MODE_OPCODE_YB2) {
+ switch (opcode) {
+ case LD_OPCODE_Y2:
+ load_store_signed = false;
+ load_store_size = 8;
+ break;
+ case LD4S_OPCODE_Y2:
+ load_store_signed = true;
+ load_store_size = 4;
+ break;
+ case LD4U_OPCODE_Y2:
+ load_store_signed = false;
+ load_store_size = 4;
+ break;
+ default:
+ unexpected = true;
+ }
+ } else if (mod == MODE_OPCODE_YA2) {
+ if (opcode == LD2S_OPCODE_Y2) {
+ load_store_signed = true;
+ load_store_size = 2;
+ } else if (opcode == LD2U_OPCODE_Y2) {
+ load_store_signed = false;
+ load_store_size = 2;
+ } else
+ unexpected = true;
+ } else
+ unexpected = true;
+ find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
+ &clob3, &alias);
+ }
+ } else {
+ unsigned int opcode;
+
+ /* bundle_2 is bundle after making X1 as "fnop". */
+ bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
+
+ if (is_x0_x1_nop(bundle_2))
+ bundle_2_enable = false;
+
+ if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
+ opcode = get_UnaryOpcodeExtension_X1(bundle);
+
+ if (get_RRROpcodeExtension_X1(bundle) ==
+ UNARY_RRR_0_OPCODE_X1) {
+ load_n_store = true;
+ find_regs(bundle, &rd, &ra, &rb, &clob1,
+ &clob2, &clob3, &alias);
+
+ switch (opcode) {
+ case LD_UNARY_OPCODE_X1:
+ load_store_signed = false;
+ load_store_size = 8;
+ break;
+ case LD4S_UNARY_OPCODE_X1:
+ load_store_signed = true;
+ /* FALLTHROUGH */
+ case LD4U_UNARY_OPCODE_X1:
+ load_store_size = 4;
+ break;
+
+ case LD2S_UNARY_OPCODE_X1:
+ load_store_signed = true;
+ /* FALLTHROUGH */
+ case LD2U_UNARY_OPCODE_X1:
+ load_store_size = 2;
+ break;
+ default:
+ unexpected = true;
+ }
+ } else {
+ load_n_store = false;
+ load_store_signed = false;
+ find_regs(bundle, 0, &ra, &rb,
+ &clob1, &clob2, &clob3,
+ &alias);
+
+ opcode = get_RRROpcodeExtension_X1(bundle);
+ switch (opcode) {
+ case ST_RRR_0_OPCODE_X1:
+ load_store_size = 8;
+ break;
+ case ST4_RRR_0_OPCODE_X1:
+ load_store_size = 4;
+ break;
+ case ST2_RRR_0_OPCODE_X1:
+ load_store_size = 2;
+ break;
+ default:
+ unexpected = true;
+ }
+ }
+ } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
+ load_n_store = true;
+ opcode = get_Imm8OpcodeExtension_X1(bundle);
+ switch (opcode) {
+ case LD_ADD_IMM8_OPCODE_X1:
+ load_store_size = 8;
+ break;
+
+ case LD4S_ADD_IMM8_OPCODE_X1:
+ load_store_signed = true;
+ /* FALLTHROUGH */
+ case LD4U_ADD_IMM8_OPCODE_X1:
+ load_store_size = 4;
+ break;
+
+ case LD2S_ADD_IMM8_OPCODE_X1:
+ load_store_signed = true;
+ /* FALLTHROUGH */
+ case LD2U_ADD_IMM8_OPCODE_X1:
+ load_store_size = 2;
+ break;
+
+ case ST_ADD_IMM8_OPCODE_X1:
+ load_n_store = false;
+ load_store_size = 8;
+ break;
+ case ST4_ADD_IMM8_OPCODE_X1:
+ load_n_store = false;
+ load_store_size = 4;
+ break;
+ case ST2_ADD_IMM8_OPCODE_X1:
+ load_n_store = false;
+ load_store_size = 2;
+ break;
+ default:
+ unexpected = true;
+ }
+
+ if (!unexpected) {
+ x1_add = true;
+ if (load_n_store)
+ x1_add_imm8 = get_Imm8_X1(bundle);
+ else
+ x1_add_imm8 = get_Dest_Imm8_X1(bundle);
+ }
+
+ find_regs(bundle, load_n_store ? (&rd) : NULL,
+ &ra, &rb, &clob1, &clob2, &clob3, &alias);
+ } else
+ unexpected = true;
+ }
+
+ /*
+ * Some sanity check for register numbers extracted from fault bundle.
+ */
+ if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
+ unexpected = true;
+
+ /* Give warning if register ra has an aligned address. */
+ if (!unexpected)
+ WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
+
+
+ /*
+ * Fault came from kernel space, here we only need take care of
+ * unaligned "get_user/put_user" macros defined in "uaccess.h".
+ * Basically, we will handle bundle like this:
+ * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
+ * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
+ * For either load or store, byte-wise operation is performed by calling
+ * get_user() or put_user(). If the macro returns non-zero value,
+ * set the value to rx, otherwise set zero to rx. Finally make pc point
+ * to next bundle and return.
+ */
+
+ if (EX1_PL(regs->ex1) != USER_PL) {
+
+ unsigned long rx = 0;
+ unsigned long x = 0, ret = 0;
+
+ if (y1_br || y1_lr || x1_add ||
+ (load_store_signed !=
+ (load_n_store && load_store_size == 4))) {
+ /* No branch, link, wrong sign-ext or load/store add. */
+ unexpected = true;
+ } else if (!unexpected) {
+ if (bundle & TILEGX_BUNDLE_MODE_MASK) {
+ /*
+ * Fault bundle is Y mode.
+ * Check if the Y1 and Y0 is the form of
+ * { movei rx, 0; nop/fnop }, if yes,
+ * find the rx.
+ */
+
+ if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
+ && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
+ (get_Imm8_Y1(bundle) == 0) &&
+ is_bundle_y0_nop(bundle)) {
+ rx = get_Dest_Y1(bundle);
+ } else if ((get_Opcode_Y0(bundle) ==
+ ADDI_OPCODE_Y0) &&
+ (get_SrcA_Y0(bundle) == TREG_ZERO) &&
+ (get_Imm8_Y0(bundle) == 0) &&
+ is_bundle_y1_nop(bundle)) {
+ rx = get_Dest_Y0(bundle);
+ } else {
+ unexpected = true;
+ }
+ } else {
+ /*
+ * Fault bundle is X mode.
+ * Check if the X0 is 'movei rx, 0',
+ * if yes, find the rx.
+ */
+
+ if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
+ && (get_Imm8OpcodeExtension_X0(bundle) ==
+ ADDI_IMM8_OPCODE_X0) &&
+ (get_SrcA_X0(bundle) == TREG_ZERO) &&
+ (get_Imm8_X0(bundle) == 0)) {
+ rx = get_Dest_X0(bundle);
+ } else {
+ unexpected = true;
+ }
+ }
+
+ /* rx should be less than 56. */
+ if (!unexpected && (rx >= 56))
+ unexpected = true;
+ }
+
+ if (!search_exception_tables(regs->pc)) {
+ /* No fixup in the exception tables for the pc. */
+ unexpected = true;
+ }
+
+ if (unexpected) {
+ /* Unexpected unalign kernel fault. */
+ struct task_struct *tsk = validate_current();
+
+ bust_spinlocks(1);
+
+ show_regs(regs);
+
+ if (unlikely(tsk->pid < 2)) {
+ panic("Kernel unalign fault running %s!",
+ tsk->pid ? "init" : "the idle task");
+ }
+#ifdef SUPPORT_DIE
+ die("Oops", regs);
+#endif
+ bust_spinlocks(1);
+
+ do_group_exit(SIGKILL);
+
+ } else {
+ unsigned long i, b = 0;
+ unsigned char *ptr =
+ (unsigned char *)regs->regs[ra];
+ if (load_n_store) {
+ /* handle get_user(x, ptr) */
+ for (i = 0; i < load_store_size; i++) {
+ ret = get_user(b, ptr++);
+ if (!ret) {
+ /* Success! update x. */
+#ifdef __LITTLE_ENDIAN
+ x |= (b << (8 * i));
+#else
+ x <<= 8;
+ x |= b;
+#endif /* __LITTLE_ENDIAN */
+ } else {
+ x = 0;
+ break;
+ }
+ }
+
+ /* Sign-extend 4-byte loads. */
+ if (load_store_size == 4)
+ x = (long)(int)x;
+
+ /* Set register rd. */
+ regs->regs[rd] = x;
+
+ /* Set register rx. */
+ regs->regs[rx] = ret;
+
+ /* Bump pc. */
+ regs->pc += 8;
+
+ } else {
+ /* Handle put_user(x, ptr) */
+ x = regs->regs[rb];
+#ifdef __LITTLE_ENDIAN
+ b = x;
+#else
+ /*
+ * Swap x in order to store x from low
+ * to high memory same as the
+ * little-endian case.
+ */
+ switch (load_store_size) {
+ case 8:
+ b = swab64(x);
+ break;
+ case 4:
+ b = swab32(x);
+ break;
+ case 2:
+ b = swab16(x);
+ break;
+ }
+#endif /* __LITTLE_ENDIAN */
+ for (i = 0; i < load_store_size; i++) {
+ ret = put_user(b, ptr++);
+ if (ret)
+ break;
+ /* Success! shift 1 byte. */
+ b >>= 8;
+ }
+ /* Set register rx. */
+ regs->regs[rx] = ret;
+
+ /* Bump pc. */
+ regs->pc += 8;
+ }
+ }
+
+ unaligned_fixup_count++;
+
+ if (unaligned_printk) {
+ pr_info("%s/%d. Unalign fixup for kernel access "
+ "to userspace %lx.",
+ current->comm, current->pid, regs->regs[ra]);
+ }
+
+ /* Done! Return to the exception handler. */
+ return;
+ }
+
+ if ((align_ctl == 0) || unexpected) {
+ siginfo_t info = {
+ .si_signo = SIGBUS,
+ .si_code = BUS_ADRALN,
+ .si_addr = (unsigned char __user *)0
+ };
+ if (unaligned_printk)
+ pr_info("Unalign bundle: unexp @%llx, %llx",
+ (unsigned long long)regs->pc,
+ (unsigned long long)bundle);
+
+ if (ra < 56) {
+ unsigned long uaa = (unsigned long)regs->regs[ra];
+ /* Set bus Address. */
+ info.si_addr = (unsigned char __user *)uaa;
+ }
+
+ unaligned_fixup_count++;
+
+ trace_unhandled_signal("unaligned fixup trap", regs,
+ (unsigned long)info.si_addr, SIGBUS);
+ force_sig_info(info.si_signo, &info, current);
+ return;
+ }
+
+#ifdef __LITTLE_ENDIAN
+#define UA_FIXUP_ADDR_DELTA 1
+#define UA_FIXUP_BFEXT_START(_B_) 0
+#define UA_FIXUP_BFEXT_END(_B_) (8 * (_B_) - 1)
+#else /* __BIG_ENDIAN */
+#define UA_FIXUP_ADDR_DELTA -1
+#define UA_FIXUP_BFEXT_START(_B_) (64 - 8 * (_B_))
+#define UA_FIXUP_BFEXT_END(_B_) 63
+#endif /* __LITTLE_ENDIAN */
+
+
+
+ if ((ra != rb) && (rd != TREG_SP) && !alias &&
+ !y1_br && !y1_lr && !x1_add) {
+ /*
+ * Simple case: ra != rb and no register alias found,
+ * and no branch or link. This will be the majority.
+ * We can do a little better for simplae case than the
+ * generic scheme below.
+ */
+ if (!load_n_store) {
+ /*
+ * Simple store: ra != rb, no need for scratch register.
+ * Just store and rotate to right bytewise.
+ */
+#ifdef __BIG_ENDIAN
+ frag.insn[n++] =
+ jit_x0_addi(ra, ra, load_store_size - 1) |
+ jit_x1_fnop();
+#endif /* __BIG_ENDIAN */
+ for (k = 0; k < load_store_size; k++) {
+ /* Store a byte. */
+ frag.insn[n++] =
+ jit_x0_rotli(rb, rb, 56) |
+ jit_x1_st1_add(ra, rb,
+ UA_FIXUP_ADDR_DELTA);
+ }
+#ifdef __BIG_ENDIAN
+ frag.insn[n] = jit_x1_addi(ra, ra, 1);
+#else
+ frag.insn[n] = jit_x1_addi(ra, ra,
+ -1 * load_store_size);
+#endif /* __LITTLE_ENDIAN */
+
+ if (load_store_size == 8) {
+ frag.insn[n] |= jit_x0_fnop();
+ } else if (load_store_size == 4) {
+ frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
+ } else { /* = 2 */
+ frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
+ }
+ n++;
+ if (bundle_2_enable)
+ frag.insn[n++] = bundle_2;
+ frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
+ } else {
+ if (rd == ra) {
+ /* Use two clobber registers: clob1/2. */
+ frag.insn[n++] =
+ jit_x0_addi(TREG_SP, TREG_SP, -16) |
+ jit_x1_fnop();
+ frag.insn[n++] =
+ jit_x0_addi(clob1, ra, 7) |
+ jit_x1_st_add(TREG_SP, clob1, -8);
+ frag.insn[n++] =
+ jit_x0_addi(clob2, ra, 0) |
+ jit_x1_st(TREG_SP, clob2);
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ldna(rd, ra);
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ldna(clob1, clob1);
+ /*
+ * Note: we must make sure that rd must not
+ * be sp. Recover clob1/2 from stack.
+ */
+ frag.insn[n++] =
+ jit_x0_dblalign(rd, clob1, clob2) |
+ jit_x1_ld_add(clob2, TREG_SP, 8);
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ld_add(clob1, TREG_SP, 16);
+ } else {
+ /* Use one clobber register: clob1 only. */
+ frag.insn[n++] =
+ jit_x0_addi(TREG_SP, TREG_SP, -16) |
+ jit_x1_fnop();
+ frag.insn[n++] =
+ jit_x0_addi(clob1, ra, 7) |
+ jit_x1_st(TREG_SP, clob1);
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ldna(rd, ra);
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ldna(clob1, clob1);
+ /*
+ * Note: we must make sure that rd must not
+ * be sp. Recover clob1 from stack.
+ */
+ frag.insn[n++] =
+ jit_x0_dblalign(rd, clob1, ra) |
+ jit_x1_ld_add(clob1, TREG_SP, 16);
+ }
+
+ if (bundle_2_enable)
+ frag.insn[n++] = bundle_2;
+ /*
+ * For non 8-byte load, extract corresponding bytes and
+ * signed extension.
+ */
+ if (load_store_size == 4) {
+ if (load_store_signed)
+ frag.insn[n++] =
+ jit_x0_bfexts(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(4),
+ UA_FIXUP_BFEXT_END(4)) |
+ jit_x1_fnop();
+ else
+ frag.insn[n++] =
+ jit_x0_bfextu(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(4),
+ UA_FIXUP_BFEXT_END(4)) |
+ jit_x1_fnop();
+ } else if (load_store_size == 2) {
+ if (load_store_signed)
+ frag.insn[n++] =
+ jit_x0_bfexts(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(2),
+ UA_FIXUP_BFEXT_END(2)) |
+ jit_x1_fnop();
+ else
+ frag.insn[n++] =
+ jit_x0_bfextu(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(2),
+ UA_FIXUP_BFEXT_END(2)) |
+ jit_x1_fnop();
+ }
+
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_iret();
+ }
+ } else if (!load_n_store) {
+
+ /*
+ * Generic memory store cases: use 3 clobber registers.
+ *
+ * Alloc space for saveing clob2,1,3 on user's stack.
+ * register clob3 points to where clob2 saved, followed by
+ * clob1 and 3 from high to low memory.
+ */
+ frag.insn[n++] =
+ jit_x0_addi(TREG_SP, TREG_SP, -32) |
+ jit_x1_fnop();
+ frag.insn[n++] =
+ jit_x0_addi(clob3, TREG_SP, 16) |
+ jit_x1_st_add(TREG_SP, clob3, 8);
+#ifdef __LITTLE_ENDIAN
+ frag.insn[n++] =
+ jit_x0_addi(clob1, ra, 0) |
+ jit_x1_st_add(TREG_SP, clob1, 8);
+#else
+ frag.insn[n++] =
+ jit_x0_addi(clob1, ra, load_store_size - 1) |
+ jit_x1_st_add(TREG_SP, clob1, 8);
+#endif
+ if (load_store_size == 8) {
+ /*
+ * We save one byte a time, not for fast, but compact
+ * code. After each store, data source register shift
+ * right one byte. unchanged after 8 stores.
+ */
+ frag.insn[n++] =
+ jit_x0_addi(clob2, TREG_ZERO, 7) |
+ jit_x1_st_add(TREG_SP, clob2, 16);
+ frag.insn[n++] =
+ jit_x0_rotli(rb, rb, 56) |
+ jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
+ frag.insn[n++] =
+ jit_x0_addi(clob2, clob2, -1) |
+ jit_x1_bnezt(clob2, -1);
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_addi(clob2, y1_br_reg, 0);
+ } else if (load_store_size == 4) {
+ frag.insn[n++] =
+ jit_x0_addi(clob2, TREG_ZERO, 3) |
+ jit_x1_st_add(TREG_SP, clob2, 16);
+ frag.insn[n++] =
+ jit_x0_rotli(rb, rb, 56) |
+ jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
+ frag.insn[n++] =
+ jit_x0_addi(clob2, clob2, -1) |
+ jit_x1_bnezt(clob2, -1);
+ /*
+ * same as 8-byte case, but need shift another 4
+ * byte to recover rb for 4-byte store.
+ */
+ frag.insn[n++] = jit_x0_rotli(rb, rb, 32) |
+ jit_x1_addi(clob2, y1_br_reg, 0);
+ } else { /* =2 */
+ frag.insn[n++] =
+ jit_x0_addi(clob2, rb, 0) |
+ jit_x1_st_add(TREG_SP, clob2, 16);
+ for (k = 0; k < 2; k++) {
+ frag.insn[n++] =
+ jit_x0_shrui(rb, rb, 8) |
+ jit_x1_st1_add(clob1, rb,
+ UA_FIXUP_ADDR_DELTA);
+ }
+ frag.insn[n++] =
+ jit_x0_addi(rb, clob2, 0) |
+ jit_x1_addi(clob2, y1_br_reg, 0);
+ }
+
+ if (bundle_2_enable)
+ frag.insn[n++] = bundle_2;
+
+ if (y1_lr) {
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_mfspr(y1_lr_reg,
+ SPR_EX_CONTEXT_0_0);
+ }
+ if (y1_br) {
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
+ clob2);
+ }
+ if (x1_add) {
+ frag.insn[n++] =
+ jit_x0_addi(ra, ra, x1_add_imm8) |
+ jit_x1_ld_add(clob2, clob3, -8);
+ } else {
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ld_add(clob2, clob3, -8);
+ }
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ld_add(clob1, clob3, -8);
+ frag.insn[n++] = jit_x0_fnop() | jit_x1_ld(clob3, clob3);
+ frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
+
+ } else {
+ /*
+ * Generic memory load cases.
+ *
+ * Alloc space for saveing clob1,2,3 on user's stack.
+ * register clob3 points to where clob1 saved, followed
+ * by clob2 and 3 from high to low memory.
+ */
+
+ frag.insn[n++] =
+ jit_x0_addi(TREG_SP, TREG_SP, -32) |
+ jit_x1_fnop();
+ frag.insn[n++] =
+ jit_x0_addi(clob3, TREG_SP, 16) |
+ jit_x1_st_add(TREG_SP, clob3, 8);
+ frag.insn[n++] =
+ jit_x0_addi(clob2, ra, 0) |
+ jit_x1_st_add(TREG_SP, clob2, 8);
+
+ if (y1_br) {
+ frag.insn[n++] =
+ jit_x0_addi(clob1, y1_br_reg, 0) |
+ jit_x1_st_add(TREG_SP, clob1, 16);
+ } else {
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_st_add(TREG_SP, clob1, 16);
+ }
+
+ if (bundle_2_enable)
+ frag.insn[n++] = bundle_2;
+
+ if (y1_lr) {
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_mfspr(y1_lr_reg,
+ SPR_EX_CONTEXT_0_0);
+ }
+
+ if (y1_br) {
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
+ clob1);
+ }
+
+ frag.insn[n++] =
+ jit_x0_addi(clob1, clob2, 7) |
+ jit_x1_ldna(rd, clob2);
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ldna(clob1, clob1);
+ frag.insn[n++] =
+ jit_x0_dblalign(rd, clob1, clob2) |
+ jit_x1_ld_add(clob1, clob3, -8);
+ if (x1_add) {
+ frag.insn[n++] =
+ jit_x0_addi(ra, ra, x1_add_imm8) |
+ jit_x1_ld_add(clob2, clob3, -8);
+ } else {
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ld_add(clob2, clob3, -8);
+ }
+
+ frag.insn[n++] =
+ jit_x0_fnop() |
+ jit_x1_ld(clob3, clob3);
+
+ if (load_store_size == 4) {
+ if (load_store_signed)
+ frag.insn[n++] =
+ jit_x0_bfexts(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(4),
+ UA_FIXUP_BFEXT_END(4)) |
+ jit_x1_fnop();
+ else
+ frag.insn[n++] =
+ jit_x0_bfextu(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(4),
+ UA_FIXUP_BFEXT_END(4)) |
+ jit_x1_fnop();
+ } else if (load_store_size == 2) {
+ if (load_store_signed)
+ frag.insn[n++] =
+ jit_x0_bfexts(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(2),
+ UA_FIXUP_BFEXT_END(2)) |
+ jit_x1_fnop();
+ else
+ frag.insn[n++] =
+ jit_x0_bfextu(
+ rd, rd,
+ UA_FIXUP_BFEXT_START(2),
+ UA_FIXUP_BFEXT_END(2)) |
+ jit_x1_fnop();
+ }
+
+ frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
+ }
+
+ /* Max JIT bundle count is 14. */
+ WARN_ON(n > 14);
+
+ if (!unexpected) {
+ int status = 0;
+ int idx = (regs->pc >> 3) &
+ ((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
+
+ frag.pc = regs->pc;
+ frag.bundle = bundle;
+
+ if (unaligned_printk) {
+ pr_info("%s/%d, Unalign fixup: pc=%lx "
+ "bundle=%lx %d %d %d %d %d %d %d %d.",
+ current->comm, current->pid,
+ (unsigned long)frag.pc,
+ (unsigned long)frag.bundle,
+ (int)alias, (int)rd, (int)ra,
+ (int)rb, (int)bundle_2_enable,
+ (int)y1_lr, (int)y1_br, (int)x1_add);
+
+ for (k = 0; k < n; k += 2)
+ pr_info("[%d] %016llx %016llx", k,
+ (unsigned long long)frag.insn[k],
+ (unsigned long long)frag.insn[k+1]);
+ }
+
+ /* Swap bundle byte order for big endian sys. */
+#ifdef __BIG_ENDIAN
+ frag.bundle = GX_INSN_BSWAP(frag.bundle);
+ for (k = 0; k < n; k++)
+ frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
+#endif /* __BIG_ENDIAN */
+
+ status = copy_to_user((void __user *)&jit_code_area[idx],
+ &frag, sizeof(frag));
+ if (status) {
+ /* Fail to copy JIT into user land. send SIGSEGV. */
+ siginfo_t info = {
+ .si_signo = SIGSEGV,
+ .si_code = SEGV_MAPERR,
+ .si_addr = (void __user *)&jit_code_area[idx]
+ };
+
+ pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx",
+ current->pid, current->comm,
+ (unsigned long long)&jit_code_area[idx]);
+
+ trace_unhandled_signal("segfault in unalign fixup",
+ regs,
+ (unsigned long)info.si_addr,
+ SIGSEGV);
+ force_sig_info(info.si_signo, &info, current);
+ return;
+ }
+
+
+ /* Do a cheaper increment, not accurate. */
+ unaligned_fixup_count++;
+ __flush_icache_range((unsigned long)&jit_code_area[idx],
+ (unsigned long)&jit_code_area[idx] +
+ sizeof(frag));
+
+ /* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
+ __insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
+ __insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
+
+ /* Modify pc at the start of new JIT. */
+ regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
+ /* Set ICS in SPR_EX_CONTEXT_K_1. */
+ regs->ex1 = PL_ICS_EX1(USER_PL, 1);
+ }
+}
+
+
+/*
+ * C function to generate unalign data JIT. Called from unalign data
+ * interrupt handler.
+ *
+ * First check if unalign fix is disabled or exception did not not come from
+ * user space or sp register points to unalign address, if true, generate a
+ * SIGBUS. Then map a page into user space as JIT area if it is not mapped
+ * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
+ * back to exception handler.
+ *
+ * The exception handler will "iret" to new generated JIT code after
+ * restoring caller saved registers. In theory, the JIT code will perform
+ * another "iret" to resume user's program.
+ */
+
+void do_unaligned(struct pt_regs *regs, int vecnum)
+{
+ tilegx_bundle_bits __user *pc;
+ tilegx_bundle_bits bundle;
+ struct thread_info *info = current_thread_info();
+ int align_ctl;
+
+ /* Checks the per-process unaligned JIT flags */
+ align_ctl = unaligned_fixup;
+ switch (task_thread_info(current)->align_ctl) {
+ case PR_UNALIGN_NOPRINT:
+ align_ctl = 1;
+ break;
+ case PR_UNALIGN_SIGBUS:
+ align_ctl = 0;
+ break;
+ }
+
+ /* Enable iterrupt in order to access user land. */
+ local_irq_enable();
+
+ /*
+ * The fault came from kernel space. Two choices:
+ * (a) unaligned_fixup < 1, we will first call get/put_user fixup
+ * to return -EFAULT. If no fixup, simply panic the kernel.
+ * (b) unaligned_fixup >=1, we will try to fix the unaligned access
+ * if it was triggered by get_user/put_user() macros. Panic the
+ * kernel if it is not fixable.
+ */
+
+ if (EX1_PL(regs->ex1) != USER_PL) {
+
+ if (align_ctl < 1) {
+ unaligned_fixup_count++;
+ /* If exception came from kernel, try fix it up. */
+ if (fixup_exception(regs)) {
+ if (unaligned_printk)
+ pr_info("Unalign fixup: %d %llx @%llx",
+ (int)unaligned_fixup,
+ (unsigned long long)regs->ex1,
+ (unsigned long long)regs->pc);
+ return;
+ }
+ /* Not fixable. Go panic. */
+ panic("Unalign exception in Kernel. pc=%lx",
+ regs->pc);
+ return;
+ } else {
+ /*
+ * Try to fix the exception. If we can't, panic the
+ * kernel.
+ */
+ bundle = GX_INSN_BSWAP(
+ *((tilegx_bundle_bits *)(regs->pc)));
+ jit_bundle_gen(regs, bundle, align_ctl);
+ return;
+ }
+ }
+
+ /*
+ * Fault came from user with ICS or stack is not aligned.
+ * If so, we will trigger SIGBUS.
+ */
+ if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
+ siginfo_t info = {
+ .si_signo = SIGBUS,
+ .si_code = BUS_ADRALN,
+ .si_addr = (unsigned char __user *)0
+ };
+
+ if (unaligned_printk)
+ pr_info("Unalign fixup: %d %llx @%llx",
+ (int)unaligned_fixup,
+ (unsigned long long)regs->ex1,
+ (unsigned long long)regs->pc);
+
+ unaligned_fixup_count++;
+
+ trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
+ force_sig_info(info.si_signo, &info, current);
+ return;
+ }
+
+
+ /* Read the bundle casued the exception! */
+ pc = (tilegx_bundle_bits __user *)(regs->pc);
+ if (get_user(bundle, pc) != 0) {
+ /* Probably never be here since pc is valid user address.*/
+ siginfo_t info = {
+ .si_signo = SIGSEGV,
+ .si_code = SEGV_MAPERR,
+ .si_addr = (void __user *)pc
+ };
+ pr_err("Couldn't read instruction at %p trying to step\n", pc);
+ trace_unhandled_signal("segfault in unalign fixup", regs,
+ (unsigned long)info.si_addr, SIGSEGV);
+ force_sig_info(info.si_signo, &info, current);
+ return;
+ }
+
+ if (!info->unalign_jit_base) {
+ void __user *user_page;
+
+ /*
+ * Allocate a page in userland.
+ * For 64-bit processes we try to place the mapping far
+ * from anything else that might be going on (specifically
+ * 64 GB below the top of the user address space). If it
+ * happens not to be possible to put it there, it's OK;
+ * the kernel will choose another location and we'll
+ * remember it for later.
+ */
+ if (is_compat_task())
+ user_page = NULL;
+ else
+ user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
+ (current->pid << PAGE_SHIFT);
+
+ user_page = (void __user *) vm_mmap(NULL,
+ (unsigned long)user_page,
+ PAGE_SIZE,
+ PROT_EXEC | PROT_READ |
+ PROT_WRITE,
+#ifdef CONFIG_HOMECACHE
+ MAP_CACHE_HOME_TASK |
+#endif
+ MAP_PRIVATE |
+ MAP_ANONYMOUS,
+ 0);
+
+ if (IS_ERR((void __force *)user_page)) {
+ pr_err("Out of kernel pages trying do_mmap.\n");
+ return;
+ }
+
+ /* Save the address in the thread_info struct */
+ info->unalign_jit_base = user_page;
+ if (unaligned_printk)
+ pr_info("Unalign bundle: %d:%d, allocate page @%llx",
+ raw_smp_processor_id(), current->pid,
+ (unsigned long long)user_page);
+ }
+
+ /* Generate unalign JIT */
+ jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
+}
+
+#endif /* __tilegx__ */
diff --git a/arch/tile/kernel/vdso.c b/arch/tile/kernel/vdso.c
new file mode 100644
index 000000000000..1533af24106e
--- /dev/null
+++ b/arch/tile/kernel/vdso.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/binfmts.h>
+#include <linux/compat.h>
+#include <linux/elf.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+
+#include <asm/vdso.h>
+#include <asm/mman.h>
+#include <asm/sections.h>
+
+#include <arch/sim.h>
+
+/* The alignment of the vDSO. */
+#define VDSO_ALIGNMENT PAGE_SIZE
+
+
+static unsigned int vdso_pages;
+static struct page **vdso_pagelist;
+
+#ifdef CONFIG_COMPAT
+static unsigned int vdso32_pages;
+static struct page **vdso32_pagelist;
+#endif
+static int vdso_ready;
+
+/*
+ * The vdso data page.
+ */
+static union {
+ struct vdso_data data;
+ u8 page[PAGE_SIZE];
+} vdso_data_store __page_aligned_data;
+
+struct vdso_data *vdso_data = &vdso_data_store.data;
+
+static unsigned int __read_mostly vdso_enabled = 1;
+
+static struct page **vdso_setup(void *vdso_kbase, unsigned int pages)
+{
+ int i;
+ struct page **pagelist;
+
+ pagelist = kzalloc(sizeof(struct page *) * (pages + 1), GFP_KERNEL);
+ BUG_ON(pagelist == NULL);
+ for (i = 0; i < pages - 1; i++) {
+ struct page *pg = virt_to_page(vdso_kbase + i*PAGE_SIZE);
+ ClearPageReserved(pg);
+ pagelist[i] = pg;
+ }
+ pagelist[pages - 1] = virt_to_page(vdso_data);
+ pagelist[pages] = NULL;
+
+ return pagelist;
+}
+
+static int __init vdso_init(void)
+{
+ int data_pages = sizeof(vdso_data_store) >> PAGE_SHIFT;
+
+ /*
+ * We can disable vDSO support generally, but we need to retain
+ * one page to support the two-bundle (16-byte) rt_sigreturn path.
+ */
+ if (!vdso_enabled) {
+ size_t offset = (unsigned long)&__vdso_rt_sigreturn;
+ static struct page *sigret_page;
+ sigret_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ BUG_ON(sigret_page == NULL);
+ vdso_pagelist = &sigret_page;
+ vdso_pages = 1;
+ BUG_ON(offset >= PAGE_SIZE);
+ memcpy(page_address(sigret_page) + offset,
+ vdso_start + offset, 16);
+#ifdef CONFIG_COMPAT
+ vdso32_pages = vdso_pages;
+ vdso32_pagelist = vdso_pagelist;
+#endif
+ vdso_ready = 1;
+ return 0;
+ }
+
+ vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
+ vdso_pages += data_pages;
+ vdso_pagelist = vdso_setup(vdso_start, vdso_pages);
+
+#ifdef CONFIG_COMPAT
+ vdso32_pages = (vdso32_end - vdso32_start) >> PAGE_SHIFT;
+ vdso32_pages += data_pages;
+ vdso32_pagelist = vdso_setup(vdso32_start, vdso32_pages);
+#endif
+
+ smp_wmb();
+ vdso_ready = 1;
+
+ return 0;
+}
+arch_initcall(vdso_init);
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+ if (vma->vm_mm && vma->vm_start == VDSO_BASE)
+ return "[vdso]";
+#ifndef __tilegx__
+ if (vma->vm_start == MEM_USER_INTRPT)
+ return "[intrpt]";
+#endif
+ return NULL;
+}
+
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+ return NULL;
+}
+
+int in_gate_area(struct mm_struct *mm, unsigned long address)
+{
+ return 0;
+}
+
+int in_gate_area_no_mm(unsigned long address)
+{
+ return 0;
+}
+
+int setup_vdso_pages(void)
+{
+ struct page **pagelist;
+ unsigned long pages;
+ struct mm_struct *mm = current->mm;
+ unsigned long vdso_base = 0;
+ int retval = 0;
+
+ if (!vdso_ready)
+ return 0;
+
+ mm->context.vdso_base = 0;
+
+ pagelist = vdso_pagelist;
+ pages = vdso_pages;
+#ifdef CONFIG_COMPAT
+ if (is_compat_task()) {
+ pagelist = vdso32_pagelist;
+ pages = vdso32_pages;
+ }
+#endif
+
+ /*
+ * vDSO has a problem and was disabled, just don't "enable" it for the
+ * process.
+ */
+ if (pages == 0)
+ return 0;
+
+ vdso_base = get_unmapped_area(NULL, vdso_base,
+ (pages << PAGE_SHIFT) +
+ ((VDSO_ALIGNMENT - 1) & PAGE_MASK),
+ 0, 0);
+ if (IS_ERR_VALUE(vdso_base)) {
+ retval = vdso_base;
+ return retval;
+ }
+
+ /* Add required alignment. */
+ vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT);
+
+ /*
+ * Put vDSO base into mm struct. We need to do this before calling
+ * install_special_mapping or the perf counter mmap tracking code
+ * will fail to recognise it as a vDSO (since arch_vma_name fails).
+ */
+ mm->context.vdso_base = vdso_base;
+
+ /*
+ * our vma flags don't have VM_WRITE so by default, the process isn't
+ * allowed to write those pages.
+ * gdb can break that with ptrace interface, and thus trigger COW on
+ * those pages but it's then your responsibility to never do that on
+ * the "data" page of the vDSO or you'll stop getting kernel updates
+ * and your nice userland gettimeofday will be totally dead.
+ * It's fine to use that for setting breakpoints in the vDSO code
+ * pages though
+ */
+ retval = install_special_mapping(mm, vdso_base,
+ pages << PAGE_SHIFT,
+ VM_READ|VM_EXEC |
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
+ pagelist);
+ if (retval)
+ mm->context.vdso_base = 0;
+
+ return retval;
+}
+
+static __init int vdso_func(char *s)
+{
+ return kstrtouint(s, 0, &vdso_enabled);
+}
+__setup("vdso=", vdso_func);
diff --git a/arch/tile/kernel/vdso/Makefile b/arch/tile/kernel/vdso/Makefile
new file mode 100644
index 000000000000..e2b7a2f4ee41
--- /dev/null
+++ b/arch/tile/kernel/vdso/Makefile
@@ -0,0 +1,118 @@
+# Symbols present in the vdso
+vdso-syms = rt_sigreturn gettimeofday
+
+# Files to link into the vdso
+obj-vdso = $(patsubst %, v%.o, $(vdso-syms))
+
+# Build rules
+targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds
+obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
+
+# vdso32 is only for tilegx -m32 compat task.
+VDSO32-$(CONFIG_COMPAT) := y
+
+obj-y += vdso.o
+obj-$(VDSO32-y) += vdso32.o
+extra-y += vdso.lds
+CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
+CFLAGS_REMOVE_vdso.o = -pg
+CFLAGS_REMOVE_vdso32.o = -pg
+CFLAGS_REMOVE_vrt_sigreturn.o = -pg
+CFLAGS_REMOVE_vrt_sigreturn32.o = -pg
+CFLAGS_REMOVE_vgettimeofday.o = -pg
+CFLAGS_REMOVE_vgettimeofday32.o = -pg
+
+ifdef CONFIG_FEEDBACK_COLLECT
+# vDSO code runs in userspace, not collecting feedback data.
+CFLAGS_REMOVE_vdso.o = -ffeedback-generate
+CFLAGS_REMOVE_vdso32.o = -ffeedback-generate
+CFLAGS_REMOVE_vrt_sigreturn.o = -ffeedback-generate
+CFLAGS_REMOVE_vrt_sigreturn32.o = -ffeedback-generate
+CFLAGS_REMOVE_vgettimeofday.o = -ffeedback-generate
+CFLAGS_REMOVE_vgettimeofday32.o = -ffeedback-generate
+endif
+
+# Disable gcov profiling for VDSO code
+GCOV_PROFILE := n
+
+# Force dependency
+$(obj)/vdso.o: $(obj)/vdso.so
+
+# link rule for the .so file, .lds has to be first
+SYSCFLAGS_vdso.so.dbg = $(c_flags)
+$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso)
+ $(call if_changed,vdsold)
+
+
+# We also create a special relocatable object that should mirror the symbol
+# table and layout of the linked DSO. With ld -R we can then refer to
+# these symbols in the kernel code rather than hand-coded addresses.
+extra-y += vdso-syms.o
+$(obj)/built-in.o: $(obj)/vdso-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o
+
+SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+SYSCFLAGS_vdso_syms.o = -r
+$(obj)/vdso-syms.o: $(src)/vdso.lds $(obj)/vrt_sigreturn.o FORCE
+ $(call if_changed,vdsold)
+
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+# actual build commands
+# The DSO images are built using a special linker script
+# Add -lgcc so tilepro gets static muldi3 and lshrdi3 definitions.
+# Make sure only to export the intended __vdso_xxx symbol offsets.
+quiet_cmd_vdsold = VDSOLD $@
+ cmd_vdsold = $(CC) $(KCFLAGS) -nostdlib $(SYSCFLAGS_$(@F)) \
+ -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp -lgcc && \
+ $(CROSS_COMPILE)objcopy \
+ $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso.so: $(obj)/vdso.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso32.so: $(obj)/vdso32.so.dbg
+ $(call cmd,vdso_install)
+
+vdso_install: vdso.so
+vdso32_install: vdso32.so
+
+
+KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS_32 += -m32 -s
+KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 += -m32 -fPIC -shared
+
+obj-vdso32 = $(patsubst %, v%32.o, $(vdso-syms))
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+targets += $(obj-vdso32) vdso32.so vdso32.so.dbg
+
+$(obj-vdso32:%=%): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
+$(obj-vdso32:%=%): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+
+$(obj)/vgettimeofday32.o: $(obj)/vgettimeofday.c
+ $(call if_changed,cc_o_c)
+
+$(obj)/vrt_sigreturn32.o: $(obj)/vrt_sigreturn.S
+ $(call if_changed,as_o_S)
+
+# Force dependency
+$(obj)/vdso32.o: $(obj)/vdso32.so
+
+SYSCFLAGS_vdso32.so.dbg = -m32 -shared -s -Wl,-soname=linux-vdso32.so.1 \
+ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+$(obj)/vdso32.so.dbg: $(src)/vdso.lds $(obj-vdso32)
+ $(call if_changed,vdsold)
diff --git a/arch/tile/kernel/vdso/vdso.S b/arch/tile/kernel/vdso/vdso.S
new file mode 100644
index 000000000000..3467adb41630
--- /dev/null
+++ b/arch/tile/kernel/vdso/vdso.S
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .global vdso_start, vdso_end
+ .align PAGE_SIZE
+vdso_start:
+ .incbin "arch/tile/kernel/vdso/vdso.so"
+ .align PAGE_SIZE
+vdso_end:
+
+ .previous
diff --git a/arch/tile/kernel/vdso/vdso.lds.S b/arch/tile/kernel/vdso/vdso.lds.S
new file mode 100644
index 000000000000..041cd6c39c83
--- /dev/null
+++ b/arch/tile/kernel/vdso/vdso.lds.S
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#define VDSO_VERSION_STRING LINUX_2.6
+
+
+OUTPUT_ARCH(tile)
+
+/* The ELF entry point can be used to set the AT_SYSINFO value. */
+ENTRY(__vdso_rt_sigreturn);
+
+
+SECTIONS
+{
+ . = SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+
+ /*
+ * This linker script is used both with -r and with -shared.
+ * For the layouts to match, we need to skip more than enough
+ * space for the dynamic symbol table et al. If this amount
+ * is insufficient, ld -shared will barf. Just increase it here.
+ */
+ . = 0x1000;
+ .text : { *(.text .text.*) } :text
+
+ .data : {
+ *(.got.plt) *(.got)
+ *(.data .data.* .gnu.linkonce.d.*)
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ }
+}
+
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION
+{
+ VDSO_VERSION_STRING {
+ global:
+ __vdso_rt_sigreturn;
+ __vdso_gettimeofday;
+ gettimeofday;
+ local:*;
+ };
+}
diff --git a/arch/tile/kernel/vdso/vdso32.S b/arch/tile/kernel/vdso/vdso32.S
new file mode 100644
index 000000000000..1d1ac3257e11
--- /dev/null
+++ b/arch/tile/kernel/vdso/vdso32.S
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .global vdso32_start, vdso32_end
+ .align PAGE_SIZE
+vdso32_start:
+ .incbin "arch/tile/kernel/vdso/vdso32.so"
+ .align PAGE_SIZE
+vdso32_end:
+
+ .previous
diff --git a/arch/tile/kernel/vdso/vgettimeofday.c b/arch/tile/kernel/vdso/vgettimeofday.c
new file mode 100644
index 000000000000..51ec8e46f5f9
--- /dev/null
+++ b/arch/tile/kernel/vdso/vgettimeofday.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#define VDSO_BUILD /* avoid some shift warnings for -m32 in <asm/page.h> */
+#include <linux/time.h>
+#include <asm/timex.h>
+#include <asm/vdso.h>
+
+#if CHIP_HAS_SPLIT_CYCLE()
+static inline cycles_t get_cycles_inline(void)
+{
+ unsigned int high = __insn_mfspr(SPR_CYCLE_HIGH);
+ unsigned int low = __insn_mfspr(SPR_CYCLE_LOW);
+ unsigned int high2 = __insn_mfspr(SPR_CYCLE_HIGH);
+
+ while (unlikely(high != high2)) {
+ low = __insn_mfspr(SPR_CYCLE_LOW);
+ high = high2;
+ high2 = __insn_mfspr(SPR_CYCLE_HIGH);
+ }
+
+ return (((cycles_t)high) << 32) | low;
+}
+#define get_cycles get_cycles_inline
+#endif
+
+/*
+ * Find out the vDSO data page address in the process address space.
+ */
+inline unsigned long get_datapage(void)
+{
+ unsigned long ret;
+
+ /* vdso data page located in the 2nd vDSO page. */
+ asm volatile ("lnk %0" : "=r"(ret));
+ ret &= ~(PAGE_SIZE - 1);
+ ret += PAGE_SIZE;
+
+ return ret;
+}
+
+int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ cycles_t cycles;
+ unsigned long count, sec, ns;
+ volatile struct vdso_data *vdso_data;
+
+ vdso_data = (struct vdso_data *)get_datapage();
+ /* The use of the timezone is obsolete, normally tz is NULL. */
+ if (unlikely(tz != NULL)) {
+ while (1) {
+ /* Spin until the update finish. */
+ count = vdso_data->tz_update_count;
+ if (count & 1)
+ continue;
+
+ tz->tz_minuteswest = vdso_data->tz_minuteswest;
+ tz->tz_dsttime = vdso_data->tz_dsttime;
+
+ /* Check whether updated, read again if so. */
+ if (count == vdso_data->tz_update_count)
+ break;
+ }
+ }
+
+ if (unlikely(tv == NULL))
+ return 0;
+
+ while (1) {
+ /* Spin until the update finish. */
+ count = vdso_data->tb_update_count;
+ if (count & 1)
+ continue;
+
+ cycles = (get_cycles() - vdso_data->xtime_tod_stamp);
+ ns = (cycles * vdso_data->mult) >> vdso_data->shift;
+ sec = vdso_data->xtime_clock_sec;
+ ns += vdso_data->xtime_clock_nsec;
+ if (ns >= NSEC_PER_SEC) {
+ ns -= NSEC_PER_SEC;
+ sec += 1;
+ }
+
+ /* Check whether updated, read again if so. */
+ if (count == vdso_data->tb_update_count)
+ break;
+ }
+
+ tv->tv_sec = sec;
+ tv->tv_usec = ns / 1000;
+
+ return 0;
+}
+
+int gettimeofday(struct timeval *tv, struct timezone *tz)
+ __attribute__((weak, alias("__vdso_gettimeofday")));
diff --git a/arch/tile/kernel/vdso/vrt_sigreturn.S b/arch/tile/kernel/vdso/vrt_sigreturn.S
new file mode 100644
index 000000000000..6326caf4a039
--- /dev/null
+++ b/arch/tile/kernel/vdso/vrt_sigreturn.S
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/linkage.h>
+#include <arch/abi.h>
+#include <asm/unistd.h>
+
+/*
+ * Note that libc has a copy of this function that it uses to compare
+ * against the PC when a stack backtrace ends, so if this code is
+ * changed, the libc implementation(s) should also be updated.
+ */
+ENTRY(__vdso_rt_sigreturn)
+ moveli TREG_SYSCALL_NR_NAME, __NR_rt_sigreturn
+ swint1
+ /* We don't use ENDPROC to avoid tagging this symbol as FUNC,
+ * which confuses the perf tool.
+ */
+ END(__vdso_rt_sigreturn)
diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S
index a13ed902afbb..f1819423ffc9 100644
--- a/arch/tile/kernel/vmlinux.lds.S
+++ b/arch/tile/kernel/vmlinux.lds.S
@@ -5,7 +5,7 @@
#include <hv/hypervisor.h>
/* Text loads starting from the supervisor interrupt vector address. */
-#define TEXT_OFFSET MEM_SV_INTRPT
+#define TEXT_OFFSET MEM_SV_START
OUTPUT_ARCH(tile)
ENTRY(_start)
@@ -13,7 +13,7 @@ jiffies = jiffies_64;
PHDRS
{
- intrpt1 PT_LOAD ;
+ intrpt PT_LOAD ;
text PT_LOAD ;
data PT_LOAD ;
}
@@ -24,14 +24,17 @@ SECTIONS
#define LOAD_OFFSET TEXT_OFFSET
/* Interrupt vectors */
- .intrpt1 (LOAD_OFFSET) : AT ( 0 ) /* put at the start of physical memory */
+ .intrpt (LOAD_OFFSET) : AT ( 0 ) /* put at the start of physical memory */
{
_text = .;
- *(.intrpt1)
- } :intrpt1 =0
+ *(.intrpt)
+ } :intrpt =0
/* Hypervisor call vectors */
- #include "hvglue.lds"
+ . = ALIGN(0x10000);
+ .hvglue : AT (ADDR(.hvglue) - LOAD_OFFSET) {
+ *(.hvglue)
+ } :NONE
/* Now the real code */
. = ALIGN(0x20000);
@@ -40,7 +43,11 @@ SECTIONS
HEAD_TEXT
SCHED_TEXT
LOCK_TEXT
+ KPROBES_TEXT
+ IRQENTRY_TEXT
__fix_text_end = .; /* tile-cpack won't rearrange before this */
+ ALIGN_FUNCTION();
+ *(.hottext*)
TEXT_TEXT
*(.text.*)
*(.coldtext*)
@@ -67,20 +74,8 @@ SECTIONS
__init_end = .;
_sdata = .; /* Start of data section */
-
RO_DATA_SECTION(PAGE_SIZE)
-
- /* initially writeable, then read-only */
- . = ALIGN(PAGE_SIZE);
- __w1data_begin = .;
- .w1data : AT(ADDR(.w1data) - LOAD_OFFSET) {
- VMLINUX_SYMBOL(__w1data_begin) = .;
- *(.w1data)
- VMLINUX_SYMBOL(__w1data_end) = .;
- }
-
RW_DATA_SECTION(L2_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
-
_edata = .;
EXCEPTION_TABLE(L2_CACHE_BYTES)
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
index 985f59858234..c4211cbb2021 100644
--- a/arch/tile/lib/Makefile
+++ b/arch/tile/lib/Makefile
@@ -4,15 +4,15 @@
lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \
memmove.o memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \
- strchr_$(BITS).o strlen_$(BITS).o
-
-ifeq ($(CONFIG_TILEGX),y)
-CFLAGS_REMOVE_memcpy_user_64.o = -fno-omit-frame-pointer
-lib-y += memcpy_user_64.o
-else
-lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o
-endif
+ strchr_$(BITS).o strlen_$(BITS).o strnlen_$(BITS).o
+lib-$(CONFIG_TILEGX) += memcpy_user_64.o
+lib-$(CONFIG_TILEPRO) += atomic_32.o atomic_asm_32.o
lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o
obj-$(CONFIG_MODULES) += exports.o
+
+# The finv_buffer_remote() and copy_{to,from}_user() routines can't
+# have -pg added, since they both rely on being leaf functions.
+CFLAGS_REMOVE_cacheflush.o = -pg
+CFLAGS_REMOVE_memcpy_user_64.o = -pg
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index f5cada70c3c8..759efa337be8 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -20,50 +20,12 @@
#include <linux/atomic.h>
#include <arch/chip.h>
-/* See <asm/atomic_32.h> */
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
-
-/*
- * A block of memory containing locks for atomic ops. Each instance of this
- * struct will be homed on a different CPU.
- */
-struct atomic_locks_on_cpu {
- int lock[ATOMIC_HASH_L2_SIZE];
-} __attribute__((aligned(ATOMIC_HASH_L2_SIZE * 4)));
-
-static DEFINE_PER_CPU(struct atomic_locks_on_cpu, atomic_lock_pool);
-
-/* The locks we'll use until __init_atomic_per_cpu is called. */
-static struct atomic_locks_on_cpu __initdata initial_atomic_locks;
-
-/* Hash into this vector to get a pointer to lock for the given atomic. */
-struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE]
- __write_once = {
- [0 ... ATOMIC_HASH_L1_SIZE-1] (&initial_atomic_locks)
-};
-
-#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
/* This page is remapped on startup to be hash-for-home. */
int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss;
-#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
int *__atomic_hashed_lock(volatile void *v)
{
/* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
- unsigned long i =
- (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long));
- unsigned long n = __insn_crc32_32(0, i);
-
- /* Grab high bits for L1 index. */
- unsigned long l1_index = n >> ((sizeof(n) * 8) - ATOMIC_HASH_L1_SHIFT);
- /* Grab low bits for L2 index. */
- unsigned long l2_index = n & (ATOMIC_HASH_L2_SIZE - 1);
-
- return &atomic_lock_ptr[l1_index]->lock[l2_index];
-#else
/*
* Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index.
* Using mm works here because atomic_locks is page aligned.
@@ -72,26 +34,13 @@ int *__atomic_hashed_lock(volatile void *v)
(unsigned long)atomic_locks,
2, (ATOMIC_HASH_SHIFT + 2) - 1);
return (int *)ptr;
-#endif
}
#ifdef CONFIG_SMP
/* Return whether the passed pointer is a valid atomic lock pointer. */
static int is_atomic_lock(int *p)
{
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
- int i;
- for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
-
- if (p >= &atomic_lock_ptr[i]->lock[0] &&
- p < &atomic_lock_ptr[i]->lock[ATOMIC_HASH_L2_SIZE]) {
- return 1;
- }
- }
- return 0;
-#else
return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE];
-#endif
}
void __atomic_fault_unlock(int *irqlock_word)
@@ -110,33 +59,32 @@ static inline int *__atomic_setup(volatile void *v)
return __atomic_hashed_lock(v);
}
-int _atomic_xchg(atomic_t *v, int n)
+int _atomic_xchg(int *v, int n)
{
- return __atomic_xchg(&v->counter, __atomic_setup(v), n).val;
+ return __atomic_xchg(v, __atomic_setup(v), n).val;
}
EXPORT_SYMBOL(_atomic_xchg);
-int _atomic_xchg_add(atomic_t *v, int i)
+int _atomic_xchg_add(int *v, int i)
{
- return __atomic_xchg_add(&v->counter, __atomic_setup(v), i).val;
+ return __atomic_xchg_add(v, __atomic_setup(v), i).val;
}
EXPORT_SYMBOL(_atomic_xchg_add);
-int _atomic_xchg_add_unless(atomic_t *v, int a, int u)
+int _atomic_xchg_add_unless(int *v, int a, int u)
{
/*
* Note: argument order is switched here since it is easier
* to use the first argument consistently as the "old value"
* in the assembly, as is done for _atomic_cmpxchg().
*/
- return __atomic_xchg_add_unless(&v->counter, __atomic_setup(v), u, a)
- .val;
+ return __atomic_xchg_add_unless(v, __atomic_setup(v), u, a).val;
}
EXPORT_SYMBOL(_atomic_xchg_add_unless);
-int _atomic_cmpxchg(atomic_t *v, int o, int n)
+int _atomic_cmpxchg(int *v, int o, int n)
{
- return __atomic_cmpxchg(&v->counter, __atomic_setup(v), o, n).val;
+ return __atomic_cmpxchg(v, __atomic_setup(v), o, n).val;
}
EXPORT_SYMBOL(_atomic_cmpxchg);
@@ -159,33 +107,32 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask)
EXPORT_SYMBOL(_atomic_xor);
-u64 _atomic64_xchg(atomic64_t *v, u64 n)
+u64 _atomic64_xchg(u64 *v, u64 n)
{
- return __atomic64_xchg(&v->counter, __atomic_setup(v), n);
+ return __atomic64_xchg(v, __atomic_setup(v), n);
}
EXPORT_SYMBOL(_atomic64_xchg);
-u64 _atomic64_xchg_add(atomic64_t *v, u64 i)
+u64 _atomic64_xchg_add(u64 *v, u64 i)
{
- return __atomic64_xchg_add(&v->counter, __atomic_setup(v), i);
+ return __atomic64_xchg_add(v, __atomic_setup(v), i);
}
EXPORT_SYMBOL(_atomic64_xchg_add);
-u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u)
+u64 _atomic64_xchg_add_unless(u64 *v, u64 a, u64 u)
{
/*
* Note: argument order is switched here since it is easier
* to use the first argument consistently as the "old value"
* in the assembly, as is done for _atomic_cmpxchg().
*/
- return __atomic64_xchg_add_unless(&v->counter, __atomic_setup(v),
- u, a);
+ return __atomic64_xchg_add_unless(v, __atomic_setup(v), u, a);
}
EXPORT_SYMBOL(_atomic64_xchg_add_unless);
-u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
+u64 _atomic64_cmpxchg(u64 *v, u64 o, u64 n)
{
- return __atomic64_cmpxchg(&v->counter, __atomic_setup(v), o, n);
+ return __atomic64_cmpxchg(v, __atomic_setup(v), o, n);
}
EXPORT_SYMBOL(_atomic64_cmpxchg);
@@ -208,54 +155,8 @@ struct __get_user __atomic_bad_address(int __user *addr)
}
-#if CHIP_HAS_CBOX_HOME_MAP()
-static int __init noatomichash(char *str)
-{
- pr_warning("noatomichash is deprecated.\n");
- return 1;
-}
-__setup("noatomichash", noatomichash);
-#endif
-
void __init __init_atomic_per_cpu(void)
{
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
-
- unsigned int i;
- int actual_cpu;
-
- /*
- * Before this is called from setup, we just have one lock for
- * all atomic objects/operations. Here we replace the
- * elements of atomic_lock_ptr so that they point at per_cpu
- * integers. This seemingly over-complex approach stems from
- * the fact that DEFINE_PER_CPU defines an entry for each cpu
- * in the grid, not each cpu from 0..ATOMIC_HASH_SIZE-1. But
- * for efficient hashing of atomics to their locks we want a
- * compile time constant power of 2 for the size of this
- * table, so we use ATOMIC_HASH_SIZE.
- *
- * Here we populate atomic_lock_ptr from the per cpu
- * atomic_lock_pool, interspersing by actual cpu so that
- * subsequent elements are homed on consecutive cpus.
- */
-
- actual_cpu = cpumask_first(cpu_possible_mask);
-
- for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
- /*
- * Preincrement to slightly bias against using cpu 0,
- * which has plenty of stuff homed on it already.
- */
- actual_cpu = cpumask_next(actual_cpu, cpu_possible_mask);
- if (actual_cpu >= nr_cpu_ids)
- actual_cpu = cpumask_first(cpu_possible_mask);
-
- atomic_lock_ptr[i] = &per_cpu(atomic_lock_pool, actual_cpu);
- }
-
-#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
/* Validate power-of-two and "bigger than cpus" assumption */
BUILD_BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1));
BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids);
@@ -279,6 +180,4 @@ void __init __init_atomic_per_cpu(void)
* That should not produce more indices than ATOMIC_HASH_SIZE.
*/
BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE);
-
-#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
}
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index 30638042691d..6bda3132cd61 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -164,6 +164,7 @@ STD_ENTRY_SECTION(__atomic\name, .text.atomic)
STD_ENDPROC(__atomic\name)
.ifc \bitwidth,32
.pushsection __ex_table,"a"
+ .align 4
.word 1b, __atomic\name
.word 2b, __atomic\name
.word __atomic\name, __atomic_bad_address
diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c
index 8f8ad814b139..9c0ec22009a5 100644
--- a/arch/tile/lib/cacheflush.c
+++ b/arch/tile/lib/cacheflush.c
@@ -36,7 +36,8 @@ static inline void force_load(char *p)
* core (if "!hfh") or homed via hash-for-home (if "hfh"), waiting
* until the memory controller holds the flushed values.
*/
-void finv_buffer_remote(void *buffer, size_t size, int hfh)
+void __attribute__((optimize("omit-frame-pointer")))
+finv_buffer_remote(void *buffer, size_t size, int hfh)
{
char *p, *base;
size_t step_size, load_count;
@@ -147,18 +148,21 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
force_load(p);
/*
- * Repeat, but with inv's instead of loads, to get rid of the
+ * Repeat, but with finv's instead of loads, to get rid of the
* data we just loaded into our own cache and the old home L3.
- * No need to unroll since inv's don't target a register.
+ * No need to unroll since finv's don't target a register.
+ * The finv's are guaranteed not to actually flush the data in
+ * the buffer back to their home, since we just read it, so the
+ * lines are clean in cache; we will only invalidate those lines.
*/
p = (char *)buffer + size - 1;
- __insn_inv(p);
+ __insn_finv(p);
p -= step_size;
p = (char *)((unsigned long)p | (step_size - 1));
for (; p >= base; p -= step_size)
- __insn_inv(p);
+ __insn_finv(p);
- /* Wait for the load+inv's (and thus finvs) to have completed. */
+ /* Wait for these finv's (and thus the first finvs) to be done. */
__insn_mf();
#ifdef __tilegx__
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
index a93b02a25222..82733c87d67e 100644
--- a/arch/tile/lib/exports.c
+++ b/arch/tile/lib/exports.c
@@ -22,7 +22,6 @@ EXPORT_SYMBOL(strnlen_user_asm);
EXPORT_SYMBOL(strncpy_from_user_asm);
EXPORT_SYMBOL(clear_user_asm);
EXPORT_SYMBOL(flush_user_asm);
-EXPORT_SYMBOL(inv_user_asm);
EXPORT_SYMBOL(finv_user_asm);
/* arch/tile/kernel/entry.S */
@@ -34,6 +33,12 @@ EXPORT_SYMBOL(dump_stack);
/* arch/tile/kernel/head.S */
EXPORT_SYMBOL(empty_zero_page);
+#ifdef CONFIG_FUNCTION_TRACER
+/* arch/tile/kernel/mcount_64.S */
+#include <asm/ftrace.h>
+EXPORT_SYMBOL(__mcount);
+#endif /* CONFIG_FUNCTION_TRACER */
+
/* arch/tile/lib/, various memcpy files */
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__copy_to_user_inatomic);
diff --git a/arch/tile/lib/memchr_64.c b/arch/tile/lib/memchr_64.c
index 6f867dbf7c56..f8196b3a950e 100644
--- a/arch/tile/lib/memchr_64.c
+++ b/arch/tile/lib/memchr_64.c
@@ -36,7 +36,7 @@ void *memchr(const void *s, int c, size_t n)
p = (const uint64_t *)(s_int & -8);
/* Create eight copies of the byte for which we are looking. */
- goal = 0x0101010101010101ULL * (uint8_t) c;
+ goal = copy_byte(c);
/* Read the first word, but munge it so that bytes before the array
* will not match goal.
diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S
index 2a419a6122db..a2771ae5da53 100644
--- a/arch/tile/lib/memcpy_32.S
+++ b/arch/tile/lib/memcpy_32.S
@@ -22,14 +22,6 @@
#include <linux/linkage.h>
-/* On TILE64, we wrap these functions via arch/tile/lib/memcpy_tile64.c */
-#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
-#define memcpy __memcpy_asm
-#define __copy_to_user_inatomic __copy_to_user_inatomic_asm
-#define __copy_from_user_inatomic __copy_from_user_inatomic_asm
-#define __copy_from_user_zeroing __copy_from_user_zeroing_asm
-#endif
-
#define IS_MEMCPY 0
#define IS_COPY_FROM_USER 1
#define IS_COPY_FROM_USER_ZEROING 2
@@ -44,6 +36,7 @@
*/
#define EX \
.pushsection __ex_table, "a"; \
+ .align 4; \
.word 9f, memcpy_common_fixup; \
.popsection; \
9
@@ -158,12 +151,9 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
{ addi r3, r1, 60; andi r9, r9, -64 }
-#if CHIP_HAS_WH64()
/* No need to prefetch dst, we'll just do the wh64
* right before we copy a line.
*/
-#endif
-
EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 }
/* Intentionally stall for a few cycles to leave L2 cache alone. */
{ bnzt zero, .; move r27, lr }
@@ -171,21 +161,6 @@ EX: { lw r6, r3; addi r3, r3, 64 }
/* Intentionally stall for a few cycles to leave L2 cache alone. */
{ bnzt zero, . }
EX: { lw r7, r3; addi r3, r3, 64 }
-#if !CHIP_HAS_WH64()
- /* Prefetch the dest */
- /* Intentionally stall for a few cycles to leave L2 cache alone. */
- { bnzt zero, . }
- /* Use a real load to cause a TLB miss if necessary. We aren't using
- * r28, so this should be fine.
- */
-EX: { lw r28, r9; addi r9, r9, 64 }
- /* Intentionally stall for a few cycles to leave L2 cache alone. */
- { bnzt zero, . }
- { prefetch r9; addi r9, r9, 64 }
- /* Intentionally stall for a few cycles to leave L2 cache alone. */
- { bnzt zero, . }
- { prefetch r9; addi r9, r9, 64 }
-#endif
/* Intentionally stall for a few cycles to leave L2 cache alone. */
{ bz zero, .Lbig_loop2 }
@@ -286,13 +261,8 @@ EX: { lw r7, r3; addi r3, r3, 64 }
/* Fill second L1D line. */
EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */
-#if CHIP_HAS_WH64()
/* Prepare destination line for writing. */
EX: { wh64 r9; addi r9, r9, 64 }
-#else
- /* Prefetch dest line */
- { prefetch r9; addi r9, r9, 64 }
-#endif
/* Load seven words that are L1D hits to cover wh64 L2 usage. */
/* Load the three remaining words from the last L1D line, which
@@ -330,16 +300,7 @@ EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */
EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */
EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */
EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */
-#if CHIP_HAS_WH64()
EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */
-#else
- /* Back up the r9 to a cache line we are already storing to
- * if it gets past the end of the dest vector. Strictly speaking,
- * we don't need to back up to the start of a cache line, but it's free
- * and tidy, so why not?
- */
-EX: { sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */
-#endif
/* Store second L1D line. */
EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */
EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */
@@ -403,7 +364,6 @@ EX: { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 }
.Ldest_is_word_aligned:
-#if CHIP_HAS_DWORD_ALIGN()
EX: { andi r8, r0, 63; lwadd_na r6, r1, 4}
{ slti_u r9, r2, 64; bz r8, .Ldest_is_L2_line_aligned }
@@ -511,26 +471,6 @@ EX: { swadd r0, r13, 4; addi r2, r2, -32 }
/* Move r1 back to the point where it corresponds to r0. */
{ addi r1, r1, -4 }
-#else /* !CHIP_HAS_DWORD_ALIGN() */
-
- /* Compute right/left shift counts and load initial source words. */
- { andi r5, r1, -4; andi r3, r1, 3 }
-EX: { lw r6, r5; addi r5, r5, 4; shli r3, r3, 3 }
-EX: { lw r7, r5; addi r5, r5, 4; sub r4, zero, r3 }
-
- /* Load and store one word at a time, using shifts and ORs
- * to correct for the misaligned src.
- */
-.Lcopy_unaligned_src_loop:
- { shr r6, r6, r3; shl r8, r7, r4 }
-EX: { lw r7, r5; or r8, r8, r6; move r6, r7 }
-EX: { sw r0, r8; addi r0, r0, 4; addi r2, r2, -4 }
- { addi r5, r5, 4; slti_u r8, r2, 8 }
- { bzt r8, .Lcopy_unaligned_src_loop; addi r1, r1, 4 }
-
- { bz r2, .Lcopy_unaligned_done }
-#endif /* !CHIP_HAS_DWORD_ALIGN() */
-
/* Fall through */
/*
@@ -614,5 +554,6 @@ memcpy_fixup_loop:
.size memcpy_common_fixup, . - memcpy_common_fixup
.section __ex_table,"a"
+ .align 4
.word .Lcfu, .Lcopy_from_user_fixup_zero_remainder
.word .Lctu, .Lcopy_to_user_fixup_done
diff --git a/arch/tile/lib/memcpy_64.c b/arch/tile/lib/memcpy_64.c
index c79b8e7c6828..4815354b8cd2 100644
--- a/arch/tile/lib/memcpy_64.c
+++ b/arch/tile/lib/memcpy_64.c
@@ -18,14 +18,17 @@
/* EXPORT_SYMBOL() is in arch/tile/lib/exports.c since this should be asm. */
/* Must be 8 bytes in size. */
-#define word_t uint64_t
+#define op_t uint64_t
-#if CHIP_L2_LINE_SIZE() != 64 && CHIP_L2_LINE_SIZE() != 128
-#error "Assumes 64 or 128 byte line size"
+/* Threshold value for when to enter the unrolled loops. */
+#define OP_T_THRES 16
+
+#if CHIP_L2_LINE_SIZE() != 64
+#error "Assumes 64 byte line size"
#endif
/* How many cache lines ahead should we prefetch? */
-#define PREFETCH_LINES_AHEAD 3
+#define PREFETCH_LINES_AHEAD 4
/*
* Provide "base versions" of load and store for the normal code path.
@@ -51,15 +54,16 @@ void *memcpy(void *__restrict dstv, const void *__restrict srcv, size_t n)
* macros to return a count of uncopied bytes due to mm fault.
*/
#define RETVAL 0
-int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
+int __attribute__((optimize("omit-frame-pointer")))
+USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
#endif
{
char *__restrict dst1 = (char *)dstv;
const char *__restrict src1 = (const char *)srcv;
const char *__restrict src1_end;
const char *__restrict prefetch;
- word_t *__restrict dst8; /* 8-byte pointer to destination memory. */
- word_t final; /* Final bytes to write to trailing word, if any */
+ op_t *__restrict dst8; /* 8-byte pointer to destination memory. */
+ op_t final; /* Final bytes to write to trailing word, if any */
long i;
if (n < 16) {
@@ -79,104 +83,228 @@ int USERCOPY_FUNC(void *__restrict dstv, const void *__restrict srcv, size_t n)
for (i = 0; i < PREFETCH_LINES_AHEAD; i++) {
__insn_prefetch(prefetch);
prefetch += CHIP_L2_LINE_SIZE();
- prefetch = (prefetch > src1_end) ? prefetch : src1;
+ prefetch = (prefetch < src1_end) ? prefetch : src1;
}
/* Copy bytes until dst is word-aligned. */
- for (; (uintptr_t)dst1 & (sizeof(word_t) - 1); n--)
+ for (; (uintptr_t)dst1 & (sizeof(op_t) - 1); n--)
ST1(dst1++, LD1(src1++));
/* 8-byte pointer to destination memory. */
- dst8 = (word_t *)dst1;
-
- if (__builtin_expect((uintptr_t)src1 & (sizeof(word_t) - 1), 0)) {
- /*
- * Misaligned copy. Copy 8 bytes at a time, but don't
- * bother with other fanciness.
- *
- * TODO: Consider prefetching and using wh64 as well.
- */
-
- /* Create an aligned src8. */
- const word_t *__restrict src8 =
- (const word_t *)((uintptr_t)src1 & -sizeof(word_t));
- word_t b;
-
- word_t a = LD8(src8++);
- for (; n >= sizeof(word_t); n -= sizeof(word_t)) {
- b = LD8(src8++);
- a = __insn_dblalign(a, b, src1);
- ST8(dst8++, a);
- a = b;
+ dst8 = (op_t *)dst1;
+
+ if (__builtin_expect((uintptr_t)src1 & (sizeof(op_t) - 1), 0)) {
+ /* Unaligned copy. */
+
+ op_t tmp0 = 0, tmp1 = 0, tmp2, tmp3;
+ const op_t *src8 = (const op_t *) ((uintptr_t)src1 &
+ -sizeof(op_t));
+ const void *srci = (void *)src1;
+ int m;
+
+ m = (CHIP_L2_LINE_SIZE() << 2) -
+ (((uintptr_t)dst8) & ((CHIP_L2_LINE_SIZE() << 2) - 1));
+ m = (n < m) ? n : m;
+ m /= sizeof(op_t);
+
+ /* Copy until 'dst' is cache-line-aligned. */
+ n -= (sizeof(op_t) * m);
+
+ switch (m % 4) {
+ case 0:
+ if (__builtin_expect(!m, 0))
+ goto _M0;
+ tmp1 = LD8(src8++);
+ tmp2 = LD8(src8++);
+ goto _8B3;
+ case 2:
+ m += 2;
+ tmp3 = LD8(src8++);
+ tmp0 = LD8(src8++);
+ goto _8B1;
+ case 3:
+ m += 1;
+ tmp2 = LD8(src8++);
+ tmp3 = LD8(src8++);
+ goto _8B2;
+ case 1:
+ m--;
+ tmp0 = LD8(src8++);
+ tmp1 = LD8(src8++);
+ if (__builtin_expect(!m, 0))
+ goto _8B0;
+ }
+
+ do {
+ tmp2 = LD8(src8++);
+ tmp0 = __insn_dblalign(tmp0, tmp1, srci);
+ ST8(dst8++, tmp0);
+_8B3:
+ tmp3 = LD8(src8++);
+ tmp1 = __insn_dblalign(tmp1, tmp2, srci);
+ ST8(dst8++, tmp1);
+_8B2:
+ tmp0 = LD8(src8++);
+ tmp2 = __insn_dblalign(tmp2, tmp3, srci);
+ ST8(dst8++, tmp2);
+_8B1:
+ tmp1 = LD8(src8++);
+ tmp3 = __insn_dblalign(tmp3, tmp0, srci);
+ ST8(dst8++, tmp3);
+ m -= 4;
+ } while (m);
+
+_8B0:
+ tmp0 = __insn_dblalign(tmp0, tmp1, srci);
+ ST8(dst8++, tmp0);
+ src8--;
+
+_M0:
+ if (__builtin_expect(n >= CHIP_L2_LINE_SIZE(), 0)) {
+ op_t tmp4, tmp5, tmp6, tmp7, tmp8;
+
+ prefetch = ((const char *)src8) +
+ CHIP_L2_LINE_SIZE() * PREFETCH_LINES_AHEAD;
+
+ for (tmp0 = LD8(src8++); n >= CHIP_L2_LINE_SIZE();
+ n -= CHIP_L2_LINE_SIZE()) {
+ /* Prefetch and advance to next line to
+ prefetch, but don't go past the end. */
+ __insn_prefetch(prefetch);
+
+ /* Make sure prefetch got scheduled
+ earlier. */
+ __asm__ ("" : : : "memory");
+
+ prefetch += CHIP_L2_LINE_SIZE();
+ prefetch = (prefetch < src1_end) ? prefetch :
+ (const char *) src8;
+
+ tmp1 = LD8(src8++);
+ tmp2 = LD8(src8++);
+ tmp3 = LD8(src8++);
+ tmp4 = LD8(src8++);
+ tmp5 = LD8(src8++);
+ tmp6 = LD8(src8++);
+ tmp7 = LD8(src8++);
+ tmp8 = LD8(src8++);
+
+ tmp0 = __insn_dblalign(tmp0, tmp1, srci);
+ tmp1 = __insn_dblalign(tmp1, tmp2, srci);
+ tmp2 = __insn_dblalign(tmp2, tmp3, srci);
+ tmp3 = __insn_dblalign(tmp3, tmp4, srci);
+ tmp4 = __insn_dblalign(tmp4, tmp5, srci);
+ tmp5 = __insn_dblalign(tmp5, tmp6, srci);
+ tmp6 = __insn_dblalign(tmp6, tmp7, srci);
+ tmp7 = __insn_dblalign(tmp7, tmp8, srci);
+
+ __insn_wh64(dst8);
+
+ ST8(dst8++, tmp0);
+ ST8(dst8++, tmp1);
+ ST8(dst8++, tmp2);
+ ST8(dst8++, tmp3);
+ ST8(dst8++, tmp4);
+ ST8(dst8++, tmp5);
+ ST8(dst8++, tmp6);
+ ST8(dst8++, tmp7);
+
+ tmp0 = tmp8;
+ }
+ src8--;
+ }
+
+ /* Copy the rest 8-byte chunks. */
+ if (n >= sizeof(op_t)) {
+ tmp0 = LD8(src8++);
+ for (; n >= sizeof(op_t); n -= sizeof(op_t)) {
+ tmp1 = LD8(src8++);
+ tmp0 = __insn_dblalign(tmp0, tmp1, srci);
+ ST8(dst8++, tmp0);
+ tmp0 = tmp1;
+ }
+ src8--;
}
if (n == 0)
return RETVAL;
- b = ((const char *)src8 <= src1_end) ? *src8 : 0;
+ tmp0 = LD8(src8++);
+ tmp1 = ((const char *)src8 <= src1_end)
+ ? LD8((op_t *)src8) : 0;
+ final = __insn_dblalign(tmp0, tmp1, srci);
- /*
- * Final source bytes to write to trailing partial
- * word, if any.
- */
- final = __insn_dblalign(a, b, src1);
} else {
/* Aligned copy. */
- const word_t* __restrict src8 = (const word_t *)src1;
+ const op_t *__restrict src8 = (const op_t *)src1;
/* src8 and dst8 are both word-aligned. */
if (n >= CHIP_L2_LINE_SIZE()) {
/* Copy until 'dst' is cache-line-aligned. */
for (; (uintptr_t)dst8 & (CHIP_L2_LINE_SIZE() - 1);
- n -= sizeof(word_t))
+ n -= sizeof(op_t))
ST8(dst8++, LD8(src8++));
for (; n >= CHIP_L2_LINE_SIZE(); ) {
- __insn_wh64(dst8);
+ op_t tmp0, tmp1, tmp2, tmp3;
+ op_t tmp4, tmp5, tmp6, tmp7;
/*
* Prefetch and advance to next line
- * to prefetch, but don't go past the end
+ * to prefetch, but don't go past the
+ * end.
*/
__insn_prefetch(prefetch);
+
+ /* Make sure prefetch got scheduled
+ earlier. */
+ __asm__ ("" : : : "memory");
+
prefetch += CHIP_L2_LINE_SIZE();
- prefetch = (prefetch > src1_end) ? prefetch :
+ prefetch = (prefetch < src1_end) ? prefetch :
(const char *)src8;
/*
- * Copy an entire cache line. Manually
- * unrolled to avoid idiosyncracies of
- * compiler unrolling.
+ * Do all the loads before wh64. This
+ * is necessary if [src8, src8+7] and
+ * [dst8, dst8+7] share the same cache
+ * line and dst8 <= src8, as can be
+ * the case when called from memmove,
+ * or with code tested on x86 whose
+ * memcpy always works with forward
+ * copies.
*/
-#define COPY_WORD(offset) ({ ST8(dst8+offset, LD8(src8+offset)); n -= 8; })
- COPY_WORD(0);
- COPY_WORD(1);
- COPY_WORD(2);
- COPY_WORD(3);
- COPY_WORD(4);
- COPY_WORD(5);
- COPY_WORD(6);
- COPY_WORD(7);
-#if CHIP_L2_LINE_SIZE() == 128
- COPY_WORD(8);
- COPY_WORD(9);
- COPY_WORD(10);
- COPY_WORD(11);
- COPY_WORD(12);
- COPY_WORD(13);
- COPY_WORD(14);
- COPY_WORD(15);
-#elif CHIP_L2_LINE_SIZE() != 64
-# error Fix code that assumes particular L2 cache line sizes
-#endif
+ tmp0 = LD8(src8++);
+ tmp1 = LD8(src8++);
+ tmp2 = LD8(src8++);
+ tmp3 = LD8(src8++);
+ tmp4 = LD8(src8++);
+ tmp5 = LD8(src8++);
+ tmp6 = LD8(src8++);
+ tmp7 = LD8(src8++);
+
+ /* wh64 and wait for tmp7 load completion. */
+ __asm__ ("move %0, %0; wh64 %1\n"
+ : : "r"(tmp7), "r"(dst8));
- dst8 += CHIP_L2_LINE_SIZE() / sizeof(word_t);
- src8 += CHIP_L2_LINE_SIZE() / sizeof(word_t);
+ ST8(dst8++, tmp0);
+ ST8(dst8++, tmp1);
+ ST8(dst8++, tmp2);
+ ST8(dst8++, tmp3);
+ ST8(dst8++, tmp4);
+ ST8(dst8++, tmp5);
+ ST8(dst8++, tmp6);
+ ST8(dst8++, tmp7);
+
+ n -= CHIP_L2_LINE_SIZE();
}
+#if CHIP_L2_LINE_SIZE() != 64
+# error "Fix code that assumes particular L2 cache line size."
+#endif
}
- for (; n >= sizeof(word_t); n -= sizeof(word_t))
+ for (; n >= sizeof(op_t); n -= sizeof(op_t))
ST8(dst8++, LD8(src8++));
if (__builtin_expect(n == 0, 1))
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c
deleted file mode 100644
index 3bc4b4e40d93..000000000000
--- a/arch/tile/lib/memcpy_tile64.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- */
-
-#include <linux/string.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-#include <linux/uaccess.h>
-#include <asm/fixmap.h>
-#include <asm/kmap_types.h>
-#include <asm/tlbflush.h>
-#include <hv/hypervisor.h>
-#include <arch/chip.h>
-
-
-#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
-
-/* Defined in memcpy.S */
-extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n);
-extern unsigned long __copy_to_user_inatomic_asm(
- void __user *to, const void *from, unsigned long n);
-extern unsigned long __copy_from_user_inatomic_asm(
- void *to, const void __user *from, unsigned long n);
-extern unsigned long __copy_from_user_zeroing_asm(
- void *to, const void __user *from, unsigned long n);
-
-typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long);
-
-/* Size above which to consider TLB games for performance */
-#define LARGE_COPY_CUTOFF 2048
-
-/* Communicate to the simulator what we are trying to do. */
-#define sim_allow_multiple_caching(b) \
- __insn_mtspr(SPR_SIM_CONTROL, \
- SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS))
-
-/*
- * Copy memory by briefly enabling incoherent cacheline-at-a-time mode.
- *
- * We set up our own source and destination PTEs that we fully control.
- * This is the only way to guarantee that we don't race with another
- * thread that is modifying the PTE; we can't afford to try the
- * copy_{to,from}_user() technique of catching the interrupt, since
- * we must run with interrupts disabled to avoid the risk of some
- * other code seeing the incoherent data in our cache. (Recall that
- * our cache is indexed by PA, so even if the other code doesn't use
- * our kmap_atomic virtual addresses, they'll still hit in cache using
- * the normal VAs that aren't supposed to hit in cache.)
- */
-static void memcpy_multicache(void *dest, const void *source,
- pte_t dst_pte, pte_t src_pte, int len)
-{
- int idx;
- unsigned long flags, newsrc, newdst;
- pmd_t *pmdp;
- pte_t *ptep;
- int type0, type1;
- int cpu = get_cpu();
-
- /*
- * Disable interrupts so that we don't recurse into memcpy()
- * in an interrupt handler, nor accidentally reference
- * the PA of the source from an interrupt routine. Also
- * notify the simulator that we're playing games so we don't
- * generate spurious coherency warnings.
- */
- local_irq_save(flags);
- sim_allow_multiple_caching(1);
-
- /* Set up the new dest mapping */
- type0 = kmap_atomic_idx_push();
- idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0;
- newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1));
- pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst);
- ptep = pte_offset_kernel(pmdp, newdst);
- if (pte_val(*ptep) != pte_val(dst_pte)) {
- set_pte(ptep, dst_pte);
- local_flush_tlb_page(NULL, newdst, PAGE_SIZE);
- }
-
- /* Set up the new source mapping */
- type1 = kmap_atomic_idx_push();
- idx += (type0 - type1);
- src_pte = hv_pte_set_nc(src_pte);
- src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */
- newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
- pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
- ptep = pte_offset_kernel(pmdp, newsrc);
- __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
- local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
-
- /* Actually move the data. */
- __memcpy_asm((void *)newdst, (const void *)newsrc, len);
-
- /*
- * Remap the source as locally-cached and not OLOC'ed so that
- * we can inval without also invaling the remote cpu's cache.
- * This also avoids known errata with inv'ing cacheable oloc data.
- */
- src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
- src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
- __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
- local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
-
- /*
- * Do the actual invalidation, covering the full L2 cache line
- * at the end since __memcpy_asm() is somewhat aggressive.
- */
- __inv_buffer((void *)newsrc, len);
-
- /*
- * We're done: notify the simulator that all is back to normal,
- * and re-enable interrupts and pre-emption.
- */
- kmap_atomic_idx_pop();
- kmap_atomic_idx_pop();
- sim_allow_multiple_caching(0);
- local_irq_restore(flags);
- put_cpu();
-}
-
-/*
- * Identify large copies from remotely-cached memory, and copy them
- * via memcpy_multicache() if they look good, otherwise fall back
- * to the particular kind of copying passed as the memcpy_t function.
- */
-static unsigned long fast_copy(void *dest, const void *source, int len,
- memcpy_t func)
-{
- /*
- * Check if it's big enough to bother with. We may end up doing a
- * small copy via TLB manipulation if we're near a page boundary,
- * but presumably we'll make it up when we hit the second page.
- */
- while (len >= LARGE_COPY_CUTOFF) {
- int copy_size, bytes_left_on_page;
- pte_t *src_ptep, *dst_ptep;
- pte_t src_pte, dst_pte;
- struct page *src_page, *dst_page;
-
- /* Is the source page oloc'ed to a remote cpu? */
-retry_source:
- src_ptep = virt_to_pte(current->mm, (unsigned long)source);
- if (src_ptep == NULL)
- break;
- src_pte = *src_ptep;
- if (!hv_pte_get_present(src_pte) ||
- !hv_pte_get_readable(src_pte) ||
- hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3)
- break;
- if (get_remote_cache_cpu(src_pte) == smp_processor_id())
- break;
- src_page = pfn_to_page(pte_pfn(src_pte));
- get_page(src_page);
- if (pte_val(src_pte) != pte_val(*src_ptep)) {
- put_page(src_page);
- goto retry_source;
- }
- if (pte_huge(src_pte)) {
- /* Adjust the PTE to correspond to a small page */
- int pfn = pte_pfn(src_pte);
- pfn += (((unsigned long)source & (HPAGE_SIZE-1))
- >> PAGE_SHIFT);
- src_pte = pfn_pte(pfn, src_pte);
- src_pte = pte_mksmall(src_pte);
- }
-
- /* Is the destination page writable? */
-retry_dest:
- dst_ptep = virt_to_pte(current->mm, (unsigned long)dest);
- if (dst_ptep == NULL) {
- put_page(src_page);
- break;
- }
- dst_pte = *dst_ptep;
- if (!hv_pte_get_present(dst_pte) ||
- !hv_pte_get_writable(dst_pte)) {
- put_page(src_page);
- break;
- }
- dst_page = pfn_to_page(pte_pfn(dst_pte));
- if (dst_page == src_page) {
- /*
- * Source and dest are on the same page; this
- * potentially exposes us to incoherence if any
- * part of src and dest overlap on a cache line.
- * Just give up rather than trying to be precise.
- */
- put_page(src_page);
- break;
- }
- get_page(dst_page);
- if (pte_val(dst_pte) != pte_val(*dst_ptep)) {
- put_page(dst_page);
- goto retry_dest;
- }
- if (pte_huge(dst_pte)) {
- /* Adjust the PTE to correspond to a small page */
- int pfn = pte_pfn(dst_pte);
- pfn += (((unsigned long)dest & (HPAGE_SIZE-1))
- >> PAGE_SHIFT);
- dst_pte = pfn_pte(pfn, dst_pte);
- dst_pte = pte_mksmall(dst_pte);
- }
-
- /* All looks good: create a cachable PTE and copy from it */
- copy_size = len;
- bytes_left_on_page =
- PAGE_SIZE - (((int)source) & (PAGE_SIZE-1));
- if (copy_size > bytes_left_on_page)
- copy_size = bytes_left_on_page;
- bytes_left_on_page =
- PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1));
- if (copy_size > bytes_left_on_page)
- copy_size = bytes_left_on_page;
- memcpy_multicache(dest, source, dst_pte, src_pte, copy_size);
-
- /* Release the pages */
- put_page(dst_page);
- put_page(src_page);
-
- /* Continue on the next page */
- dest += copy_size;
- source += copy_size;
- len -= copy_size;
- }
-
- return func(dest, source, len);
-}
-
-void *memcpy(void *to, const void *from, __kernel_size_t n)
-{
- if (n < LARGE_COPY_CUTOFF)
- return (void *)__memcpy_asm(to, from, n);
- else
- return (void *)fast_copy(to, from, n, __memcpy_asm);
-}
-
-unsigned long __copy_to_user_inatomic(void __user *to, const void *from,
- unsigned long n)
-{
- if (n < LARGE_COPY_CUTOFF)
- return __copy_to_user_inatomic_asm(to, from, n);
- else
- return fast_copy(to, from, n, __copy_to_user_inatomic_asm);
-}
-
-unsigned long __copy_from_user_inatomic(void *to, const void __user *from,
- unsigned long n)
-{
- if (n < LARGE_COPY_CUTOFF)
- return __copy_from_user_inatomic_asm(to, from, n);
- else
- return fast_copy(to, from, n, __copy_from_user_inatomic_asm);
-}
-
-unsigned long __copy_from_user_zeroing(void *to, const void __user *from,
- unsigned long n)
-{
- if (n < LARGE_COPY_CUTOFF)
- return __copy_from_user_zeroing_asm(to, from, n);
- else
- return fast_copy(to, from, n, __copy_from_user_zeroing_asm);
-}
-
-#endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */
diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c
index 37440caa7370..88c7016492c4 100644
--- a/arch/tile/lib/memcpy_user_64.c
+++ b/arch/tile/lib/memcpy_user_64.c
@@ -31,6 +31,7 @@
".pushsection .coldtext.memcpy,\"ax\";" \
"2: { move r0, %2; jrp lr };" \
".section __ex_table,\"a\";" \
+ ".align 8;" \
".quad 1b, 2b;" \
".popsection" \
: "=m" (*(p)) : "r" (v), "r" (n)); \
@@ -43,6 +44,7 @@
".pushsection .coldtext.memcpy,\"ax\";" \
"2: { move r0, %2; jrp lr };" \
".section __ex_table,\"a\";" \
+ ".align 8;" \
".quad 1b, 2b;" \
".popsection" \
: "=r" (__v) : "m" (*(p)), "r" (n)); \
diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c
index 57dbb3a5bff8..2042bfe6595f 100644
--- a/arch/tile/lib/memset_32.c
+++ b/arch/tile/lib/memset_32.c
@@ -12,13 +12,10 @@
* more details.
*/
-#include <arch/chip.h>
-
#include <linux/types.h>
#include <linux/string.h>
#include <linux/module.h>
-
-#undef memset
+#include <arch/chip.h>
void *memset(void *s, int c, size_t n)
{
@@ -26,11 +23,7 @@ void *memset(void *s, int c, size_t n)
int n32;
uint32_t v16, v32;
uint8_t *out8 = s;
-#if !CHIP_HAS_WH64()
- int ahead32;
-#else
int to_align32;
-#endif
/* Experimentation shows that a trivial tight loop is a win up until
* around a size of 20, where writing a word at a time starts to win.
@@ -61,21 +54,6 @@ void *memset(void *s, int c, size_t n)
return s;
}
-#if !CHIP_HAS_WH64()
- /* Use a spare issue slot to start prefetching the first cache
- * line early. This instruction is free as the store can be buried
- * in otherwise idle issue slots doing ALU ops.
- */
- __insn_prefetch(out8);
-
- /* We prefetch the end so that a short memset that spans two cache
- * lines gets some prefetching benefit. Again we believe this is free
- * to issue.
- */
- __insn_prefetch(&out8[n - 1]);
-#endif /* !CHIP_HAS_WH64() */
-
-
/* Align 'out8'. We know n >= 3 so this won't write past the end. */
while (((uintptr_t) out8 & 3) != 0) {
*out8++ = c;
@@ -96,90 +74,6 @@ void *memset(void *s, int c, size_t n)
/* This must be at least 8 or the following loop doesn't work. */
#define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4)
-#if !CHIP_HAS_WH64()
-
- ahead32 = CACHE_LINE_SIZE_IN_WORDS;
-
- /* We already prefetched the first and last cache lines, so
- * we only need to do more prefetching if we are storing
- * to more than two cache lines.
- */
- if (n32 > CACHE_LINE_SIZE_IN_WORDS * 2) {
- int i;
-
- /* Prefetch the next several cache lines.
- * This is the setup code for the software-pipelined
- * loop below.
- */
-#define MAX_PREFETCH 5
- ahead32 = n32 & -CACHE_LINE_SIZE_IN_WORDS;
- if (ahead32 > MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS)
- ahead32 = MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS;
-
- for (i = CACHE_LINE_SIZE_IN_WORDS;
- i < ahead32; i += CACHE_LINE_SIZE_IN_WORDS)
- __insn_prefetch(&out32[i]);
- }
-
- if (n32 > ahead32) {
- while (1) {
- int j;
-
- /* Prefetch by reading one word several cache lines
- * ahead. Since loads are non-blocking this will
- * cause the full cache line to be read while we are
- * finishing earlier cache lines. Using a store
- * here causes microarchitectural performance
- * problems where a victimizing store miss goes to
- * the head of the retry FIFO and locks the pipe for
- * a few cycles. So a few subsequent stores in this
- * loop go into the retry FIFO, and then later
- * stores see other stores to the same cache line
- * are already in the retry FIFO and themselves go
- * into the retry FIFO, filling it up and grinding
- * to a halt waiting for the original miss to be
- * satisfied.
- */
- __insn_prefetch(&out32[ahead32]);
-
-#if CACHE_LINE_SIZE_IN_WORDS % 4 != 0
-#error "Unhandled CACHE_LINE_SIZE_IN_WORDS"
-#endif
-
- n32 -= CACHE_LINE_SIZE_IN_WORDS;
-
- /* Save icache space by only partially unrolling
- * this loop.
- */
- for (j = CACHE_LINE_SIZE_IN_WORDS / 4; j > 0; j--) {
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- *out32++ = v32;
- }
-
- /* To save compiled code size, reuse this loop even
- * when we run out of prefetching to do by dropping
- * ahead32 down.
- */
- if (n32 <= ahead32) {
- /* Not even a full cache line left,
- * so stop now.
- */
- if (n32 < CACHE_LINE_SIZE_IN_WORDS)
- break;
-
- /* Choose a small enough value that we don't
- * prefetch past the end. There's no sense
- * in touching cache lines we don't have to.
- */
- ahead32 = CACHE_LINE_SIZE_IN_WORDS - 1;
- }
- }
- }
-
-#else /* CHIP_HAS_WH64() */
-
/* Determine how many words we need to emit before the 'out32'
* pointer becomes aligned modulo the cache line size.
*/
@@ -236,8 +130,6 @@ void *memset(void *s, int c, size_t n)
n32 &= CACHE_LINE_SIZE_IN_WORDS - 1;
}
-#endif /* CHIP_HAS_WH64() */
-
/* Now handle any leftover values. */
if (n32 != 0) {
do {
diff --git a/arch/tile/lib/memset_64.c b/arch/tile/lib/memset_64.c
index 3873085711d5..03ef69cd73de 100644
--- a/arch/tile/lib/memset_64.c
+++ b/arch/tile/lib/memset_64.c
@@ -12,13 +12,11 @@
* more details.
*/
-#include <arch/chip.h>
-
#include <linux/types.h>
#include <linux/string.h>
#include <linux/module.h>
-
-#undef memset
+#include <arch/chip.h>
+#include "string-endian.h"
void *memset(void *s, int c, size_t n)
{
@@ -70,8 +68,7 @@ void *memset(void *s, int c, size_t n)
n64 = n >> 3;
/* Tile input byte out to 64 bits. */
- /* KLUDGE */
- v64 = 0x0101010101010101ULL * (uint8_t)c;
+ v64 = copy_byte(c);
/* This must be at least 8 or the following loop doesn't work. */
#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8)
diff --git a/arch/tile/lib/strchr_32.c b/arch/tile/lib/strchr_32.c
index c94e6f7ae7b5..841fe6963019 100644
--- a/arch/tile/lib/strchr_32.c
+++ b/arch/tile/lib/strchr_32.c
@@ -16,8 +16,6 @@
#include <linux/string.h>
#include <linux/module.h>
-#undef strchr
-
char *strchr(const char *s, int c)
{
int z, g;
diff --git a/arch/tile/lib/strchr_64.c b/arch/tile/lib/strchr_64.c
index f39f9dc422b0..fe6e31c06f8d 100644
--- a/arch/tile/lib/strchr_64.c
+++ b/arch/tile/lib/strchr_64.c
@@ -26,7 +26,7 @@ char *strchr(const char *s, int c)
const uint64_t *p = (const uint64_t *)(s_int & -8);
/* Create eight copies of the byte for which we are looking. */
- const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
+ const uint64_t goal = copy_byte(c);
/* Read the first aligned word, but force bytes before the string to
* match neither zero nor goal (we make sure the high bit of each
diff --git a/arch/tile/lib/string-endian.h b/arch/tile/lib/string-endian.h
index c0eed7ce69c3..2e49cbfe9371 100644
--- a/arch/tile/lib/string-endian.h
+++ b/arch/tile/lib/string-endian.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -31,3 +31,14 @@
#define CFZ(x) __insn_clz(x)
#define REVCZ(x) __insn_ctz(x)
#endif
+
+/*
+ * Create eight copies of the byte in a uint64_t. Byte Shuffle uses
+ * the bytes of srcB as the index into the dest vector to select a
+ * byte. With all indices of zero, the first byte is copied into all
+ * the other bytes.
+ */
+static inline uint64_t copy_byte(uint8_t byte)
+{
+ return __insn_shufflebytes(byte, 0, 0);
+}
diff --git a/arch/tile/lib/strlen_32.c b/arch/tile/lib/strlen_32.c
index 4974292a5534..f26f88e11e4a 100644
--- a/arch/tile/lib/strlen_32.c
+++ b/arch/tile/lib/strlen_32.c
@@ -16,8 +16,6 @@
#include <linux/string.h>
#include <linux/module.h>
-#undef strlen
-
size_t strlen(const char *s)
{
/* Get an aligned pointer. */
diff --git a/arch/tile/lib/strnlen_32.c b/arch/tile/lib/strnlen_32.c
new file mode 100644
index 000000000000..1434141d9e01
--- /dev/null
+++ b/arch/tile/lib/strnlen_32.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+size_t strnlen(const char *s, size_t count)
+{
+ /* Get an aligned pointer. */
+ const uintptr_t s_int = (uintptr_t) s;
+ const uint32_t *p = (const uint32_t *)(s_int & -4);
+ size_t bytes_read = sizeof(*p) - (s_int & (sizeof(*p) - 1));
+ size_t len;
+ uint32_t v, bits;
+
+ /* Avoid page fault risk by not reading any bytes when count is 0. */
+ if (count == 0)
+ return 0;
+
+ /* Read first word, but force bytes before the string to be nonzero. */
+ v = *p | ((1 << ((s_int << 3) & 31)) - 1);
+
+ while ((bits = __insn_seqb(v, 0)) == 0) {
+ if (bytes_read >= count) {
+ /* Read COUNT bytes and didn't find the terminator. */
+ return count;
+ }
+ v = *++p;
+ bytes_read += sizeof(v);
+ }
+
+ len = ((const char *) p) + (__insn_ctz(bits) >> 3) - s;
+ return (len < count ? len : count);
+}
+EXPORT_SYMBOL(strnlen);
diff --git a/arch/tile/lib/strnlen_64.c b/arch/tile/lib/strnlen_64.c
new file mode 100644
index 000000000000..2e8de6a5136f
--- /dev/null
+++ b/arch/tile/lib/strnlen_64.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include "string-endian.h"
+
+size_t strnlen(const char *s, size_t count)
+{
+ /* Get an aligned pointer. */
+ const uintptr_t s_int = (uintptr_t) s;
+ const uint64_t *p = (const uint64_t *)(s_int & -8);
+ size_t bytes_read = sizeof(*p) - (s_int & (sizeof(*p) - 1));
+ size_t len;
+ uint64_t v, bits;
+
+ /* Avoid page fault risk by not reading any bytes when count is 0. */
+ if (count == 0)
+ return 0;
+
+ /* Read and MASK the first word. */
+ v = *p | MASK(s_int);
+
+ while ((bits = __insn_v1cmpeqi(v, 0)) == 0) {
+ if (bytes_read >= count) {
+ /* Read COUNT bytes and didn't find the terminator. */
+ return count;
+ }
+ v = *++p;
+ bytes_read += sizeof(v);
+ }
+
+ len = ((const char *) p) + (CFZ(bits) >> 3) - s;
+ return (len < count ? len : count);
+}
+EXPORT_SYMBOL(strnlen);
diff --git a/arch/tile/lib/usercopy_32.S b/arch/tile/lib/usercopy_32.S
index b62d002af009..1bc162224638 100644
--- a/arch/tile/lib/usercopy_32.S
+++ b/arch/tile/lib/usercopy_32.S
@@ -36,6 +36,7 @@ strnlen_user_fault:
{ move r0, zero; jrp lr }
ENDPROC(strnlen_user_fault)
.section __ex_table,"a"
+ .align 4
.word 1b, strnlen_user_fault
.popsection
@@ -47,18 +48,20 @@ strnlen_user_fault:
*/
STD_ENTRY(strncpy_from_user_asm)
{ bz r2, 2f; move r3, r0 }
-1: { lb_u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
+1: { lb_u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
{ sb r0, r4; addi r0, r0, 1 }
- bz r2, 2f
- bnzt r4, 1b
- addi r0, r0, -1 /* don't count the trailing NUL */
-2: { sub r0, r0, r3; jrp lr }
+ bz r4, 2f
+ bnzt r2, 1b
+ { sub r0, r0, r3; jrp lr }
+2: addi r0, r0, -1 /* don't count the trailing NUL */
+ { sub r0, r0, r3; jrp lr }
STD_ENDPROC(strncpy_from_user_asm)
.pushsection .fixup,"ax"
strncpy_from_user_fault:
{ movei r0, -EFAULT; jrp lr }
ENDPROC(strncpy_from_user_fault)
.section __ex_table,"a"
+ .align 4
.word 1b, strncpy_from_user_fault
.popsection
@@ -77,6 +80,7 @@ STD_ENTRY(clear_user_asm)
bnzt r1, 1b
2: { move r0, r1; jrp lr }
.pushsection __ex_table,"a"
+ .align 4
.word 1b, 2b
.popsection
@@ -86,6 +90,7 @@ STD_ENTRY(clear_user_asm)
2: { move r0, r1; jrp lr }
STD_ENDPROC(clear_user_asm)
.pushsection __ex_table,"a"
+ .align 4
.word 1b, 2b
.popsection
@@ -105,25 +110,7 @@ STD_ENTRY(flush_user_asm)
2: { move r0, r1; jrp lr }
STD_ENDPROC(flush_user_asm)
.pushsection __ex_table,"a"
- .word 1b, 2b
- .popsection
-
-/*
- * inv_user_asm takes the user target address in r0 and the
- * number of bytes to invalidate in r1.
- * It returns the number of not inv'able bytes (hopefully zero) in r0.
- */
-STD_ENTRY(inv_user_asm)
- bz r1, 2f
- { movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
- { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
- { and r0, r0, r2; and r1, r1, r2 }
- { sub r1, r1, r0 }
-1: { inv r0; addi r1, r1, -CHIP_INV_STRIDE() }
- { addi r0, r0, CHIP_INV_STRIDE(); bnzt r1, 1b }
-2: { move r0, r1; jrp lr }
- STD_ENDPROC(inv_user_asm)
- .pushsection __ex_table,"a"
+ .align 4
.word 1b, 2b
.popsection
@@ -143,5 +130,6 @@ STD_ENTRY(finv_user_asm)
2: { move r0, r1; jrp lr }
STD_ENDPROC(finv_user_asm)
.pushsection __ex_table,"a"
+ .align 4
.word 1b, 2b
.popsection
diff --git a/arch/tile/lib/usercopy_64.S b/arch/tile/lib/usercopy_64.S
index adb2dbbc70cd..b3b31a3306f8 100644
--- a/arch/tile/lib/usercopy_64.S
+++ b/arch/tile/lib/usercopy_64.S
@@ -36,6 +36,7 @@ strnlen_user_fault:
{ move r0, zero; jrp lr }
ENDPROC(strnlen_user_fault)
.section __ex_table,"a"
+ .align 8
.quad 1b, strnlen_user_fault
.popsection
@@ -47,18 +48,20 @@ strnlen_user_fault:
*/
STD_ENTRY(strncpy_from_user_asm)
{ beqz r2, 2f; move r3, r0 }
-1: { ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
+1: { ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
{ st1 r0, r4; addi r0, r0, 1 }
- beqz r2, 2f
- bnezt r4, 1b
- addi r0, r0, -1 /* don't count the trailing NUL */
-2: { sub r0, r0, r3; jrp lr }
+ beqz r4, 2f
+ bnezt r2, 1b
+ { sub r0, r0, r3; jrp lr }
+2: addi r0, r0, -1 /* don't count the trailing NUL */
+ { sub r0, r0, r3; jrp lr }
STD_ENDPROC(strncpy_from_user_asm)
.pushsection .fixup,"ax"
strncpy_from_user_fault:
{ movei r0, -EFAULT; jrp lr }
ENDPROC(strncpy_from_user_fault)
.section __ex_table,"a"
+ .align 8
.quad 1b, strncpy_from_user_fault
.popsection
@@ -77,6 +80,7 @@ STD_ENTRY(clear_user_asm)
bnezt r1, 1b
2: { move r0, r1; jrp lr }
.pushsection __ex_table,"a"
+ .align 8
.quad 1b, 2b
.popsection
@@ -86,6 +90,7 @@ STD_ENTRY(clear_user_asm)
2: { move r0, r1; jrp lr }
STD_ENDPROC(clear_user_asm)
.pushsection __ex_table,"a"
+ .align 8
.quad 1b, 2b
.popsection
@@ -105,25 +110,7 @@ STD_ENTRY(flush_user_asm)
2: { move r0, r1; jrp lr }
STD_ENDPROC(flush_user_asm)
.pushsection __ex_table,"a"
- .quad 1b, 2b
- .popsection
-
-/*
- * inv_user_asm takes the user target address in r0 and the
- * number of bytes to invalidate in r1.
- * It returns the number of not inv'able bytes (hopefully zero) in r0.
- */
-STD_ENTRY(inv_user_asm)
- beqz r1, 2f
- { movei r2, L2_CACHE_BYTES; add r1, r0, r1 }
- { sub r2, zero, r2; addi r1, r1, L2_CACHE_BYTES-1 }
- { and r0, r0, r2; and r1, r1, r2 }
- { sub r1, r1, r0 }
-1: { inv r0; addi r1, r1, -CHIP_INV_STRIDE() }
- { addi r0, r0, CHIP_INV_STRIDE(); bnezt r1, 1b }
-2: { move r0, r1; jrp lr }
- STD_ENDPROC(inv_user_asm)
- .pushsection __ex_table,"a"
+ .align 8
.quad 1b, 2b
.popsection
@@ -143,5 +130,6 @@ STD_ENTRY(finv_user_asm)
2: { move r0, r1; jrp lr }
STD_ENDPROC(finv_user_asm)
.pushsection __ex_table,"a"
+ .align 8
.quad 1b, 2b
.popsection
diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c
index 743c951c61b0..23f044e8a7ab 100644
--- a/arch/tile/mm/elf.c
+++ b/arch/tile/mm/elf.c
@@ -21,7 +21,8 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
-#include <arch/sim_def.h>
+#include <asm/vdso.h>
+#include <arch/sim.h>
/* Notify a running simulator, if any, that an exec just occurred. */
static void sim_notify_exec(const char *binary_name)
@@ -38,21 +39,55 @@ static void sim_notify_exec(const char *binary_name)
static int notify_exec(struct mm_struct *mm)
{
- int retval = 0; /* failure */
-
- if (mm->exe_file) {
- char *buf = (char *) __get_free_page(GFP_KERNEL);
- if (buf) {
- char *path = d_path(&mm->exe_file->f_path,
- buf, PAGE_SIZE);
- if (!IS_ERR(path)) {
- sim_notify_exec(path);
- retval = 1;
- }
- free_page((unsigned long)buf);
+ char *buf, *path;
+ struct vm_area_struct *vma;
+
+ if (!sim_is_simulator())
+ return 1;
+
+ if (mm->exe_file == NULL)
+ return 0;
+
+ for (vma = current->mm->mmap; ; vma = vma->vm_next) {
+ if (vma == NULL)
+ return 0;
+ if (vma->vm_file == mm->exe_file)
+ break;
+ }
+
+ buf = (char *) __get_free_page(GFP_KERNEL);
+ if (buf == NULL)
+ return 0;
+
+ path = d_path(&mm->exe_file->f_path, buf, PAGE_SIZE);
+ if (IS_ERR(path)) {
+ free_page((unsigned long)buf);
+ return 0;
+ }
+
+ /*
+ * Notify simulator of an ET_DYN object so we know the load address.
+ * The somewhat cryptic overuse of SIM_CONTROL_DLOPEN allows us
+ * to be backward-compatible with older simulator releases.
+ */
+ if (vma->vm_start == (ELF_ET_DYN_BASE & PAGE_MASK)) {
+ char buf[64];
+ int i;
+
+ snprintf(buf, sizeof(buf), "0x%lx:@", vma->vm_start);
+ for (i = 0; ; ++i) {
+ char c = buf[i];
+ __insn_mtspr(SPR_SIM_CONTROL,
+ (SIM_CONTROL_DLOPEN
+ | (c << _SIM_CONTROL_OPERATOR_BITS)));
+ if (c == '\0')
+ break;
}
}
- return retval;
+
+ sim_notify_exec(path);
+ free_page((unsigned long)buf);
+ return 1;
}
/* Notify a running simulator, if any, that we loaded an interpreter. */
@@ -68,37 +103,10 @@ static void sim_notify_interp(unsigned long load_addr)
}
-/* Kernel address of page used to map read-only kernel data into userspace. */
-static void *vdso_page;
-
-/* One-entry array used for install_special_mapping. */
-static struct page *vdso_pages[1];
-
-static int __init vdso_setup(void)
-{
- vdso_page = (void *)get_zeroed_page(GFP_ATOMIC);
- memcpy(vdso_page, __rt_sigreturn, __rt_sigreturn_end - __rt_sigreturn);
- vdso_pages[0] = virt_to_page(vdso_page);
- return 0;
-}
-device_initcall(vdso_setup);
-
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
- if (vma->vm_private_data == vdso_pages)
- return "[vdso]";
-#ifndef __tilegx__
- if (vma->vm_start == MEM_USER_INTRPT)
- return "[intrpt]";
-#endif
- return NULL;
-}
-
int arch_setup_additional_pages(struct linux_binprm *bprm,
int executable_stack)
{
struct mm_struct *mm = current->mm;
- unsigned long vdso_base;
int retval = 0;
down_write(&mm->mmap_sem);
@@ -111,14 +119,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
if (!notify_exec(mm))
sim_notify_exec(bprm->filename);
- /*
- * MAYWRITE to allow gdb to COW and set breakpoints
- */
- vdso_base = VDSO_BASE;
- retval = install_special_mapping(mm, vdso_base, PAGE_SIZE,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- vdso_pages);
+ retval = setup_vdso_pages();
#ifndef __tilegx__
/*
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index f7f99f90cbe0..111d5a9b76f1 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -34,6 +34,7 @@
#include <linux/hugetlb.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
+#include <linux/kdebug.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
@@ -122,10 +123,9 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
pmd_k = pmd_offset(pud_k, address);
if (!pmd_present(*pmd_k))
return NULL;
- if (!pmd_present(*pmd)) {
+ if (!pmd_present(*pmd))
set_pmd(pmd, *pmd_k);
- arch_flush_lazy_mmu_mode();
- } else
+ else
BUG_ON(pmd_ptfn(*pmd) != pmd_ptfn(*pmd_k));
return pmd_k;
}
@@ -283,7 +283,7 @@ static int handle_page_fault(struct pt_regs *regs,
flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
(write ? FAULT_FLAG_WRITE : 0));
- is_kernel_mode = (EX1_PL(regs->ex1) != USER_PL);
+ is_kernel_mode = !user_mode(regs);
tsk = validate_current();
@@ -466,28 +466,15 @@ good_area:
}
}
-#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
- /*
- * If this was an asynchronous fault,
- * restart the appropriate engine.
- */
- switch (fault_num) {
#if CHIP_HAS_TILE_DMA()
+ /* If this was a DMA TLB fault, restart the DMA engine. */
+ switch (fault_num) {
case INT_DMATLB_MISS:
case INT_DMATLB_MISS_DWNCL:
case INT_DMATLB_ACCESS:
case INT_DMATLB_ACCESS_DWNCL:
__insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK);
break;
-#endif
-#if CHIP_HAS_SN_PROC()
- case INT_SNITLB_MISS:
- case INT_SNITLB_MISS_DWNCL:
- __insn_mtspr(SPR_SNCTL,
- __insn_mfspr(SPR_SNCTL) &
- ~SPR_SNCTL__FRZPROC_MASK);
- break;
-#endif
}
#endif
@@ -722,8 +709,60 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
{
int is_page_fault;
+#ifdef CONFIG_KPROBES
+ /*
+ * This is to notify the fault handler of the kprobes. The
+ * exception code is redundant as it is also carried in REGS,
+ * but we pass it anyhow.
+ */
+ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1,
+ regs->faultnum, SIGSEGV) == NOTIFY_STOP)
+ return;
+#endif
+
+#ifdef __tilegx__
+ /*
+ * We don't need early do_page_fault_ics() support, since unlike
+ * Pro we don't need to worry about unlocking the atomic locks.
+ * There is only one current case in GX where we touch any memory
+ * under ICS other than our own kernel stack, and we handle that
+ * here. (If we crash due to trying to touch our own stack,
+ * we're in too much trouble for C code to help out anyway.)
+ */
+ if (write & ~1) {
+ unsigned long pc = write & ~1;
+ if (pc >= (unsigned long) __start_unalign_asm_code &&
+ pc < (unsigned long) __end_unalign_asm_code) {
+ struct thread_info *ti = current_thread_info();
+ /*
+ * Our EX_CONTEXT is still what it was from the
+ * initial unalign exception, but now we've faulted
+ * on the JIT page. We would like to complete the
+ * page fault however is appropriate, and then retry
+ * the instruction that caused the unalign exception.
+ * Our state has been "corrupted" by setting the low
+ * bit in "sp", and stashing r0..r3 in the
+ * thread_info area, so we revert all of that, then
+ * continue as if this were a normal page fault.
+ */
+ regs->sp &= ~1UL;
+ regs->regs[0] = ti->unalign_jit_tmp[0];
+ regs->regs[1] = ti->unalign_jit_tmp[1];
+ regs->regs[2] = ti->unalign_jit_tmp[2];
+ regs->regs[3] = ti->unalign_jit_tmp[3];
+ write &= 1;
+ } else {
+ pr_alert("%s/%d: ICS set at page fault at %#lx: %#lx\n",
+ current->comm, current->pid, pc, address);
+ show_regs(regs);
+ do_group_exit(SIGKILL);
+ return;
+ }
+ }
+#else
/* This case should have been handled by do_page_fault_ics(). */
BUG_ON(write & ~1);
+#endif
#if CHIP_HAS_TILE_DMA()
/*
@@ -752,10 +791,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
case INT_DMATLB_MISS:
case INT_DMATLB_MISS_DWNCL:
#endif
-#if CHIP_HAS_SN_PROC()
- case INT_SNITLB_MISS:
- case INT_SNITLB_MISS_DWNCL:
-#endif
is_page_fault = 1;
break;
@@ -771,8 +806,8 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
panic("Bad fault number %d in do_page_fault", fault_num);
}
-#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
- if (EX1_PL(regs->ex1) != USER_PL) {
+#if CHIP_HAS_TILE_DMA()
+ if (!user_mode(regs)) {
struct async_tlb *async;
switch (fault_num) {
#if CHIP_HAS_TILE_DMA()
@@ -783,12 +818,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
async = &current->thread.dma_async_tlb;
break;
#endif
-#if CHIP_HAS_SN_PROC()
- case INT_SNITLB_MISS:
- case INT_SNITLB_MISS_DWNCL:
- async = &current->thread.sn_async_tlb;
- break;
-#endif
default:
async = NULL;
}
@@ -821,14 +850,22 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
}
-#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
+#if CHIP_HAS_TILE_DMA()
/*
- * Check an async_tlb structure to see if a deferred fault is waiting,
- * and if so pass it to the page-fault code.
+ * This routine effectively re-issues asynchronous page faults
+ * when we are returning to user space.
*/
-static void handle_async_page_fault(struct pt_regs *regs,
- struct async_tlb *async)
+void do_async_page_fault(struct pt_regs *regs)
{
+ struct async_tlb *async = &current->thread.dma_async_tlb;
+
+ /*
+ * Clear thread flag early. If we re-interrupt while processing
+ * code here, we will reset it and recall this routine before
+ * returning to user space.
+ */
+ clear_thread_flag(TIF_ASYNC_TLB);
+
if (async->fault_num) {
/*
* Clear async->fault_num before calling the page-fault
@@ -842,35 +879,15 @@ static void handle_async_page_fault(struct pt_regs *regs,
async->address, async->is_write);
}
}
-
-/*
- * This routine effectively re-issues asynchronous page faults
- * when we are returning to user space.
- */
-void do_async_page_fault(struct pt_regs *regs)
-{
- /*
- * Clear thread flag early. If we re-interrupt while processing
- * code here, we will reset it and recall this routine before
- * returning to user space.
- */
- clear_thread_flag(TIF_ASYNC_TLB);
-
-#if CHIP_HAS_TILE_DMA()
- handle_async_page_fault(regs, &current->thread.dma_async_tlb);
-#endif
-#if CHIP_HAS_SN_PROC()
- handle_async_page_fault(regs, &current->thread.sn_async_tlb);
-#endif
-}
-#endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */
+#endif /* CHIP_HAS_TILE_DMA() */
void vmalloc_sync_all(void)
{
#ifdef __tilegx__
/* Currently all L1 kernel pmd's are static and shared. */
- BUG_ON(pgd_index(VMALLOC_END) != pgd_index(VMALLOC_START));
+ BUILD_BUG_ON(pgd_index(VMALLOC_END - PAGE_SIZE) !=
+ pgd_index(VMALLOC_START));
#else
/*
* Note that races in the updates of insync and start aren't
diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c
index 347d123b14be..0dc218294770 100644
--- a/arch/tile/mm/highmem.c
+++ b/arch/tile/mm/highmem.c
@@ -114,7 +114,6 @@ static void kmap_atomic_register(struct page *page, int type,
list_add(&amp->list, &amp_list);
set_pte(ptep, pteval);
- arch_flush_lazy_mmu_mode();
spin_unlock(&amp_lock);
homecache_kpte_unlock(flags);
@@ -259,7 +258,6 @@ void __kunmap_atomic(void *kvaddr)
BUG_ON(vaddr >= (unsigned long)high_memory);
}
- arch_flush_lazy_mmu_mode();
pagefault_enable();
}
EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index 1ae911939a18..004ba568d93f 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -43,12 +43,9 @@
#include "migrate.h"
-#if CHIP_HAS_COHERENT_LOCAL_CACHE()
-
/*
* The noallocl2 option suppresses all use of the L2 cache to cache
- * locally from a remote home. There's no point in using it if we
- * don't have coherent local caching, though.
+ * locally from a remote home.
*/
static int __write_once noallocl2;
static int __init set_noallocl2(char *str)
@@ -58,12 +55,6 @@ static int __init set_noallocl2(char *str)
}
early_param("noallocl2", set_noallocl2);
-#else
-
-#define noallocl2 0
-
-#endif
-
/*
* Update the irq_stat for cpus that we are going to interrupt
@@ -172,7 +163,8 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control,
static void homecache_finv_page_va(void* va, int home)
{
- if (home == smp_processor_id()) {
+ int cpu = get_cpu();
+ if (home == cpu) {
finv_buffer_local(va, PAGE_SIZE);
} else if (home == PAGE_HOME_HASH) {
finv_buffer_remote(va, PAGE_SIZE, 1);
@@ -180,6 +172,7 @@ static void homecache_finv_page_va(void* va, int home)
BUG_ON(home < 0 || home >= NR_CPUS);
finv_buffer_remote(va, PAGE_SIZE, 0);
}
+ put_cpu();
}
void homecache_finv_map_page(struct page *page, int home)
@@ -198,7 +191,7 @@ void homecache_finv_map_page(struct page *page, int home)
#else
va = __fix_to_virt(FIX_HOMECACHE_BEGIN + smp_processor_id());
#endif
- ptep = virt_to_pte(NULL, (unsigned long)va);
+ ptep = virt_to_kpte(va);
pte = pfn_pte(page_to_pfn(page), PAGE_KERNEL);
__set_pte(ptep, pte_set_home(pte, home));
homecache_finv_page_va((void *)va, home);
@@ -263,10 +256,8 @@ static int pte_to_home(pte_t pte)
return PAGE_HOME_INCOHERENT;
case HV_PTE_MODE_UNCACHED:
return PAGE_HOME_UNCACHED;
-#if CHIP_HAS_CBOX_HOME_MAP()
case HV_PTE_MODE_CACHE_HASH_L3:
return PAGE_HOME_HASH;
-#endif
}
panic("Bad PTE %#llx\n", pte.val);
}
@@ -323,20 +314,16 @@ pte_t pte_set_home(pte_t pte, int home)
HV_PTE_MODE_CACHE_NO_L3);
}
} else
-#if CHIP_HAS_CBOX_HOME_MAP()
if (hash_default)
pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3);
else
-#endif
pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3);
pte = hv_pte_set_nc(pte);
break;
-#if CHIP_HAS_CBOX_HOME_MAP()
case PAGE_HOME_HASH:
pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3);
break;
-#endif
default:
BUG_ON(home < 0 || home >= NR_CPUS ||
@@ -346,7 +333,6 @@ pte_t pte_set_home(pte_t pte, int home)
break;
}
-#if CHIP_HAS_NC_AND_NOALLOC_BITS()
if (noallocl2)
pte = hv_pte_set_no_alloc_l2(pte);
@@ -355,7 +341,6 @@ pte_t pte_set_home(pte_t pte, int home)
hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) {
pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED);
}
-#endif
/* Checking this case here gives a better panic than from the hv. */
BUG_ON(hv_pte_get_mode(pte) == 0);
@@ -371,19 +356,13 @@ EXPORT_SYMBOL(pte_set_home);
* so they're not suitable for anything but infrequent use.
*/
-#if CHIP_HAS_CBOX_HOME_MAP()
-static inline int initial_page_home(void) { return PAGE_HOME_HASH; }
-#else
-static inline int initial_page_home(void) { return 0; }
-#endif
-
int page_home(struct page *page)
{
if (PageHighMem(page)) {
- return initial_page_home();
+ return PAGE_HOME_HASH;
} else {
unsigned long kva = (unsigned long)page_address(page);
- return pte_to_home(*virt_to_pte(NULL, kva));
+ return pte_to_home(*virt_to_kpte(kva));
}
}
EXPORT_SYMBOL(page_home);
@@ -402,7 +381,7 @@ void homecache_change_page_home(struct page *page, int order, int home)
NULL, 0);
for (i = 0; i < pages; ++i, kva += PAGE_SIZE) {
- pte_t *ptep = virt_to_pte(NULL, kva);
+ pte_t *ptep = virt_to_kpte(kva);
pte_t pteval = *ptep;
BUG_ON(!pte_present(pteval) || pte_huge(pteval));
__set_pte(ptep, pte_set_home(pteval, home));
@@ -436,7 +415,7 @@ struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
void __homecache_free_pages(struct page *page, unsigned int order)
{
if (put_page_testzero(page)) {
- homecache_change_page_home(page, order, initial_page_home());
+ homecache_change_page_home(page, order, PAGE_HOME_HASH);
if (order == 0) {
free_hot_cold_page(page, 0);
} else {
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index 650ccff8378c..e514899e1100 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -49,38 +49,6 @@ int huge_shift[HUGE_SHIFT_ENTRIES] = {
#endif
};
-/*
- * This routine is a hybrid of pte_alloc_map() and pte_alloc_kernel().
- * It assumes that L2 PTEs are never in HIGHMEM (we don't support that).
- * It locks the user pagetable, and bumps up the mm->nr_ptes field,
- * but otherwise allocate the page table using the kernel versions.
- */
-static pte_t *pte_alloc_hugetlb(struct mm_struct *mm, pmd_t *pmd,
- unsigned long address)
-{
- pte_t *new;
-
- if (pmd_none(*pmd)) {
- new = pte_alloc_one_kernel(mm, address);
- if (!new)
- return NULL;
-
- smp_wmb(); /* See comment in __pte_alloc */
-
- spin_lock(&mm->page_table_lock);
- if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
- mm->nr_ptes++;
- pmd_populate_kernel(mm, pmd, new);
- new = NULL;
- } else
- VM_BUG_ON(pmd_trans_splitting(*pmd));
- spin_unlock(&mm->page_table_lock);
- if (new)
- pte_free_kernel(mm, new);
- }
-
- return pte_offset_kernel(pmd, address);
-}
#endif
pte_t *huge_pte_alloc(struct mm_struct *mm,
@@ -109,7 +77,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
else {
if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE])
panic("Unexpected page size %#lx\n", sz);
- return pte_alloc_hugetlb(mm, pmd, addr);
+ return pte_alloc_map(mm, NULL, pmd, addr);
}
}
#else
@@ -144,14 +112,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
/* Get the top-level page table entry. */
pgd = (pgd_t *)get_pte((pte_t *)mm->pgd, pgd_index(addr), 0);
- if (!pgd_present(*pgd))
- return NULL;
/* We don't have four levels. */
pud = pud_offset(pgd, addr);
#ifndef __PAGETABLE_PUD_FOLDED
# error support fourth page table level
#endif
+ if (!pud_present(*pud))
+ return NULL;
/* Check for an L0 huge PTE, if we have three levels. */
#ifndef __PAGETABLE_PMD_FOLDED
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index e182958c707d..4e316deb92fd 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -106,10 +106,8 @@ pte_t *get_prealloc_pte(unsigned long pfn)
*/
static int initial_heap_home(void)
{
-#if CHIP_HAS_CBOX_HOME_MAP()
if (hash_default)
return PAGE_HOME_HASH;
-#endif
return smp_processor_id();
}
@@ -190,14 +188,11 @@ static void __init page_table_range_init(unsigned long start,
}
-#if CHIP_HAS_CBOX_HOME_MAP()
-
static int __initdata ktext_hash = 1; /* .text pages */
static int __initdata kdata_hash = 1; /* .data and .bss pages */
int __write_once hash_default = 1; /* kernel allocator pages */
EXPORT_SYMBOL(hash_default);
int __write_once kstack_hash = 1; /* if no homecaching, use h4h */
-#endif /* CHIP_HAS_CBOX_HOME_MAP */
/*
* CPUs to use to for striping the pages of kernel data. If hash-for-home
@@ -215,14 +210,12 @@ int __write_once kdata_huge; /* if no homecaching, small pages */
static pgprot_t __init construct_pgprot(pgprot_t prot, int home)
{
prot = pte_set_home(prot, home);
-#if CHIP_HAS_CBOX_HOME_MAP()
if (home == PAGE_HOME_IMMUTABLE) {
if (ktext_hash)
prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3);
else
prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3);
}
-#endif
return prot;
}
@@ -234,22 +227,17 @@ static pgprot_t __init init_pgprot(ulong address)
{
int cpu;
unsigned long page;
- enum { CODE_DELTA = MEM_SV_INTRPT - PAGE_OFFSET };
+ enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET };
-#if CHIP_HAS_CBOX_HOME_MAP()
/* For kdata=huge, everything is just hash-for-home. */
if (kdata_huge)
return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
-#endif
/* We map the aliased pages of permanent text inaccessible. */
if (address < (ulong) _sinittext - CODE_DELTA)
return PAGE_NONE;
- /*
- * We map read-only data non-coherent for performance. We could
- * use neighborhood caching on TILE64, but it's not clear it's a win.
- */
+ /* We map read-only data non-coherent for performance. */
if ((address >= (ulong) __start_rodata &&
address < (ulong) __end_rodata) ||
address == (ulong) empty_zero_page) {
@@ -257,12 +245,10 @@ static pgprot_t __init init_pgprot(ulong address)
}
#ifndef __tilegx__
-#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
/* Force the atomic_locks[] array page to be hash-for-home. */
if (address == (ulong) atomic_locks)
return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
#endif
-#endif
/*
* Everything else that isn't data or bss is heap, so mark it
@@ -280,19 +266,9 @@ static pgprot_t __init init_pgprot(ulong address)
if (address >= (ulong) _end || address < (ulong) _einitdata)
return construct_pgprot(PAGE_KERNEL, initial_heap_home());
-#if CHIP_HAS_CBOX_HOME_MAP()
/* Use hash-for-home if requested for data/bss. */
if (kdata_hash)
return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
-#endif
-
- /*
- * Make the w1data homed like heap to start with, to avoid
- * making it part of the page-striped data area when we're just
- * going to convert it to read-only soon anyway.
- */
- if (address >= (ulong)__w1data_begin && address < (ulong)__w1data_end)
- return construct_pgprot(PAGE_KERNEL, initial_heap_home());
/*
* Otherwise we just hand out consecutive cpus. To avoid
@@ -301,7 +277,7 @@ static pgprot_t __init init_pgprot(ulong address)
* the requested address, while walking cpu home around kdata_mask.
* This is typically no more than a dozen or so iterations.
*/
- page = (((ulong)__w1data_end) + PAGE_SIZE - 1) & PAGE_MASK;
+ page = (((ulong)__end_rodata) + PAGE_SIZE - 1) & PAGE_MASK;
BUG_ON(address < page || address >= (ulong)_end);
cpu = cpumask_first(&kdata_mask);
for (; page < address; page += PAGE_SIZE) {
@@ -311,11 +287,9 @@ static pgprot_t __init init_pgprot(ulong address)
if (page == (ulong)empty_zero_page)
continue;
#ifndef __tilegx__
-#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
if (page == (ulong)atomic_locks)
continue;
#endif
-#endif
cpu = cpumask_next(cpu, &kdata_mask);
if (cpu == NR_CPUS)
cpu = cpumask_first(&kdata_mask);
@@ -358,7 +332,7 @@ static int __init setup_ktext(char *str)
ktext_arg_seen = 1;
- /* Default setting on Tile64: use a huge page */
+ /* Default setting: use a huge page */
if (strcmp(str, "huge") == 0)
pr_info("ktext: using one huge locally cached page\n");
@@ -404,10 +378,8 @@ static inline pgprot_t ktext_set_nocache(pgprot_t prot)
{
if (!ktext_nocache)
prot = hv_pte_set_nc(prot);
-#if CHIP_HAS_NC_AND_NOALLOC_BITS()
else
prot = hv_pte_set_no_alloc_l2(prot);
-#endif
return prot;
}
@@ -440,7 +412,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
struct cpumask kstripe_mask;
int rc, i;
-#if CHIP_HAS_CBOX_HOME_MAP()
if (ktext_arg_seen && ktext_hash) {
pr_warning("warning: \"ktext\" boot argument ignored"
" if \"kcache_hash\" sets up text hash-for-home\n");
@@ -457,7 +428,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
" kcache_hash=all or =allbutstack\n");
kdata_huge = 0;
}
-#endif
/*
* Set up a mask for cpus to use for kernel striping.
@@ -538,7 +508,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
}
}
- address = MEM_SV_INTRPT;
+ address = MEM_SV_START;
pmd = get_pmd(pgtables, address);
pfn = 0; /* code starts at PA 0 */
if (ktext_small) {
@@ -585,13 +555,11 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
} else {
pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC);
pteval = pte_mkhuge(pteval);
-#if CHIP_HAS_CBOX_HOME_MAP()
if (ktext_hash) {
pteval = hv_pte_set_mode(pteval,
HV_PTE_MODE_CACHE_HASH_L3);
pteval = ktext_set_nocache(pteval);
} else
-#endif /* CHIP_HAS_CBOX_HOME_MAP() */
if (cpumask_weight(&ktext_mask) == 1) {
pteval = set_remote_cache_cpu(pteval,
cpumask_first(&ktext_mask));
@@ -777,10 +745,7 @@ void __init paging_init(void)
kernel_physical_mapping_init(pgd_base);
- /*
- * Fixed mappings, only the page table structure has to be
- * created - mappings will be set by set_fixmap():
- */
+ /* Fixed mappings, only the page table structure has to be created. */
page_table_range_init(fix_to_virt(__end_of_fixed_addresses - 1),
FIXADDR_TOP, pgd_base);
@@ -941,26 +906,6 @@ void __init pgtable_cache_init(void)
panic("pgtable_cache_init(): Cannot create pgd cache");
}
-#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
-/*
- * The __w1data area holds data that is only written during initialization,
- * and is read-only and thus freely cacheable thereafter. Fix the page
- * table entries that cover that region accordingly.
- */
-static void mark_w1data_ro(void)
-{
- /* Loop over page table entries */
- unsigned long addr = (unsigned long)__w1data_begin;
- BUG_ON((addr & (PAGE_SIZE-1)) != 0);
- for (; addr <= (unsigned long)__w1data_end - 1; addr += PAGE_SIZE) {
- unsigned long pfn = kaddr_to_pfn((void *)addr);
- pte_t *ptep = virt_to_pte(NULL, addr);
- BUG_ON(pte_huge(*ptep)); /* not relevant for kdata_huge */
- set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL_RO));
- }
-}
-#endif
-
#ifdef CONFIG_DEBUG_PAGEALLOC
static long __write_once initfree;
#else
@@ -1000,7 +945,7 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end)
*/
int pfn = kaddr_to_pfn((void *)addr);
struct page *page = pfn_to_page(pfn);
- pte_t *ptep = virt_to_pte(NULL, addr);
+ pte_t *ptep = virt_to_kpte(addr);
if (!initfree) {
/*
* If debugging page accesses then do not free
@@ -1024,15 +969,11 @@ static void free_init_pages(char *what, unsigned long begin, unsigned long end)
void free_initmem(void)
{
- const unsigned long text_delta = MEM_SV_INTRPT - PAGE_OFFSET;
+ const unsigned long text_delta = MEM_SV_START - PAGE_OFFSET;
/*
- * Evict the dirty initdata on the boot cpu, evict the w1data
- * wherever it's homed, and evict all the init code everywhere.
- * We are guaranteed that no one will touch the init pages any
- * more, and although other cpus may be touching the w1data,
- * we only actually change the caching on tile64, which won't
- * be keeping local copies in the other tiles' caches anyway.
+ * Evict the cache on all cores to avoid incoherence.
+ * We are guaranteed that no one will touch the init pages any more.
*/
homecache_evict(&cpu_cacheable_map);
@@ -1043,26 +984,11 @@ void free_initmem(void)
/*
* Free the pages mapped from 0xc0000000 that correspond to code
- * pages from MEM_SV_INTRPT that we won't use again after init.
+ * pages from MEM_SV_START that we won't use again after init.
*/
free_init_pages("unused kernel text",
(unsigned long)_sinittext - text_delta,
(unsigned long)_einittext - text_delta);
-
-#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
- /*
- * Upgrade the .w1data section to globally cached.
- * We don't do this on tilepro, since the cache architecture
- * pretty much makes it irrelevant, and in any case we end
- * up having racing issues with other tiles that may touch
- * the data after we flush the cache but before we update
- * the PTEs and flush the TLBs, causing sharer shootdowns
- * later. Even though this is to clean data, it seems like
- * an unnecessary complication.
- */
- mark_w1data_ro();
-#endif
-
/* Do a global TLB flush so everyone sees the changes. */
flush_tlb_all();
}
diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S
index 5305814bf187..772085491bf9 100644
--- a/arch/tile/mm/migrate_32.S
+++ b/arch/tile/mm/migrate_32.S
@@ -136,7 +136,7 @@ STD_ENTRY(flush_and_install_context)
move r8, zero /* asids */
move r9, zero /* asidcount */
}
- jal hv_flush_remote
+ jal _hv_flush_remote
bnz r0, .Ldone
/* Now install the new page table. */
@@ -152,7 +152,7 @@ STD_ENTRY(flush_and_install_context)
move r4, r_asid
moveli r5, HV_CTX_DIRECTIO | CTX_PAGE_FLAG
}
- jal hv_install_context
+ jal _hv_install_context
bnz r0, .Ldone
/* Finally, flush the TLB. */
diff --git a/arch/tile/mm/migrate_64.S b/arch/tile/mm/migrate_64.S
index 1d15b10833d1..a49eee38f872 100644
--- a/arch/tile/mm/migrate_64.S
+++ b/arch/tile/mm/migrate_64.S
@@ -123,7 +123,7 @@ STD_ENTRY(flush_and_install_context)
}
{
move r8, zero /* asidcount */
- jal hv_flush_remote
+ jal _hv_flush_remote
}
bnez r0, 1f
@@ -136,7 +136,7 @@ STD_ENTRY(flush_and_install_context)
move r2, r_asid
moveli r3, HV_CTX_DIRECTIO | CTX_PAGE_FLAG
}
- jal hv_install_context
+ jal _hv_install_context
bnez r0, 1f
/* Finally, flush the TLB. */
diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c
index d67d91ebf63e..851a94e6ae58 100644
--- a/arch/tile/mm/mmap.c
+++ b/arch/tile/mm/mmap.c
@@ -58,16 +58,36 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
#else
int is_32bit = 0;
#endif
+ unsigned long random_factor = 0UL;
+
+ /*
+ * 8 bits of randomness in 32bit mmaps, 24 address space bits
+ * 12 bits of randomness in 64bit mmaps, 28 address space bits
+ */
+ if (current->flags & PF_RANDOMIZE) {
+ if (is_32bit)
+ random_factor = get_random_int() % (1<<8);
+ else
+ random_factor = get_random_int() % (1<<12);
+
+ random_factor <<= PAGE_SHIFT;
+ }
/*
* Use standard layout if the expected stack growth is unlimited
* or we are running native 64 bits.
*/
- if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
- mm->mmap_base = TASK_UNMAPPED_BASE;
+ if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
+ mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(mm);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+ unsigned long range_end = mm->brk + 0x02000000;
+ return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index dfd63ce87327..2deaddf3e01f 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -83,55 +83,6 @@ void show_mem(unsigned int filter)
}
}
-/*
- * Associate a virtual page frame with a given physical page frame
- * and protection flags for that frame.
- */
-static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
-
- pgd = swapper_pg_dir + pgd_index(vaddr);
- if (pgd_none(*pgd)) {
- BUG();
- return;
- }
- pud = pud_offset(pgd, vaddr);
- if (pud_none(*pud)) {
- BUG();
- return;
- }
- pmd = pmd_offset(pud, vaddr);
- if (pmd_none(*pmd)) {
- BUG();
- return;
- }
- pte = pte_offset_kernel(pmd, vaddr);
- /* <pfn,flags> stored as-is, to permit clearing entries */
- set_pte(pte, pfn_pte(pfn, flags));
-
- /*
- * It's enough to flush this one mapping.
- * This appears conservative since it is only called
- * from __set_fixmap.
- */
- local_flush_tlb_page(NULL, vaddr, PAGE_SIZE);
-}
-
-void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
-{
- unsigned long address = __fix_to_virt(idx);
-
- if (idx >= __end_of_fixed_addresses) {
- BUG();
- return;
- }
- set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
-}
-
/**
* shatter_huge_page() - ensure a given address is mapped by a small page.
*
@@ -374,6 +325,17 @@ void ptep_set_wrprotect(struct mm_struct *mm,
#endif
+/*
+ * Return a pointer to the PTE that corresponds to the given
+ * address in the given page table. A NULL page table just uses
+ * the standard kernel page table; the preferred API in this case
+ * is virt_to_kpte().
+ *
+ * The returned pointer can point to a huge page in other levels
+ * of the page table than the bottom, if the huge page is present
+ * in the page table. For bottom-level PTEs, the returned pointer
+ * can point to a PTE that is either present or not.
+ */
pte_t *virt_to_pte(struct mm_struct* mm, unsigned long addr)
{
pgd_t *pgd;
@@ -387,13 +349,23 @@ pte_t *virt_to_pte(struct mm_struct* mm, unsigned long addr)
pud = pud_offset(pgd, addr);
if (!pud_present(*pud))
return NULL;
+ if (pud_huge_page(*pud))
+ return (pte_t *)pud;
pmd = pmd_offset(pud, addr);
- if (pmd_huge_page(*pmd))
- return (pte_t *)pmd;
if (!pmd_present(*pmd))
return NULL;
+ if (pmd_huge_page(*pmd))
+ return (pte_t *)pmd;
return pte_offset_kernel(pmd, addr);
}
+EXPORT_SYMBOL(virt_to_pte);
+
+pte_t *virt_to_kpte(unsigned long kaddr)
+{
+ BUG_ON(kaddr < PAGE_OFFSET);
+ return virt_to_pte(NULL, kaddr);
+}
+EXPORT_SYMBOL(virt_to_kpte);
pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu)
{
@@ -568,7 +540,7 @@ void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
addr = area->addr;
if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
phys_addr, pgprot)) {
- remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
+ free_vm_area(area);
return NULL;
}
return (__force void __iomem *) (offset + (char *)addr);