summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinux Build Service Account <lnxbuild@localhost>2017-02-23 21:35:10 -0800
committerGerrit - the friendly Code Review server <code-review@localhost>2017-02-23 21:35:09 -0800
commitef49b42e4abb3f8c6f8a81d794332f078aeaea2a (patch)
tree243313f0f3e6d11f44502481547443d2e024be23
parentfb98e68c1c8cb2c66827e10ae7c8049ad52638b2 (diff)
parent9f8cd5dfb437c689b563c6e9a2d3d4316655ab28 (diff)
Merge "drm/msm: Get and enable the IOMMU clocks"
-rw-r--r--drivers/gpu/drm/msm/Makefile7
-rw-r--r--drivers/gpu/drm/msm/adreno/a3xx_gpu.c14
-rw-r--r--drivers/gpu/drm/msm/adreno/a4xx_gpu.c14
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx.xml.h147
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.c403
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.h123
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_power.c6
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_preempt.c383
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_snapshot.c796
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.c305
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.h50
-rw-r--r--drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c2
-rw-r--r--drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c5
-rw-r--r--drivers/gpu/drm/msm/msm_drv.c128
-rw-r--r--drivers/gpu/drm/msm/msm_drv.h49
-rw-r--r--drivers/gpu/drm/msm/msm_gem.c6
-rw-r--r--drivers/gpu/drm/msm/msm_gem.h7
-rw-r--r--drivers/gpu/drm/msm/msm_gem_submit.c24
-rw-r--r--drivers/gpu/drm/msm/msm_gem_vma.c69
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c280
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.h71
-rw-r--r--drivers/gpu/drm/msm/msm_iommu.c193
-rw-r--r--drivers/gpu/drm/msm/msm_iommu.h37
-rw-r--r--drivers/gpu/drm/msm/msm_mmu.h3
-rw-r--r--drivers/gpu/drm/msm/msm_ringbuffer.c13
-rw-r--r--drivers/gpu/drm/msm/msm_ringbuffer.h19
-rw-r--r--drivers/gpu/drm/msm/msm_smmu.c2
-rw-r--r--drivers/gpu/drm/msm/msm_snapshot.c105
-rw-r--r--drivers/gpu/drm/msm/msm_snapshot.h85
-rw-r--r--drivers/gpu/drm/msm/msm_snapshot_api.h121
-rw-r--r--drivers/gpu/drm/msm/sde/sde_kms.c7
-rw-r--r--drivers/iommu/arm-smmu.c29
-rw-r--r--drivers/iommu/io-pgtable-arm.c158
-rw-r--r--drivers/iommu/io-pgtable.h1
-rw-r--r--include/linux/iommu.h1
-rw-r--r--include/uapi/drm/msm_drm.h19
36 files changed, 3262 insertions, 420 deletions
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 9f035b10875b..b77fdd098471 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -54,7 +54,9 @@ msm_drm-y += adreno/adreno_device.o \
adreno/a3xx_gpu.o \
adreno/a4xx_gpu.o \
adreno/a5xx_gpu.o \
- adreno/a5xx_power.o
+ adreno/a5xx_power.o \
+ adreno/a5xx_preempt.o \
+ adreno/a5xx_snapshot.o
endif
msm_drm-$(CONFIG_DRM_MSM_MDP4) += mdp/mdp4/mdp4_crtc.o \
@@ -130,6 +132,7 @@ msm_drm-$(CONFIG_DRM_MSM) += \
msm_perf.o \
msm_rd.o \
msm_ringbuffer.o \
- msm_prop.o
+ msm_prop.o \
+ msm_snapshot.o
obj-$(CONFIG_DRM_MSM) += msm_drm.o
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 5a061ad6225a..c4f886fd6037 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -40,10 +40,11 @@
extern bool hang_debug;
static void a3xx_dump(struct msm_gpu *gpu);
+static bool a3xx_idle(struct msm_gpu *gpu);
static bool a3xx_me_init(struct msm_gpu *gpu)
{
- struct msm_ringbuffer *ring = gpu->rb;
+ struct msm_ringbuffer *ring = gpu->rb[0];
OUT_PKT3(ring, CP_ME_INIT, 17);
OUT_RING(ring, 0x000003f7);
@@ -64,8 +65,8 @@ static bool a3xx_me_init(struct msm_gpu *gpu)
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
- gpu->funcs->flush(gpu);
- return gpu->funcs->idle(gpu);
+ gpu->funcs->flush(gpu, ring);
+ return a3xx_idle(gpu);
}
static int a3xx_hw_init(struct msm_gpu *gpu)
@@ -331,7 +332,7 @@ static void a3xx_destroy(struct msm_gpu *gpu)
static bool a3xx_idle(struct msm_gpu *gpu)
{
/* wait for ringbuffer to drain: */
- if (!adreno_idle(gpu))
+ if (!adreno_idle(gpu, gpu->rb[0]))
return false;
/* then wait for GPU to finish: */
@@ -439,9 +440,10 @@ static const struct adreno_gpu_funcs funcs = {
.pm_resume = msm_gpu_pm_resume,
.recover = a3xx_recover,
.last_fence = adreno_last_fence,
+ .submitted_fence = adreno_submitted_fence,
.submit = adreno_submit,
.flush = adreno_flush,
- .idle = a3xx_idle,
+ .active_ring = adreno_active_ring,
.irq = a3xx_irq,
.destroy = a3xx_destroy,
#ifdef CONFIG_DEBUG_FS
@@ -489,7 +491,7 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
adreno_gpu->registers = a3xx_registers;
adreno_gpu->reg_offsets = a3xx_register_offsets;
- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs);
+ ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
if (ret)
goto fail;
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index 47c9b22b0801..534a7c3fbdca 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -31,6 +31,7 @@
extern bool hang_debug;
static void a4xx_dump(struct msm_gpu *gpu);
+static bool a4xx_idle(struct msm_gpu *gpu);
/*
* a4xx_enable_hwcg() - Program the clock control registers
@@ -115,7 +116,7 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu)
static bool a4xx_me_init(struct msm_gpu *gpu)
{
- struct msm_ringbuffer *ring = gpu->rb;
+ struct msm_ringbuffer *ring = gpu->rb[0];
OUT_PKT3(ring, CP_ME_INIT, 17);
OUT_RING(ring, 0x000003f7);
@@ -136,8 +137,8 @@ static bool a4xx_me_init(struct msm_gpu *gpu)
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
- gpu->funcs->flush(gpu);
- return gpu->funcs->idle(gpu);
+ gpu->funcs->flush(gpu, ring);
+ return a4xx_idle(gpu);
}
static int a4xx_hw_init(struct msm_gpu *gpu)
@@ -329,7 +330,7 @@ static void a4xx_destroy(struct msm_gpu *gpu)
static bool a4xx_idle(struct msm_gpu *gpu)
{
/* wait for ringbuffer to drain: */
- if (!adreno_idle(gpu))
+ if (!adreno_idle(gpu, gpu->rb[0]))
return false;
/* then wait for GPU to finish: */
@@ -522,9 +523,10 @@ static const struct adreno_gpu_funcs funcs = {
.pm_resume = a4xx_pm_resume,
.recover = a4xx_recover,
.last_fence = adreno_last_fence,
+ .submitted_fence = adreno_submitted_fence,
.submit = adreno_submit,
.flush = adreno_flush,
- .idle = a4xx_idle,
+ .active_ring = adreno_active_ring,
.irq = a4xx_irq,
.destroy = a4xx_destroy,
#ifdef CONFIG_DEBUG_FS
@@ -566,7 +568,7 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
adreno_gpu->registers = a4xx_registers;
adreno_gpu->reg_offsets = a4xx_register_offsets;
- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs);
+ ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
if (ret)
goto fail;
diff --git a/drivers/gpu/drm/msm/adreno/a5xx.xml.h b/drivers/gpu/drm/msm/adreno/a5xx.xml.h
index bfee2fd83462..56dad2217289 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx.xml.h
+++ b/drivers/gpu/drm/msm/adreno/a5xx.xml.h
@@ -155,6 +155,114 @@ enum a5xx_depth_format {
DEPTH5_32 = 4,
};
+enum a5xx_debugbus {
+ A5XX_RBBM_DBGBUS_CP = 1,
+ A5XX_RBBM_DBGBUS_RBBM = 2,
+ A5XX_RBBM_DBGBUS_VBIF = 3,
+ A5XX_RBBM_DBGBUS_HLSQ = 4,
+ A5XX_RBBM_DBGBUS_UCHE = 5,
+ A5XX_RBBM_DBGBUS_DPM = 6,
+ A5XX_RBBM_DBGBUS_TESS = 7,
+ A5XX_RBBM_DBGBUS_PC = 8,
+ A5XX_RBBM_DBGBUS_VFDP = 9,
+ A5XX_RBBM_DBGBUS_VPC = 10,
+ A5XX_RBBM_DBGBUS_TSE = 11,
+ A5XX_RBBM_DBGBUS_RAS = 12,
+ A5XX_RBBM_DBGBUS_VSC = 13,
+ A5XX_RBBM_DBGBUS_COM = 14,
+ A5XX_RBBM_DBGBUS_DCOM = 15,
+ A5XX_RBBM_DBGBUS_LRZ = 16,
+ A5XX_RBBM_DBGBUS_A2D_DSP = 17,
+ A5XX_RBBM_DBGBUS_CCUFCHE = 18,
+ A5XX_RBBM_DBGBUS_GPMU = 19,
+ A5XX_RBBM_DBGBUS_RBP = 20,
+ A5XX_RBBM_DBGBUS_HM = 21,
+ A5XX_RBBM_DBGBUS_RBBM_CFG = 22,
+ A5XX_RBBM_DBGBUS_VBIF_CX = 23,
+ A5XX_RBBM_DBGBUS_GPC = 29,
+ A5XX_RBBM_DBGBUS_LARC = 30,
+ A5XX_RBBM_DBGBUS_HLSQ_SPTP = 31,
+ A5XX_RBBM_DBGBUS_RB_0 = 32,
+ A5XX_RBBM_DBGBUS_RB_1 = 33,
+ A5XX_RBBM_DBGBUS_RB_2 = 34,
+ A5XX_RBBM_DBGBUS_RB_3 = 35,
+ A5XX_RBBM_DBGBUS_CCU_0 = 40,
+ A5XX_RBBM_DBGBUS_CCU_1 = 41,
+ A5XX_RBBM_DBGBUS_CCU_2 = 42,
+ A5XX_RBBM_DBGBUS_CCU_3 = 43,
+ A5XX_RBBM_DBGBUS_A2D_RAS_0 = 48,
+ A5XX_RBBM_DBGBUS_A2D_RAS_1 = 49,
+ A5XX_RBBM_DBGBUS_A2D_RAS_2 = 50,
+ A5XX_RBBM_DBGBUS_A2D_RAS_3 = 51,
+ A5XX_RBBM_DBGBUS_VFD_0 = 56,
+ A5XX_RBBM_DBGBUS_VFD_1 = 57,
+ A5XX_RBBM_DBGBUS_VFD_2 = 58,
+ A5XX_RBBM_DBGBUS_VFD_3 = 59,
+ A5XX_RBBM_DBGBUS_SP_0 = 64,
+ A5XX_RBBM_DBGBUS_SP_1 = 65,
+ A5XX_RBBM_DBGBUS_SP_2 = 66,
+ A5XX_RBBM_DBGBUS_SP_3 = 67,
+ A5XX_RBBM_DBGBUS_TPL1_0 = 72,
+ A5XX_RBBM_DBGBUS_TPL1_1 = 73,
+ A5XX_RBBM_DBGBUS_TPL1_2 = 74,
+ A5XX_RBBM_DBGBUS_TPL1_3 = 75,
+};
+
+enum a5xx_shader_blocks {
+ A5XX_TP_W_MEMOBJ = 1,
+ A5XX_TP_W_SAMPLER = 2,
+ A5XX_TP_W_MIPMAP_BASE = 3,
+ A5XX_TP_W_MEMOBJ_TAG = 4,
+ A5XX_TP_W_SAMPLER_TAG = 5,
+ A5XX_TP_S_3D_MEMOBJ = 6,
+ A5XX_TP_S_3D_SAMPLER = 7,
+ A5XX_TP_S_3D_MEMOBJ_TAG = 8,
+ A5XX_TP_S_3D_SAMPLER_TAG = 9,
+ A5XX_TP_S_CS_MEMOBJ = 10,
+ A5XX_TP_S_CS_SAMPLER = 11,
+ A5XX_TP_S_CS_MEMOBJ_TAG = 12,
+ A5XX_TP_S_CS_SAMPLER_TAG = 13,
+ A5XX_SP_W_INSTR = 14,
+ A5XX_SP_W_CONST = 15,
+ A5XX_SP_W_UAV_SIZE = 16,
+ A5XX_SP_W_CB_SIZE = 17,
+ A5XX_SP_W_UAV_BASE = 18,
+ A5XX_SP_W_CB_BASE = 19,
+ A5XX_SP_W_INST_TAG = 20,
+ A5XX_SP_W_STATE = 21,
+ A5XX_SP_S_3D_INSTR = 22,
+ A5XX_SP_S_3D_CONST = 23,
+ A5XX_SP_S_3D_CB_BASE = 24,
+ A5XX_SP_S_3D_CB_SIZE = 25,
+ A5XX_SP_S_3D_UAV_BASE = 26,
+ A5XX_SP_S_3D_UAV_SIZE = 27,
+ A5XX_SP_S_CS_INSTR = 28,
+ A5XX_SP_S_CS_CONST = 29,
+ A5XX_SP_S_CS_CB_BASE = 30,
+ A5XX_SP_S_CS_CB_SIZE = 31,
+ A5XX_SP_S_CS_UAV_BASE = 32,
+ A5XX_SP_S_CS_UAV_SIZE = 33,
+ A5XX_SP_S_3D_INSTR_DIRTY = 34,
+ A5XX_SP_S_3D_CONST_DIRTY = 35,
+ A5XX_SP_S_3D_CB_BASE_DIRTY = 36,
+ A5XX_SP_S_3D_CB_SIZE_DIRTY = 37,
+ A5XX_SP_S_3D_UAV_BASE_DIRTY = 38,
+ A5XX_SP_S_3D_UAV_SIZE_DIRTY = 39,
+ A5XX_SP_S_CS_INSTR_DIRTY = 40,
+ A5XX_SP_S_CS_CONST_DIRTY = 41,
+ A5XX_SP_S_CS_CB_BASE_DIRTY = 42,
+ A5XX_SP_S_CS_CB_SIZE_DIRTY = 43,
+ A5XX_SP_S_CS_UAV_BASE_DIRTY = 44,
+ A5XX_SP_S_CS_UAV_SIZE_DIRTY = 45,
+ A5XX_HLSQ_ICB = 46,
+ A5XX_HLSQ_ICB_DIRTY = 47,
+ A5XX_HLSQ_ICB_CB_BASE_DIRTY = 48,
+ A5XX_SP_POWER_RESTORE_RAM = 64,
+ A5XX_SP_POWER_RESTORE_RAM_TAG = 65,
+ A5XX_TP_POWER_RESTORE_RAM = 66,
+ A5XX_TP_POWER_RESTORE_RAM_TAG = 67,
+};
+
enum a5xx_tex_filter {
A5XX_TEX_NEAREST = 0,
A5XX_TEX_LINEAR = 1,
@@ -396,6 +504,18 @@ static inline uint32_t A5XX_CP_PROTECT_REG_MASK_LEN(uint32_t val)
#define REG_A5XX_CP_POWERCTR_CP_SEL_3 0x00000bbd
#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_A 0x00000004
+#define A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX__MASK 0x000000ff
+#define A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX__SHIFT 0
+static inline uint32_t A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX(uint32_t val)
+{
+ return ((val) << A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX__SHIFT) & A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX__MASK;
+}
+#define A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL__MASK 0x0000ff00
+#define A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL__SHIFT 8
+static inline uint32_t A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL(uint32_t val)
+{
+ return ((val) << A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL__SHIFT) & A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL__MASK;
+}
#define REG_A5XX_RBBM_CFG_DBGBUS_SEL_B 0x00000005
@@ -406,6 +526,12 @@ static inline uint32_t A5XX_CP_PROTECT_REG_MASK_LEN(uint32_t val)
#define REG_A5XX_RBBM_CFG_DBGBUS_CNTLT 0x00000008
#define REG_A5XX_RBBM_CFG_DBGBUS_CNTLM 0x00000009
+#define A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE__MASK 0x0f000000
+#define A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE__SHIFT 24
+static inline uint32_t A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE(uint32_t val)
+{
+ return ((val) << A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE__SHIFT) & A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE__MASK;
+}
#define REG_A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT 0x00000018
@@ -1413,6 +1539,12 @@ static inline uint32_t A5XX_VSC_BIN_SIZE_Y(uint32_t val)
#define REG_A5XX_HLSQ_SPTP_RDSEL 0x00000f08
#define REG_A5XX_HLSQ_DBG_READ_SEL 0x0000bc00
+#define A5XX_HLSQ_DBG_READ_SEL_STATETYPE__MASK 0x0000ff00
+#define A5XX_HLSQ_DBG_READ_SEL_STATETYPE__SHIFT 8
+static inline uint32_t A5XX_HLSQ_DBG_READ_SEL_STATETYPE(uint32_t val)
+{
+ return ((val) << A5XX_HLSQ_DBG_READ_SEL_STATETYPE__SHIFT) & A5XX_HLSQ_DBG_READ_SEL_STATETYPE__MASK;
+}
#define REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE 0x0000a000
@@ -1583,6 +1715,8 @@ static inline uint32_t A5XX_VSC_BIN_SIZE_Y(uint32_t val)
#define REG_A5XX_VBIF_VERSION 0x00003000
#define REG_A5XX_VBIF_CLKON 0x00003001
+#define A5XX_VBIF_CLKON_FORCE_ON 0x00000001
+#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS 0x00000002
#define REG_A5XX_VBIF_ABIT_SORT 0x00003028
@@ -1601,14 +1735,27 @@ static inline uint32_t A5XX_VSC_BIN_SIZE_Y(uint32_t val)
#define REG_A5XX_VBIF_XIN_HALT_CTRL1 0x00003081
#define REG_A5XX_VBIF_TEST_BUS_OUT_CTRL 0x00003084
+#define A5XX_VBIF_TEST_BUS_OUT_CTRL_TEST_BUS_CTRL_EN 0x00000001
#define REG_A5XX_VBIF_TEST_BUS1_CTRL0 0x00003085
#define REG_A5XX_VBIF_TEST_BUS1_CTRL1 0x00003086
+#define A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL__MASK 0x0000000f
+#define A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL__SHIFT 0
+static inline uint32_t A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL(uint32_t val)
+{
+ return ((val) << A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL__SHIFT) & A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL__MASK;
+}
#define REG_A5XX_VBIF_TEST_BUS2_CTRL0 0x00003087
#define REG_A5XX_VBIF_TEST_BUS2_CTRL1 0x00003088
+#define A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL__MASK 0x0000001f
+#define A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL__SHIFT 0
+static inline uint32_t A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL(uint32_t val)
+{
+ return ((val) << A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL__SHIFT) & A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL__MASK;
+}
#define REG_A5XX_VBIF_TEST_BUS_OUT 0x0000308c
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 8bc3c7bee3fb..a49a7b247547 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -12,26 +12,132 @@
*/
#include "msm_gem.h"
+#include "msm_iommu.h"
#include "a5xx_gpu.h"
-extern bool hang_debug;
-static void a5xx_dump(struct msm_gpu *gpu);
+static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ uint32_t wptr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ring->lock, flags);
+
+ /* Copy the shadow to the actual register */
+ ring->cur = ring->next;
+
+ /* Make sure to wrap wptr if we need to */
+ wptr = get_wptr(ring);
+
+ spin_unlock_irqrestore(&ring->lock, flags);
+
+ /* Make sure everything is posted before making a decision */
+ mb();
+
+ /* Update HW if this is the current ring and we are not in preempt */
+ if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
+ gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
+}
-static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
- struct msm_file_private *ctx)
+static void a5xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
+ struct msm_gem_address_space *aspace)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
- struct msm_drm_private *priv = gpu->dev->dev_private;
- struct msm_ringbuffer *ring = gpu->rb;
+ struct msm_mmu *mmu = aspace->mmu;
+ struct msm_iommu *iommu = to_msm_iommu(mmu);
+
+ if (!iommu->ttbr0)
+ return;
+
+ /* Turn off protected mode */
+ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+ OUT_RING(ring, 0);
+
+ /* Turn on APIV mode to access critical regions */
+ OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1);
+ OUT_RING(ring, 1);
+
+ /* Make sure the ME is syncronized before staring the update */
+ OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
+
+ /* Execute the table update */
+ OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 3);
+ OUT_RING(ring, lower_32_bits(iommu->ttbr0));
+ OUT_RING(ring, upper_32_bits(iommu->ttbr0));
+ OUT_RING(ring, iommu->contextidr);
+
+ /*
+ * Write the new TTBR0 to the preemption records - this will be used to
+ * reload the pagetable if the current ring gets preempted out.
+ */
+ OUT_PKT7(ring, CP_MEM_WRITE, 4);
+ OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, ring->id, ttbr0)));
+ OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, ring->id, ttbr0)));
+ OUT_RING(ring, lower_32_bits(iommu->ttbr0));
+ OUT_RING(ring, upper_32_bits(iommu->ttbr0));
+
+ /* Also write the current contextidr (ASID) */
+ OUT_PKT7(ring, CP_MEM_WRITE, 3);
+ OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, ring->id,
+ contextidr)));
+ OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, ring->id,
+ contextidr)));
+ OUT_RING(ring, iommu->contextidr);
+
+ /* Invalidate the draw state so we start off fresh */
+ OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
+ OUT_RING(ring, 0x40000);
+ OUT_RING(ring, 1);
+ OUT_RING(ring, 0);
+
+ /* Turn off APRIV */
+ OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1);
+ OUT_RING(ring, 0);
+
+ /* Turn off protected mode */
+ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+ OUT_RING(ring, 1);
+}
+
+static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ struct msm_ringbuffer *ring = gpu->rb[submit->ring];
unsigned int i, ibs = 0;
+ a5xx_set_pagetable(gpu, ring, submit->aspace);
+
+ OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
+ OUT_RING(ring, 0x02);
+
+ /* Turn off protected mode to write to special registers */
+ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+ OUT_RING(ring, 0);
+
+ /* Set the save preemption record for the ring/command */
+ OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
+ OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring]));
+ OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring]));
+
+ /* Turn back on protected mode */
+ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+ OUT_RING(ring, 1);
+
+ /* Enable local preemption for finegrain preemption */
+ OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
+ OUT_RING(ring, 0x02);
+
+ /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
+ OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
+ OUT_RING(ring, 0x02);
+
+ /* Submit the commands */
for (i = 0; i < submit->nr_cmds; i++) {
switch (submit->cmd[i].type) {
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
break;
- case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
- if (priv->lastctx == ctx)
- break;
case MSM_SUBMIT_CMD_BUF:
OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
@@ -42,16 +148,55 @@ static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
}
}
+ /*
+ * Write the render mode to NULL (0) to indicate to the CP that the IBs
+ * are done rendering - otherwise a lucky preemption would start
+ * replaying from the last checkpoint
+ */
+ OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+ OUT_RING(ring, 0);
+
+ /* Turn off IB level preemptions */
+ OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
+ OUT_RING(ring, 0x01);
+
+ /* Write the fence to the scratch register */
OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
OUT_RING(ring, submit->fence);
+ /*
+ * Execute a CACHE_FLUSH_TS event. This will ensure that the
+ * timestamp is written to the memory and then triggers the interrupt
+ */
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
- OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, fence)));
- OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, fence)));
+
+ OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, ring->id, fence)));
+ OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, ring->id, fence)));
OUT_RING(ring, submit->fence);
- gpu->funcs->flush(gpu);
+ /* Yield the floor on command completion */
+ OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
+ /*
+ * If dword[2:1] are non zero, they specify an address for the CP to
+ * write the value of dword[3] to on preemption complete. Write 0 to
+ * skip the write
+ */
+ OUT_RING(ring, 0x00);
+ OUT_RING(ring, 0x00);
+ /* Data value - not used if the address above is 0 */
+ OUT_RING(ring, 0x01);
+ /* Set bit 0 to trigger an interrupt on preempt complete */
+ OUT_RING(ring, 0x01);
+
+ a5xx_flush(gpu, ring);
+
+ /* Check to see if we need to start preemption */
+ a5xx_preempt_trigger(gpu);
return 0;
}
@@ -154,28 +299,31 @@ static const struct {
{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
};
-static void a5xx_enable_hwcg(struct msm_gpu *gpu)
+void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
unsigned int i;
for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
- gpu_write(gpu, a5xx_hwcg[i].offset, a5xx_hwcg[i].value);
+ gpu_write(gpu, a5xx_hwcg[i].offset,
+ state ? a5xx_hwcg[i].value : 0);
/* There are a few additional registers just for A540 */
if (adreno_is_a540(adreno_gpu)) {
- gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, 0x770);
- gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, 0x004);
+ gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU,
+ state ? 0x770 : 0);
+ gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU,
+ state ? 0x004 : 0);
}
- gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xAAA8AA00);
- gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, 0x182);
+ gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
+ gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
}
static int a5xx_me_init(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
- struct msm_ringbuffer *ring = gpu->rb;
+ struct msm_ringbuffer *ring = gpu->rb[0];
OUT_PKT7(ring, CP_ME_INIT, 8);
@@ -206,11 +354,54 @@ static int a5xx_me_init(struct msm_gpu *gpu)
OUT_RING(ring, 0x00000000);
OUT_RING(ring, 0x00000000);
- gpu->funcs->flush(gpu);
+ gpu->funcs->flush(gpu, ring);
+ return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
+}
+
+static int a5xx_preempt_start(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ struct msm_ringbuffer *ring = gpu->rb[0];
+
+ if (gpu->nr_rings == 1)
+ return 0;
+
+ /* Turn off protected mode to write to special registers */
+ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+ OUT_RING(ring, 0);
+
+ /* Set the save preemption record for the ring/command */
+ OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
+ OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
+ OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
+
+ /* Turn back on protected mode */
+ OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+ OUT_RING(ring, 1);
- return gpu->funcs->idle(gpu) ? 0 : -EINVAL;
+ OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
+ OUT_RING(ring, 0x00);
+
+ OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
+ OUT_RING(ring, 0x01);
+
+ OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
+ OUT_RING(ring, 0x01);
+
+ /* Yield the floor on command completion */
+ OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
+ OUT_RING(ring, 0x00);
+ OUT_RING(ring, 0x00);
+ OUT_RING(ring, 0x01);
+ OUT_RING(ring, 0x01);
+
+ gpu->funcs->flush(gpu, ring);
+
+ return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
}
+
static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu,
const struct firmware *fw, u64 *iova)
{
@@ -354,6 +545,7 @@ static void a5xx_zap_shader_init(struct msm_gpu *gpu)
A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
+ A5XX_RBBM_INT_0_MASK_CP_SW | \
A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
@@ -445,7 +637,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
/* Enable HWCG */
- a5xx_enable_hwcg(gpu);
+ a5xx_set_hwcg(gpu, true);
gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
@@ -516,6 +708,20 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
+ /* Put the GPU into 64 bit by default */
+ gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
+ gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
+
/* Load the GPMU firmware before starting the HW init */
a5xx_gpmu_ucode_init(gpu);
@@ -523,6 +729,8 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
if (ret)
return ret;
+ a5xx_preempt_hw_init(gpu);
+
ret = a5xx_ucode_init(gpu);
if (ret)
return ret;
@@ -545,11 +753,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
* ticking correctly
*/
if (adreno_is_a530(adreno_gpu)) {
- OUT_PKT7(gpu->rb, CP_EVENT_WRITE, 1);
- OUT_RING(gpu->rb, 0x0F);
+ OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
+ OUT_RING(gpu->rb[0], 0x0F);
- gpu->funcs->flush(gpu);
- if (!gpu->funcs->idle(gpu))
+ gpu->funcs->flush(gpu, gpu->rb[0]);
+ if (!a5xx_idle(gpu, gpu->rb[0]))
return -EINVAL;
}
@@ -562,13 +770,13 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
* cause a XPU violation.
*/
if (test_bit(A5XX_ZAP_SHADER_LOADED, &a5xx_gpu->flags)) {
- struct msm_ringbuffer *ring = gpu->rb;
+ struct msm_ringbuffer *ring = gpu->rb[0];
OUT_PKT7(ring, CP_SET_SECURE_MODE, 1);
OUT_RING(ring, 0x00000000);
- gpu->funcs->flush(gpu);
- if (!gpu->funcs->idle(gpu))
+ gpu->funcs->flush(gpu, gpu->rb[0]);
+ if (!a5xx_idle(gpu, gpu->rb[0]))
return -EINVAL;
} else {
/* Print a warning so if we die, we know why */
@@ -577,6 +785,9 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
}
+ /* Last step - yield the ringbuffer */
+ a5xx_preempt_start(gpu);
+
pm_qos_update_request(&gpu->pm_qos_req_dma, 501);
return 0;
@@ -586,8 +797,7 @@ static void a5xx_recover(struct msm_gpu *gpu)
{
adreno_dump_info(gpu);
- if (hang_debug)
- a5xx_dump(gpu);
+ msm_gpu_snapshot(gpu, gpu->snapshot);
/* Reset the GPU so it can work again */
gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
@@ -604,6 +814,8 @@ static void a5xx_destroy(struct msm_gpu *gpu)
DBG("%s", gpu->name);
+ a5xx_preempt_fini(gpu);
+
if (a5xx_gpu->pm4_bo) {
if (a5xx_gpu->pm4_iova)
msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
@@ -639,16 +851,27 @@ static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
}
-static bool a5xx_idle(struct msm_gpu *gpu)
+bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+
+ if (ring != a5xx_gpu->cur_ring) {
+ WARN(1, "Tried to idle a non-current ringbuffer\n");
+ return false;
+ }
+
/* wait for CP to drain ringbuffer: */
- if (!adreno_idle(gpu))
+ if (!adreno_idle(gpu, ring))
return false;
if (spin_until(_a5xx_check_idle(gpu))) {
- DRM_ERROR("%s: timeout waiting for GPU to idle: status %8.8X irq %8.8X\n",
- gpu->name,
+ DRM_ERROR(
+ "%s: timeout waiting for GPU RB %d to idle: status %8.8X rptr/wptr: %4.4X/%4.4X irq %8.8X\n",
+ gpu->name, ring->id,
gpu_read(gpu, REG_A5XX_RBBM_STATUS),
+ gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
+ gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS));
return false;
@@ -768,6 +991,20 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
{
struct drm_device *dev = gpu->dev;
struct msm_drm_private *priv = dev->dev_private;
+ struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
+
+ dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
+ ring ? ring->id : -1, adreno_submitted_fence(gpu, ring),
+ gpu_read(gpu, REG_A5XX_RBBM_STATUS),
+ gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
+ gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
+ gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
+ gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
+ gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
+ gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
+
+ /* Turn off the hangcheck timer to keep it from bothering us */
+ del_timer(&gpu->hangcheck_timer);
queue_work(priv->wq, &gpu->recover_work);
}
@@ -810,6 +1047,9 @@ static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS)
msm_gpu_retire(gpu);
+ if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
+ a5xx_preempt_irq(gpu);
+
return IRQ_HANDLED;
}
@@ -825,43 +1065,49 @@ static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
};
static const u32 a5xx_registers[] = {
- 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
- 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
- 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
- 0x04E0, 0x0533, 0x0540, 0x0555, 0xF400, 0xF400, 0xF800, 0xF807,
- 0x0800, 0x081A, 0x081F, 0x0841, 0x0860, 0x0860, 0x0880, 0x08A0,
- 0x0B00, 0x0B12, 0x0B15, 0x0B28, 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD,
- 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 0x0C60, 0x0C61, 0x0C80, 0x0C82,
- 0x0C84, 0x0C85, 0x0C90, 0x0C98, 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2,
- 0x2180, 0x2185, 0x2580, 0x2585, 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7,
- 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8, 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8,
- 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E, 0x2100, 0x211E, 0x2140, 0x2145,
- 0x2500, 0x251E, 0x2540, 0x2545, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
- 0x0D30, 0x0D30, 0x20C0, 0x20C0, 0x24C0, 0x24C0, 0x0E40, 0x0E43,
- 0x0E4A, 0x0E4A, 0x0E50, 0x0E57, 0x0E60, 0x0E7C, 0x0E80, 0x0E8E,
- 0x0E90, 0x0E96, 0x0EA0, 0x0EA8, 0x0EB0, 0x0EB2, 0xE140, 0xE147,
- 0xE150, 0xE187, 0xE1A0, 0xE1A9, 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7,
- 0xE1D0, 0xE1D1, 0xE200, 0xE201, 0xE210, 0xE21C, 0xE240, 0xE268,
- 0xE000, 0xE006, 0xE010, 0xE09A, 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB,
- 0xE100, 0xE105, 0xE380, 0xE38F, 0xE3B0, 0xE3B0, 0xE400, 0xE405,
- 0xE408, 0xE4E9, 0xE4F0, 0xE4F0, 0xE280, 0xE280, 0xE282, 0xE2A3,
- 0xE2A5, 0xE2C2, 0xE940, 0xE947, 0xE950, 0xE987, 0xE9A0, 0xE9A9,
- 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7, 0xE9D0, 0xE9D1, 0xEA00, 0xEA01,
- 0xEA10, 0xEA1C, 0xEA40, 0xEA68, 0xE800, 0xE806, 0xE810, 0xE89A,
- 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB, 0xE900, 0xE905, 0xEB80, 0xEB8F,
- 0xEBB0, 0xEBB0, 0xEC00, 0xEC05, 0xEC08, 0xECE9, 0xECF0, 0xECF0,
- 0xEA80, 0xEA80, 0xEA82, 0xEAA3, 0xEAA5, 0xEAC2, 0xA800, 0xA8FF,
- 0xAC60, 0xAC60, 0xB000, 0xB97F, 0xB9A0, 0xB9BF,
+ 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002b,
+ 0x002e, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
+ 0x0097, 0x00bb, 0x03a0, 0x0464, 0x0469, 0x046f, 0x04d2, 0x04d3,
+ 0x04e0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081a, 0x081f, 0x0841,
+ 0x0860, 0x0860, 0x0880, 0x08a0, 0x0b00, 0x0b12, 0x0b14, 0x0b28,
+ 0x0b78, 0x0b7f, 0x0bb0, 0x0bbd, 0x0bc0, 0x0bc6, 0x0bd0, 0x0c53,
+ 0x0c60, 0x0c61, 0x0c80, 0x0c82, 0x0c84, 0x0c85, 0x0c90, 0x0c9b,
+ 0x0ca0, 0x0ca0, 0x0cb0, 0x0cb2, 0x0cc1, 0x0cc1, 0x0cc4, 0x0cc7,
+ 0x0ccc, 0x0ccc, 0x0cd0, 0x0cdb, 0x0ce0, 0x0ce5, 0x0ce8, 0x0ce8,
+ 0x0cec, 0x0cf1, 0x0cfb, 0x0d0e, 0x0d10, 0x0d17, 0x0d20, 0x0d23,
+ 0x0d30, 0x0d30, 0x0e40, 0x0e43, 0x0e4a, 0x0e4a, 0x0e50, 0x0e57,
+ 0x0e60, 0x0e7c, 0x0e80, 0x0e8e, 0x0e90, 0x0e96, 0x0ea0, 0x0eab,
+ 0x0eb0, 0x0eb2, 0x2100, 0x211e, 0x2140, 0x2145, 0x2180, 0x2185,
+ 0x2500, 0x251e, 0x2540, 0x2545, 0x2580, 0x2585, 0x3000, 0x3014,
+ 0x3018, 0x302c, 0x3030, 0x3030, 0x3034, 0x3036, 0x303c, 0x303d,
+ 0x3040, 0x3040, 0x3042, 0x3042, 0x3049, 0x3049, 0x3058, 0x3058,
+ 0x305a, 0x3061, 0x3064, 0x3068, 0x306c, 0x306d, 0x3080, 0x3088,
+ 0x308b, 0x308c, 0x3090, 0x3094, 0x3098, 0x3098, 0x309c, 0x309c,
+ 0x3124, 0x3124, 0x340c, 0x340c, 0x3410, 0x3410, 0x3800, 0x3801,
+ 0xa800, 0xa800, 0xa820, 0xa828, 0xa840, 0xa87d, 0xa880, 0xa88d,
+ 0xa890, 0xa8a3, 0xa8a8, 0xa8aa, 0xa8c0, 0xa8c3, 0xa8c6, 0xa8ca,
+ 0xa8cc, 0xa8cf, 0xa8d1, 0xa8d8, 0xa8dc, 0xa8dc, 0xa8e0, 0xa8f5,
+ 0xac00, 0xac06, 0xac40, 0xac47, 0xac60, 0xac62, 0xac80, 0xac82,
+ 0xb800, 0xb808, 0xb80c, 0xb812, 0xb814, 0xb817, 0xb900, 0xb904,
+ 0xb906, 0xb90a, 0xb90c, 0xb90f, 0xb920, 0xb924, 0xb926, 0xb92a,
+ 0xb92c, 0xb92f, 0xb940, 0xb944, 0xb946, 0xb94a, 0xb94c, 0xb94f,
+ 0xb960, 0xb964, 0xb966, 0xb96a, 0xb96c, 0xb96f, 0xb980, 0xb984,
+ 0xb986, 0xb98a, 0xb98c, 0xb98f, 0xb9a0, 0xb9b0, 0xb9b8, 0xb9ba,
+ 0xd200, 0xd23f, 0xe000, 0xe006, 0xe010, 0xe09a, 0xe0a0, 0xe0a4,
+ 0xe0aa, 0xe0eb, 0xe100, 0xe105, 0xe140, 0xe147, 0xe150, 0xe187,
+ 0xe1a0, 0xe1a9, 0xe1b0, 0xe1b6, 0xe1c0, 0xe1c7, 0xe1d0, 0xe1d1,
+ 0xe200, 0xe201, 0xe210, 0xe21c, 0xe240, 0xe268, 0xe280, 0xe280,
+ 0xe282, 0xe2a3, 0xe2a5, 0xe2c2, 0xe380, 0xe38f, 0xe3b0, 0xe3b0,
+ 0xe400, 0xe405, 0xe408, 0xe4e9, 0xe4f0, 0xe4f0, 0xe800, 0xe806,
+ 0xe810, 0xe89a, 0xe8a0, 0xe8a4, 0xe8aa, 0xe8eb, 0xe900, 0xe905,
+ 0xe940, 0xe947, 0xe950, 0xe987, 0xe9a0, 0xe9a9, 0xe9b0, 0xe9b6,
+ 0xe9c0, 0xe9c7, 0xe9d0, 0xe9d1, 0xea00, 0xea01, 0xea10, 0xea1c,
+ 0xea40, 0xea68, 0xea80, 0xea80, 0xea82, 0xeaa3, 0xeaa5, 0xeac2,
+ 0xeb80, 0xeb8f, 0xebb0, 0xebb0, 0xec00, 0xec05, 0xec08, 0xece9,
+ 0xecf0, 0xecf0, 0xf400, 0xf400, 0xf800, 0xf807,
~0
};
-static void a5xx_dump(struct msm_gpu *gpu)
-{
- dev_info(gpu->dev->dev, "status: %08x\n",
- gpu_read(gpu, REG_A5XX_RBBM_STATUS));
- adreno_dump(gpu);
-}
-
static int a5xx_pm_resume(struct msm_gpu *gpu)
{
int ret;
@@ -943,6 +1189,14 @@ static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m)
}
#endif
+static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+
+ return a5xx_gpu->cur_ring;
+}
+
static const struct adreno_gpu_funcs funcs = {
.base = {
.get_param = adreno_get_param,
@@ -951,14 +1205,16 @@ static const struct adreno_gpu_funcs funcs = {
.pm_resume = a5xx_pm_resume,
.recover = a5xx_recover,
.last_fence = adreno_last_fence,
+ .submitted_fence = adreno_submitted_fence,
.submit = a5xx_submit,
- .flush = adreno_flush,
- .idle = a5xx_idle,
+ .flush = a5xx_flush,
+ .active_ring = a5xx_active_ring,
.irq = a5xx_irq,
.destroy = a5xx_destroy,
#ifdef CONFIG_DEBUG_FS
.show = a5xx_show,
#endif
+ .snapshot = a5xx_snapshot,
},
.get_timestamp = a5xx_get_timestamp,
};
@@ -1073,11 +1329,14 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
/* Check the efuses for some configuration */
a5xx_efuses_read(pdev, adreno_gpu);
- ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs);
+ ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
if (ret) {
a5xx_destroy(&(a5xx_gpu->base.base));
return ERR_PTR(ret);
}
+ /* Set up the preemption specific bits and pieces for each ringbuffer */
+ a5xx_preempt_init(gpu);
+
return gpu;
}
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
index e82f54063877..3de14fe42a1b 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -42,10 +42,115 @@ struct a5xx_gpu {
uint32_t gpmu_dwords;
uint32_t lm_leakage;
+
+ struct msm_ringbuffer *cur_ring;
+ struct msm_ringbuffer *next_ring;
+
+ struct drm_gem_object *preempt_bo[MSM_GPU_MAX_RINGS];
+ struct a5xx_preempt_record *preempt[MSM_GPU_MAX_RINGS];
+ uint64_t preempt_iova[MSM_GPU_MAX_RINGS];
+
+ atomic_t preempt_state;
+ struct timer_list preempt_timer;
+
+ struct a5xx_smmu_info *smmu_info;
+ struct drm_gem_object *smmu_info_bo;
+ uint64_t smmu_info_iova;
};
#define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base)
+/*
+ * In order to do lockless preemption we use a simple state machine to progress
+ * through the process.
+ *
+ * PREEMPT_NONE - no preemption in progress. Next state START.
+ * PREEMPT_START - The trigger is evaulating if preemption is possible. Next
+ * states: TRIGGERED, NONE
+ * PREEMPT_TRIGGERED: A preemption has been executed on the hardware. Next
+ * states: FAULTED, PENDING
+ * PREEMPT_FAULTED: A preemption timed out (never completed). This will trigger
+ * recovery. Next state: N/A
+ * PREEMPT_PENDING: Preemption complete interrupt fired - the callback is
+ * checking the success of the operation. Next state: FAULTED, NONE.
+ */
+
+enum preempt_state {
+ PREEMPT_NONE = 0,
+ PREEMPT_START,
+ PREEMPT_TRIGGERED,
+ PREEMPT_FAULTED,
+ PREEMPT_PENDING,
+};
+
+/*
+ * struct a5xx_preempt_record is a shared buffer between the microcode and the
+ * CPU to store the state for preemption. The record itself is much larger
+ * (64k) but most of that is used by the CP for storage.
+ *
+ * There is a preemption record assigned per ringbuffer. When the CPU triggers a
+ * preemption, it fills out the record with the useful information (wptr, ring
+ * base, etc) and the microcode uses that information to set up the CP following
+ * the preemption. When a ring is switched out, the CP will save the ringbuffer
+ * state back to the record. In this way, once the records are properly set up
+ * the CPU can quickly switch back and forth between ringbuffers by only
+ * updating a few registers (often only the wptr).
+ *
+ * These are the CPU aware registers in the record:
+ * @magic: Must always be 0x27C4BAFC
+ * @info: Type of the record - written 0 by the CPU, updated by the CP
+ * @data: Data field from SET_RENDER_MODE or a checkpoint. Written and used by
+ * the CP
+ * @cntl: Value of RB_CNTL written by CPU, save/restored by CP
+ * @rptr: Value of RB_RPTR written by CPU, save/restored by CP
+ * @wptr: Value of RB_WPTR written by CPU, save/restored by CP
+ * @rptr_addr: Value of RB_RPTR_ADDR written by CPU, save/restored by CP
+ * @rbase: Value of RB_BASE written by CPU, save/restored by CP
+ * @counter: GPU address of the storage area for the performance counters
+ */
+struct a5xx_preempt_record {
+ uint32_t magic;
+ uint32_t info;
+ uint32_t data;
+ uint32_t cntl;
+ uint32_t rptr;
+ uint32_t wptr;
+ uint64_t rptr_addr;
+ uint64_t rbase;
+ uint64_t counter;
+};
+
+/* Magic identifier for the preemption record */
+#define A5XX_PREEMPT_RECORD_MAGIC 0x27C4BAFCUL
+
+/*
+ * Even though the structure above is only a few bytes, we need a full 64k to
+ * store the entire preemption record from the CP
+ */
+#define A5XX_PREEMPT_RECORD_SIZE (64 * 1024)
+
+/*
+ * The preemption counter block is a storage area for the value of the
+ * preemption counters that are saved immediately before context switch. We
+ * append it on to the end of the allocadtion for the preemption record.
+ */
+#define A5XX_PREEMPT_COUNTER_SIZE (16 * 4)
+
+/*
+ * This is a global structure that the preemption code uses to switch in the
+ * pagetable for the preempted process - the code switches in whatever we
+ * after preempting in a new ring.
+ */
+struct a5xx_smmu_info {
+ uint32_t magic;
+ uint32_t _pad4;
+ uint64_t ttbr0;
+ uint32_t asid;
+ uint32_t contextidr;
+};
+
+#define A5XX_SMMU_INFO_MAGIC 0x3618CDA3UL
+
int a5xx_power_init(struct msm_gpu *gpu);
void a5xx_gpmu_ucode_init(struct msm_gpu *gpu);
@@ -62,5 +167,21 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs,
return -ETIMEDOUT;
}
+void a5xx_set_hwcg(struct msm_gpu *gpu, bool state);
+bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
+
+void a5xx_preempt_init(struct msm_gpu *gpu);
+void a5xx_preempt_hw_init(struct msm_gpu *gpu);
+void a5xx_preempt_trigger(struct msm_gpu *gpu);
+void a5xx_preempt_irq(struct msm_gpu *gpu);
+void a5xx_preempt_fini(struct msm_gpu *gpu);
+
+int a5xx_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot);
+
+/* Return true if we are in a preempt state */
+static inline bool a5xx_in_preempt(struct a5xx_gpu *a5xx_gpu)
+{
+ return !(atomic_read(&a5xx_gpu->preempt_state) == PREEMPT_NONE);
+}
#endif /* __A5XX_GPU_H__ */
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c
index 18bca141b425..2040b18f731f 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_power.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c
@@ -229,7 +229,7 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
- struct msm_ringbuffer *ring = gpu->rb;
+ struct msm_ringbuffer *ring = gpu->rb[0];
if (!a5xx_gpu->gpmu_dwords)
return 0;
@@ -248,9 +248,9 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu)
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 1);
- gpu->funcs->flush(gpu);
+ gpu->funcs->flush(gpu, ring);
- if (!gpu->funcs->idle(gpu)) {
+ if (!a5xx_idle(gpu, ring)) {
DRM_ERROR("%s: Unable to load GPMU firmware. GPMU will not be active\n",
gpu->name);
return -EINVAL;
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
new file mode 100644
index 000000000000..648494c75abc
--- /dev/null
+++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
@@ -0,0 +1,383 @@
+/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "msm_gem.h"
+#include "msm_iommu.h"
+#include "a5xx_gpu.h"
+
+static void *alloc_kernel_bo(struct drm_device *drm, struct msm_gpu *gpu,
+ size_t size, uint32_t flags, struct drm_gem_object **bo,
+ u64 *iova)
+{
+ struct drm_gem_object *_bo;
+ u64 _iova;
+ void *ptr;
+ int ret;
+
+ mutex_lock(&drm->struct_mutex);
+ _bo = msm_gem_new(drm, size, flags);
+ mutex_unlock(&drm->struct_mutex);
+
+ if (IS_ERR(_bo))
+ return _bo;
+
+ ret = msm_gem_get_iova(_bo, gpu->aspace, &_iova);
+ if (ret)
+ goto out;
+
+ ptr = msm_gem_vaddr(_bo);
+ if (!ptr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (bo)
+ *bo = _bo;
+ if (iova)
+ *iova = _iova;
+
+ return ptr;
+out:
+ drm_gem_object_unreference_unlocked(_bo);
+ return ERR_PTR(ret);
+}
+
+/*
+ * Try to transition the preemption state from old to new. Return
+ * true on success or false if the original state wasn't 'old'
+ */
+static inline bool try_preempt_state(struct a5xx_gpu *a5xx_gpu,
+ enum preempt_state old, enum preempt_state new)
+{
+ enum preempt_state cur = atomic_cmpxchg(&a5xx_gpu->preempt_state,
+ old, new);
+
+ return (cur == old);
+}
+
+/*
+ * Force the preemption state to the specified state. This is used in cases
+ * where the current state is known and won't change
+ */
+static inline void set_preempt_state(struct a5xx_gpu *gpu,
+ enum preempt_state new)
+{
+ /*
+ * preempt_state may be read by other cores trying to trigger a
+ * preemption or in the interrupt handler so barriers are needed
+ * before...
+ */
+ smp_mb__before_atomic();
+ atomic_set(&gpu->preempt_state, new);
+ /* ... and after*/
+ smp_mb__after_atomic();
+}
+
+/* Write the most recent wptr for the given ring into the hardware */
+static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+ unsigned long flags;
+ uint32_t wptr;
+
+ if (!ring)
+ return;
+
+ spin_lock_irqsave(&ring->lock, flags);
+ wptr = get_wptr(ring);
+ spin_unlock_irqrestore(&ring->lock, flags);
+
+ gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
+}
+
+/* Return the highest priority ringbuffer with something in it */
+static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ unsigned long flags;
+ int i;
+
+ for (i = gpu->nr_rings - 1; i >= 0; i--) {
+ bool empty;
+ struct msm_ringbuffer *ring = gpu->rb[i];
+
+ spin_lock_irqsave(&ring->lock, flags);
+ empty = (get_wptr(ring) == adreno_gpu->memptrs->rptr[ring->id]);
+ spin_unlock_irqrestore(&ring->lock, flags);
+
+ if (!empty)
+ return ring;
+ }
+
+ return NULL;
+}
+
+static void a5xx_preempt_timer(unsigned long data)
+{
+ struct a5xx_gpu *a5xx_gpu = (struct a5xx_gpu *) data;
+ struct msm_gpu *gpu = &a5xx_gpu->base.base;
+ struct drm_device *dev = gpu->dev;
+ struct msm_drm_private *priv = dev->dev_private;
+
+ if (!try_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED, PREEMPT_FAULTED))
+ return;
+
+ dev_err(dev->dev, "%s: preemption timed out\n", gpu->name);
+ queue_work(priv->wq, &gpu->recover_work);
+}
+
+/* Try to trigger a preemption switch */
+void a5xx_preempt_trigger(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ unsigned long flags;
+ struct msm_ringbuffer *ring;
+
+ if (gpu->nr_rings == 1)
+ return;
+
+ /*
+ * Try to start preemption by moving from NONE to START. If
+ * unsuccessful, a preemption is already in flight
+ */
+ if (!try_preempt_state(a5xx_gpu, PREEMPT_NONE, PREEMPT_START))
+ return;
+
+ /* Get the next ring to preempt to */
+ ring = get_next_ring(gpu);
+
+ /*
+ * If no ring is populated or the highest priority ring is the current
+ * one do nothing except to update the wptr to the latest and greatest
+ */
+ if (!ring || (a5xx_gpu->cur_ring == ring)) {
+ update_wptr(gpu, ring);
+
+ /* Set the state back to NONE */
+ set_preempt_state(a5xx_gpu, PREEMPT_NONE);
+ return;
+ }
+
+ /* Make sure the wptr doesn't update while we're in motion */
+ spin_lock_irqsave(&ring->lock, flags);
+ a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring);
+ spin_unlock_irqrestore(&ring->lock, flags);
+
+ /* Do read barrier to make sure we have updated pagetable info */
+ rmb();
+
+ /* Set the SMMU info for the preemption */
+ if (a5xx_gpu->smmu_info) {
+ a5xx_gpu->smmu_info->ttbr0 =
+ adreno_gpu->memptrs->ttbr0[ring->id];
+ a5xx_gpu->smmu_info->contextidr =
+ adreno_gpu->memptrs->contextidr[ring->id];
+ }
+
+ /* Set the address of the incoming preemption record */
+ gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
+ REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
+ a5xx_gpu->preempt_iova[ring->id]);
+
+ a5xx_gpu->next_ring = ring;
+
+ /* Start a timer to catch a stuck preemption */
+ mod_timer(&a5xx_gpu->preempt_timer, jiffies + msecs_to_jiffies(10000));
+
+ /* Set the preemption state to triggered */
+ set_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED);
+
+ /* Make sure everything is written before hitting the button */
+ wmb();
+
+ /* And actually start the preemption */
+ gpu_write(gpu, REG_A5XX_CP_CONTEXT_SWITCH_CNTL, 1);
+}
+
+void a5xx_preempt_irq(struct msm_gpu *gpu)
+{
+ uint32_t status;
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ struct drm_device *dev = gpu->dev;
+ struct msm_drm_private *priv = dev->dev_private;
+
+ if (!try_preempt_state(a5xx_gpu, PREEMPT_TRIGGERED, PREEMPT_PENDING))
+ return;
+
+ /* Delete the preemption watchdog timer */
+ del_timer(&a5xx_gpu->preempt_timer);
+
+ /*
+ * The hardware should be setting CP_CONTEXT_SWITCH_CNTL to zero before
+ * firing the interrupt, but there is a non zero chance of a hardware
+ * condition or a software race that could set it again before we have a
+ * chance to finish. If that happens, log and go for recovery
+ */
+ status = gpu_read(gpu, REG_A5XX_CP_CONTEXT_SWITCH_CNTL);
+ if (unlikely(status)) {
+ set_preempt_state(a5xx_gpu, PREEMPT_FAULTED);
+ dev_err(dev->dev, "%s: Preemption failed to complete\n",
+ gpu->name);
+ queue_work(priv->wq, &gpu->recover_work);
+ return;
+ }
+
+ a5xx_gpu->cur_ring = a5xx_gpu->next_ring;
+ a5xx_gpu->next_ring = NULL;
+
+ update_wptr(gpu, a5xx_gpu->cur_ring);
+
+ set_preempt_state(a5xx_gpu, PREEMPT_NONE);
+}
+
+void a5xx_preempt_hw_init(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ struct msm_ringbuffer *ring;
+ int i;
+
+ if (gpu->nr_rings > 1) {
+ /* Clear the preemption records */
+ FOR_EACH_RING(gpu, ring, i) {
+ if (ring) {
+ a5xx_gpu->preempt[ring->id]->wptr = 0;
+ a5xx_gpu->preempt[ring->id]->rptr = 0;
+ a5xx_gpu->preempt[ring->id]->rbase = ring->iova;
+ }
+ }
+ }
+
+ /* Tell the CP where to find the smmu_info buffer */
+ gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+ REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+ a5xx_gpu->smmu_info_iova);
+
+ /* Reset the preemption state */
+ set_preempt_state(a5xx_gpu, PREEMPT_NONE);
+
+ /* Always come up on rb 0 */
+ a5xx_gpu->cur_ring = gpu->rb[0];
+}
+
+static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu,
+ struct msm_ringbuffer *ring)
+{
+ struct adreno_gpu *adreno_gpu = &a5xx_gpu->base;
+ struct msm_gpu *gpu = &adreno_gpu->base;
+ struct a5xx_preempt_record *ptr;
+ struct drm_gem_object *bo;
+ u64 iova;
+
+ ptr = alloc_kernel_bo(gpu->dev, gpu,
+ A5XX_PREEMPT_RECORD_SIZE + A5XX_PREEMPT_COUNTER_SIZE,
+ MSM_BO_UNCACHED | MSM_BO_PRIVILEGED,
+ &bo, &iova);
+
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+
+ a5xx_gpu->preempt_bo[ring->id] = bo;
+ a5xx_gpu->preempt_iova[ring->id] = iova;
+ a5xx_gpu->preempt[ring->id] = ptr;
+
+ /* Set up the defaults on the preemption record */
+
+ ptr->magic = A5XX_PREEMPT_RECORD_MAGIC;
+ ptr->info = 0;
+ ptr->data = 0;
+ ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT;
+ ptr->rptr_addr = rbmemptr(adreno_gpu, ring->id, rptr);
+ ptr->counter = iova + A5XX_PREEMPT_RECORD_SIZE;
+
+ return 0;
+}
+
+void a5xx_preempt_fini(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ struct msm_ringbuffer *ring;
+ int i;
+
+ FOR_EACH_RING(gpu, ring, i) {
+ if (!ring || !a5xx_gpu->preempt_bo[i])
+ continue;
+
+ if (a5xx_gpu->preempt_iova[i])
+ msm_gem_put_iova(a5xx_gpu->preempt_bo[i], gpu->aspace);
+
+ drm_gem_object_unreference_unlocked(a5xx_gpu->preempt_bo[i]);
+
+ a5xx_gpu->preempt_bo[i] = NULL;
+ }
+
+ if (a5xx_gpu->smmu_info_bo) {
+ if (a5xx_gpu->smmu_info_iova)
+ msm_gem_put_iova(a5xx_gpu->smmu_info_bo, gpu->aspace);
+ drm_gem_object_unreference_unlocked(a5xx_gpu->smmu_info_bo);
+ a5xx_gpu->smmu_info_bo = NULL;
+ }
+}
+
+void a5xx_preempt_init(struct msm_gpu *gpu)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+ struct msm_ringbuffer *ring;
+ struct a5xx_smmu_info *ptr;
+ struct drm_gem_object *bo;
+ uint64_t iova;
+ int i;
+
+ /* No preemption if we only have one ring */
+ if (gpu->nr_rings <= 1)
+ return;
+
+ FOR_EACH_RING(gpu, ring, i) {
+ if (!ring)
+ continue;
+
+ if (preempt_init_ring(a5xx_gpu, ring))
+ goto fail;
+ }
+
+ if (msm_iommu_allow_dynamic(gpu->aspace->mmu)) {
+ ptr = alloc_kernel_bo(gpu->dev, gpu,
+ sizeof(struct a5xx_smmu_info),
+ MSM_BO_UNCACHED | MSM_BO_PRIVILEGED,
+ &bo, &iova);
+
+ if (IS_ERR(ptr))
+ goto fail;
+
+ ptr->magic = A5XX_SMMU_INFO_MAGIC;
+
+ a5xx_gpu->smmu_info_bo = bo;
+ a5xx_gpu->smmu_info_iova = iova;
+ a5xx_gpu->smmu_info = ptr;
+ }
+
+ setup_timer(&a5xx_gpu->preempt_timer, a5xx_preempt_timer,
+ (unsigned long) a5xx_gpu);
+
+ return;
+fail:
+ /*
+ * On any failure our adventure is over. Clean up and
+ * set nr_rings to 1 to force preemption off
+ */
+ a5xx_preempt_fini(gpu);
+ gpu->nr_rings = 1;
+}
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_snapshot.c b/drivers/gpu/drm/msm/adreno/a5xx_snapshot.c
new file mode 100644
index 000000000000..5a2edb0ea518
--- /dev/null
+++ b/drivers/gpu/drm/msm/adreno/a5xx_snapshot.c
@@ -0,0 +1,796 @@
+/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "msm_gpu.h"
+#include "msm_gem.h"
+#include "a5xx_gpu.h"
+#include "msm_snapshot_api.h"
+
+#define A5XX_NR_SHADER_BANKS 4
+
+/*
+ * These are a list of the registers that need to be read through the HLSQ
+ * aperture through the crashdumper. These are not nominally accessible from
+ * the CPU on a secure platform.
+ */
+static const struct {
+ u32 type;
+ u32 regoffset;
+ u32 count;
+} a5xx_hlsq_aperture_regs[] = {
+ { 0x35, 0xE00, 0x32 }, /* HSLQ non-context */
+ { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */
+ { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */
+ { 0x32, 0xE780, 0x62 }, /* HLSQ 3D context 0 */
+ { 0x34, 0xEF80, 0x62 }, /* HLSQ 3D context 1 */
+ { 0x3f, 0x0EC0, 0x40 }, /* SP non-context */
+ { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */
+ { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */
+ { 0x3e, 0xE580, 0x180 }, /* SP 3D context 0 */
+ { 0x3c, 0xED80, 0x180 }, /* SP 3D context 1 */
+ { 0x3a, 0x0F00, 0x1c }, /* TP non-context */
+ { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */
+ { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */
+ { 0x39, 0xE700, 0x80 }, /* TP 3D context 0 */
+ { 0x37, 0xEF00, 0x80 }, /* TP 3D context 1 */
+};
+
+/*
+ * The debugbus registers contain device state that presumably makes
+ * sense to the hardware designers. 'count' is the number of indexes to read,
+ * each index value is 64 bits
+ */
+static const struct {
+ enum a5xx_debugbus id;
+ u32 count;
+} a5xx_debugbus_blocks[] = {
+ { A5XX_RBBM_DBGBUS_CP, 0x100, },
+ { A5XX_RBBM_DBGBUS_RBBM, 0x100, },
+ { A5XX_RBBM_DBGBUS_HLSQ, 0x100, },
+ { A5XX_RBBM_DBGBUS_UCHE, 0x100, },
+ { A5XX_RBBM_DBGBUS_DPM, 0x100, },
+ { A5XX_RBBM_DBGBUS_TESS, 0x100, },
+ { A5XX_RBBM_DBGBUS_PC, 0x100, },
+ { A5XX_RBBM_DBGBUS_VFDP, 0x100, },
+ { A5XX_RBBM_DBGBUS_VPC, 0x100, },
+ { A5XX_RBBM_DBGBUS_TSE, 0x100, },
+ { A5XX_RBBM_DBGBUS_RAS, 0x100, },
+ { A5XX_RBBM_DBGBUS_VSC, 0x100, },
+ { A5XX_RBBM_DBGBUS_COM, 0x100, },
+ { A5XX_RBBM_DBGBUS_DCOM, 0x100, },
+ { A5XX_RBBM_DBGBUS_LRZ, 0x100, },
+ { A5XX_RBBM_DBGBUS_A2D_DSP, 0x100, },
+ { A5XX_RBBM_DBGBUS_CCUFCHE, 0x100, },
+ { A5XX_RBBM_DBGBUS_GPMU, 0x100, },
+ { A5XX_RBBM_DBGBUS_RBP, 0x100, },
+ { A5XX_RBBM_DBGBUS_HM, 0x100, },
+ { A5XX_RBBM_DBGBUS_RBBM_CFG, 0x100, },
+ { A5XX_RBBM_DBGBUS_VBIF_CX, 0x100, },
+ { A5XX_RBBM_DBGBUS_GPC, 0x100, },
+ { A5XX_RBBM_DBGBUS_LARC, 0x100, },
+ { A5XX_RBBM_DBGBUS_HLSQ_SPTP, 0x100, },
+ { A5XX_RBBM_DBGBUS_RB_0, 0x100, },
+ { A5XX_RBBM_DBGBUS_RB_1, 0x100, },
+ { A5XX_RBBM_DBGBUS_RB_2, 0x100, },
+ { A5XX_RBBM_DBGBUS_RB_3, 0x100, },
+ { A5XX_RBBM_DBGBUS_CCU_0, 0x100, },
+ { A5XX_RBBM_DBGBUS_CCU_1, 0x100, },
+ { A5XX_RBBM_DBGBUS_CCU_2, 0x100, },
+ { A5XX_RBBM_DBGBUS_CCU_3, 0x100, },
+ { A5XX_RBBM_DBGBUS_A2D_RAS_0, 0x100, },
+ { A5XX_RBBM_DBGBUS_A2D_RAS_1, 0x100, },
+ { A5XX_RBBM_DBGBUS_A2D_RAS_2, 0x100, },
+ { A5XX_RBBM_DBGBUS_A2D_RAS_3, 0x100, },
+ { A5XX_RBBM_DBGBUS_VFD_0, 0x100, },
+ { A5XX_RBBM_DBGBUS_VFD_1, 0x100, },
+ { A5XX_RBBM_DBGBUS_VFD_2, 0x100, },
+ { A5XX_RBBM_DBGBUS_VFD_3, 0x100, },
+ { A5XX_RBBM_DBGBUS_SP_0, 0x100, },
+ { A5XX_RBBM_DBGBUS_SP_1, 0x100, },
+ { A5XX_RBBM_DBGBUS_SP_2, 0x100, },
+ { A5XX_RBBM_DBGBUS_SP_3, 0x100, },
+ { A5XX_RBBM_DBGBUS_TPL1_0, 0x100, },
+ { A5XX_RBBM_DBGBUS_TPL1_1, 0x100, },
+ { A5XX_RBBM_DBGBUS_TPL1_2, 0x100, },
+ { A5XX_RBBM_DBGBUS_TPL1_3, 0x100, },
+};
+
+/*
+ * The shader blocks are read from the HLSQ aperture - each one has its own
+ * identifier for the aperture read
+ */
+static const struct {
+ enum a5xx_shader_blocks id;
+ u32 size;
+} a5xx_shader_blocks[] = {
+ {A5XX_TP_W_MEMOBJ, 0x200},
+ {A5XX_TP_W_MIPMAP_BASE, 0x3C0},
+ {A5XX_TP_W_SAMPLER_TAG, 0x40},
+ {A5XX_TP_S_3D_SAMPLER, 0x80},
+ {A5XX_TP_S_3D_SAMPLER_TAG, 0x20},
+ {A5XX_TP_S_CS_SAMPLER, 0x40},
+ {A5XX_TP_S_CS_SAMPLER_TAG, 0x10},
+ {A5XX_SP_W_CONST, 0x800},
+ {A5XX_SP_W_CB_SIZE, 0x30},
+ {A5XX_SP_W_CB_BASE, 0xF0},
+ {A5XX_SP_W_STATE, 0x1},
+ {A5XX_SP_S_3D_CONST, 0x800},
+ {A5XX_SP_S_3D_CB_SIZE, 0x28},
+ {A5XX_SP_S_3D_UAV_SIZE, 0x80},
+ {A5XX_SP_S_CS_CONST, 0x400},
+ {A5XX_SP_S_CS_CB_SIZE, 0x8},
+ {A5XX_SP_S_CS_UAV_SIZE, 0x80},
+ {A5XX_SP_S_3D_CONST_DIRTY, 0x12},
+ {A5XX_SP_S_3D_CB_SIZE_DIRTY, 0x1},
+ {A5XX_SP_S_3D_UAV_SIZE_DIRTY, 0x2},
+ {A5XX_SP_S_CS_CONST_DIRTY, 0xA},
+ {A5XX_SP_S_CS_CB_SIZE_DIRTY, 0x1},
+ {A5XX_SP_S_CS_UAV_SIZE_DIRTY, 0x2},
+ {A5XX_HLSQ_ICB_DIRTY, 0xB},
+ {A5XX_SP_POWER_RESTORE_RAM_TAG, 0xA},
+ {A5XX_TP_POWER_RESTORE_RAM_TAG, 0xA},
+ {A5XX_TP_W_SAMPLER, 0x80},
+ {A5XX_TP_W_MEMOBJ_TAG, 0x40},
+ {A5XX_TP_S_3D_MEMOBJ, 0x200},
+ {A5XX_TP_S_3D_MEMOBJ_TAG, 0x20},
+ {A5XX_TP_S_CS_MEMOBJ, 0x100},
+ {A5XX_TP_S_CS_MEMOBJ_TAG, 0x10},
+ {A5XX_SP_W_INSTR, 0x800},
+ {A5XX_SP_W_UAV_SIZE, 0x80},
+ {A5XX_SP_W_UAV_BASE, 0x80},
+ {A5XX_SP_W_INST_TAG, 0x40},
+ {A5XX_SP_S_3D_INSTR, 0x800},
+ {A5XX_SP_S_3D_CB_BASE, 0xC8},
+ {A5XX_SP_S_3D_UAV_BASE, 0x80},
+ {A5XX_SP_S_CS_INSTR, 0x400},
+ {A5XX_SP_S_CS_CB_BASE, 0x28},
+ {A5XX_SP_S_CS_UAV_BASE, 0x80},
+ {A5XX_SP_S_3D_INSTR_DIRTY, 0x1},
+ {A5XX_SP_S_3D_CB_BASE_DIRTY, 0x5},
+ {A5XX_SP_S_3D_UAV_BASE_DIRTY, 0x2},
+ {A5XX_SP_S_CS_INSTR_DIRTY, 0x1},
+ {A5XX_SP_S_CS_CB_BASE_DIRTY, 0x1},
+ {A5XX_SP_S_CS_UAV_BASE_DIRTY, 0x2},
+ {A5XX_HLSQ_ICB, 0x200},
+ {A5XX_HLSQ_ICB_CB_BASE_DIRTY, 0x4},
+ {A5XX_SP_POWER_RESTORE_RAM, 0x140},
+ {A5XX_TP_POWER_RESTORE_RAM, 0x40},
+};
+
+/*
+ * The A5XX architecture has a a built in engine to asynchronously dump
+ * registers from the GPU. It is used to accelerate the copy of hundreds
+ * (thousands) of registers and as a safe way to access registers that might
+ * have secure data in them (if the GPU is in secure, the crashdumper returns
+ * bogus values for those registers). On a fully secured device the CPU will be
+ * blocked from accessing those registers directly and so the crashdump is the
+ * only way that we can access context registers and the shader banks for debug
+ * purposes.
+ *
+ * The downside of the crashdump is that it requires access to GPU accessible
+ * memory (so the VBIF and the bus and the SMMU need to be up and working) and
+ * you need enough memory to write the script for the crashdumper and to store
+ * the data that you are dumping so there is a balancing act between the work to
+ * set up a crash dumper and the value we get out of it.
+ */
+
+/*
+ * The crashdump uses a pseudo-script format to read and write registers. Each
+ * operation is two 64 bit values.
+ *
+ * READ:
+ * [qword 0] [64:00] - The absolute IOVA address target for the register value
+ * [qword 1] [63:44] - the dword address of the register offset to read
+ * [15:00] - Number of dwords to read at once
+ *
+ * WRITE:
+ * [qword 0] [31:0] 32 bit value to write to the register
+ * [qword 1] [63:44] - the dword address of the register offset to write
+ * [21:21] - set 1 to write
+ * [15:00] - Number of dwords to write (usually 1)
+ *
+ * At the bottom of the script, write quadword zeros to trigger the end.
+ */
+struct crashdump {
+ struct drm_gem_object *bo;
+ void *ptr;
+ u64 iova;
+ u32 index;
+};
+
+#define CRASHDUMP_BO_SIZE (SZ_1M)
+#define CRASHDUMP_SCRIPT_SIZE (256 * SZ_1K)
+#define CRASHDUMP_DATA_SIZE (CRASHDUMP_BO_SIZE - CRASHDUMP_SCRIPT_SIZE)
+
+static int crashdump_init(struct msm_gpu *gpu, struct crashdump *crashdump)
+{
+ struct drm_device *drm = gpu->dev;
+ int ret = -ENOMEM;
+
+ crashdump->bo = msm_gem_new(drm, CRASHDUMP_BO_SIZE, MSM_BO_UNCACHED);
+ if (IS_ERR(crashdump->bo)) {
+ ret = PTR_ERR(crashdump->bo);
+ crashdump->bo = NULL;
+ return ret;
+ }
+
+ crashdump->ptr = msm_gem_vaddr_locked(crashdump->bo);
+ if (!crashdump->ptr)
+ goto out;
+
+ ret = msm_gem_get_iova_locked(crashdump->bo, gpu->aspace,
+ &crashdump->iova);
+
+out:
+ if (ret) {
+ drm_gem_object_unreference(crashdump->bo);
+ crashdump->bo = NULL;
+ }
+
+ return ret;
+}
+
+static int crashdump_run(struct msm_gpu *gpu, struct crashdump *crashdump)
+{
+ if (!crashdump->ptr || !crashdump->index)
+ return -EINVAL;
+
+ gpu_write(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
+ lower_32_bits(crashdump->iova));
+ gpu_write(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_HI,
+ upper_32_bits(crashdump->iova));
+
+ gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
+
+ return spin_until(gpu_read(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL) & 0x04);
+}
+
+static void crashdump_destroy(struct msm_gpu *gpu, struct crashdump *crashdump)
+{
+ if (!crashdump->bo)
+ return;
+
+ if (crashdump->iova)
+ msm_gem_put_iova(crashdump->bo, gpu->aspace);
+
+ drm_gem_object_unreference(crashdump->bo);
+
+ memset(crashdump, 0, sizeof(*crashdump));
+}
+
+static inline void CRASHDUMP_SCRIPT_WRITE(struct crashdump *crashdump,
+ u32 reg, u32 val)
+{
+ u64 *ptr = crashdump->ptr + crashdump->index;
+
+ if (WARN_ON(crashdump->index + (2 * sizeof(u64))
+ >= CRASHDUMP_SCRIPT_SIZE))
+ return;
+
+ /* This is the value to write */
+ ptr[0] = (u64) val;
+
+ /*
+ * This triggers a write to the specified register. 1 is the size of
+ * the write in dwords
+ */
+ ptr[1] = (((u64) reg) << 44) | (1 << 21) | 1;
+
+ crashdump->index += 2 * sizeof(u64);
+}
+
+static inline void CRASHDUMP_SCRIPT_READ(struct crashdump *crashdump,
+ u32 reg, u32 count, u32 offset)
+{
+ u64 *ptr = crashdump->ptr + crashdump->index;
+
+ if (WARN_ON(crashdump->index + (2 * sizeof(u64))
+ >= CRASHDUMP_SCRIPT_SIZE))
+ return;
+
+ if (WARN_ON(offset + (count * sizeof(u32)) >= CRASHDUMP_DATA_SIZE))
+ return;
+
+ ptr[0] = (u64) crashdump->iova + CRASHDUMP_SCRIPT_SIZE + offset;
+ ptr[1] = (((u64) reg) << 44) | count;
+
+ crashdump->index += 2 * sizeof(u64);
+}
+
+static inline void *CRASHDUMP_DATA_PTR(struct crashdump *crashdump, u32 offset)
+{
+ if (WARN_ON(!crashdump->ptr || offset >= CRASHDUMP_DATA_SIZE))
+ return NULL;
+
+ return crashdump->ptr + CRASHDUMP_SCRIPT_SIZE + offset;
+}
+
+static inline u32 CRASHDUMP_DATA_READ(struct crashdump *crashdump, u32 offset)
+{
+ return *((u32 *) CRASHDUMP_DATA_PTR(crashdump, offset));
+}
+
+static inline void CRASHDUMP_RESET(struct crashdump *crashdump)
+{
+ crashdump->index = 0;
+}
+
+static inline void CRASHDUMP_END(struct crashdump *crashdump)
+{
+ u64 *ptr = crashdump->ptr + crashdump->index;
+
+ if (WARN_ON((crashdump->index + (2 * sizeof(u64)))
+ >= CRASHDUMP_SCRIPT_SIZE))
+ return;
+
+ ptr[0] = 0;
+ ptr[1] = 0;
+
+ crashdump->index += 2 * sizeof(u64);
+}
+
+static u32 _crashdump_read_hlsq_aperture(struct crashdump *crashdump,
+ u32 offset, u32 statetype, u32 bank,
+ u32 count)
+{
+ CRASHDUMP_SCRIPT_WRITE(crashdump, REG_A5XX_HLSQ_DBG_READ_SEL,
+ A5XX_HLSQ_DBG_READ_SEL_STATETYPE(statetype) | bank);
+
+ CRASHDUMP_SCRIPT_READ(crashdump, REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE,
+ count, offset);
+
+ return count * sizeof(u32);
+}
+
+static u32 _copy_registers(struct msm_snapshot *snapshot,
+ struct crashdump *crashdump, u32 reg, u32 count,
+ u32 offset)
+{
+ int i;
+ u32 *ptr = (u32 *) (crashdump->ptr + CRASHDUMP_SCRIPT_SIZE + offset);
+ /*
+ * Write the offset of the first register of the group and the number of
+ * registers in the group
+ */
+ SNAPSHOT_WRITE_U32(snapshot, ((count << 16) | reg));
+
+ /* Followed by each register value in the group */
+ for (i = 0; i < count; i++)
+ SNAPSHOT_WRITE_U32(snapshot, ptr[i]);
+
+ return count * sizeof(u32);
+}
+
+/*
+ * Return the number of registers in each register group from the
+ * adreno_gpu->rgisters
+ */
+static inline u32 REG_COUNT(const unsigned int *ptr)
+{
+ return (ptr[1] - ptr[0]) + 1;
+}
+
+/*
+ * Capture what registers we can from the CPU in case the crashdumper is
+ * unavailable or broken. This will omit the SP,TP and HLSQ registers, but
+ * you'll get everything else and that ain't bad
+ */
+static void a5xx_snapshot_registers_cpu(struct msm_gpu *gpu,
+ struct msm_snapshot *snapshot)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct msm_snapshot_regs header;
+ u32 regcount = 0, groups = 0;
+ int i;
+
+ /*
+ * Before we write the section we need to figure out how big our data
+ * section will be
+ */
+ for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
+ regcount += REG_COUNT(&(adreno_gpu->registers[i]));
+ groups++;
+ }
+
+ header.count = groups;
+
+ /*
+ * We need one dword for each group and then one dword for each register
+ * value in that group
+ */
+ if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_REGS_V2,
+ regcount + groups))
+ return;
+
+ for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
+ u32 count = REG_COUNT(&(adreno_gpu->registers[i]));
+ u32 reg = adreno_gpu->registers[i];
+ int j;
+
+ /* Write the offset and count for the group */
+ SNAPSHOT_WRITE_U32(snapshot, (count << 16) | reg);
+
+ /* Write each value in the group */
+ for (j = 0; j < count; j++)
+ SNAPSHOT_WRITE_U32(snapshot, gpu_read(gpu, reg++));
+ }
+}
+
+static void a5xx_snapshot_registers(struct msm_gpu *gpu,
+ struct msm_snapshot *snapshot)
+{
+ struct msm_snapshot_regs header;
+ struct crashdump *crashdump = snapshot->priv;
+ u32 offset = 0, regcount = 0, groups = 0;
+ int i;
+
+ /*
+ * First snapshot all the registers that we can from the CPU. Do this
+ * because the crashdumper has a tendency to "taint" the value of some
+ * of the registers (because the GPU implements the crashdumper) so we
+ * only want to use the crash dump facility if we have to
+ */
+ a5xx_snapshot_registers_cpu(gpu, snapshot);
+
+ if (!crashdump)
+ return;
+
+ CRASHDUMP_RESET(crashdump);
+
+ /* HLSQ and context registers behind the aperture */
+ for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
+ u32 count = a5xx_hlsq_aperture_regs[i].count;
+
+ offset += _crashdump_read_hlsq_aperture(crashdump, offset,
+ a5xx_hlsq_aperture_regs[i].type, 0, count);
+ regcount += count;
+
+ groups++;
+ }
+
+ CRASHDUMP_END(crashdump);
+
+ if (crashdump_run(gpu, crashdump))
+ return;
+
+ header.count = groups;
+
+ /*
+ * The size of the data will be one dword for each "group" of registers,
+ * and then one dword for each of the registers in that group
+ */
+ if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_REGS_V2,
+ groups + regcount))
+ return;
+
+ /* Copy the registers to the snapshot */
+ for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
+ offset += _copy_registers(snapshot, crashdump,
+ a5xx_hlsq_aperture_regs[i].regoffset,
+ a5xx_hlsq_aperture_regs[i].count, offset);
+}
+
+static void _a5xx_snapshot_shader_bank(struct msm_snapshot *snapshot,
+ struct crashdump *crashdump, u32 block, u32 bank,
+ u32 size, u32 offset)
+{
+ void *src;
+
+ struct msm_snapshot_shader header = {
+ .type = block,
+ .index = bank,
+ .size = size,
+ };
+
+ if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_SHADER, size))
+ return;
+
+ src = CRASHDUMP_DATA_PTR(crashdump, offset);
+
+ if (src)
+ SNAPSHOT_MEMCPY(snapshot, src, size * sizeof(u32));
+}
+
+static void a5xx_snapshot_shader_memory(struct msm_gpu *gpu,
+ struct msm_snapshot *snapshot)
+{
+ struct crashdump *crashdump = snapshot->priv;
+ u32 offset = 0;
+ int i;
+
+ /* We can only get shader memory through the crashdump */
+ if (!crashdump)
+ return;
+
+ CRASHDUMP_RESET(crashdump);
+
+ /* For each shader block */
+ for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) {
+ int j;
+
+ /* For each block, dump 4 banks */
+ for (j = 0; j < A5XX_NR_SHADER_BANKS; j++)
+ offset += _crashdump_read_hlsq_aperture(crashdump,
+ offset, a5xx_shader_blocks[i].id, j,
+ a5xx_shader_blocks[i].size);
+ }
+
+ CRASHDUMP_END(crashdump);
+
+ /* If the crashdump fails we can't get shader memory any other way */
+ if (crashdump_run(gpu, crashdump))
+ return;
+
+ /* Each bank of each shader gets its own snapshot section */
+ for (offset = 0, i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) {
+ int j;
+
+ for (j = 0; j < A5XX_NR_SHADER_BANKS; j++) {
+ _a5xx_snapshot_shader_bank(snapshot, crashdump,
+ a5xx_shader_blocks[i].id, j,
+ a5xx_shader_blocks[i].size, offset);
+ offset += a5xx_shader_blocks[i].size * sizeof(u32);
+ }
+ }
+}
+
+#define A5XX_NUM_AXI_ARB_BLOCKS 2
+#define A5XX_NUM_XIN_BLOCKS 4
+#define VBIF_DATA_SIZE ((16 * A5XX_NUM_AXI_ARB_BLOCKS) + \
+ (18 * A5XX_NUM_XIN_BLOCKS) + (12 * A5XX_NUM_XIN_BLOCKS))
+
+static void a5xx_snapshot_debugbus_vbif(struct msm_gpu *gpu,
+ struct msm_snapshot *snapshot)
+{
+ int i;
+ struct msm_snapshot_debugbus header = {
+ .id = A5XX_RBBM_DBGBUS_VBIF,
+ .count = VBIF_DATA_SIZE,
+ };
+
+ if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_DEBUGBUS,
+ VBIF_DATA_SIZE))
+ return;
+
+ gpu_rmw(gpu, REG_A5XX_VBIF_CLKON, A5XX_VBIF_CLKON_FORCE_ON_TESTBUS,
+ A5XX_VBIF_CLKON_FORCE_ON_TESTBUS);
+
+ gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS1_CTRL0, 0);
+ gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS_OUT_CTRL,
+ A5XX_VBIF_TEST_BUS_OUT_CTRL_TEST_BUS_CTRL_EN);
+
+ for (i = 0; i < A5XX_NUM_AXI_ARB_BLOCKS; i++) {
+ int j;
+
+ gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS2_CTRL0, 1 << (i + 16));
+ for (j = 0; j < 16; j++) {
+ gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS2_CTRL1,
+ A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL(j));
+ SNAPSHOT_WRITE_U32(snapshot, gpu_read(gpu,
+ REG_A5XX_VBIF_TEST_BUS_OUT));
+ }
+ }
+
+ for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) {
+ int j;
+
+ gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS2_CTRL0, 1 << i);
+ for (j = 0; j < 18; j++) {
+ gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS2_CTRL1,
+ A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL(j));
+ SNAPSHOT_WRITE_U32(snapshot,
+ gpu_read(gpu, REG_A5XX_VBIF_TEST_BUS_OUT));
+ }
+ }
+
+ for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) {
+ int j;
+
+ gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS1_CTRL0, 1 << i);
+ for (j = 0; j < 12; j++) {
+ gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS1_CTRL1,
+ A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL(j));
+ SNAPSHOT_WRITE_U32(snapshot, gpu_read(gpu,
+ REG_A5XX_VBIF_TEST_BUS_OUT));
+ }
+ }
+
+}
+
+static void a5xx_snapshot_debugbus_block(struct msm_gpu *gpu,
+ struct msm_snapshot *snapshot, u32 block, u32 count)
+{
+ int i;
+ struct msm_snapshot_debugbus header = {
+ .id = block,
+ .count = count * 2, /* Each value is 2 dwords */
+ };
+
+ if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_DEBUGBUS,
+ (count * 2)))
+ return;
+
+ for (i = 0; i < count; i++) {
+ u32 reg = A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX(i) |
+ A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
+
+ gpu_write(gpu, REG_A5XX_RBBM_CFG_DBGBUS_SEL_A, reg);
+ gpu_write(gpu, REG_A5XX_RBBM_CFG_DBGBUS_SEL_B, reg);
+ gpu_write(gpu, REG_A5XX_RBBM_CFG_DBGBUS_SEL_C, reg);
+ gpu_write(gpu, REG_A5XX_RBBM_CFG_DBGBUS_SEL_D, reg);
+
+ /* Each debugbus entry is a quad word */
+ SNAPSHOT_WRITE_U32(snapshot, gpu_read(gpu,
+ REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2));
+ SNAPSHOT_WRITE_U32(snapshot,
+ gpu_read(gpu, REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1));
+ }
+}
+
+static void a5xx_snapshot_debugbus(struct msm_gpu *gpu,
+ struct msm_snapshot *snapshot)
+{
+ int i;
+
+ gpu_write(gpu, REG_A5XX_RBBM_CFG_DBGBUS_CNTLM,
+ A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE(0xF));
+
+ for (i = 0; i < ARRAY_SIZE(a5xx_debugbus_blocks); i++)
+ a5xx_snapshot_debugbus_block(gpu, snapshot,
+ a5xx_debugbus_blocks[i].id,
+ a5xx_debugbus_blocks[i].count);
+
+ /* VBIF is special and not in a good way */
+ a5xx_snapshot_debugbus_vbif(gpu, snapshot);
+}
+
+static void a5xx_snapshot_cp_merciu(struct msm_gpu *gpu,
+ struct msm_snapshot *snapshot)
+{
+ unsigned int i;
+ struct msm_snapshot_debug header = {
+ .type = SNAPSHOT_DEBUG_CP_MERCIU,
+ .size = 64 << 1, /* Data size is 2 dwords per entry */
+ };
+
+ if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_DEBUG, 64 << 1))
+ return;
+
+ gpu_write(gpu, REG_A5XX_CP_MERCIU_DBG_ADDR, 0);
+ for (i = 0; i < 64; i++) {
+ SNAPSHOT_WRITE_U32(snapshot,
+ gpu_read(gpu, REG_A5XX_CP_MERCIU_DBG_DATA_1));
+ SNAPSHOT_WRITE_U32(snapshot,
+ gpu_read(gpu, REG_A5XX_CP_MERCIU_DBG_DATA_2));
+ }
+}
+
+static void a5xx_snapshot_cp_roq(struct msm_gpu *gpu,
+ struct msm_snapshot *snapshot)
+{
+ int i;
+ struct msm_snapshot_debug header = {
+ .type = SNAPSHOT_DEBUG_CP_ROQ,
+ .size = 512,
+ };
+
+ if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_DEBUG, 512))
+ return;
+
+ gpu_write(gpu, REG_A5XX_CP_ROQ_DBG_ADDR, 0);
+ for (i = 0; i < 512; i++)
+ SNAPSHOT_WRITE_U32(snapshot,
+ gpu_read(gpu, REG_A5XX_CP_ROQ_DBG_DATA));
+}
+
+static void a5xx_snapshot_cp_meq(struct msm_gpu *gpu,
+ struct msm_snapshot *snapshot)
+{
+ int i;
+ struct msm_snapshot_debug header = {
+ .type = SNAPSHOT_DEBUG_CP_MEQ,
+ .size = 64,
+ };
+
+ if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_DEBUG, 64))
+ return;
+
+ gpu_write(gpu, REG_A5XX_CP_MEQ_DBG_ADDR, 0);
+ for (i = 0; i < 64; i++)
+ SNAPSHOT_WRITE_U32(snapshot,
+ gpu_read(gpu, REG_A5XX_CP_MEQ_DBG_DATA));
+}
+
+static void a5xx_snapshot_indexed_registers(struct msm_gpu *gpu,
+ struct msm_snapshot *snapshot, u32 addr, u32 data,
+ u32 count)
+{
+ unsigned int i;
+ struct msm_snapshot_indexed_regs header = {
+ .index_reg = addr,
+ .data_reg = data,
+ .start = 0,
+ .count = count,
+ };
+
+ if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_INDEXED_REGS,
+ count))
+ return;
+
+ for (i = 0; i < count; i++) {
+ gpu_write(gpu, addr, i);
+ SNAPSHOT_WRITE_U32(snapshot, gpu_read(gpu, data));
+ }
+}
+
+int a5xx_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot)
+{
+ struct crashdump crashdump = { 0 };
+
+ if (!crashdump_init(gpu, &crashdump))
+ snapshot->priv = &crashdump;
+
+ /* To accurately read all registers, disable hardware clock gating */
+ a5xx_set_hwcg(gpu, false);
+
+ /* Kick it up to the generic level */
+ adreno_snapshot(gpu, snapshot);
+
+ /* Read the GPU registers */
+ a5xx_snapshot_registers(gpu, snapshot);
+
+ /* Read the shader memory banks */
+ a5xx_snapshot_shader_memory(gpu, snapshot);
+
+ /* Read the debugbus registers */
+ a5xx_snapshot_debugbus(gpu, snapshot);
+
+ /* PFP data */
+ a5xx_snapshot_indexed_registers(gpu, snapshot,
+ REG_A5XX_CP_PFP_STAT_ADDR, REG_A5XX_CP_PFP_STAT_DATA, 36);
+
+ /* ME data */
+ a5xx_snapshot_indexed_registers(gpu, snapshot,
+ REG_A5XX_CP_ME_STAT_ADDR, REG_A5XX_CP_ME_STAT_DATA, 29);
+
+ /* DRAW_STATE data */
+ a5xx_snapshot_indexed_registers(gpu, snapshot,
+ REG_A5XX_CP_DRAW_STATE_ADDR, REG_A5XX_CP_DRAW_STATE_DATA,
+ 256);
+
+ /* ME cache */
+ a5xx_snapshot_indexed_registers(gpu, snapshot,
+ REG_A5XX_CP_ME_UCODE_DBG_ADDR, REG_A5XX_CP_ME_UCODE_DBG_DATA,
+ 0x53F);
+
+ /* PFP cache */
+ a5xx_snapshot_indexed_registers(gpu, snapshot,
+ REG_A5XX_CP_PFP_UCODE_DBG_ADDR, REG_A5XX_CP_PFP_UCODE_DBG_DATA,
+ 0x53F);
+
+ /* ME queue */
+ a5xx_snapshot_cp_meq(gpu, snapshot);
+
+ /* CP ROQ */
+ a5xx_snapshot_cp_roq(gpu, snapshot);
+
+ /* CP MERCIU */
+ a5xx_snapshot_cp_merciu(gpu, snapshot);
+
+ crashdump_destroy(gpu, &crashdump);
+ snapshot->priv = NULL;
+
+ /* Re-enable HWCG */
+ a5xx_set_hwcg(gpu, true);
+ return 0;
+}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index e67c63c9a3ac..f1883825354e 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -2,7 +2,7 @@
* Copyright (C) 2013 Red Hat
* Author: Rob Clark <robdclark@gmail.com>
*
- * Copyright (c) 2014,2016 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2014,2016-2017 The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
@@ -17,12 +17,12 @@
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/utsname.h>
#include "adreno_gpu.h"
+#include "msm_snapshot.h"
#include "msm_gem.h"
#include "msm_mmu.h"
-#define RB_SIZE SZ_32K
-#define RB_BLKSIZE 32
int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
{
@@ -35,6 +35,9 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
case MSM_PARAM_GMEM_SIZE:
*value = adreno_gpu->gmem;
return 0;
+ case MSM_PARAM_GMEM_BASE:
+ *value = 0x100000;
+ return 0;
case MSM_PARAM_CHIP_ID:
*value = adreno_gpu->rev.patchid |
(adreno_gpu->rev.minor << 8) |
@@ -57,70 +60,106 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
int adreno_hw_init(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
- int ret;
+ int i;
DBG("%s", gpu->name);
- ret = msm_gem_get_iova(gpu->rb->bo, gpu->aspace, &gpu->rb_iova);
- if (ret) {
- gpu->rb_iova = 0;
- dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret);
- return ret;
+ for (i = 0; i < gpu->nr_rings; i++) {
+ int ret = msm_gem_get_iova(gpu->rb[i]->bo, gpu->aspace,
+ &gpu->rb[i]->iova);
+ if (ret) {
+ gpu->rb[i]->iova = 0;
+ dev_err(gpu->dev->dev,
+ "could not map ringbuffer %d: %d\n", i, ret);
+ return ret;
+ }
}
- /* Setup REG_CP_RB_CNTL: */
+ /*
+ * Setup REG_CP_RB_CNTL. The same value is used across targets (with
+ * the excpetion of A430 that disables the RPTR shadow) - the cacluation
+ * for the ringbuffer size and block size is moved to msm_gpu.h for the
+ * pre-processor to deal with and the A430 variant is ORed in here
+ */
adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL,
- /* size is log2(quad-words): */
- AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) |
- AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) |
- (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0));
+ MSM_GPU_RB_CNTL_DEFAULT |
+ (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0));
- /* Setup ringbuffer address */
+ /* Setup ringbuffer address - use ringbuffer[0] for GPU init */
adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE,
- REG_ADRENO_CP_RB_BASE_HI, gpu->rb_iova);
+ REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova);
adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR,
- REG_ADRENO_CP_RB_RPTR_ADDR_HI, rbmemptr(adreno_gpu, rptr));
+ REG_ADRENO_CP_RB_RPTR_ADDR_HI, rbmemptr(adreno_gpu, 0, rptr));
return 0;
}
-static uint32_t get_wptr(struct msm_ringbuffer *ring)
+/* Use this helper to read rptr, since a430 doesn't update rptr in memory */
+static uint32_t get_rptr(struct adreno_gpu *adreno_gpu,
+ struct msm_ringbuffer *ring)
{
- return ring->cur - ring->start;
+ if (adreno_is_a430(adreno_gpu)) {
+ /*
+ * If index is anything but 0 this will probably break horribly,
+ * but I think that we have enough infrastructure in place to
+ * ensure that it won't be. If not then this is why your
+ * a430 stopped working.
+ */
+ return adreno_gpu->memptrs->rptr[ring->id] = adreno_gpu_read(
+ adreno_gpu, REG_ADRENO_CP_RB_RPTR);
+ } else
+ return adreno_gpu->memptrs->rptr[ring->id];
}
-/* Use this helper to read rptr, since a430 doesn't update rptr in memory */
-static uint32_t get_rptr(struct adreno_gpu *adreno_gpu)
+struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu)
{
- if (adreno_is_a430(adreno_gpu))
- return adreno_gpu->memptrs->rptr = adreno_gpu_read(
- adreno_gpu, REG_ADRENO_CP_RB_RPTR);
- else
- return adreno_gpu->memptrs->rptr;
+ return gpu->rb[0];
}
-uint32_t adreno_last_fence(struct msm_gpu *gpu)
+uint32_t adreno_submitted_fence(struct msm_gpu *gpu,
+ struct msm_ringbuffer *ring)
+{
+ if (!ring)
+ return 0;
+
+ return ring->submitted_fence;
+}
+
+uint32_t adreno_last_fence(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
- return adreno_gpu->memptrs->fence;
+
+ if (!ring)
+ return 0;
+
+ return adreno_gpu->memptrs->fence[ring->id];
}
void adreno_recover(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct drm_device *dev = gpu->dev;
- int ret;
+ struct msm_ringbuffer *ring;
+ int ret, i;
gpu->funcs->pm_suspend(gpu);
- /* reset ringbuffer: */
- gpu->rb->cur = gpu->rb->start;
+ /* reset ringbuffer(s): */
- /* reset completed fence seqno, just discard anything pending: */
- adreno_gpu->memptrs->fence = gpu->submitted_fence;
- adreno_gpu->memptrs->rptr = 0;
- adreno_gpu->memptrs->wptr = 0;
+ FOR_EACH_RING(gpu, ring, i) {
+ if (!ring)
+ continue;
+
+ /* No need for a lock here, nobody else is peeking in */
+ ring->cur = ring->start;
+ ring->next = ring->start;
+
+ /* reset completed fence seqno, discard anything pending: */
+ adreno_gpu->memptrs->fence[ring->id] =
+ adreno_submitted_fence(gpu, ring);
+ adreno_gpu->memptrs->rptr[ring->id] = 0;
+ }
gpu->funcs->pm_resume(gpu);
@@ -133,12 +172,10 @@ void adreno_recover(struct msm_gpu *gpu)
enable_irq(gpu->irq);
}
-int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
- struct msm_file_private *ctx)
+int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
- struct msm_drm_private *priv = gpu->dev->dev_private;
- struct msm_ringbuffer *ring = gpu->rb;
+ struct msm_ringbuffer *ring = gpu->rb[submit->ring];
unsigned i, ibs = 0;
for (i = 0; i < submit->nr_cmds; i++) {
@@ -147,8 +184,6 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
/* ignore IB-targets */
break;
case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
- /* ignore if there has not been a ctx switch: */
- if (priv->lastctx == ctx)
break;
case MSM_SUBMIT_CMD_BUF:
OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ?
@@ -184,7 +219,7 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
OUT_PKT3(ring, CP_EVENT_WRITE, 3);
OUT_RING(ring, CACHE_FLUSH_TS);
- OUT_RING(ring, rbmemptr(adreno_gpu, fence));
+ OUT_RING(ring, rbmemptr(adreno_gpu, ring->id, fence));
OUT_RING(ring, submit->fence);
/* we could maybe be clever and only CP_COND_EXEC the interrupt: */
@@ -211,22 +246,25 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
}
#endif
- gpu->funcs->flush(gpu);
+ gpu->funcs->flush(gpu, ring);
return 0;
}
-void adreno_flush(struct msm_gpu *gpu)
+void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
uint32_t wptr;
+ /* Copy the shadow to the actual register */
+ ring->cur = ring->next;
+
/*
* Mask the wptr value that we calculate to fit in the HW range. This is
* to account for the possibility that the last command fit exactly into
* the ringbuffer and rb->next hasn't wrapped to zero yet
*/
- wptr = get_wptr(gpu->rb) & ((gpu->rb->size / 4) - 1);
+ wptr = get_wptr(ring);
/* ensure writes to ringbuffer have hit system memory: */
mb();
@@ -234,17 +272,19 @@ void adreno_flush(struct msm_gpu *gpu)
adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr);
}
-bool adreno_idle(struct msm_gpu *gpu)
+bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
- uint32_t wptr = get_wptr(gpu->rb);
+ uint32_t wptr = get_wptr(ring);
/* wait for CP to drain ringbuffer: */
- if (!spin_until(get_rptr(adreno_gpu) == wptr))
+ if (!spin_until(get_rptr(adreno_gpu, ring) == wptr))
return true;
/* TODO maybe we need to reset GPU here to recover from hang? */
- DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name);
+ DRM_ERROR("%s: timeout waiting to drain ringbuffer %d rptr/wptr = %X/%X\n",
+ gpu->name, ring->id, get_rptr(adreno_gpu, ring), wptr);
+
return false;
}
@@ -252,6 +292,7 @@ bool adreno_idle(struct msm_gpu *gpu)
void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct msm_ringbuffer *ring;
int i;
seq_printf(m, "revision: %d (%d.%d.%d.%d)\n",
@@ -259,11 +300,18 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
adreno_gpu->rev.major, adreno_gpu->rev.minor,
adreno_gpu->rev.patchid);
- seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence,
- gpu->submitted_fence);
- seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu));
- seq_printf(m, "wptr: %d\n", adreno_gpu->memptrs->wptr);
- seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb));
+ FOR_EACH_RING(gpu, ring, i) {
+ if (!ring)
+ continue;
+
+ seq_printf(m, "rb %d: fence: %d/%d\n", i,
+ adreno_last_fence(gpu, ring),
+ adreno_submitted_fence(gpu, ring));
+
+ seq_printf(m, " rptr: %d\n",
+ get_rptr(adreno_gpu, ring));
+ seq_printf(m, "rb wptr: %d\n", get_wptr(ring));
+ }
gpu->funcs->pm_resume(gpu);
@@ -292,22 +340,29 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
*/
void adreno_dump_info(struct msm_gpu *gpu)
{
+ struct drm_device *dev = gpu->dev;
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct msm_ringbuffer *ring;
int i;
- printk("revision: %d (%d.%d.%d.%d)\n",
+ dev_err(dev->dev, "revision: %d (%d.%d.%d.%d)\n",
adreno_gpu->info->revn, adreno_gpu->rev.core,
adreno_gpu->rev.major, adreno_gpu->rev.minor,
adreno_gpu->rev.patchid);
- printk("fence: %d/%d\n", adreno_gpu->memptrs->fence,
- gpu->submitted_fence);
- printk("rptr: %d\n", get_rptr(adreno_gpu));
- printk("wptr: %d\n", adreno_gpu->memptrs->wptr);
- printk("rb wptr: %d\n", get_wptr(gpu->rb));
+ FOR_EACH_RING(gpu, ring, i) {
+ if (!ring)
+ continue;
+
+ dev_err(dev->dev, " ring %d: fence %d/%d rptr/wptr %x/%x\n", i,
+ adreno_last_fence(gpu, ring),
+ adreno_submitted_fence(gpu, ring),
+ get_rptr(adreno_gpu, ring),
+ get_wptr(ring));
+ }
for (i = 0; i < 8; i++) {
- printk("CP_SCRATCH_REG%d: %u\n", i,
+ pr_err("CP_SCRATCH_REG%d: %u\n", i,
gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
}
}
@@ -332,19 +387,21 @@ void adreno_dump(struct msm_gpu *gpu)
}
}
-static uint32_t ring_freewords(struct msm_gpu *gpu)
+static uint32_t ring_freewords(struct msm_ringbuffer *ring)
{
- struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
- uint32_t size = gpu->rb->size / 4;
- uint32_t wptr = get_wptr(gpu->rb);
- uint32_t rptr = get_rptr(adreno_gpu);
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu);
+ uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2;
+ /* Use ring->next to calculate free size */
+ uint32_t wptr = ring->next - ring->start;
+ uint32_t rptr = get_rptr(adreno_gpu, ring);
return (rptr + (size - 1) - wptr) % size;
}
-void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords)
+void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords)
{
- if (spin_until(ring_freewords(gpu) >= ndwords))
- DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name);
+ if (spin_until(ring_freewords(ring) >= ndwords))
+ DRM_ERROR("%s: timeout waiting for space in ringubffer %d\n",
+ ring->gpu->name, ring->id);
}
static const char *iommu_ports[] = {
@@ -465,9 +522,11 @@ static int adreno_of_parse(struct platform_device *pdev, struct msm_gpu *gpu)
}
int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
- struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs)
+ struct adreno_gpu *adreno_gpu,
+ const struct adreno_gpu_funcs *funcs, int nr_rings)
{
struct adreno_platform_config *config = pdev->dev.platform_data;
+ struct msm_gpu_config adreno_gpu_config = { 0 };
struct msm_gpu *gpu = &adreno_gpu->base;
struct msm_mmu *mmu;
int ret;
@@ -481,9 +540,26 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
/* Get the rest of the target configuration from the device tree */
adreno_of_parse(pdev, gpu);
+ adreno_gpu_config.ioname = "kgsl_3d0_reg_memory";
+ adreno_gpu_config.irqname = "kgsl_3d0_irq";
+ adreno_gpu_config.nr_rings = nr_rings;
+
+ adreno_gpu_config.va_start = SZ_16M;
+ adreno_gpu_config.va_end = 0xffffffff;
+
+ if (adreno_gpu->revn >= 500) {
+ /* 5XX targets use a 64 bit region */
+ adreno_gpu_config.va_start = 0x800000000;
+ adreno_gpu_config.va_end = 0x8ffffffff;
+ } else {
+ adreno_gpu_config.va_start = 0x300000;
+ adreno_gpu_config.va_end = 0xffffffff;
+ }
+
+ adreno_gpu_config.nr_rings = nr_rings;
+
ret = msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base,
- adreno_gpu->info->name, "kgsl_3d0_reg_memory", "kgsl_3d0_irq",
- RB_SIZE);
+ adreno_gpu->info->name, &adreno_gpu_config);
if (ret)
return ret;
@@ -542,7 +618,7 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu)
if (gpu->memptrs_bo) {
if (gpu->memptrs_iova)
- msm_gem_put_iova(gpu->memptrs_bo, gpu->base.aspace);
+ msm_gem_put_iova(gpu->memptrs_bo, aspace);
drm_gem_object_unreference_unlocked(gpu->memptrs_bo);
}
release_firmware(gpu->pm4);
@@ -551,8 +627,85 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu)
msm_gpu_cleanup(&gpu->base);
if (aspace) {
- aspace->mmu->funcs->detach(aspace->mmu,
- iommu_ports, ARRAY_SIZE(iommu_ports));
- msm_gem_address_space_destroy(aspace);
+ aspace->mmu->funcs->detach(aspace->mmu);
+ msm_gem_address_space_put(aspace);
}
}
+
+static void adreno_snapshot_os(struct msm_gpu *gpu,
+ struct msm_snapshot *snapshot)
+{
+ struct msm_snapshot_linux header;
+
+ memset(&header, 0, sizeof(header));
+
+ header.osid = SNAPSHOT_OS_LINUX_V3;
+ strlcpy(header.release, utsname()->release, sizeof(header.release));
+ strlcpy(header.version, utsname()->version, sizeof(header.version));
+
+ header.seconds = get_seconds();
+ header.ctxtcount = 0;
+
+ SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_OS, 0);
+}
+
+static void adreno_snapshot_ringbuffer(struct msm_gpu *gpu,
+ struct msm_snapshot *snapshot, struct msm_ringbuffer *ring)
+{
+ struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+ struct msm_snapshot_ringbuffer header;
+ unsigned int i, end = 0;
+ unsigned int *data = ring->start;
+
+ memset(&header, 0, sizeof(header));
+
+ /*
+ * We only want to copy the active contents of each ring, so find the
+ * last valid entry in the ringbuffer
+ */
+ for (i = 0; i < MSM_GPU_RINGBUFFER_SZ >> 2; i++) {
+ if (data[i])
+ end = i;
+ }
+
+ /* The dump always starts at 0 */
+ header.start = 0;
+ header.end = end;
+
+ /* This is the number of dwords being dumped */
+ header.count = end + 1;
+
+ /* This is the size of the actual ringbuffer */
+ header.rbsize = MSM_GPU_RINGBUFFER_SZ >> 2;
+
+ header.id = ring->id;
+ header.gpuaddr = ring->iova;
+ header.rptr = get_rptr(adreno_gpu, ring);
+ header.wptr = get_wptr(ring);
+ header.timestamp_queued = adreno_submitted_fence(gpu, ring);
+ header.timestamp_retired = adreno_last_fence(gpu, ring);
+
+ /* Write the header even if the ringbuffer data is empty */
+ if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_RB_V2,
+ header.count))
+ return;
+
+ SNAPSHOT_MEMCPY(snapshot, ring->start, header.count * sizeof(u32));
+}
+
+static void adreno_snapshot_ringbuffers(struct msm_gpu *gpu,
+ struct msm_snapshot *snapshot)
+{
+ struct msm_ringbuffer *ring;
+ int i;
+
+ /* Write a new section for each ringbuffer */
+ FOR_EACH_RING(gpu, ring, i)
+ adreno_snapshot_ringbuffer(gpu, snapshot, ring);
+}
+
+void adreno_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot)
+{
+ adreno_snapshot_os(gpu, snapshot);
+ adreno_snapshot_ringbuffers(gpu, snapshot);
+}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 9bcf4bb705bd..30461115281c 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -2,7 +2,7 @@
* Copyright (C) 2013 Red Hat
* Author: Rob Clark <robdclark@gmail.com>
*
- * Copyright (c) 2014,2016 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2014,2016-2017 The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
@@ -83,13 +83,20 @@ struct adreno_info {
const struct adreno_info *adreno_info(struct adreno_rev rev);
-#define rbmemptr(adreno_gpu, member) \
+#define _sizeof(member) \
+ sizeof(((struct adreno_rbmemptrs *) 0)->member[0])
+
+#define _base(adreno_gpu, member) \
((adreno_gpu)->memptrs_iova + offsetof(struct adreno_rbmemptrs, member))
+#define rbmemptr(adreno_gpu, index, member) \
+ (_base((adreno_gpu), member) + ((index) * _sizeof(member)))
+
struct adreno_rbmemptrs {
- volatile uint32_t rptr;
- volatile uint32_t wptr;
- volatile uint32_t fence;
+ volatile uint32_t rptr[MSM_GPU_MAX_RINGS];
+ volatile uint32_t fence[MSM_GPU_MAX_RINGS];
+ volatile uint64_t ttbr0[MSM_GPU_MAX_RINGS];
+ volatile unsigned int contextidr[MSM_GPU_MAX_RINGS];
};
struct adreno_gpu {
@@ -206,30 +213,34 @@ static inline int adreno_is_a540(struct adreno_gpu *gpu)
int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value);
int adreno_hw_init(struct msm_gpu *gpu);
-uint32_t adreno_last_fence(struct msm_gpu *gpu);
+uint32_t adreno_last_fence(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
+uint32_t adreno_submitted_fence(struct msm_gpu *gpu,
+ struct msm_ringbuffer *ring);
void adreno_recover(struct msm_gpu *gpu);
-int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
- struct msm_file_private *ctx);
-void adreno_flush(struct msm_gpu *gpu);
-bool adreno_idle(struct msm_gpu *gpu);
+int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit);
+void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
+bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
#ifdef CONFIG_DEBUG_FS
void adreno_show(struct msm_gpu *gpu, struct seq_file *m);
#endif
void adreno_dump_info(struct msm_gpu *gpu);
void adreno_dump(struct msm_gpu *gpu);
-void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords);
+void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords);
+struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu);
int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
- struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs);
+ struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs,
+ int nr_rings);
void adreno_gpu_cleanup(struct adreno_gpu *gpu);
+void adreno_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot);
/* ringbuffer helpers (the parts that are adreno specific) */
static inline void
OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
{
- adreno_wait_ring(ring->gpu, cnt+1);
+ adreno_wait_ring(ring, cnt+1);
OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
}
@@ -237,14 +248,14 @@ OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
static inline void
OUT_PKT2(struct msm_ringbuffer *ring)
{
- adreno_wait_ring(ring->gpu, 1);
+ adreno_wait_ring(ring, 1);
OUT_RING(ring, CP_TYPE2_PKT);
}
static inline void
OUT_PKT3(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
{
- adreno_wait_ring(ring->gpu, cnt+1);
+ adreno_wait_ring(ring, cnt+1);
OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
}
@@ -266,14 +277,14 @@ static inline u32 PM4_PARITY(u32 val)
static inline void
OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
{
- adreno_wait_ring(ring->gpu, cnt + 1);
+ adreno_wait_ring(ring, cnt + 1);
OUT_RING(ring, PKT4(regindx, cnt));
}
static inline void
OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
{
- adreno_wait_ring(ring->gpu, cnt + 1);
+ adreno_wait_ring(ring, cnt + 1);
OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) |
((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23));
}
@@ -328,6 +339,11 @@ static inline void adreno_gpu_write64(struct adreno_gpu *gpu,
adreno_gpu_write(gpu, hi, upper_32_bits(data));
}
+static inline uint32_t get_wptr(struct msm_ringbuffer *ring)
+{
+ return (ring->cur - ring->start) % (MSM_GPU_RINGBUFFER_SZ >> 2);
+}
+
/*
* Given a register and a count, return a value to program into
* REG_CP_PROTECT_REG(n) - this will block both reads and writes for _len
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
index da9d58eccc3a..b6cddee0cf34 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
@@ -204,7 +204,7 @@ static void mdp4_destroy(struct msm_kms *kms)
if (aspace) {
aspace->mmu->funcs->detach(aspace->mmu,
iommu_ports, ARRAY_SIZE(iommu_ports));
- msm_gem_address_space_destroy(aspace);
+ msm_gem_address_space_put(aspace);
}
kfree(mdp4_kms);
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
index ede681e12a68..e4e69ebd116e 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
@@ -136,9 +136,8 @@ static void mdp5_destroy(struct msm_kms *kms)
mdp5_irq_domain_fini(mdp5_kms);
if (aspace) {
- aspace->mmu->funcs->detach(aspace->mmu,
- iommu_ports, ARRAY_SIZE(iommu_ports));
- msm_gem_address_space_destroy(aspace);
+ aspace->mmu->funcs->detach(aspace->mmu);
+ msm_gem_address_space_put(aspace);
}
if (mdp5_kms->ctlm)
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 9163b90981a2..0231ac3f269f 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -23,6 +23,8 @@
#include "sde_wb.h"
#define TEARDOWN_DEADLOCK_RETRY_MAX 5
+#include "msm_gem.h"
+#include "msm_mmu.h"
static void msm_fb_output_poll_changed(struct drm_device *dev)
{
@@ -552,29 +554,65 @@ static void load_gpu(struct drm_device *dev)
}
#endif
-static int msm_open(struct drm_device *dev, struct drm_file *file)
+static struct msm_file_private *setup_pagetable(struct msm_drm_private *priv)
{
struct msm_file_private *ctx;
+ if (!priv || !priv->gpu)
+ return NULL;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+
+ ctx->aspace = msm_gem_address_space_create_instance(
+ priv->gpu->aspace->mmu, "gpu", 0x100000000, 0x1ffffffff);
+
+ if (IS_ERR(ctx->aspace)) {
+ int ret = PTR_ERR(ctx->aspace);
+
+ /*
+ * If dynamic domains are not supported, everybody uses the
+ * same pagetable
+ */
+ if (ret != -EOPNOTSUPP) {
+ kfree(ctx);
+ return ERR_PTR(ret);
+ }
+
+ ctx->aspace = priv->gpu->aspace;
+ }
+
+ ctx->aspace->mmu->funcs->attach(ctx->aspace->mmu, NULL, 0);
+ return ctx;
+}
+
+static int msm_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct msm_file_private *ctx = NULL;
+ struct msm_drm_private *priv;
+ struct msm_kms *kms;
+
+ if (!dev || !dev->dev_private)
+ return -ENODEV;
+
+ priv = dev->dev_private;
/* For now, load gpu on open.. to avoid the requirement of having
* firmware in the initrd.
*/
load_gpu(dev);
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
+ ctx = setup_pagetable(priv);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
file->driver_priv = ctx;
- if (dev && dev->dev_private) {
- struct msm_drm_private *priv = dev->dev_private;
- struct msm_kms *kms;
+ kms = priv->kms;
+
+ if (kms && kms->funcs && kms->funcs->postopen)
+ kms->funcs->postopen(kms, file);
- kms = priv->kms;
- if (kms && kms->funcs && kms->funcs->postopen)
- kms->funcs->postopen(kms, file);
- }
return 0;
}
@@ -597,8 +635,10 @@ static void msm_postclose(struct drm_device *dev, struct drm_file *file)
kms->funcs->postclose(kms, file);
mutex_lock(&dev->struct_mutex);
- if (ctx == priv->lastctx)
- priv->lastctx = NULL;
+ if (ctx && ctx->aspace && ctx->aspace != priv->gpu->aspace) {
+ ctx->aspace->mmu->funcs->detach(ctx->aspace->mmu);
+ msm_gem_address_space_put(ctx->aspace);
+ }
mutex_unlock(&dev->struct_mutex);
kfree(ctx);
@@ -797,6 +837,13 @@ static int msm_gpu_show(struct drm_device *dev, struct seq_file *m)
return 0;
}
+static int msm_snapshot_show(struct drm_device *dev, struct seq_file *m)
+{
+ struct msm_drm_private *priv = dev->dev_private;
+
+ return msm_snapshot_write(priv->gpu, m);
+}
+
static int msm_gem_show(struct drm_device *dev, struct seq_file *m)
{
struct msm_drm_private *priv = dev->dev_private;
@@ -861,11 +908,22 @@ static int show_locked(struct seq_file *m, void *arg)
return ret;
}
+static int show_unlocked(struct seq_file *m, void *arg)
+{
+ struct drm_info_node *node = (struct drm_info_node *) m->private;
+ struct drm_device *dev = node->minor->dev;
+ int (*show)(struct drm_device *dev, struct seq_file *m) =
+ node->info_ent->data;
+
+ return show(dev, m);
+}
+
static struct drm_info_list msm_debugfs_list[] = {
{"gpu", show_locked, 0, msm_gpu_show},
{"gem", show_locked, 0, msm_gem_show},
{ "mm", show_locked, 0, msm_mm_show },
{ "fb", show_locked, 0, msm_fb_show },
+ { "snapshot", show_unlocked, 0, msm_snapshot_show },
};
static int late_init_minor(struct drm_minor *minor)
@@ -939,14 +997,23 @@ int msm_wait_fence(struct drm_device *dev, uint32_t fence,
ktime_t *timeout , bool interruptible)
{
struct msm_drm_private *priv = dev->dev_private;
+ struct msm_gpu *gpu = priv->gpu;
+ int index = FENCE_RING(fence);
+ uint32_t submitted;
int ret;
- if (!priv->gpu)
+ if (!gpu)
return -ENXIO;
- if (fence > priv->gpu->submitted_fence) {
+ if (index > MSM_GPU_MAX_RINGS || index >= gpu->nr_rings ||
+ !gpu->rb[index])
+ return -EINVAL;
+
+ submitted = gpu->funcs->submitted_fence(gpu, gpu->rb[index]);
+
+ if (fence > submitted) {
DRM_ERROR("waiting on invalid fence: %u (of %u)\n",
- fence, priv->gpu->submitted_fence);
+ fence, submitted);
return -EINVAL;
}
@@ -976,7 +1043,7 @@ int msm_wait_fence(struct drm_device *dev, uint32_t fence,
if (ret == 0) {
DBG("timeout waiting for fence: %u (completed: %u)",
- fence, priv->completed_fence);
+ fence, priv->completed_fence[index]);
ret = -ETIMEDOUT;
} else if (ret != -ERESTARTSYS) {
ret = 0;
@@ -990,12 +1057,13 @@ int msm_queue_fence_cb(struct drm_device *dev,
struct msm_fence_cb *cb, uint32_t fence)
{
struct msm_drm_private *priv = dev->dev_private;
+ int index = FENCE_RING(fence);
int ret = 0;
mutex_lock(&dev->struct_mutex);
if (!list_empty(&cb->work.entry)) {
ret = -EINVAL;
- } else if (fence > priv->completed_fence) {
+ } else if (fence > priv->completed_fence[index]) {
cb->fence = fence;
list_add_tail(&cb->work.entry, &priv->fence_cbs);
} else {
@@ -1010,21 +1078,21 @@ int msm_queue_fence_cb(struct drm_device *dev,
void msm_update_fence(struct drm_device *dev, uint32_t fence)
{
struct msm_drm_private *priv = dev->dev_private;
+ struct msm_fence_cb *cb, *tmp;
+ int index = FENCE_RING(fence);
- mutex_lock(&dev->struct_mutex);
- priv->completed_fence = max(fence, priv->completed_fence);
-
- while (!list_empty(&priv->fence_cbs)) {
- struct msm_fence_cb *cb;
-
- cb = list_first_entry(&priv->fence_cbs,
- struct msm_fence_cb, work.entry);
+ if (index >= MSM_GPU_MAX_RINGS)
+ return;
- if (cb->fence > priv->completed_fence)
- break;
+ mutex_lock(&dev->struct_mutex);
+ priv->completed_fence[index] = max(fence, priv->completed_fence[index]);
- list_del_init(&cb->work.entry);
- queue_work(priv->wq, &cb->work);
+ list_for_each_entry_safe(cb, tmp, &priv->fence_cbs, work.entry) {
+ if (COMPARE_FENCE_LTE(cb->fence,
+ priv->completed_fence[index])) {
+ list_del_init(&cb->work.entry);
+ queue_work(priv->wq, &cb->work);
+ }
}
mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 406b40f0f8de..0a06c1cb27a4 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -74,11 +74,7 @@ struct msm_gem_vma;
#define MAX_CONNECTORS 8
struct msm_file_private {
- /* currently we don't do anything useful with this.. but when
- * per-context address spaces are supported we'd keep track of
- * the context's page-tables here.
- */
- int dummy;
+ struct msm_gem_address_space *aspace;
};
enum msm_mdp_plane_property {
@@ -250,6 +246,8 @@ struct msm_drm_commit {
struct kthread_worker worker;
};
+#define MSM_GPU_MAX_RINGS 4
+
struct msm_drm_private {
struct msm_kms *kms;
@@ -276,11 +274,12 @@ struct msm_drm_private {
/* when we have more than one 'msm_gpu' these need to be an array: */
struct msm_gpu *gpu;
- struct msm_file_private *lastctx;
struct drm_fb_helper *fbdev;
- uint32_t next_fence, completed_fence;
+ uint32_t next_fence[MSM_GPU_MAX_RINGS];
+ uint32_t completed_fence[MSM_GPU_MAX_RINGS];
+
wait_queue_head_t fence_event;
struct msm_rd_state *rd;
@@ -351,6 +350,31 @@ struct msm_format {
uint32_t pixel_format;
};
+/*
+ * Some GPU targets can support multiple ringbuffers and preempt between them.
+ * In order to do this without massive API changes we will steal two bits from
+ * the top of the fence and use them to identify the ringbuffer, (0x00000001 for
+ * riug 0, 0x40000001 for ring 1, 0x50000001 for ring 2, etc). If you are going
+ * to do a fence comparision you have to make sure you are only comparing
+ * against fences from the same ring, but since fences within a ringbuffer are
+ * still contigious you can still use straight comparisons (i.e 0x40000001 is
+ * older than 0x40000002). Mathmatically there will be 0x3FFFFFFF timestamps
+ * per ring or ~103 days of 120 interrupts per second (two interrupts per frame
+ * at 60 FPS).
+ */
+#define FENCE_RING(_fence) ((_fence >> 30) & 3)
+#define FENCE(_ring, _fence) ((((_ring) & 3) << 30) | ((_fence) & 0x3FFFFFFF))
+
+static inline bool COMPARE_FENCE_LTE(uint32_t a, uint32_t b)
+{
+ return ((FENCE_RING(a) == FENCE_RING(b)) && a <= b);
+}
+
+static inline bool COMPARE_FENCE_LT(uint32_t a, uint32_t b)
+{
+ return ((FENCE_RING(a) == FENCE_RING(b)) && a < b);
+}
+
/* callback from wq once fence has passed: */
struct msm_fence_cb {
struct work_struct work;
@@ -379,13 +403,17 @@ void msm_gem_unmap_vma(struct msm_gem_address_space *aspace,
void *priv);
int msm_gem_map_vma(struct msm_gem_address_space *aspace,
struct msm_gem_vma *vma, struct sg_table *sgt,
- void *priv);
-void msm_gem_address_space_destroy(struct msm_gem_address_space *aspace);
+ void *priv, unsigned int flags);
+
+void msm_gem_address_space_put(struct msm_gem_address_space *aspace);
/* For GPU and legacy display */
struct msm_gem_address_space *
msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
const char *name);
+struct msm_gem_address_space *
+msm_gem_address_space_create_instance(struct msm_mmu *parent, const char *name,
+ uint64_t start, uint64_t end);
/* For SDE display */
struct msm_gem_address_space *
@@ -529,7 +557,8 @@ u32 msm_readl(const void __iomem *addr);
static inline bool fence_completed(struct drm_device *dev, uint32_t fence)
{
struct msm_drm_private *priv = dev->dev_private;
- return priv->completed_fence >= fence;
+
+ return priv->completed_fence[FENCE_RING(fence)] >= fence;
}
static inline int align_pitch(int width, int bpp)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index f42c7bff18fb..63128d11767e 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -330,6 +330,10 @@ static struct msm_gem_vma *obj_get_domain(struct drm_gem_object *obj,
return NULL;
}
+#ifndef IOMMU_PRIV
+#define IOMMU_PRIV 0
+#endif
+
/* should be called under struct_mutex.. although it can be called
* from atomic context without struct_mutex to acquire an extra
* iova ref if you know one is already held.
@@ -369,7 +373,7 @@ int msm_gem_get_iova_locked(struct drm_gem_object *obj,
}
ret = msm_gem_map_vma(aspace, domain, msm_obj->sgt,
- get_dmabuf_ptr(obj));
+ get_dmabuf_ptr(obj), msm_obj->flags);
}
if (!ret)
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index d9c3977434d9..ac46c473791f 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -18,6 +18,7 @@
#ifndef __MSM_GEM_H__
#define __MSM_GEM_H__
+#include <linux/kref.h>
#include <linux/reservation.h>
#include "msm_drv.h"
@@ -26,7 +27,7 @@
struct msm_gem_aspace_ops {
int (*map)(struct msm_gem_address_space *, struct msm_gem_vma *,
- struct sg_table *sgt, void *priv);
+ struct sg_table *sgt, void *priv, unsigned int flags);
void (*unmap)(struct msm_gem_address_space *, struct msm_gem_vma *,
struct sg_table *sgt, void *priv);
@@ -38,6 +39,7 @@ struct msm_gem_address_space {
const char *name;
struct msm_mmu *mmu;
const struct msm_gem_aspace_ops *ops;
+ struct kref kref;
};
struct msm_gem_vma {
@@ -116,11 +118,12 @@ static inline uint32_t msm_gem_fence(struct msm_gem_object *msm_obj,
*/
struct msm_gem_submit {
struct drm_device *dev;
- struct msm_gpu *gpu;
+ struct msm_gem_address_space *aspace;
struct list_head node; /* node in gpu submit_list */
struct list_head bo_list;
struct ww_acquire_ctx ticket;
uint32_t fence;
+ int ring;
bool valid;
unsigned int nr_cmds;
unsigned int nr_bos;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index f7b5e30b41eb..0566cefaae81 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -34,7 +34,7 @@ static inline void __user *to_user_ptr(u64 address)
}
static struct msm_gem_submit *submit_create(struct drm_device *dev,
- struct msm_gpu *gpu, int nr)
+ struct msm_gem_address_space *aspace, int nr)
{
struct msm_gem_submit *submit;
int sz = sizeof(*submit) + (nr * sizeof(submit->bos[0]));
@@ -42,7 +42,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
submit = kmalloc(sz, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
if (submit) {
submit->dev = dev;
- submit->gpu = gpu;
+ submit->aspace = aspace;
/* initially, until copy_from_user() and bo lookup succeeds: */
submit->nr_bos = 0;
@@ -142,7 +142,7 @@ static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i)
struct msm_gem_object *msm_obj = submit->bos[i].obj;
if (submit->bos[i].flags & BO_PINNED)
- msm_gem_put_iova(&msm_obj->base, submit->gpu->aspace);
+ msm_gem_put_iova(&msm_obj->base, submit->aspace);
if (submit->bos[i].flags & BO_LOCKED)
ww_mutex_unlock(&msm_obj->resv->lock);
@@ -181,7 +181,7 @@ retry:
/* if locking succeeded, pin bo: */
ret = msm_gem_get_iova_locked(&msm_obj->base,
- submit->gpu->aspace, &iova);
+ submit->aspace, &iova);
/* this would break the logic in the fail path.. there is no
* reason for this to happen, but just to be on the safe side
@@ -349,7 +349,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
/* for now, we just have 3d pipe.. eventually this would need to
* be more clever to dispatch to appropriate gpu module:
*/
- if (args->pipe != MSM_PIPE_3D0)
+ if (MSM_PIPE_ID(args->flags) != MSM_PIPE_3D0)
return -EINVAL;
gpu = priv->gpu;
@@ -361,7 +361,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
mutex_lock(&dev->struct_mutex);
- submit = submit_create(dev, gpu, args->nr_bos);
+ submit = submit_create(dev, ctx->aspace, args->nr_bos);
if (!submit) {
ret = -ENOMEM;
goto out;
@@ -412,8 +412,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
goto out;
}
- if ((submit_cmd.size + submit_cmd.submit_offset) >=
- msm_obj->base.size) {
+ if (!(submit_cmd.size) ||
+ ((submit_cmd.size + submit_cmd.submit_offset) >
+ msm_obj->base.size)) {
DRM_ERROR("invalid cmdstream size: %u\n", submit_cmd.size);
ret = -EINVAL;
goto out;
@@ -435,7 +436,12 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
submit->nr_cmds = i;
- ret = msm_gpu_submit(gpu, submit, ctx);
+ /* Clamp the user submitted ring to the range of available rings */
+ submit->ring = clamp_t(uint32_t,
+ (args->flags & MSM_SUBMIT_RING_MASK) >> MSM_SUBMIT_RING_SHIFT,
+ 0, gpu->nr_rings - 1);
+
+ ret = msm_gpu_submit(gpu, submit);
args->fence = submit->fence;
diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c
index 53e70263e03c..7ca96831a9b3 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -19,6 +19,24 @@
#include "msm_gem.h"
#include "msm_mmu.h"
+static void
+msm_gem_address_space_destroy(struct kref *kref)
+{
+ struct msm_gem_address_space *aspace = container_of(kref,
+ struct msm_gem_address_space, kref);
+
+ if (aspace->ops->destroy)
+ aspace->ops->destroy(aspace);
+
+ kfree(aspace);
+}
+
+void msm_gem_address_space_put(struct msm_gem_address_space *aspace)
+{
+ if (aspace)
+ kref_put(&aspace->kref, msm_gem_address_space_destroy);
+}
+
/* SDE address space operations */
static void smmu_aspace_unmap_vma(struct msm_gem_address_space *aspace,
struct msm_gem_vma *vma, struct sg_table *sgt,
@@ -34,12 +52,14 @@ static void smmu_aspace_unmap_vma(struct msm_gem_address_space *aspace,
DMA_BIDIRECTIONAL);
vma->iova = 0;
+
+ msm_gem_address_space_put(aspace);
}
static int smmu_aspace_map_vma(struct msm_gem_address_space *aspace,
struct msm_gem_vma *vma, struct sg_table *sgt,
- void *priv)
+ void *priv, unsigned int flags)
{
struct dma_buf *buf = priv;
int ret;
@@ -54,6 +74,9 @@ static int smmu_aspace_map_vma(struct msm_gem_address_space *aspace,
if (!ret)
vma->iova = sg_dma_address(sgt->sgl);
+ /* Get a reference to the aspace to keep it around */
+ kref_get(&aspace->kref);
+
return ret;
}
@@ -79,6 +102,8 @@ msm_gem_smmu_address_space_create(struct device *dev, struct msm_mmu *mmu,
aspace->mmu = mmu;
aspace->ops = &smmu_aspace_ops;
+ kref_init(&aspace->kref);
+
return aspace;
}
@@ -104,16 +129,25 @@ static void iommu_aspace_unmap_vma(struct msm_gem_address_space *aspace,
drm_mm_remove_node(&vma->node);
vma->iova = 0;
+
+ msm_gem_address_space_put(aspace);
}
static int iommu_aspace_map_vma(struct msm_gem_address_space *aspace,
- struct msm_gem_vma *vma, struct sg_table *sgt,
- void *priv)
+ struct msm_gem_vma *vma, struct sg_table *sgt, void *priv,
+ unsigned int flags)
{
struct msm_iommu_aspace *local = to_iommu_aspace(aspace);
size_t size = 0;
struct scatterlist *sg;
- int ret = 0, i;
+ int ret, i;
+ int iommu_flags = IOMMU_READ;
+
+ if (!(flags & MSM_BO_GPU_READONLY))
+ iommu_flags |= IOMMU_WRITE;
+
+ if (flags & MSM_BO_PRIVILEGED)
+ iommu_flags |= IOMMU_PRIV;
if (WARN_ON(drm_mm_node_allocated(&vma->node)))
return 0;
@@ -129,8 +163,11 @@ static int iommu_aspace_map_vma(struct msm_gem_address_space *aspace,
vma->iova = vma->node.start << PAGE_SHIFT;
if (aspace->mmu)
- ret = aspace->mmu->funcs->map(aspace->mmu, vma->iova,
- sgt, IOMMU_READ | IOMMU_WRITE);
+ ret = aspace->mmu->funcs->map(aspace->mmu, vma->iova, sgt,
+ iommu_flags);
+
+ /* Get a reference to the aspace to keep it around */
+ kref_get(&aspace->kref);
return ret;
}
@@ -169,15 +206,17 @@ msm_gem_address_space_new(struct msm_mmu *mmu, const char *name,
local->base.mmu = mmu;
local->base.ops = &msm_iommu_aspace_ops;
+ kref_init(&local->base.kref);
+
return &local->base;
}
int msm_gem_map_vma(struct msm_gem_address_space *aspace,
struct msm_gem_vma *vma, struct sg_table *sgt,
- void *priv)
+ void *priv, unsigned int flags)
{
if (aspace && aspace->ops->map)
- return aspace->ops->map(aspace, vma, sgt, priv);
+ return aspace->ops->map(aspace, vma, sgt, priv, flags);
return -EINVAL;
}
@@ -203,11 +242,15 @@ msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
domain->geometry.aperture_end);
}
-void
-msm_gem_address_space_destroy(struct msm_gem_address_space *aspace)
+/* Create a new dynamic instance */
+struct msm_gem_address_space *
+msm_gem_address_space_create_instance(struct msm_mmu *parent, const char *name,
+ uint64_t start, uint64_t end)
{
- if (aspace && aspace->ops->destroy)
- aspace->ops->destroy(aspace);
+ struct msm_mmu *child = msm_iommu_new_dynamic(parent);
- kfree(aspace);
+ if (IS_ERR(child))
+ return (struct msm_gem_address_space *) child;
+
+ return msm_gem_address_space_new(child, name, start, end);
}
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 08ecc089611f..3fb480f41fde 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -93,17 +93,17 @@ static int enable_clk(struct msm_gpu *gpu)
uint32_t rate = gpu->gpufreq[gpu->active_level];
int i;
- clk_set_rate(gpu->grp_clks[0], rate);
+ if (gpu->core_clk)
+ clk_set_rate(gpu->core_clk, rate);
- if (gpu->grp_clks[3])
- clk_set_rate(gpu->grp_clks[3], 19200000);
+ if (gpu->rbbmtimer_clk)
+ clk_set_rate(gpu->rbbmtimer_clk, 19200000);
- /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */
- for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--)
+ for (i = gpu->nr_clocks - 1; i >= 0; i--)
if (gpu->grp_clks[i])
clk_prepare(gpu->grp_clks[i]);
- for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--)
+ for (i = gpu->nr_clocks - 1; i >= 0; i--)
if (gpu->grp_clks[i])
clk_enable(gpu->grp_clks[i]);
@@ -115,16 +115,20 @@ static int disable_clk(struct msm_gpu *gpu)
uint32_t rate = gpu->gpufreq[gpu->nr_pwrlevels - 1];
int i;
- /* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */
- for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--)
+ for (i = gpu->nr_clocks - 1; i >= 0; i--)
if (gpu->grp_clks[i])
clk_disable(gpu->grp_clks[i]);
- for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--)
+ for (i = gpu->nr_clocks - 1; i >= 0; i--)
if (gpu->grp_clks[i])
clk_unprepare(gpu->grp_clks[i]);
- clk_set_rate(gpu->grp_clks[0], rate);
+ if (gpu->core_clk)
+ clk_set_rate(gpu->core_clk, rate);
+
+ if (gpu->rbbmtimer_clk)
+ clk_set_rate(gpu->rbbmtimer_clk, 0);
+
return 0;
}
@@ -273,17 +277,35 @@ static void recover_worker(struct work_struct *work)
mutex_lock(&dev->struct_mutex);
if (msm_gpu_active(gpu)) {
struct msm_gem_submit *submit;
- uint32_t fence = gpu->funcs->last_fence(gpu);
-
- /* retire completed submits, plus the one that hung: */
- retire_submits(gpu, fence + 1);
+ struct msm_ringbuffer *ring;
+ int i;
inactive_cancel(gpu);
+
+ FOR_EACH_RING(gpu, ring, i) {
+ uint32_t fence;
+
+ if (!ring)
+ continue;
+
+ fence = gpu->funcs->last_fence(gpu, ring);
+
+ /*
+ * Retire the faulting command on the active ring and
+ * make sure the other rings are cleaned up
+ */
+ if (ring == gpu->funcs->active_ring(gpu))
+ retire_submits(gpu, fence + 1);
+ else
+ retire_submits(gpu, fence);
+ }
+
+ /* Recover the GPU */
gpu->funcs->recover(gpu);
- /* replay the remaining submits after the one that hung: */
+ /* replay the remaining submits for all rings: */
list_for_each_entry(submit, &gpu->submit_list, node) {
- gpu->funcs->submit(gpu, submit, NULL);
+ gpu->funcs->submit(gpu, submit);
}
}
mutex_unlock(&dev->struct_mutex);
@@ -303,25 +325,28 @@ static void hangcheck_handler(unsigned long data)
struct msm_gpu *gpu = (struct msm_gpu *)data;
struct drm_device *dev = gpu->dev;
struct msm_drm_private *priv = dev->dev_private;
- uint32_t fence = gpu->funcs->last_fence(gpu);
+ struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
+ uint32_t fence = gpu->funcs->last_fence(gpu, ring);
+ uint32_t submitted = gpu->funcs->submitted_fence(gpu, ring);
- if (fence != gpu->hangcheck_fence) {
+ if (fence != gpu->hangcheck_fence[ring->id]) {
/* some progress has been made.. ya! */
- gpu->hangcheck_fence = fence;
- } else if (fence < gpu->submitted_fence) {
+ gpu->hangcheck_fence[ring->id] = fence;
+ } else if (fence < submitted) {
/* no progress and not done.. hung! */
- gpu->hangcheck_fence = fence;
- dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n",
- gpu->name);
+ gpu->hangcheck_fence[ring->id] = fence;
+ dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n",
+ gpu->name, ring->id);
dev_err(dev->dev, "%s: completed fence: %u\n",
gpu->name, fence);
dev_err(dev->dev, "%s: submitted fence: %u\n",
- gpu->name, gpu->submitted_fence);
+ gpu->name, submitted);
+
queue_work(priv->wq, &gpu->recover_work);
}
/* if still more pending work, reset the hangcheck timer: */
- if (gpu->submitted_fence > gpu->hangcheck_fence)
+ if (submitted > gpu->hangcheck_fence[ring->id])
hangcheck_timer_reset(gpu);
/* workaround for missing irq: */
@@ -430,54 +455,66 @@ out:
static void retire_submits(struct msm_gpu *gpu, uint32_t fence)
{
struct drm_device *dev = gpu->dev;
+ struct msm_gem_submit *submit, *tmp;
WARN_ON(!mutex_is_locked(&dev->struct_mutex));
- while (!list_empty(&gpu->submit_list)) {
- struct msm_gem_submit *submit;
-
- submit = list_first_entry(&gpu->submit_list,
- struct msm_gem_submit, node);
+ /*
+ * Find and retire all the submits in the same ring that are older than
+ * or equal to 'fence'
+ */
- if (submit->fence <= fence) {
+ list_for_each_entry_safe(submit, tmp, &gpu->submit_list, node) {
+ if (COMPARE_FENCE_LTE(submit->fence, fence)) {
list_del(&submit->node);
kfree(submit);
- } else {
- break;
}
}
}
-static void retire_worker(struct work_struct *work)
+static bool _fence_signaled(struct msm_gem_object *obj, uint32_t fence)
{
- struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work);
- struct drm_device *dev = gpu->dev;
- uint32_t fence = gpu->funcs->last_fence(gpu);
+ if (obj->write_fence & 0x3FFFFFFF)
+ return COMPARE_FENCE_LTE(obj->write_fence, fence);
- msm_update_fence(gpu->dev, fence);
+ return COMPARE_FENCE_LTE(obj->read_fence, fence);
+}
- mutex_lock(&dev->struct_mutex);
+static void _retire_ring(struct msm_gpu *gpu, uint32_t fence)
+{
+ struct msm_gem_object *obj, *tmp;
retire_submits(gpu, fence);
- while (!list_empty(&gpu->active_list)) {
- struct msm_gem_object *obj;
-
- obj = list_first_entry(&gpu->active_list,
- struct msm_gem_object, mm_list);
-
- if ((obj->read_fence <= fence) &&
- (obj->write_fence <= fence)) {
- /* move to inactive: */
+ list_for_each_entry_safe(obj, tmp, &gpu->active_list, mm_list) {
+ if (_fence_signaled(obj, fence)) {
msm_gem_move_to_inactive(&obj->base);
msm_gem_put_iova(&obj->base, gpu->aspace);
drm_gem_object_unreference(&obj->base);
- } else {
- break;
}
}
+}
- mutex_unlock(&dev->struct_mutex);
+static void retire_worker(struct work_struct *work)
+{
+ struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work);
+ struct drm_device *dev = gpu->dev;
+ struct msm_ringbuffer *ring;
+ int i;
+
+ FOR_EACH_RING(gpu, ring, i) {
+ uint32_t fence;
+
+ if (!ring)
+ continue;
+
+ fence = gpu->funcs->last_fence(gpu, ring);
+ msm_update_fence(gpu->dev, fence);
+
+ mutex_lock(&dev->struct_mutex);
+ _retire_ring(gpu, fence);
+ mutex_unlock(&dev->struct_mutex);
+ }
if (!msm_gpu_active(gpu))
inactive_start(gpu);
@@ -492,18 +529,16 @@ void msm_gpu_retire(struct msm_gpu *gpu)
}
/* add bo's to gpu's ring, and kick gpu: */
-int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
- struct msm_file_private *ctx)
+int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
{
struct drm_device *dev = gpu->dev;
struct msm_drm_private *priv = dev->dev_private;
+ struct msm_ringbuffer *ring = gpu->rb[submit->ring];
int i, ret;
WARN_ON(!mutex_is_locked(&dev->struct_mutex));
- submit->fence = ++priv->next_fence;
-
- gpu->submitted_fence = submit->fence;
+ submit->fence = FENCE(submit->ring, ++priv->next_fence[submit->ring]);
inactive_cancel(gpu);
@@ -511,7 +546,7 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
msm_rd_dump_submit(submit);
- gpu->submitted_fence = submit->fence;
+ ring->submitted_fence = submit->fence;
update_sw_cntrs(gpu);
@@ -529,7 +564,7 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
/* ring takes a reference to the bo and iova: */
drm_gem_object_reference(&msm_obj->base);
msm_gem_get_iova_locked(&msm_obj->base,
- submit->gpu->aspace, &iova);
+ submit->aspace, &iova);
}
if (submit->bos[i].flags & MSM_SUBMIT_BO_READ)
@@ -539,8 +574,7 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence);
}
- ret = gpu->funcs->submit(gpu, submit, ctx);
- priv->lastctx = ctx;
+ ret = gpu->funcs->submit(gpu, submit);
hangcheck_timer_reset(gpu);
@@ -557,17 +591,54 @@ static irqreturn_t irq_handler(int irq, void *data)
return gpu->funcs->irq(gpu);
}
-static const char *clk_names[] = {
- "src_clk", "core_clk", "iface_clk", "rbbmtimer_clk",
- "mem_clk", "mem_iface_clk", "alt_mem_iface_clk",
-};
+static struct clk *get_clock(struct device *dev, const char *name)
+{
+ struct clk *clk = devm_clk_get(dev, name);
+
+ DBG("clks[%s]: %p", name, clk);
+
+ return IS_ERR(clk) ? NULL : clk;
+}
+
+static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu)
+{
+ struct device *dev = &pdev->dev;
+ struct property *prop;
+ const char *name;
+ int i = 0;
+
+ gpu->nr_clocks = of_property_count_strings(dev->of_node, "clock-names");
+ if (gpu->nr_clocks < 1) {
+ gpu->nr_clocks = 0;
+ return 0;
+ }
+
+ gpu->grp_clks = devm_kcalloc(dev, sizeof(struct clk *), gpu->nr_clocks,
+ GFP_KERNEL);
+ if (!gpu->grp_clks)
+ return -ENOMEM;
+
+ of_property_for_each_string(dev->of_node, "clock-names", prop, name) {
+ gpu->grp_clks[i] = get_clock(dev, name);
+
+ /* Remember the key clocks that we need to control later */
+ if (!strcmp(name, "core_clk"))
+ gpu->core_clk = gpu->grp_clks[i];
+ else if (!strcmp(name, "rbbmtimer_clk"))
+ gpu->rbbmtimer_clk = gpu->grp_clks[i];
+
+ ++i;
+ }
+
+ return 0;
+}
int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
- const char *name, const char *ioname, const char *irqname, int ringsz)
+ const char *name, struct msm_gpu_config *config)
{
struct iommu_domain *iommu;
- int i, ret;
+ int i, ret, nr_rings;
if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs)))
gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs);
@@ -591,17 +662,16 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
spin_lock_init(&gpu->perf_lock);
- BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks));
/* Map registers: */
- gpu->mmio = msm_ioremap(pdev, ioname, name);
+ gpu->mmio = msm_ioremap(pdev, config->ioname, name);
if (IS_ERR(gpu->mmio)) {
ret = PTR_ERR(gpu->mmio);
goto fail;
}
/* Get Interrupt: */
- gpu->irq = platform_get_irq_byname(pdev, irqname);
+ gpu->irq = platform_get_irq_byname(pdev, config->irqname);
if (gpu->irq < 0) {
ret = gpu->irq;
dev_err(drm->dev, "failed to get irq: %d\n", ret);
@@ -615,15 +685,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
goto fail;
}
- /* Acquire clocks: */
- for (i = 0; i < ARRAY_SIZE(clk_names); i++) {
- gpu->grp_clks[i] = devm_clk_get(&pdev->dev, clk_names[i]);
- DBG("grp_clks[%s]: %p", clk_names[i], gpu->grp_clks[i]);
- if (IS_ERR(gpu->grp_clks[i]))
- gpu->grp_clks[i] = NULL;
- }
-
- gpu->grp_clks[0] = gpu->grp_clks[1];
+ ret = get_clocks(pdev, gpu);
+ if (ret)
+ goto fail;
gpu->ebi1_clk = devm_clk_get(&pdev->dev, "bus_clk");
DBG("ebi1_clk: %p", gpu->ebi1_clk);
@@ -648,8 +712,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
iommu = iommu_domain_alloc(&platform_bus_type);
if (iommu) {
/* TODO 32b vs 64b address space.. */
- iommu->geometry.aperture_start = 0x1000;
- iommu->geometry.aperture_end = 0xffffffff;
+ iommu->geometry.aperture_start = config->va_start;
+ iommu->geometry.aperture_end = config->va_end;
dev_info(drm->dev, "%s: using IOMMU\n", name);
gpu->aspace = msm_gem_address_space_create(&pdev->dev,
@@ -666,17 +730,30 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
dev_info(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name);
}
- /* Create ringbuffer: */
- mutex_lock(&drm->struct_mutex);
- gpu->rb = msm_ringbuffer_new(gpu, ringsz);
- mutex_unlock(&drm->struct_mutex);
- if (IS_ERR(gpu->rb)) {
- ret = PTR_ERR(gpu->rb);
- gpu->rb = NULL;
- dev_err(drm->dev, "could not create ringbuffer: %d\n", ret);
- goto fail;
+ nr_rings = config->nr_rings;
+
+ if (nr_rings > ARRAY_SIZE(gpu->rb)) {
+ WARN(1, "Only creating %lu ringbuffers\n", ARRAY_SIZE(gpu->rb));
+ nr_rings = ARRAY_SIZE(gpu->rb);
}
+ /* Create ringbuffer(s): */
+ for (i = 0; i < nr_rings; i++) {
+ mutex_lock(&drm->struct_mutex);
+ gpu->rb[i] = msm_ringbuffer_new(gpu, i);
+ mutex_unlock(&drm->struct_mutex);
+
+ if (IS_ERR(gpu->rb[i])) {
+ ret = PTR_ERR(gpu->rb[i]);
+ gpu->rb[i] = NULL;
+ dev_err(drm->dev,
+ "could not create ringbuffer %d: %d\n", i, ret);
+ goto fail;
+ }
+ }
+
+ gpu->nr_rings = nr_rings;
+
#ifdef CONFIG_SMP
gpu->pm_qos_req_dma.type = PM_QOS_REQ_AFFINE_IRQ;
gpu->pm_qos_req_dma.irq = gpu->irq;
@@ -687,23 +764,40 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
bs_init(gpu);
+ gpu->snapshot = msm_snapshot_new(gpu);
+ if (IS_ERR(gpu->snapshot))
+ gpu->snapshot = NULL;
+
return 0;
fail:
+ for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
+ if (gpu->rb[i])
+ msm_ringbuffer_destroy(gpu->rb[i]);
+ }
+
return ret;
}
void msm_gpu_cleanup(struct msm_gpu *gpu)
{
+ int i;
+
DBG("%s", gpu->name);
WARN_ON(!list_empty(&gpu->active_list));
bs_fini(gpu);
- if (gpu->rb) {
- if (gpu->rb_iova)
- msm_gem_put_iova(gpu->rb->bo, gpu->aspace);
- msm_ringbuffer_destroy(gpu->rb);
+ for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
+ if (!gpu->rb[i])
+ continue;
+
+ if (gpu->rb[i]->iova)
+ msm_gem_put_iova(gpu->rb[i]->bo, gpu->aspace);
+
+ msm_ringbuffer_destroy(gpu->rb[i]);
}
+
+ msm_snapshot_destroy(gpu, gpu->snapshot);
}
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index c6f1d3bd36e9..06dfaabbfcfe 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -24,10 +24,19 @@
#include "msm_drv.h"
#include "msm_ringbuffer.h"
+#include "msm_snapshot.h"
struct msm_gem_submit;
struct msm_gpu_perfcntr;
+struct msm_gpu_config {
+ const char *ioname;
+ const char *irqname;
+ int nr_rings;
+ uint64_t va_start;
+ uint64_t va_end;
+};
+
/* So far, with hardware that I've seen to date, we can have:
* + zero, one, or two z180 2d cores
* + a3xx or a2xx 3d core, which share a common CP (the firmware
@@ -47,18 +56,21 @@ struct msm_gpu_funcs {
int (*hw_init)(struct msm_gpu *gpu);
int (*pm_suspend)(struct msm_gpu *gpu);
int (*pm_resume)(struct msm_gpu *gpu);
- int (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit,
- struct msm_file_private *ctx);
- void (*flush)(struct msm_gpu *gpu);
- bool (*idle)(struct msm_gpu *gpu);
+ int (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit);
+ void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
irqreturn_t (*irq)(struct msm_gpu *irq);
- uint32_t (*last_fence)(struct msm_gpu *gpu);
+ uint32_t (*last_fence)(struct msm_gpu *gpu,
+ struct msm_ringbuffer *ring);
+ uint32_t (*submitted_fence)(struct msm_gpu *gpu,
+ struct msm_ringbuffer *ring);
+ struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu);
void (*recover)(struct msm_gpu *gpu);
void (*destroy)(struct msm_gpu *gpu);
#ifdef CONFIG_DEBUG_FS
/* show GPU status in debugfs: */
void (*show)(struct msm_gpu *gpu, struct seq_file *m);
#endif
+ int (*snapshot)(struct msm_gpu *gpu, struct msm_snapshot *snapshot);
};
struct msm_gpu {
@@ -78,14 +90,12 @@ struct msm_gpu {
const struct msm_gpu_perfcntr *perfcntrs;
uint32_t num_perfcntrs;
- struct msm_ringbuffer *rb;
- uint64_t rb_iova;
+ struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS];
+ int nr_rings;
/* list of GEM active objects: */
struct list_head active_list;
- uint32_t submitted_fence;
-
/* is gpu powered/active? */
int active_cnt;
bool inactive;
@@ -100,7 +110,9 @@ struct msm_gpu {
/* Power Control: */
struct regulator *gpu_reg, *gpu_cx;
- struct clk *ebi1_clk, *grp_clks[7];
+ struct clk **grp_clks;
+ struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk;
+ int nr_clocks;
uint32_t gpufreq[10];
uint32_t busfreq[10];
@@ -123,15 +135,44 @@ struct msm_gpu {
#define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */
#define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD)
struct timer_list hangcheck_timer;
- uint32_t hangcheck_fence;
+ uint32_t hangcheck_fence[MSM_GPU_MAX_RINGS];
struct work_struct recover_work;
struct list_head submit_list;
+
+ struct msm_snapshot *snapshot;
};
+/* It turns out that all targets use the same ringbuffer size. */
+#define MSM_GPU_RINGBUFFER_SZ SZ_32K
+#define MSM_GPU_RINGBUFFER_BLKSIZE 32
+
+#define MSM_GPU_RB_CNTL_DEFAULT \
+ (AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | \
+ AXXX_CP_RB_CNTL_BLKSZ(ilog2(MSM_GPU_RINGBUFFER_BLKSIZE / 8)))
+
+static inline struct msm_ringbuffer *__get_ring(struct msm_gpu *gpu, int index)
+{
+ return (index < ARRAY_SIZE(gpu->rb) ? gpu->rb[index] : NULL);
+}
+
+#define FOR_EACH_RING(gpu, ring, index) \
+ for (index = 0, ring = (gpu)->rb[0]; \
+ index < (gpu)->nr_rings && index < ARRAY_SIZE((gpu)->rb); \
+ index++, ring = __get_ring(gpu, index))
+
static inline bool msm_gpu_active(struct msm_gpu *gpu)
{
- return gpu->submitted_fence > gpu->funcs->last_fence(gpu);
+ struct msm_ringbuffer *ring;
+ int i;
+
+ FOR_EACH_RING(gpu, ring, i) {
+ if (gpu->funcs->submitted_fence(gpu, ring) >
+ gpu->funcs->last_fence(gpu, ring))
+ return true;
+ }
+
+ return false;
}
/* Perf-Counters:
@@ -205,12 +246,12 @@ int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs);
void msm_gpu_retire(struct msm_gpu *gpu);
-int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
- struct msm_file_private *ctx);
+int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit);
int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
- const char *name, const char *ioname, const char *irqname, int ringsz);
+ const char *name, struct msm_gpu_config *config);
+
void msm_gpu_cleanup(struct msm_gpu *gpu);
struct msm_gpu *adreno_load_gpu(struct drm_device *dev);
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index b4a5015ff922..3c16222b8890 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -17,13 +17,7 @@
#include <linux/of_platform.h>
#include "msm_drv.h"
-#include "msm_mmu.h"
-
-struct msm_iommu {
- struct msm_mmu base;
- struct iommu_domain *domain;
-};
-#define to_msm_iommu(x) container_of(x, struct msm_iommu, base)
+#include "msm_iommu.h"
static int msm_fault_handler(struct iommu_domain *iommu, struct device *dev,
unsigned long iova, int flags, void *arg)
@@ -32,9 +26,50 @@ static int msm_fault_handler(struct iommu_domain *iommu, struct device *dev,
return 0;
}
-static int msm_iommu_attach(struct msm_mmu *mmu, const char **names, int cnt)
+/*
+ * Get and enable the IOMMU clocks so that we can make
+ * sure they stay on the entire duration so that we can
+ * safely change the pagetable from the GPU
+ */
+static void _get_iommu_clocks(struct msm_mmu *mmu, struct platform_device *pdev)
{
struct msm_iommu *iommu = to_msm_iommu(mmu);
+ struct device *dev;
+ struct property *prop;
+ const char *name;
+ int i = 0;
+
+ if (WARN_ON(!pdev))
+ return;
+
+ dev = &pdev->dev;
+
+ iommu->nr_clocks =
+ of_property_count_strings(dev->of_node, "clock-names");
+
+ if (iommu->nr_clocks < 0) {
+ iommu->nr_clocks = 0;
+ return;
+ }
+
+ if (WARN_ON(iommu->nr_clocks > ARRAY_SIZE(iommu->clocks)))
+ iommu->nr_clocks = ARRAY_SIZE(iommu->clocks);
+
+ of_property_for_each_string(dev->of_node, "clock-names", prop, name) {
+ if (i == iommu->nr_clocks)
+ break;
+
+ iommu->clocks[i] = clk_get(dev, name);
+ if (iommu->clocks[i])
+ clk_prepare_enable(iommu->clocks[i]);
+
+ i++;
+ }
+}
+
+static int _attach_iommu_device(struct msm_mmu *mmu,
+ struct iommu_domain *domain, const char **names, int cnt)
+{
int i;
/* See if there is a iommus member in the current device. If not, look
@@ -42,7 +77,7 @@ static int msm_iommu_attach(struct msm_mmu *mmu, const char **names, int cnt)
*/
if (of_find_property(mmu->dev->of_node, "iommus", NULL))
- return iommu_attach_device(iommu->domain, mmu->dev);
+ return iommu_attach_device(domain, mmu->dev);
/* Look through the list of names for a target */
for (i = 0; i < cnt; i++) {
@@ -64,8 +99,12 @@ static int msm_iommu_attach(struct msm_mmu *mmu, const char **names, int cnt)
if (!pdev)
continue;
+ _get_iommu_clocks(mmu,
+ of_find_device_by_node(node->parent));
+
mmu->dev = &pdev->dev;
- return iommu_attach_device(iommu->domain, mmu->dev);
+
+ return iommu_attach_device(domain, mmu->dev);
}
}
@@ -73,7 +112,80 @@ static int msm_iommu_attach(struct msm_mmu *mmu, const char **names, int cnt)
return -ENODEV;
}
-static void msm_iommu_detach(struct msm_mmu *mmu, const char **names, int cnt)
+static int msm_iommu_attach(struct msm_mmu *mmu, const char **names, int cnt)
+{
+ struct msm_iommu *iommu = to_msm_iommu(mmu);
+ int val = 1, ret;
+
+ /* Hope springs eternal */
+ iommu->allow_dynamic = true;
+
+ /* per-instance pagetables need TTBR1 support in the IOMMU driver */
+ ret = iommu_domain_set_attr(iommu->domain,
+ DOMAIN_ATTR_ENABLE_TTBR1, &val);
+ if (ret)
+ iommu->allow_dynamic = false;
+
+ /* Attach the device to the domain */
+ ret = _attach_iommu_device(mmu, iommu->domain, names, cnt);
+ if (ret)
+ return ret;
+
+ /*
+ * Get the context bank for the base domain; this will be shared with
+ * the children.
+ */
+ iommu->cb = -1;
+ if (iommu_domain_get_attr(iommu->domain, DOMAIN_ATTR_CONTEXT_BANK,
+ &iommu->cb))
+ iommu->allow_dynamic = false;
+
+ return 0;
+}
+
+static int msm_iommu_attach_dynamic(struct msm_mmu *mmu, const char **names,
+ int cnt)
+{
+ static unsigned int procid;
+ struct msm_iommu *iommu = to_msm_iommu(mmu);
+ int ret;
+ unsigned int id;
+
+ /* Assign a unique procid for the domain to cut down on TLB churn */
+ id = ++procid;
+
+ iommu_domain_set_attr(iommu->domain, DOMAIN_ATTR_PROCID, &id);
+
+ ret = iommu_attach_device(iommu->domain, mmu->dev);
+ if (ret)
+ return ret;
+
+ /*
+ * Get the TTBR0 and the CONTEXTIDR - these will be used by the GPU to
+ * switch the pagetable on its own.
+ */
+ iommu_domain_get_attr(iommu->domain, DOMAIN_ATTR_TTBR0,
+ &iommu->ttbr0);
+ iommu_domain_get_attr(iommu->domain, DOMAIN_ATTR_CONTEXTIDR,
+ &iommu->contextidr);
+
+ return 0;
+}
+
+static void msm_iommu_detach(struct msm_mmu *mmu)
+{
+ struct msm_iommu *iommu = to_msm_iommu(mmu);
+ int i;
+
+ iommu_detach_device(iommu->domain, mmu->dev);
+
+ for (i = 0; i < iommu->nr_clocks; i++) {
+ if (iommu->clocks[i])
+ clk_disable(iommu->clocks[i]);
+ }
+}
+
+static void msm_iommu_detach_dynamic(struct msm_mmu *mmu)
{
struct msm_iommu *iommu = to_msm_iommu(mmu);
iommu_detach_device(iommu->domain, mmu->dev);
@@ -160,7 +272,16 @@ static const struct msm_mmu_funcs funcs = {
.destroy = msm_iommu_destroy,
};
-struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain)
+static const struct msm_mmu_funcs dynamic_funcs = {
+ .attach = msm_iommu_attach_dynamic,
+ .detach = msm_iommu_detach_dynamic,
+ .map = msm_iommu_map,
+ .unmap = msm_iommu_unmap,
+ .destroy = msm_iommu_destroy,
+};
+
+struct msm_mmu *_msm_iommu_new(struct device *dev, struct iommu_domain *domain,
+ const struct msm_mmu_funcs *funcs)
{
struct msm_iommu *iommu;
@@ -169,8 +290,54 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain)
return ERR_PTR(-ENOMEM);
iommu->domain = domain;
- msm_mmu_init(&iommu->base, dev, &funcs);
+ msm_mmu_init(&iommu->base, dev, funcs);
iommu_set_fault_handler(domain, msm_fault_handler, dev);
return &iommu->base;
}
+struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain)
+{
+ return _msm_iommu_new(dev, domain, &funcs);
+}
+
+/*
+ * Given a base domain that is attached to a IOMMU device try to create a
+ * dynamic domain that is also attached to the same device but allocates a new
+ * pagetable. This is used to allow multiple pagetables to be attached to the
+ * same device.
+ */
+struct msm_mmu *msm_iommu_new_dynamic(struct msm_mmu *base)
+{
+ struct msm_iommu *base_iommu = to_msm_iommu(base);
+ struct iommu_domain *domain;
+ struct msm_mmu *mmu;
+ int ret, val = 1;
+
+ /* Don't continue if the base domain didn't have the support we need */
+ if (!base || base_iommu->allow_dynamic == false)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ domain = iommu_domain_alloc(&platform_bus_type);
+ if (!domain)
+ return ERR_PTR(-ENODEV);
+
+ mmu = _msm_iommu_new(base->dev, domain, &dynamic_funcs);
+
+ if (IS_ERR(mmu)) {
+ if (domain)
+ iommu_domain_free(domain);
+ return mmu;
+ }
+
+ ret = iommu_domain_set_attr(domain, DOMAIN_ATTR_DYNAMIC, &val);
+ if (ret) {
+ msm_iommu_destroy(mmu);
+ return ERR_PTR(ret);
+ }
+
+ /* Set the context bank to match the base domain */
+ iommu_domain_set_attr(domain, DOMAIN_ATTR_CONTEXT_BANK,
+ &base_iommu->cb);
+
+ return mmu;
+}
diff --git a/drivers/gpu/drm/msm/msm_iommu.h b/drivers/gpu/drm/msm/msm_iommu.h
new file mode 100644
index 000000000000..d005cfb9758f
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_iommu.h
@@ -0,0 +1,37 @@
+/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _MSM_IOMMU_H_
+#define _MSM_IOMMU_H_
+
+#include "msm_mmu.h"
+
+struct msm_iommu {
+ struct msm_mmu base;
+ struct iommu_domain *domain;
+ int cb;
+ phys_addr_t ttbr0;
+ uint32_t contextidr;
+ bool allow_dynamic;
+
+ struct clk *clocks[5];
+ int nr_clocks;
+};
+#define to_msm_iommu(x) container_of(x, struct msm_iommu, base)
+
+static inline bool msm_iommu_allow_dynamic(struct msm_mmu *mmu)
+{
+ struct msm_iommu *iommu = to_msm_iommu(mmu);
+
+ return iommu->allow_dynamic;
+}
+#endif
diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h
index 8464b9e04b63..501f12bef00d 100644
--- a/drivers/gpu/drm/msm/msm_mmu.h
+++ b/drivers/gpu/drm/msm/msm_mmu.h
@@ -32,7 +32,7 @@ enum msm_mmu_domain_type {
struct msm_mmu_funcs {
int (*attach)(struct msm_mmu *mmu, const char **names, int cnt);
- void (*detach)(struct msm_mmu *mmu, const char **names, int cnt);
+ void (*detach)(struct msm_mmu *mmu);
int (*map)(struct msm_mmu *mmu, uint64_t iova, struct sg_table *sgt,
int prot);
int (*unmap)(struct msm_mmu *mmu, uint64_t iova, struct sg_table *sgt);
@@ -62,5 +62,6 @@ static inline void msm_mmu_init(struct msm_mmu *mmu, struct device *dev,
struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain);
struct msm_mmu *msm_smmu_new(struct device *dev,
enum msm_mmu_domain_type domain);
+struct msm_mmu *msm_iommu_new_dynamic(struct msm_mmu *orig);
#endif /* __MSM_MMU_H__ */
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 1f14b908b221..14a16c4578d9 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -18,12 +18,13 @@
#include "msm_ringbuffer.h"
#include "msm_gpu.h"
-struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size)
+struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id)
{
struct msm_ringbuffer *ring;
int ret;
- size = ALIGN(size, 4); /* size should be dword aligned */
+ /* We assume everwhere that MSM_GPU_RINGBUFFER_SZ is a power of 2 */
+ BUILD_BUG_ON(!is_power_of_2(MSM_GPU_RINGBUFFER_SZ));
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
if (!ring) {
@@ -32,7 +33,8 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size)
}
ring->gpu = gpu;
- ring->bo = msm_gem_new(gpu->dev, size, MSM_BO_WC);
+ ring->id = id;
+ ring->bo = msm_gem_new(gpu->dev, MSM_GPU_RINGBUFFER_SZ, MSM_BO_WC);
if (IS_ERR(ring->bo)) {
ret = PTR_ERR(ring->bo);
ring->bo = NULL;
@@ -40,10 +42,11 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size)
}
ring->start = msm_gem_vaddr_locked(ring->bo);
- ring->end = ring->start + (size / 4);
+ ring->end = ring->start + (MSM_GPU_RINGBUFFER_SZ >> 2);
+ ring->next = ring->start;
ring->cur = ring->start;
- ring->size = size;
+ spin_lock_init(&ring->lock);
return ring;
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h
index 6e0e1049fa4f..1e84905073bf 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.h
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
@@ -22,12 +22,15 @@
struct msm_ringbuffer {
struct msm_gpu *gpu;
- int size;
+ int id;
struct drm_gem_object *bo;
- uint32_t *start, *end, *cur;
+ uint32_t *start, *end, *cur, *next;
+ uint64_t iova;
+ uint32_t submitted_fence;
+ spinlock_t lock;
};
-struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size);
+struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id);
void msm_ringbuffer_destroy(struct msm_ringbuffer *ring);
/* ringbuffer helpers (the parts that are same for a3xx/a2xx/z180..) */
@@ -35,9 +38,13 @@ void msm_ringbuffer_destroy(struct msm_ringbuffer *ring);
static inline void
OUT_RING(struct msm_ringbuffer *ring, uint32_t data)
{
- if (ring->cur == ring->end)
- ring->cur = ring->start;
- *(ring->cur++) = data;
+ /*
+ * ring->next points to the current command being written - it won't be
+ * committed as ring->cur until the flush
+ */
+ if (ring->next == ring->end)
+ ring->next = ring->start;
+ *(ring->next++) = data;
}
#endif /* __MSM_RINGBUFFER_H__ */
diff --git a/drivers/gpu/drm/msm/msm_smmu.c b/drivers/gpu/drm/msm/msm_smmu.c
index 500e8a5e6247..c99f51e09700 100644
--- a/drivers/gpu/drm/msm/msm_smmu.c
+++ b/drivers/gpu/drm/msm/msm_smmu.c
@@ -86,7 +86,7 @@ static int msm_smmu_attach(struct msm_mmu *mmu, const char **names, int cnt)
return 0;
}
-static void msm_smmu_detach(struct msm_mmu *mmu, const char **names, int cnt)
+static void msm_smmu_detach(struct msm_mmu *mmu)
{
struct msm_smmu *smmu = to_msm_smmu(mmu);
struct msm_smmu_client *client = msm_smmu_to_client(smmu);
diff --git a/drivers/gpu/drm/msm/msm_snapshot.c b/drivers/gpu/drm/msm/msm_snapshot.c
new file mode 100644
index 000000000000..30f3e5c64ebd
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_snapshot.c
@@ -0,0 +1,105 @@
+/* Copyright (c) 2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "msm_gpu.h"
+#include "msm_gem.h"
+#include "msm_snapshot_api.h"
+
+void msm_snapshot_destroy(struct msm_gpu *gpu, struct msm_snapshot *snapshot)
+{
+ struct drm_device *dev = gpu->dev;
+ struct msm_drm_private *priv = dev->dev_private;
+ struct platform_device *pdev = priv->gpu_pdev;
+
+ if (!snapshot)
+ return;
+
+ dma_free_coherent(&pdev->dev, SZ_1M, snapshot->ptr,
+ snapshot->physaddr);
+
+ kfree(snapshot);
+}
+
+struct msm_snapshot *msm_snapshot_new(struct msm_gpu *gpu)
+{
+ struct drm_device *dev = gpu->dev;
+ struct msm_drm_private *priv = dev->dev_private;
+ struct platform_device *pdev = priv->gpu_pdev;
+ struct msm_snapshot *snapshot;
+
+ snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL);
+ if (!snapshot)
+ return ERR_PTR(-ENOMEM);
+
+ snapshot->ptr = dma_alloc_coherent(&pdev->dev, SZ_1M,
+ &snapshot->physaddr, GFP_KERNEL);
+
+ if (!snapshot->ptr) {
+ kfree(snapshot);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ seq_buf_init(&snapshot->buf, snapshot->ptr, SZ_1M);
+
+ return snapshot;
+}
+
+int msm_gpu_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot)
+{
+ int ret;
+ struct msm_snapshot_header header;
+ uint64_t val;
+
+ if (!snapshot)
+ return -ENOMEM;
+
+ /*
+ * For now, blow away the snapshot and take a new one - the most
+ * interesting hang is the last one we saw
+ */
+ seq_buf_init(&snapshot->buf, snapshot->ptr, SZ_1M);
+
+ header.magic = SNAPSHOT_MAGIC;
+ gpu->funcs->get_param(gpu, MSM_PARAM_GPU_ID, &val);
+ header.gpuid = lower_32_bits(val);
+
+ gpu->funcs->get_param(gpu, MSM_PARAM_CHIP_ID, &val);
+ header.chipid = lower_32_bits(val);
+
+ seq_buf_putmem(&snapshot->buf, &header, sizeof(header));
+
+ ret = gpu->funcs->snapshot(gpu, snapshot);
+
+ if (!ret) {
+ struct msm_snapshot_section_header end;
+
+ end.magic = SNAPSHOT_SECTION_MAGIC;
+ end.id = SNAPSHOT_SECTION_END;
+ end.size = sizeof(end);
+
+ seq_buf_putmem(&snapshot->buf, &end, sizeof(end));
+
+ dev_info(gpu->dev->dev, "GPU snapshot created [0x%pa (%d bytes)]\n",
+ &snapshot->physaddr, seq_buf_used(&snapshot->buf));
+ }
+
+ return ret;
+}
+
+int msm_snapshot_write(struct msm_gpu *gpu, struct seq_file *m)
+{
+ if (gpu && gpu->snapshot)
+ seq_write(m, gpu->snapshot->ptr,
+ seq_buf_used(&gpu->snapshot->buf));
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/msm/msm_snapshot.h b/drivers/gpu/drm/msm/msm_snapshot.h
new file mode 100644
index 000000000000..247e1358c885
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_snapshot.h
@@ -0,0 +1,85 @@
+/* Copyright (c) 2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef MSM_SNAPSHOT_H_
+#define MSM_SNAPSHOT_H_
+
+#include <linux/string.h>
+#include <linux/seq_buf.h>
+#include "msm_snapshot_api.h"
+
+struct msm_snapshot {
+ void *ptr;
+ struct seq_buf buf;
+ phys_addr_t physaddr;
+ uint32_t index;
+ uint32_t remain;
+ unsigned long timestamp;
+ void *priv;
+};
+
+/* Write a uint32_t value to the next position in the snapshot buffer */
+static inline void SNAPSHOT_WRITE_U32(struct msm_snapshot *snapshot,
+ uint32_t value)
+{
+ seq_buf_putmem(&snapshot->buf, &value, sizeof(value));
+}
+
+/* Copy a block of memory to the next position in the snapshot buffer */
+static inline void SNAPSHOT_MEMCPY(struct msm_snapshot *snapshot, void *src,
+ uint32_t size)
+{
+ if (size)
+ seq_buf_putmem(&snapshot->buf, src, size);
+}
+
+static inline bool _snapshot_header(struct msm_snapshot *snapshot,
+ struct msm_snapshot_section_header *header,
+ u32 headsz, u32 datasz, u32 id)
+{
+ u32 size = headsz + datasz;
+
+ if (seq_buf_buffer_left(&snapshot->buf) <= size)
+ return false;
+
+ /* Write the section header */
+ header->magic = SNAPSHOT_SECTION_MAGIC;
+ header->id = id;
+ header->size = headsz + datasz;
+
+ /* Write the section header */
+ seq_buf_putmem(&snapshot->buf, header, headsz);
+
+ /* The caller will fill in the data from here */
+ return true;
+}
+
+/* SNAPSHOT_HEADER
+ * _snapshot: pointer to struct msm_snapshot
+ * _header: Local variable containing the sub-section header
+ * _id: Section ID to write
+ * _dword: Size of the data section (in dword)
+ */
+#define SNAPSHOT_HEADER(_snapshot, _header, _id, _dwords) \
+ _snapshot_header((_snapshot), \
+ (struct msm_snapshot_section_header *) &(header), \
+ sizeof(header), (_dwords) << 2, (_id))
+
+struct msm_gpu;
+
+struct msm_snapshot *msm_snapshot_new(struct msm_gpu *gpu);
+void msm_snapshot_destroy(struct msm_gpu *gpu, struct msm_snapshot *snapshot);
+int msm_gpu_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot);
+int msm_snapshot_write(struct msm_gpu *gpu, struct seq_file *m);
+
+#endif
+
diff --git a/drivers/gpu/drm/msm/msm_snapshot_api.h b/drivers/gpu/drm/msm/msm_snapshot_api.h
new file mode 100644
index 000000000000..9f0adb9ee784
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_snapshot_api.h
@@ -0,0 +1,121 @@
+/* Copyright (c) 2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef MSM_SNAPSHOT_API_H_
+#define MSM_SNAPSHOT_API_H_
+
+#include <linux/types.h>
+
+/* High word is the magic, low word is the snapshot header version */
+#define SNAPSHOT_MAGIC 0x504D0002
+
+struct msm_snapshot_header {
+ __u32 magic;
+ __u32 gpuid;
+ __u32 chipid;
+} __packed;
+
+#define SNAPSHOT_SECTION_MAGIC 0xABCD
+
+struct msm_snapshot_section_header {
+ __u16 magic;
+ __u16 id;
+ __u32 size;
+} __packed;
+
+/* Section identifiers */
+#define SNAPSHOT_SECTION_OS 0x0101
+#define SNAPSHOT_SECTION_REGS_V2 0x0202
+#define SNAPSHOT_SECTION_RB_V2 0x0302
+#define SNAPSHOT_SECTION_IB_V2 0x0402
+#define SNAPSHOT_SECTION_INDEXED_REGS 0x0501
+#define SNAPSHOT_SECTION_DEBUG 0x0901
+#define SNAPSHOT_SECTION_DEBUGBUS 0x0A01
+#define SNAPSHOT_SECTION_GPU_OBJECT_V2 0x0B02
+#define SNAPSHOT_SECTION_MEMLIST_V2 0x0E02
+#define SNAPSHOT_SECTION_SHADER 0x1201
+#define SNAPSHOT_SECTION_END 0xFFFF
+
+#define SNAPSHOT_OS_LINUX_V3 0x00000202
+
+struct msm_snapshot_linux {
+ struct msm_snapshot_section_header header;
+ int osid;
+ __u32 seconds;
+ __u32 power_flags;
+ __u32 power_level;
+ __u32 power_interval_timeout;
+ __u32 grpclk;
+ __u32 busclk;
+ __u64 ptbase;
+ __u32 pid;
+ __u32 current_context;
+ __u32 ctxtcount;
+ unsigned char release[32];
+ unsigned char version[32];
+ unsigned char comm[16];
+} __packed;
+
+struct msm_snapshot_ringbuffer {
+ struct msm_snapshot_section_header header;
+ int start;
+ int end;
+ int rbsize;
+ int wptr;
+ int rptr;
+ int count;
+ __u32 timestamp_queued;
+ __u32 timestamp_retired;
+ __u64 gpuaddr;
+ __u32 id;
+} __packed;
+
+struct msm_snapshot_regs {
+ struct msm_snapshot_section_header header;
+ __u32 count;
+} __packed;
+
+struct msm_snapshot_indexed_regs {
+ struct msm_snapshot_section_header header;
+ __u32 index_reg;
+ __u32 data_reg;
+ __u32 start;
+ __u32 count;
+} __packed;
+
+#define SNAPSHOT_DEBUG_CP_MEQ 7
+#define SNAPSHOT_DEBUG_CP_PM4_RAM 8
+#define SNAPSHOT_DEBUG_CP_PFP_RAM 9
+#define SNAPSHOT_DEBUG_CP_ROQ 10
+#define SNAPSHOT_DEBUG_SHADER_MEMORY 11
+#define SNAPSHOT_DEBUG_CP_MERCIU 12
+
+struct msm_snapshot_debug {
+ struct msm_snapshot_section_header header;
+ __u32 type;
+ __u32 size;
+} __packed;
+
+struct msm_snapshot_debugbus {
+ struct msm_snapshot_section_header header;
+ __u32 id;
+ __u32 count;
+} __packed;
+
+struct msm_snapshot_shader {
+ struct msm_snapshot_section_header header;
+ __u32 type;
+ __u32 index;
+ __u32 size;
+} __packed;
+
+#endif
diff --git a/drivers/gpu/drm/msm/sde/sde_kms.c b/drivers/gpu/drm/msm/sde/sde_kms.c
index e906b66dcd95..576e7d1e7189 100644
--- a/drivers/gpu/drm/msm/sde/sde_kms.c
+++ b/drivers/gpu/drm/msm/sde/sde_kms.c
@@ -946,9 +946,8 @@ static int _sde_kms_mmu_destroy(struct sde_kms *sde_kms)
mmu = sde_kms->aspace[i]->mmu;
- mmu->funcs->detach(mmu, (const char **)iommu_ports,
- ARRAY_SIZE(iommu_ports));
- msm_gem_address_space_destroy(sde_kms->aspace[i]);
+ mmu->funcs->detach(mmu);
+ msm_gem_address_space_put(sde_kms->aspace[i]);
sde_kms->aspace[i] = NULL;
}
@@ -987,7 +986,7 @@ static int _sde_kms_mmu_init(struct sde_kms *sde_kms)
ARRAY_SIZE(iommu_ports));
if (ret) {
SDE_ERROR("failed to attach iommu %d: %d\n", i, ret);
- msm_gem_address_space_destroy(aspace);
+ msm_gem_address_space_put(aspace);
goto fail;
}
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index ce15e150277e..ce1eb562be36 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -249,17 +249,6 @@
#define RESUME_RETRY (0 << 0)
#define RESUME_TERMINATE (1 << 0)
-#define TTBCR2_SEP_SHIFT 15
-#define TTBCR2_SEP_UPSTREAM (0x7 << TTBCR2_SEP_SHIFT)
-
-#define TTBCR2_SEP_31 0
-#define TTBCR2_SEP_35 1
-#define TTBCR2_SEP_39 2
-#define TTBCR2_SEP_41 3
-#define TTBCR2_SEP_43 4
-#define TTBCR2_SEP_47 5
-#define TTBCR2_SEP_NOSIGN 7
-
#define TTBRn_ASID_SHIFT 48
#define FSR_MULTI (1 << 31)
@@ -1614,7 +1603,6 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
if (smmu->version > ARM_SMMU_V1) {
reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
- reg |= TTBCR2_SEP_UPSTREAM;
writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR2);
}
} else {
@@ -1745,7 +1733,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
bool is_fast = smmu_domain->attributes & (1 << DOMAIN_ATTR_FAST);
- unsigned long quirks = 0;
+ unsigned long quirks =
+ smmu_domain->attributes & (1 << DOMAIN_ATTR_ENABLE_TTBR1) ?
+ IO_PGTABLE_QUIRK_ARM_TTBR1 : 0;
if (smmu_domain->smmu)
goto out;
@@ -1837,6 +1827,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
};
fmt = ARM_MSM_SECURE;
} else {
+
smmu_domain->pgtbl_cfg = (struct io_pgtable_cfg) {
.quirks = quirks,
.pgsize_bitmap = arm_smmu_ops.pgsize_bitmap,
@@ -3140,6 +3131,12 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
& (1 << DOMAIN_ATTR_PAGE_TABLE_FORCE_COHERENT));
ret = 0;
break;
+ case DOMAIN_ATTR_ENABLE_TTBR1:
+ *((int *)data) = !!(smmu_domain->attributes
+ & (1 << DOMAIN_ATTR_ENABLE_TTBR1));
+ ret = 0;
+ break;
+
default:
ret = -ENODEV;
break;
@@ -3283,6 +3280,12 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
ret = 0;
break;
}
+ case DOMAIN_ATTR_ENABLE_TTBR1:
+ if (*((int *)data))
+ smmu_domain->attributes |=
+ 1 << DOMAIN_ATTR_ENABLE_TTBR1;
+ ret = 0;
+ break;
default:
ret = -ENODEV;
break;
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 0d057ca92972..5f2b66286c0c 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -131,14 +131,21 @@
#define ARM_LPAE_TCR_TG0_64K (1 << 14)
#define ARM_LPAE_TCR_TG0_16K (2 << 14)
+#define ARM_LPAE_TCR_TG1_16K 1ULL
+#define ARM_LPAE_TCR_TG1_4K 2ULL
+#define ARM_LPAE_TCR_TG1_64K 3ULL
+
#define ARM_LPAE_TCR_SH0_SHIFT 12
#define ARM_LPAE_TCR_SH0_MASK 0x3
+#define ARM_LPAE_TCR_SH1_SHIFT 28
#define ARM_LPAE_TCR_SH_NS 0
#define ARM_LPAE_TCR_SH_OS 2
#define ARM_LPAE_TCR_SH_IS 3
#define ARM_LPAE_TCR_ORGN0_SHIFT 10
+#define ARM_LPAE_TCR_ORGN1_SHIFT 26
#define ARM_LPAE_TCR_IRGN0_SHIFT 8
+#define ARM_LPAE_TCR_IRGN1_SHIFT 24
#define ARM_LPAE_TCR_RGN_MASK 0x3
#define ARM_LPAE_TCR_RGN_NC 0
#define ARM_LPAE_TCR_RGN_WBWA 1
@@ -151,6 +158,9 @@
#define ARM_LPAE_TCR_T0SZ_SHIFT 0
#define ARM_LPAE_TCR_SZ_MASK 0xf
+#define ARM_LPAE_TCR_T1SZ_SHIFT 16
+#define ARM_LPAE_TCR_T1SZ_MASK 0x3f
+
#define ARM_LPAE_TCR_PS_SHIFT 16
#define ARM_LPAE_TCR_PS_MASK 0x7
@@ -167,6 +177,16 @@
#define ARM_LPAE_TCR_EPD1_SHIFT 23
#define ARM_LPAE_TCR_EPD1_FAULT 1
+#define ARM_LPAE_TCR_SEP_SHIFT (15 + 32)
+
+#define ARM_LPAE_TCR_SEP_31 0ULL
+#define ARM_LPAE_TCR_SEP_35 1ULL
+#define ARM_LPAE_TCR_SEP_39 2ULL
+#define ARM_LPAE_TCR_SEP_41 3ULL
+#define ARM_LPAE_TCR_SEP_43 4ULL
+#define ARM_LPAE_TCR_SEP_47 5ULL
+#define ARM_LPAE_TCR_SEP_UPSTREAM 7ULL
+
#define ARM_LPAE_MAIR_ATTR_SHIFT(n) ((n) << 3)
#define ARM_LPAE_MAIR_ATTR_MASK 0xff
#define ARM_LPAE_MAIR_ATTR_DEVICE 0x04
@@ -206,7 +226,7 @@ struct arm_lpae_io_pgtable {
unsigned long pg_shift;
unsigned long bits_per_level;
- void *pgd;
+ void *pgd[2];
};
typedef u64 arm_lpae_iopte;
@@ -524,14 +544,26 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
return pte;
}
+static inline arm_lpae_iopte *arm_lpae_get_table(
+ struct arm_lpae_io_pgtable *data, unsigned long iova)
+{
+ struct io_pgtable_cfg *cfg = &data->iop.cfg;
+
+ return ((cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) &&
+ (iova & (1UL << (cfg->ias - 1)))) ?
+ data->pgd[1] : data->pgd[0];
+}
+
static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t size, int iommu_prot)
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
- arm_lpae_iopte *ptep = data->pgd;
+ arm_lpae_iopte *ptep;
int ret, lvl = ARM_LPAE_START_LVL(data);
arm_lpae_iopte prot;
+ ptep = arm_lpae_get_table(data, iova);
+
/* If no access, then nothing to do */
if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
return 0;
@@ -554,7 +586,7 @@ static int arm_lpae_map_sg(struct io_pgtable_ops *ops, unsigned long iova,
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
- arm_lpae_iopte *ptep = data->pgd;
+ arm_lpae_iopte *ptep;
int lvl = ARM_LPAE_START_LVL(data);
arm_lpae_iopte prot;
struct scatterlist *s;
@@ -563,6 +595,8 @@ static int arm_lpae_map_sg(struct io_pgtable_ops *ops, unsigned long iova,
unsigned int min_pagesz;
struct map_state ms;
+ ptep = arm_lpae_get_table(data, iova);
+
/* If no access, then nothing to do */
if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
goto out_err;
@@ -672,7 +706,10 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
{
struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop);
- __arm_lpae_free_pgtable(data, ARM_LPAE_START_LVL(data), data->pgd);
+ __arm_lpae_free_pgtable(data, ARM_LPAE_START_LVL(data), data->pgd[0]);
+ if (data->pgd[1])
+ __arm_lpae_free_pgtable(data, ARM_LPAE_START_LVL(data),
+ data->pgd[1]);
kfree(data);
}
@@ -800,9 +837,11 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
size_t unmapped = 0;
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
struct io_pgtable *iop = &data->iop;
- arm_lpae_iopte *ptep = data->pgd;
+ arm_lpae_iopte *ptep;
int lvl = ARM_LPAE_START_LVL(data);
+ ptep = arm_lpae_get_table(data, iova);
+
while (unmapped < size) {
size_t ret, size_to_unmap, remaining;
@@ -828,7 +867,10 @@ static int arm_lpae_iova_to_pte(struct arm_lpae_io_pgtable *data,
unsigned long iova, int *plvl_ret,
arm_lpae_iopte *ptep_ret)
{
- arm_lpae_iopte pte, *ptep = data->pgd;
+ arm_lpae_iopte pte, *ptep;
+
+ ptep = arm_lpae_get_table(data, iova);
+
*plvl_ret = ARM_LPAE_START_LVL(data);
*ptep_ret = 0;
@@ -994,6 +1036,71 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
return data;
}
+static u64 arm64_lpae_setup_ttbr1(struct io_pgtable_cfg *cfg,
+ struct arm_lpae_io_pgtable *data)
+
+{
+ u64 reg;
+
+ /* If TTBR1 is disabled, disable speculative walks through the TTBR1 */
+ if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)) {
+ reg = ARM_LPAE_TCR_EPD1;
+ reg |= (ARM_LPAE_TCR_SEP_UPSTREAM << ARM_LPAE_TCR_SEP_SHIFT);
+ return reg;
+ }
+
+ if (cfg->iommu_dev && cfg->iommu_dev->archdata.dma_coherent)
+ reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH1_SHIFT) |
+ (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN1_SHIFT) |
+ (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN1_SHIFT);
+ else
+ reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH1_SHIFT) |
+ (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN1_SHIFT) |
+ (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN1_SHIFT);
+
+ switch (1 << data->pg_shift) {
+ case SZ_4K:
+ reg |= (ARM_LPAE_TCR_TG1_4K << 30);
+ break;
+ case SZ_16K:
+ reg |= (ARM_LPAE_TCR_TG1_16K << 30);
+ break;
+ case SZ_64K:
+ reg |= (ARM_LPAE_TCR_TG1_64K << 30);
+ break;
+ }
+
+ /* Set T1SZ */
+ reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T1SZ_SHIFT;
+
+ /* Set the SEP bit based on the size */
+ switch (cfg->ias) {
+ case 32:
+ reg |= (ARM_LPAE_TCR_SEP_31 << ARM_LPAE_TCR_SEP_SHIFT);
+ break;
+ case 36:
+ reg |= (ARM_LPAE_TCR_SEP_35 << ARM_LPAE_TCR_SEP_SHIFT);
+ break;
+ case 40:
+ reg |= (ARM_LPAE_TCR_SEP_39 << ARM_LPAE_TCR_SEP_SHIFT);
+ break;
+ case 42:
+ reg |= (ARM_LPAE_TCR_SEP_41 << ARM_LPAE_TCR_SEP_SHIFT);
+ break;
+ case 44:
+ reg |= (ARM_LPAE_TCR_SEP_43 << ARM_LPAE_TCR_SEP_SHIFT);
+ break;
+ case 48:
+ reg |= (ARM_LPAE_TCR_SEP_47 << ARM_LPAE_TCR_SEP_SHIFT);
+ break;
+ default:
+ reg |= (ARM_LPAE_TCR_SEP_UPSTREAM << ARM_LPAE_TCR_SEP_SHIFT);
+ break;
+ }
+
+ return reg;
+}
+
static struct io_pgtable *
arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
{
@@ -1050,8 +1157,9 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT;
- /* Disable speculative walks through TTBR1 */
- reg |= ARM_LPAE_TCR_EPD1;
+ /* Bring in the TTBR1 configuration */
+ reg |= arm64_lpae_setup_ttbr1(cfg, data);
+
cfg->arm_lpae_s1_cfg.tcr = reg;
/* MAIRs */
@@ -1066,16 +1174,33 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
cfg->arm_lpae_s1_cfg.mair[1] = 0;
/* Looking good; allocate a pgd */
- data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg, cookie);
- if (!data->pgd)
+ data->pgd[0] = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg,
+ cookie);
+ if (!data->pgd[0])
goto out_free_data;
+
+ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
+ data->pgd[1] = __arm_lpae_alloc_pages(data->pgd_size,
+ GFP_KERNEL, cfg, cookie);
+ if (!data->pgd[1]) {
+ __arm_lpae_free_pages(data->pgd[0], data->pgd_size, cfg,
+ cookie);
+ goto out_free_data;
+ }
+ } else {
+ data->pgd[1] = NULL;
+ }
+
/* Ensure the empty pgd is visible before any actual TTBR write */
wmb();
/* TTBRs */
- cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd);
- cfg->arm_lpae_s1_cfg.ttbr[1] = 0;
+ cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd[0]);
+
+ if (data->pgd[1])
+ cfg->arm_lpae_s1_cfg.ttbr[1] = virt_to_phys(data->pgd[1]);
+
return &data->iop;
out_free_data:
@@ -1155,15 +1280,16 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
cfg->arm_lpae_s2_cfg.vtcr = reg;
/* Allocate pgd pages */
- data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg, cookie);
- if (!data->pgd)
+ data->pgd[0] = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg,
+ cookie);
+ if (!data->pgd[0])
goto out_free_data;
/* Ensure the empty pgd is visible before any actual TTBR write */
wmb();
/* VTTBR */
- cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd);
+ cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd[0]);
return &data->iop;
out_free_data:
@@ -1261,7 +1387,7 @@ static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
cfg->pgsize_bitmap, cfg->ias);
pr_err("data: %d levels, 0x%zx pgd_size, %lu pg_shift, %lu bits_per_level, pgd @ %p\n",
data->levels, data->pgd_size, data->pg_shift,
- data->bits_per_level, data->pgd);
+ data->bits_per_level, data->pgd[0]);
}
#define __FAIL(ops, i) ({ \
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index a3f366f559a7..f4533040806f 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -62,6 +62,7 @@ struct io_pgtable_cfg {
*/
#define IO_PGTABLE_QUIRK_ARM_NS (1 << 0) /* Set NS bit in PTEs */
#define IO_PGTABLE_QUIRK_PAGE_TABLE_COHERENT (1 << 1)
+ #define IO_PGTABLE_QUIRK_ARM_TTBR1 (1 << 2) /* Allocate TTBR1 PT */
int quirks;
unsigned long pgsize_bitmap;
unsigned int ias;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 1b3f20e8fb74..c4c25651ff21 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -136,6 +136,7 @@ enum iommu_attr {
DOMAIN_ATTR_EARLY_MAP,
DOMAIN_ATTR_PAGE_TABLE_IS_COHERENT,
DOMAIN_ATTR_PAGE_TABLE_FORCE_COHERENT,
+ DOMAIN_ATTR_ENABLE_TTBR1,
DOMAIN_ATTR_MAX,
};
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 587d35ce1638..d2f19ac6f536 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -40,6 +40,15 @@
#define MSM_PIPE_2D1 0x02
#define MSM_PIPE_3D0 0x10
+/* The pipe-id just uses the lower bits, so can be OR'd with flags in
+ * the upper 16 bits (which could be extended further, if needed, maybe
+ * we extend/overload the pipe-id some day to deal with multiple rings,
+ * but even then I don't think we need the full lower 16 bits).
+ */
+#define MSM_PIPE_ID_MASK 0xffff
+#define MSM_PIPE_ID(x) ((x) & MSM_PIPE_ID_MASK)
+#define MSM_PIPE_FLAGS(x) ((x) & ~MSM_PIPE_ID_MASK)
+
/* timeouts are specified in clock-monotonic absolute times (to simplify
* restarting interrupted ioctls). The following struct is logically the
* same as 'struct timespec' but 32/64b ABI safe.
@@ -54,6 +63,7 @@ struct drm_msm_timespec {
#define MSM_PARAM_CHIP_ID 0x03
#define MSM_PARAM_MAX_FREQ 0x04
#define MSM_PARAM_TIMESTAMP 0x05
+#define MSM_PARAM_GMEM_BASE 0x06
struct drm_msm_param {
__u32 pipe; /* in, MSM_PIPE_x */
@@ -67,6 +77,7 @@ struct drm_msm_param {
#define MSM_BO_SCANOUT 0x00000001 /* scanout capable */
#define MSM_BO_GPU_READONLY 0x00000002
+#define MSM_BO_PRIVILEGED 0x00000004
#define MSM_BO_CACHE_MASK 0x000f0000
/* cache modes */
#define MSM_BO_CACHED 0x00010000
@@ -177,12 +188,18 @@ struct drm_msm_gem_submit_bo {
__u64 presumed; /* in/out, presumed buffer address */
};
+/* Valid submit ioctl flags: */
+#define MSM_SUBMIT_RING_MASK 0x000F0000
+#define MSM_SUBMIT_RING_SHIFT 16
+
+#define MSM_SUBMIT_FLAGS (MSM_SUBMIT_RING_MASK)
+
/* Each cmdstream submit consists of a table of buffers involved, and
* one or more cmdstream buffers. This allows for conditional execution
* (context-restore), and IB buffers needed for per tile/bin draw cmds.
*/
struct drm_msm_gem_submit {
- __u32 pipe; /* in, MSM_PIPE_x */
+ __u32 flags; /* MSM_PIPE_x | MSM_SUBMIT_x */
__u32 fence; /* out */
__u32 nr_bos; /* in, number of submit_bo's */
__u32 nr_cmds; /* in, number of submit_cmd's */