summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorLinux Build Service Account <lnxbuild@quicinc.com>2017-07-04 01:20:54 -0700
committerGerrit - the friendly Code Review server <code-review@localhost>2017-07-04 01:20:53 -0700
commit4a528fd7b8e00235d4f314a4cf30adacede764b4 (patch)
tree4ef7925390eb15f1a2a7006732ce40f1e56ff194 /drivers/gpu
parent01f0e05f48e6a9f0eff1ea603cea23886562b5c9 (diff)
parent130cbfae0872f54fdd73c8e549bd2630de3068cf (diff)
Merge "drm/msm: Add kernel side submit profiling and tracing"
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/msm/Makefile3
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.c100
-rw-r--r--drivers/gpu/drm/msm/msm_gem.h3
-rw-r--r--drivers/gpu/drm/msm/msm_gem_submit.c8
-rw-r--r--drivers/gpu/drm/msm/msm_gpu.c17
-rw-r--r--drivers/gpu/drm/msm/msm_ringbuffer.h12
-rw-r--r--drivers/gpu/drm/msm/msm_trace.h98
-rw-r--r--drivers/gpu/drm/msm/msm_trace_points.c18
8 files changed, 203 insertions, 56 deletions
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 999d5e45e5c5..84125b3d1f95 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -149,6 +149,7 @@ msm_drm-$(CONFIG_DRM_MSM) += \
msm_ringbuffer.o \
msm_prop.o \
msm_snapshot.o \
- msm_submitqueue.o
+ msm_submitqueue.o \
+ msm_trace_points.o
obj-$(CONFIG_DRM_MSM) += msm_drm.o
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index f8dbc843f852..687ca96d0636 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -13,6 +13,7 @@
#include "msm_gem.h"
#include "msm_iommu.h"
+#include "msm_trace.h"
#include "a5xx_gpu.h"
#define SECURE_VA_START 0xc0000000
@@ -100,12 +101,31 @@ static void a5xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
OUT_RING(ring, 1);
}
+/* Inline PM4 code to get the current value of the 19.2 Mhz always on counter */
+static void a5xx_get_ticks(struct msm_ringbuffer *ring, uint64_t iova)
+{
+ /*
+ * Set bit[30] to make this command a 64 bit write operation.
+ * bits[18-29] is to specify number of consecutive registers
+ * to copy, so set this space with 2, since we want to copy
+ * data from REG_A5XX_RBBM_ALWAYSON_COUNTER_LO and [HI].
+ */
+
+ OUT_PKT7(ring, CP_REG_TO_MEM, 3);
+ OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO |
+ (1 << 30) | (2 << 18));
+ OUT_RING(ring, lower_32_bits(iova));
+ OUT_RING(ring, upper_32_bits(iova));
+}
+
static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
struct msm_ringbuffer *ring = gpu->rb[submit->ring];
unsigned int i, ibs = 0;
+ unsigned long flags;
+ u64 ktime, ticks;
a5xx_set_pagetable(gpu, ring, submit->aspace);
@@ -139,24 +159,15 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
OUT_RING(ring, 1);
}
- /* Record the always on counter before command execution */
- if (submit->profile_buf_iova) {
- uint64_t gpuaddr = submit->profile_buf_iova +
- offsetof(struct drm_msm_gem_submit_profile_buffer,
- ticks_submitted);
+ /* Record the GPU ticks at command start for kernel side profiling */
+ a5xx_get_ticks(ring,
+ RING_TICKS_IOVA(ring, submit->tick_index, started));
- /*
- * Set bit[30] to make this command a 64 bit write operation.
- * bits[18-29] is to specify number of consecutive registers
- * to copy, so set this space with 2, since we want to copy
- * data from REG_A5XX_RBBM_ALWAYSON_COUNTER_LO and [HI].
- */
- OUT_PKT7(ring, CP_REG_TO_MEM, 3);
- OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO |
- (1 << 30) | (2 << 18));
- OUT_RING(ring, lower_32_bits(gpuaddr));
- OUT_RING(ring, upper_32_bits(gpuaddr));
- }
+ /* And for the user profiling too if it is enabled */
+ if (submit->profile_buf_iova)
+ a5xx_get_ticks(ring, submit->profile_buf_iova +
+ offsetof(struct drm_msm_gem_submit_profile_buffer,
+ ticks_submitted));
/* Submit the commands */
for (i = 0; i < submit->nr_cmds; i++) {
@@ -190,18 +201,15 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
OUT_RING(ring, 0x01);
+ /* Record the GPU ticks at command retire for kernel side profiling */
+ a5xx_get_ticks(ring,
+ RING_TICKS_IOVA(ring, submit->tick_index, retired));
+
/* Record the always on counter after command execution */
- if (submit->profile_buf_iova) {
- uint64_t gpuaddr = submit->profile_buf_iova +
+ if (submit->profile_buf_iova)
+ a5xx_get_ticks(ring, submit->profile_buf_iova +
offsetof(struct drm_msm_gem_submit_profile_buffer,
- ticks_retired);
-
- OUT_PKT7(ring, CP_REG_TO_MEM, 3);
- OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO |
- (1 << 30) | (2 << 18));
- OUT_RING(ring, lower_32_bits(gpuaddr));
- OUT_RING(ring, upper_32_bits(gpuaddr));
- }
+ ticks_retired));
/* Write the fence to the scratch register */
OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
@@ -237,33 +245,27 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
/* Set bit 0 to trigger an interrupt on preempt complete */
OUT_RING(ring, 0x01);
- if (submit->profile_buf_iova) {
- unsigned long flags;
- uint64_t ktime;
- struct drm_msm_gem_submit_profile_buffer *profile_buf =
- submit->profile_buf_vaddr;
-
- /*
- * With this profiling, we are trying to create closest
- * possible mapping between the CPU time domain(monotonic clock)
- * and the GPU time domain(ticks). In order to make this
- * happen, we need to briefly turn off interrupts to make sure
- * interrupts do not run between collecting these two samples.
- */
- local_irq_save(flags);
-
- profile_buf->ticks_queued = gpu_read64(gpu,
- REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
- REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
+ /*
+ * Get the current kernel time and ticks with interrupts off so we don't
+ * get interrupted between the operations and skew the numbers
+ */
- ktime = ktime_get_raw_ns();
+ local_irq_save(flags);
+ ticks = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
+ REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
+ ktime = ktime_get_raw_ns();
+ local_irq_restore(flags);
- local_irq_restore(flags);
+ if (submit->profile_buf) {
+ /* Write the data into the use-specified profile buffer */
- profile_buf->queue_time = ktime;
- profile_buf->submit_time = ktime;
+ submit->profile_buf->queue_time = ktime;
+ submit->profile_buf->submit_time = ktime;
+ submit->profile_buf->ticks_queued = ticks;
}
+ trace_msm_submitted(submit, ticks, ktime);
+
a5xx_flush(gpu, ring);
/* Check to see if we need to start preemption */
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index e8528892939f..df9ddadc5c5c 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -151,9 +151,10 @@ struct msm_gem_submit {
u32 flags;
bool valid;
uint64_t profile_buf_iova;
- void *profile_buf_vaddr;
+ struct drm_msm_gem_submit_profile_buffer *profile_buf;
bool secure;
struct msm_gpu_submitqueue *queue;
+ int tick_index;
unsigned int nr_cmds;
unsigned int nr_bos;
struct {
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 7ccc146f3ae1..b73379aa9ed7 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -18,6 +18,7 @@
#include "msm_drv.h"
#include "msm_gpu.h"
#include "msm_gem.h"
+#include "msm_trace.h"
/*
* Cmdstream submission:
@@ -55,7 +56,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
submit->nr_bos = 0;
submit->nr_cmds = 0;
- submit->profile_buf_vaddr = NULL;
+ submit->profile_buf = NULL;
submit->profile_buf_iova = 0;
submit->cmd = (void *)&submit->bos[nr_bos];
@@ -510,9 +511,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
if (submit_cmd.type == MSM_SUBMIT_CMD_PROFILE_BUF) {
submit->profile_buf_iova = submit->cmd[i].iova;
- submit->profile_buf_vaddr =
- msm_gem_vaddr(&msm_obj->base) +
- submit_cmd.submit_offset;
+ submit->profile_buf = msm_gem_vaddr(&msm_obj->base)
+ + submit_cmd.submit_offset;
}
if (submit->valid)
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index d896e436251f..6bac1cf6f7c5 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -18,7 +18,7 @@
#include "msm_gpu.h"
#include "msm_gem.h"
#include "msm_mmu.h"
-
+#include "msm_trace.h"
/*
* Power Management:
@@ -494,9 +494,18 @@ static void retire_submits(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
WARN_ON(!mutex_is_locked(&dev->struct_mutex));
list_for_each_entry_safe(submit, tmp, &ring->submits, node) {
+ struct msm_memptr_ticks *ticks;
+
if (submit->fence > fence)
break;
+ ticks = &(ring->memptrs->ticks[submit->tick_index]);
+
+ /* Add memory barrier to ensure the timer ticks are posted */
+ rmb();
+
+ trace_msm_retired(submit, ticks->started, ticks->retired);
+
msm_gem_submit_free(submit);
}
}
@@ -578,6 +587,12 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
ring->submitted_fence = submit->fence;
+ submit->tick_index = ring->tick_index;
+ ring->tick_index = (ring->tick_index + 1) %
+ ARRAY_SIZE(ring->memptrs->ticks);
+
+ trace_msm_queued(submit);
+
update_sw_cntrs(gpu);
for (i = 0; i < submit->nr_bos; i++) {
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h
index 3eb9a86b2a2e..b19ce75a4cc9 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.h
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
@@ -23,13 +23,24 @@
#define rbmemptr(ring, member) \
((ring)->memptrs_iova + offsetof(struct msm_memptrs, member))
+struct msm_memptr_ticks {
+ uint64_t started;
+ uint64_t retired;
+};
+
struct msm_memptrs {
volatile uint32_t rptr;
volatile uint32_t fence;
volatile uint64_t ttbr0;
volatile unsigned int contextidr;
+ struct msm_memptr_ticks ticks[128];
};
+#define RING_TICKS_IOVA(ring, index, field) \
+ ((ring)->memptrs_iova + offsetof(struct msm_memptrs, ticks) + \
+ ((index) * sizeof(struct msm_memptr_ticks)) + \
+ offsetof(struct msm_memptr_ticks, field))
+
struct msm_ringbuffer {
struct msm_gpu *gpu;
int id;
@@ -42,6 +53,7 @@ struct msm_ringbuffer {
struct msm_memptrs *memptrs;
uint64_t memptrs_iova;
+ int tick_index;
};
struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
diff --git a/drivers/gpu/drm/msm/msm_trace.h b/drivers/gpu/drm/msm/msm_trace.h
new file mode 100644
index 000000000000..68c7ff78ffc2
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_trace.h
@@ -0,0 +1,98 @@
+/* Copyright (c) 2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#if !defined(_MSM_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MSM_TRACE_H_
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM msm_drm
+#define TRACE_INCLUDE_FILE msm_trace
+
+TRACE_EVENT(msm_queued,
+ TP_PROTO(struct msm_gem_submit *submit),
+ TP_ARGS(submit),
+ TP_STRUCT__entry(
+ __field(uint32_t, queue_id)
+ __field(uint32_t, fence_id)
+ __field(int, ring)
+ ),
+ TP_fast_assign(
+ __entry->queue_id = submit->queue->id;
+ __entry->fence_id = submit->fence;
+ __entry->ring = submit->ring;
+ ),
+ TP_printk(
+ "queue=%u fence=%u ring=%d",
+ __entry->queue_id, __entry->fence_id, __entry->ring
+ )
+);
+
+TRACE_EVENT(msm_submitted,
+ TP_PROTO(struct msm_gem_submit *submit, uint64_t ticks, uint64_t nsecs),
+ TP_ARGS(submit, ticks, nsecs),
+ TP_STRUCT__entry(
+ __field(uint32_t, queue_id)
+ __field(uint32_t, fence_id)
+ __field(int, ring)
+ __field(uint64_t, ticks)
+ __field(uint64_t, nsecs)
+ ),
+ TP_fast_assign(
+ __entry->queue_id = submit->queue->id;
+ __entry->fence_id = submit->fence;
+ __entry->ring = submit->ring;
+ __entry->ticks = ticks;
+ __entry->nsecs = nsecs;
+ ),
+ TP_printk(
+ "queue=%u fence=%u ring=%d ticks=%lld nsecs=%llu",
+ __entry->queue_id, __entry->fence_id, __entry->ring,
+ __entry->ticks, __entry->nsecs
+ )
+);
+
+TRACE_EVENT(msm_retired,
+ TP_PROTO(struct msm_gem_submit *submit, uint64_t start_ticks,
+ uint64_t retire_ticks),
+ TP_ARGS(submit, start_ticks, retire_ticks),
+ TP_STRUCT__entry(
+ __field(uint32_t, queue_id)
+ __field(uint32_t, fence_id)
+ __field(int, ring)
+ __field(uint64_t, start_ticks)
+ __field(uint64_t, retire_ticks)
+ ),
+ TP_fast_assign(
+ __entry->queue_id = submit->queue->id;
+ __entry->fence_id = submit->fence;
+ __entry->ring = submit->ring;
+ __entry->start_ticks = start_ticks;
+ __entry->retire_ticks = retire_ticks;
+ ),
+ TP_printk(
+ "queue=%u fence=%u ring=%d started=%lld retired=%lld",
+ __entry->queue_id, __entry->fence_id, __entry->ring,
+ __entry->start_ticks, __entry->retire_ticks
+ )
+);
+
+
+#endif
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
+
diff --git a/drivers/gpu/drm/msm/msm_trace_points.c b/drivers/gpu/drm/msm/msm_trace_points.c
new file mode 100644
index 000000000000..41d9a975ac92
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_trace_points.c
@@ -0,0 +1,18 @@
+/* Copyright (c) 2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "msm_gem.h"
+#include "msm_gpu.h"
+
+#define CREATE_TRACE_POINTS
+#include "msm_trace.h"