diff options
author | Linux Build Service Account <lnxbuild@quicinc.com> | 2017-07-04 01:20:54 -0700 |
---|---|---|
committer | Gerrit - the friendly Code Review server <code-review@localhost> | 2017-07-04 01:20:53 -0700 |
commit | 4a528fd7b8e00235d4f314a4cf30adacede764b4 (patch) | |
tree | 4ef7925390eb15f1a2a7006732ce40f1e56ff194 /drivers/gpu | |
parent | 01f0e05f48e6a9f0eff1ea603cea23886562b5c9 (diff) | |
parent | 130cbfae0872f54fdd73c8e549bd2630de3068cf (diff) |
Merge "drm/msm: Add kernel side submit profiling and tracing"
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/msm/Makefile | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 100 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gem.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gem_submit.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.c | 17 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_ringbuffer.h | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_trace.h | 98 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_trace_points.c | 18 |
8 files changed, 203 insertions, 56 deletions
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 999d5e45e5c5..84125b3d1f95 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -149,6 +149,7 @@ msm_drm-$(CONFIG_DRM_MSM) += \ msm_ringbuffer.o \ msm_prop.o \ msm_snapshot.o \ - msm_submitqueue.o + msm_submitqueue.o \ + msm_trace_points.o obj-$(CONFIG_DRM_MSM) += msm_drm.o diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index f8dbc843f852..687ca96d0636 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -13,6 +13,7 @@ #include "msm_gem.h" #include "msm_iommu.h" +#include "msm_trace.h" #include "a5xx_gpu.h" #define SECURE_VA_START 0xc0000000 @@ -100,12 +101,31 @@ static void a5xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer *ring, OUT_RING(ring, 1); } +/* Inline PM4 code to get the current value of the 19.2 Mhz always on counter */ +static void a5xx_get_ticks(struct msm_ringbuffer *ring, uint64_t iova) +{ + /* + * Set bit[30] to make this command a 64 bit write operation. + * bits[18-29] is to specify number of consecutive registers + * to copy, so set this space with 2, since we want to copy + * data from REG_A5XX_RBBM_ALWAYSON_COUNTER_LO and [HI]. + */ + + OUT_PKT7(ring, CP_REG_TO_MEM, 3); + OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO | + (1 << 30) | (2 << 18)); + OUT_RING(ring, lower_32_bits(iova)); + OUT_RING(ring, upper_32_bits(iova)); +} + static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); struct msm_ringbuffer *ring = gpu->rb[submit->ring]; unsigned int i, ibs = 0; + unsigned long flags; + u64 ktime, ticks; a5xx_set_pagetable(gpu, ring, submit->aspace); @@ -139,24 +159,15 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_RING(ring, 1); } - /* Record the always on counter before command execution */ - if (submit->profile_buf_iova) { - uint64_t gpuaddr = submit->profile_buf_iova + - offsetof(struct drm_msm_gem_submit_profile_buffer, - ticks_submitted); + /* Record the GPU ticks at command start for kernel side profiling */ + a5xx_get_ticks(ring, + RING_TICKS_IOVA(ring, submit->tick_index, started)); - /* - * Set bit[30] to make this command a 64 bit write operation. - * bits[18-29] is to specify number of consecutive registers - * to copy, so set this space with 2, since we want to copy - * data from REG_A5XX_RBBM_ALWAYSON_COUNTER_LO and [HI]. - */ - OUT_PKT7(ring, CP_REG_TO_MEM, 3); - OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO | - (1 << 30) | (2 << 18)); - OUT_RING(ring, lower_32_bits(gpuaddr)); - OUT_RING(ring, upper_32_bits(gpuaddr)); - } + /* And for the user profiling too if it is enabled */ + if (submit->profile_buf_iova) + a5xx_get_ticks(ring, submit->profile_buf_iova + + offsetof(struct drm_msm_gem_submit_profile_buffer, + ticks_submitted)); /* Submit the commands */ for (i = 0; i < submit->nr_cmds; i++) { @@ -190,18 +201,15 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) OUT_PKT7(ring, CP_YIELD_ENABLE, 1); OUT_RING(ring, 0x01); + /* Record the GPU ticks at command retire for kernel side profiling */ + a5xx_get_ticks(ring, + RING_TICKS_IOVA(ring, submit->tick_index, retired)); + /* Record the always on counter after command execution */ - if (submit->profile_buf_iova) { - uint64_t gpuaddr = submit->profile_buf_iova + + if (submit->profile_buf_iova) + a5xx_get_ticks(ring, submit->profile_buf_iova + offsetof(struct drm_msm_gem_submit_profile_buffer, - ticks_retired); - - OUT_PKT7(ring, CP_REG_TO_MEM, 3); - OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO | - (1 << 30) | (2 << 18)); - OUT_RING(ring, lower_32_bits(gpuaddr)); - OUT_RING(ring, upper_32_bits(gpuaddr)); - } + ticks_retired)); /* Write the fence to the scratch register */ OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1); @@ -237,33 +245,27 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) /* Set bit 0 to trigger an interrupt on preempt complete */ OUT_RING(ring, 0x01); - if (submit->profile_buf_iova) { - unsigned long flags; - uint64_t ktime; - struct drm_msm_gem_submit_profile_buffer *profile_buf = - submit->profile_buf_vaddr; - - /* - * With this profiling, we are trying to create closest - * possible mapping between the CPU time domain(monotonic clock) - * and the GPU time domain(ticks). In order to make this - * happen, we need to briefly turn off interrupts to make sure - * interrupts do not run between collecting these two samples. - */ - local_irq_save(flags); - - profile_buf->ticks_queued = gpu_read64(gpu, - REG_A5XX_RBBM_ALWAYSON_COUNTER_LO, - REG_A5XX_RBBM_ALWAYSON_COUNTER_HI); + /* + * Get the current kernel time and ticks with interrupts off so we don't + * get interrupted between the operations and skew the numbers + */ - ktime = ktime_get_raw_ns(); + local_irq_save(flags); + ticks = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO, + REG_A5XX_RBBM_ALWAYSON_COUNTER_HI); + ktime = ktime_get_raw_ns(); + local_irq_restore(flags); - local_irq_restore(flags); + if (submit->profile_buf) { + /* Write the data into the use-specified profile buffer */ - profile_buf->queue_time = ktime; - profile_buf->submit_time = ktime; + submit->profile_buf->queue_time = ktime; + submit->profile_buf->submit_time = ktime; + submit->profile_buf->ticks_queued = ticks; } + trace_msm_submitted(submit, ticks, ktime); + a5xx_flush(gpu, ring); /* Check to see if we need to start preemption */ diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index e8528892939f..df9ddadc5c5c 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -151,9 +151,10 @@ struct msm_gem_submit { u32 flags; bool valid; uint64_t profile_buf_iova; - void *profile_buf_vaddr; + struct drm_msm_gem_submit_profile_buffer *profile_buf; bool secure; struct msm_gpu_submitqueue *queue; + int tick_index; unsigned int nr_cmds; unsigned int nr_bos; struct { diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 7ccc146f3ae1..b73379aa9ed7 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -18,6 +18,7 @@ #include "msm_drv.h" #include "msm_gpu.h" #include "msm_gem.h" +#include "msm_trace.h" /* * Cmdstream submission: @@ -55,7 +56,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, submit->nr_bos = 0; submit->nr_cmds = 0; - submit->profile_buf_vaddr = NULL; + submit->profile_buf = NULL; submit->profile_buf_iova = 0; submit->cmd = (void *)&submit->bos[nr_bos]; @@ -510,9 +511,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (submit_cmd.type == MSM_SUBMIT_CMD_PROFILE_BUF) { submit->profile_buf_iova = submit->cmd[i].iova; - submit->profile_buf_vaddr = - msm_gem_vaddr(&msm_obj->base) + - submit_cmd.submit_offset; + submit->profile_buf = msm_gem_vaddr(&msm_obj->base) + + submit_cmd.submit_offset; } if (submit->valid) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index d896e436251f..6bac1cf6f7c5 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -18,7 +18,7 @@ #include "msm_gpu.h" #include "msm_gem.h" #include "msm_mmu.h" - +#include "msm_trace.h" /* * Power Management: @@ -494,9 +494,18 @@ static void retire_submits(struct msm_gpu *gpu, struct msm_ringbuffer *ring, WARN_ON(!mutex_is_locked(&dev->struct_mutex)); list_for_each_entry_safe(submit, tmp, &ring->submits, node) { + struct msm_memptr_ticks *ticks; + if (submit->fence > fence) break; + ticks = &(ring->memptrs->ticks[submit->tick_index]); + + /* Add memory barrier to ensure the timer ticks are posted */ + rmb(); + + trace_msm_retired(submit, ticks->started, ticks->retired); + msm_gem_submit_free(submit); } } @@ -578,6 +587,12 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) ring->submitted_fence = submit->fence; + submit->tick_index = ring->tick_index; + ring->tick_index = (ring->tick_index + 1) % + ARRAY_SIZE(ring->memptrs->ticks); + + trace_msm_queued(submit); + update_sw_cntrs(gpu); for (i = 0; i < submit->nr_bos; i++) { diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 3eb9a86b2a2e..b19ce75a4cc9 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -23,13 +23,24 @@ #define rbmemptr(ring, member) \ ((ring)->memptrs_iova + offsetof(struct msm_memptrs, member)) +struct msm_memptr_ticks { + uint64_t started; + uint64_t retired; +}; + struct msm_memptrs { volatile uint32_t rptr; volatile uint32_t fence; volatile uint64_t ttbr0; volatile unsigned int contextidr; + struct msm_memptr_ticks ticks[128]; }; +#define RING_TICKS_IOVA(ring, index, field) \ + ((ring)->memptrs_iova + offsetof(struct msm_memptrs, ticks) + \ + ((index) * sizeof(struct msm_memptr_ticks)) + \ + offsetof(struct msm_memptr_ticks, field)) + struct msm_ringbuffer { struct msm_gpu *gpu; int id; @@ -42,6 +53,7 @@ struct msm_ringbuffer { struct msm_memptrs *memptrs; uint64_t memptrs_iova; + int tick_index; }; struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, diff --git a/drivers/gpu/drm/msm/msm_trace.h b/drivers/gpu/drm/msm/msm_trace.h new file mode 100644 index 000000000000..68c7ff78ffc2 --- /dev/null +++ b/drivers/gpu/drm/msm/msm_trace.h @@ -0,0 +1,98 @@ +/* Copyright (c) 2017 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#if !defined(_MSM_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _MSM_TRACE_H_ + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM msm_drm +#define TRACE_INCLUDE_FILE msm_trace + +TRACE_EVENT(msm_queued, + TP_PROTO(struct msm_gem_submit *submit), + TP_ARGS(submit), + TP_STRUCT__entry( + __field(uint32_t, queue_id) + __field(uint32_t, fence_id) + __field(int, ring) + ), + TP_fast_assign( + __entry->queue_id = submit->queue->id; + __entry->fence_id = submit->fence; + __entry->ring = submit->ring; + ), + TP_printk( + "queue=%u fence=%u ring=%d", + __entry->queue_id, __entry->fence_id, __entry->ring + ) +); + +TRACE_EVENT(msm_submitted, + TP_PROTO(struct msm_gem_submit *submit, uint64_t ticks, uint64_t nsecs), + TP_ARGS(submit, ticks, nsecs), + TP_STRUCT__entry( + __field(uint32_t, queue_id) + __field(uint32_t, fence_id) + __field(int, ring) + __field(uint64_t, ticks) + __field(uint64_t, nsecs) + ), + TP_fast_assign( + __entry->queue_id = submit->queue->id; + __entry->fence_id = submit->fence; + __entry->ring = submit->ring; + __entry->ticks = ticks; + __entry->nsecs = nsecs; + ), + TP_printk( + "queue=%u fence=%u ring=%d ticks=%lld nsecs=%llu", + __entry->queue_id, __entry->fence_id, __entry->ring, + __entry->ticks, __entry->nsecs + ) +); + +TRACE_EVENT(msm_retired, + TP_PROTO(struct msm_gem_submit *submit, uint64_t start_ticks, + uint64_t retire_ticks), + TP_ARGS(submit, start_ticks, retire_ticks), + TP_STRUCT__entry( + __field(uint32_t, queue_id) + __field(uint32_t, fence_id) + __field(int, ring) + __field(uint64_t, start_ticks) + __field(uint64_t, retire_ticks) + ), + TP_fast_assign( + __entry->queue_id = submit->queue->id; + __entry->fence_id = submit->fence; + __entry->ring = submit->ring; + __entry->start_ticks = start_ticks; + __entry->retire_ticks = retire_ticks; + ), + TP_printk( + "queue=%u fence=%u ring=%d started=%lld retired=%lld", + __entry->queue_id, __entry->fence_id, __entry->ring, + __entry->start_ticks, __entry->retire_ticks + ) +); + + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include <trace/define_trace.h> + diff --git a/drivers/gpu/drm/msm/msm_trace_points.c b/drivers/gpu/drm/msm/msm_trace_points.c new file mode 100644 index 000000000000..41d9a975ac92 --- /dev/null +++ b/drivers/gpu/drm/msm/msm_trace_points.c @@ -0,0 +1,18 @@ +/* Copyright (c) 2017 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "msm_gem.h" +#include "msm_gpu.h" + +#define CREATE_TRACE_POINTS +#include "msm_trace.h" |