diff options
-rw-r--r-- | drivers/gpu/msm/adreno.c | 7 | ||||
-rw-r--r-- | drivers/gpu/msm/adreno.h | 2 | ||||
-rw-r--r-- | drivers/gpu/msm/adreno_a4xx_snapshot.c | 10 | ||||
-rw-r--r-- | drivers/gpu/msm/adreno_a5xx.c | 28 | ||||
-rw-r--r-- | drivers/gpu/msm/adreno_a5xx.h | 2 | ||||
-rw-r--r-- | drivers/gpu/msm/adreno_a5xx_snapshot.c | 174 | ||||
-rw-r--r-- | drivers/gpu/msm/adreno_dispatch.c | 34 | ||||
-rw-r--r-- | drivers/gpu/msm/adreno_snapshot.c | 3 | ||||
-rw-r--r-- | drivers/gpu/msm/kgsl.c | 20 | ||||
-rw-r--r-- | drivers/gpu/msm/kgsl.h | 7 | ||||
-rw-r--r-- | drivers/gpu/msm/kgsl_debugfs.c | 2 | ||||
-rw-r--r-- | drivers/gpu/msm/kgsl_iommu.c | 50 | ||||
-rw-r--r-- | drivers/gpu/msm/kgsl_pool.c | 25 | ||||
-rw-r--r-- | drivers/gpu/msm/kgsl_pool.h | 1 | ||||
-rw-r--r-- | drivers/gpu/msm/kgsl_pwrctrl.c | 31 | ||||
-rw-r--r-- | drivers/gpu/msm/kgsl_pwrctrl.h | 2 | ||||
-rw-r--r-- | drivers/gpu/msm/kgsl_pwrscale.c | 3 | ||||
-rw-r--r-- | drivers/gpu/msm/kgsl_sharedmem.c | 131 | ||||
-rw-r--r-- | drivers/gpu/msm/kgsl_sharedmem.h | 43 |
19 files changed, 411 insertions, 164 deletions
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index a802671acba0..90f855c52c7a 100644 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -1130,7 +1130,10 @@ static int adreno_init(struct kgsl_device *device) struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev); int ret; - kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT); + ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT); + if (ret) + return ret; + /* * initialization only needs to be done once initially until * device is shutdown @@ -1595,6 +1598,8 @@ static int adreno_stop(struct kgsl_device *device) adreno_ringbuffer_stop(adreno_dev); + kgsl_pwrscale_update_stats(device); + adreno_irqctrl(adreno_dev, 0); adreno_ocmem_free(adreno_dev); diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h index 9f462bca26ce..f5fb4e48c3ee 100644 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -613,11 +613,13 @@ struct adreno_vbif_platform { * struct adreno_vbif_snapshot_registers - Holds an array of vbif registers * listed for snapshot dump for a particular core * @version: vbif version + * @mask: vbif revision mask * @registers: vbif registers listed for snapshot dump * @count: count of vbif registers listed for snapshot */ struct adreno_vbif_snapshot_registers { const unsigned int version; + const unsigned int mask; const unsigned int *registers; const int count; }; diff --git a/drivers/gpu/msm/adreno_a4xx_snapshot.c b/drivers/gpu/msm/adreno_a4xx_snapshot.c index 6921af5c0ab5..540b42b984c0 100644 --- a/drivers/gpu/msm/adreno_a4xx_snapshot.c +++ b/drivers/gpu/msm/adreno_a4xx_snapshot.c @@ -168,15 +168,15 @@ static const unsigned int a4xx_vbif_ver_20050000_registers[] = { static const struct adreno_vbif_snapshot_registers a4xx_vbif_snapshot_registers[] = { - { 0x20000000, a4xx_vbif_ver_20000000_registers, + { 0x20000000, 0xFFFF0000, a4xx_vbif_ver_20000000_registers, ARRAY_SIZE(a4xx_vbif_ver_20000000_registers)/2}, - { 0x20020000, a4xx_vbif_ver_20020000_registers, + { 0x20020000, 0xFFFF0000, a4xx_vbif_ver_20020000_registers, ARRAY_SIZE(a4xx_vbif_ver_20020000_registers)/2}, - { 0x20050000, a4xx_vbif_ver_20050000_registers, + { 0x20050000, 0xFFFF0000, a4xx_vbif_ver_20050000_registers, ARRAY_SIZE(a4xx_vbif_ver_20050000_registers)/2}, - { 0x20070000, a4xx_vbif_ver_20020000_registers, + { 0x20070000, 0xFFFF0000, a4xx_vbif_ver_20020000_registers, ARRAY_SIZE(a4xx_vbif_ver_20020000_registers)/2}, - { 0x20090000, a4xx_vbif_ver_20050000_registers, + { 0x20090000, 0xFFFF0000, a4xx_vbif_ver_20050000_registers, ARRAY_SIZE(a4xx_vbif_ver_20050000_registers)/2}, }; diff --git a/drivers/gpu/msm/adreno_a5xx.c b/drivers/gpu/msm/adreno_a5xx.c index 96f72c59e4cd..1a0fbf6728be 100644 --- a/drivers/gpu/msm/adreno_a5xx.c +++ b/drivers/gpu/msm/adreno_a5xx.c @@ -2373,17 +2373,25 @@ static int a5xx_microcode_read(struct adreno_device *adreno_dev) { int ret; - ret = _load_firmware(KGSL_DEVICE(adreno_dev), - adreno_dev->gpucore->pm4fw_name, &adreno_dev->pm4, - &adreno_dev->pm4_fw_size, &adreno_dev->pm4_fw_version); - if (ret) - return ret; + if (adreno_dev->pm4.hostptr == NULL) { + ret = _load_firmware(KGSL_DEVICE(adreno_dev), + adreno_dev->gpucore->pm4fw_name, + &adreno_dev->pm4, + &adreno_dev->pm4_fw_size, + &adreno_dev->pm4_fw_version); + if (ret) + return ret; + } - ret = _load_firmware(KGSL_DEVICE(adreno_dev), - adreno_dev->gpucore->pfpfw_name, &adreno_dev->pfp, - &adreno_dev->pfp_fw_size, &adreno_dev->pfp_fw_version); - if (ret) - return ret; + if (adreno_dev->pfp.hostptr == NULL) { + ret = _load_firmware(KGSL_DEVICE(adreno_dev), + adreno_dev->gpucore->pfpfw_name, + &adreno_dev->pfp, + &adreno_dev->pfp_fw_size, + &adreno_dev->pfp_fw_version); + if (ret) + return ret; + } ret = _load_gpmu_firmware(adreno_dev); if (ret) diff --git a/drivers/gpu/msm/adreno_a5xx.h b/drivers/gpu/msm/adreno_a5xx.h index 7965bb7b5440..27d5a4b31c71 100644 --- a/drivers/gpu/msm/adreno_a5xx.h +++ b/drivers/gpu/msm/adreno_a5xx.h @@ -52,7 +52,7 @@ #define A5XX_CP_CTXRECORD_MAGIC_REF 0x27C4BAFCUL /* Size of each CP preemption record */ -#define A5XX_CP_CTXRECORD_SIZE_IN_BYTES 0x100000 +#define A5XX_CP_CTXRECORD_SIZE_IN_BYTES 0x10000 /* Size of the preemption counter block (in bytes) */ #define A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE (16 * 4) diff --git a/drivers/gpu/msm/adreno_a5xx_snapshot.c b/drivers/gpu/msm/adreno_a5xx_snapshot.c index 4f368a8f93f3..04d82844a5e9 100644 --- a/drivers/gpu/msm/adreno_a5xx_snapshot.c +++ b/drivers/gpu/msm/adreno_a5xx_snapshot.c @@ -128,6 +128,9 @@ static const struct adreno_debugbus_block a5xx_debugbus_blocks[] = { #define A5XX_NUM_AXI_ARB_BLOCKS 2 #define A5XX_NUM_XIN_BLOCKS 4 +/* Width of A5XX_CP_DRAW_STATE_ADDR is 8 bits */ +#define A5XX_CP_DRAW_STATE_ADDR_WIDTH 8 + /* a5xx_snapshot_cp_pm4() - Dump PM4 data in snapshot */ static size_t a5xx_snapshot_cp_pm4(struct kgsl_device *device, u8 *buf, size_t remain, void *priv) @@ -326,8 +329,7 @@ static void a5xx_snapshot_debugbus(struct kgsl_device *device, } } -static const unsigned int a5xx_vbif_ver_20040000_registers[] = { - /* VBIF version 0x20040000*/ +static const unsigned int a5xx_vbif_ver_20xxxxxx_registers[] = { 0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3042, 0x3042, 0x3049, 0x3049, 0x3058, 0x3058, 0x305A, 0x3061, @@ -341,10 +343,8 @@ static const unsigned int a5xx_vbif_ver_20040000_registers[] = { static const struct adreno_vbif_snapshot_registers a5xx_vbif_snapshot_registers[] = { - { 0x20040000, a5xx_vbif_ver_20040000_registers, - ARRAY_SIZE(a5xx_vbif_ver_20040000_registers)/2}, - { 0x20040001, a5xx_vbif_ver_20040000_registers, - ARRAY_SIZE(a5xx_vbif_ver_20040000_registers)/2}, + { 0x20000000, 0xFF000000, a5xx_vbif_ver_20xxxxxx_registers, + ARRAY_SIZE(a5xx_vbif_ver_20xxxxxx_registers)/2}, }; /* @@ -379,7 +379,7 @@ static const unsigned int a5xx_registers[] = { /* VPC */ 0x0E60, 0x0E7C, /* UCHE */ - 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0xEA0, 0xEA8, 0xEB0, 0xEB2, + 0x0E80, 0x0E8F, 0x0E90, 0x0E96, 0xEA0, 0xEA8, 0xEB0, 0xEB2, /* RB CTX 0 */ 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9, 0xE1B0, 0xE1B6, @@ -414,49 +414,49 @@ static const unsigned int a5xx_registers[] = { 0xB000, 0xB97F, 0xB9A0, 0xB9BF, }; -/* - * The HLSQ registers can only be read via the crash dumper (not AHB) so they - * need to be in their own array because the array above does double duty for - * the fallback path too - */ -static const unsigned int a5xx_hlsq_registers[] = { +struct a5xx_hlsq_sp_tp_regs { + unsigned int statetype; + unsigned int ahbaddr; + unsigned int size; + uint64_t offset; +}; + +static struct a5xx_hlsq_sp_tp_regs a5xx_hlsq_sp_tp_registers[] = { + /* HSLQ non context. 0xe32 - 0xe3f are holes so don't include them */ + { 0x35, 0xE00, 0x32 }, + /* HLSQ CTX 0 2D */ + { 0x31, 0x2080, 0x1 }, + /* HLSQ CTX 1 2D */ + { 0x33, 0x2480, 0x1 }, + /* HLSQ CTX 0 3D. 0xe7e2 - 0xe7ff are holes so don't inculde them */ + { 0x32, 0xE780, 0x62 }, + /* HLSQ CTX 1 3D. 0xefe2 - 0xefff are holes so don't include them */ + { 0x34, 0xEF80, 0x62 }, + /* SP non context */ - 0x0EC0, 0xEC2, 0xED0, 0xEE0, 0xEF0, 0xEF2, 0xEFA, 0xEFF, + { 0x3f, 0x0EC0, 0x40 }, /* SP CTX 0 2D */ - 0x2040, 0x2040, + { 0x3d, 0x2040, 0x1 }, /* SP CTX 1 2D */ - 0x2440, 0x2440, - /* SP CTXT 0 3D */ - 0xE580, 0xE580, 0xE584, 0xE58B, 0xE590, 0xE5B1, 0xE5C0, 0xE5DF, - 0xE5F0, 0xE5F9, 0xE600, 0xE608, 0xE610, 0xE631, 0xE640, 0xE661, - 0xE670, 0xE673, 0xE6F0, 0xE6F0, - /* SP CTXT 1 3D */ - 0xED80, 0xED80, 0xED84, 0xED8B, 0xED90, 0xEDB1, 0xEDC0, 0xEDDF, - 0xEDF0, 0xEDF9, 0xEE00, 0xEE08, 0xEE10, 0xEE31, 0xEE40, 0xEE61, - 0xEE70, 0xEE73, 0xEEF0, 0xEEF0, - /* TP non context */ - 0xF00, 0xF03, 0xF08, 0xF08, 0xF10, 0xF1B, - /* TP CTX 0 2D */ - 0x2000, 0x2009, - /* TP CTX 1 2D */ - 0x2400, 0x2409, + { 0x3b, 0x2440, 0x1 }, + /* SP CTX 0 3D */ + { 0x3e, 0xE580, 0x180 }, + /* SP CTX 1 3D */ + { 0x3c, 0xED80, 0x180 }, + + /* TP non context. 0x0f1c - 0x0f3f are holes so don't include them */ + { 0x3a, 0x0F00, 0x1c }, + /* TP CTX 0 2D. 0x200a - 0x200f are holes so don't include them */ + { 0x38, 0x2000, 0xa }, + /* TP CTX 1 2D. 0x240a - 0x240f are holes so don't include them */ + { 0x36, 0x2400, 0xa }, /* TP CTX 0 3D */ - 0xE700, 0xE707, 0xE70E, 0xE731, - 0xE750, 0xE751, 0xE75A, 0xE764, 0xE76C, 0xE77F, + { 0x39, 0xE700, 0x80 }, /* TP CTX 1 3D */ - 0xEF00, 0xEF07, 0xEF0E, 0xEF31, - 0xEF50, 0xEF51, 0xEF5A, 0xEF64, 0xEF6C, 0xEF7F, - /* HLSQ non context */ - 0xE00, 0xE01, 0xE04, 0xE06, 0xE08, 0xE09, 0xE10, 0xE17, - 0xE20, 0xE25, - /* HLSQ CTXT 0 3D */ - 0xE784, 0xE789, 0xE78B, 0xE796, 0xE7A0, 0xE7A2, 0xE7B0, 0xE7BB, - 0xE7C0, 0xE7DD, 0xE7E0, 0xE7E1, - /* HLSQ CTXT 1 3D */ - 0xEF84, 0xEF89, 0xEF8B, 0xEF96, 0xEFA0, 0xEFA2, 0xEFB0, 0xEFBB, - 0xEFC0, 0xEFDD, 0xEFE0, 0xEFE1, + { 0x37, 0xEF00, 0x80 }, }; + #define A5XX_NUM_SHADER_BANKS 4 #define A5XX_SHADER_STATETYPE_SHIFT 8 @@ -652,7 +652,6 @@ static struct cdregs { unsigned int size; } _a5xx_cd_registers[] = { { a5xx_registers, ARRAY_SIZE(a5xx_registers) }, - { a5xx_hlsq_registers, ARRAY_SIZE(a5xx_hlsq_registers) }, }; #define REG_PAIR_COUNT(_a, _i) \ @@ -776,6 +775,46 @@ static void _a5xx_do_crashdump(struct kgsl_device *device) crash_dump_valid = true; } +static int get_hlsq_registers(struct kgsl_device *device, + const struct a5xx_hlsq_sp_tp_regs *regs, unsigned int *data) +{ + unsigned int i; + unsigned int *src = registers.hostptr + regs->offset; + + for (i = 0; i < regs->size; i++) { + *data++ = regs->ahbaddr + i; + *data++ = *(src + i); + } + + return (2 * regs->size); +} + +static size_t a5xx_snapshot_dump_hlsq_sp_tp_regs(struct kgsl_device *device, + u8 *buf, size_t remain, void *priv) +{ + struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf; + unsigned int *data = (unsigned int *)(buf + sizeof(*header)); + int count = 0, i; + + /* Figure out how many registers we are going to dump */ + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) + count += a5xx_hlsq_sp_tp_registers[i].size; + + if (remain < (count * 8) + sizeof(*header)) { + SNAPSHOT_ERR_NOMEM(device, "REGISTERS"); + return 0; + } + + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) + data += get_hlsq_registers(device, + &a5xx_hlsq_sp_tp_registers[i], data); + + header->count = count; + + /* Return the size of the section */ + return (count * 8) + sizeof(*header); +} + /* * a5xx_snapshot() - A5XX GPU snapshot function * @adreno_dev: Device being snapshotted @@ -806,6 +845,10 @@ void a5xx_snapshot(struct adreno_device *adreno_dev, a5xx_vbif_snapshot_registers, ARRAY_SIZE(a5xx_vbif_snapshot_registers)); + /* Dump SP TP HLSQ registers */ + kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot, + a5xx_snapshot_dump_hlsq_sp_tp_regs, NULL); + /* CP_PFP indexed registers */ kgsl_snapshot_indexed_registers(device, snapshot, A5XX_CP_PFP_STAT_ADDR, A5XX_CP_PFP_STAT_DATA, @@ -819,7 +862,7 @@ void a5xx_snapshot(struct adreno_device *adreno_dev, /* CP_DRAW_STATE */ kgsl_snapshot_indexed_registers(device, snapshot, A5XX_CP_DRAW_STATE_ADDR, A5XX_CP_DRAW_STATE_DATA, - 0, 128); + 0, 1 << A5XX_CP_DRAW_STATE_ADDR_WIDTH); /* * CP needs to be halted on a530v1 before reading CP_PFP_UCODE_DBG_DATA @@ -878,8 +921,8 @@ void a5xx_snapshot(struct adreno_device *adreno_dev, } -static int _a5xx_crashdump_init(struct a5xx_shader_block *block, uint64_t *ptr, - uint64_t *offset) +static int _a5xx_crashdump_init_shader(struct a5xx_shader_block *block, + uint64_t *ptr, uint64_t *offset) { int qwords = 0; unsigned int j; @@ -908,6 +951,31 @@ static int _a5xx_crashdump_init(struct a5xx_shader_block *block, uint64_t *ptr, return qwords; } +static int _a5xx_crashdump_init_hlsq(struct a5xx_hlsq_sp_tp_regs *regs, + uint64_t *ptr, uint64_t *offset) +{ + int qwords = 0; + + /* Program the aperture */ + ptr[qwords++] = + (regs->statetype << A5XX_SHADER_STATETYPE_SHIFT); + ptr[qwords++] = (((uint64_t) A5XX_HLSQ_DBG_READ_SEL << 44)) | + (1 << 21) | 1; + + /* Read all the data in one chunk */ + ptr[qwords++] = registers.gpuaddr + *offset; + ptr[qwords++] = + (((uint64_t) A5XX_HLSQ_DBG_AHB_READ_APERTURE << 44)) | + regs->size; + + /* Remember the offset of the first bank for easy access */ + regs->offset = *offset; + + *offset += regs->size * sizeof(unsigned int); + + return qwords; +} + void a5xx_crashdump_init(struct adreno_device *adreno_dev) { struct kgsl_device *device = KGSL_DEVICE(adreno_dev); @@ -954,6 +1022,11 @@ void a5xx_crashdump_init(struct adreno_device *adreno_dev) data_size += a5xx_shader_blocks[i].sz * sizeof(unsigned int) * A5XX_NUM_SHADER_BANKS; } + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) { + script_size += 32; + data_size += + a5xx_hlsq_sp_tp_registers[i].size * sizeof(unsigned int); + } /* Now allocate the script and data buffers */ @@ -968,7 +1041,6 @@ void a5xx_crashdump_init(struct adreno_device *adreno_dev) kgsl_free_global(KGSL_DEVICE(adreno_dev), &capturescript); return; } - /* Build the crash script */ ptr = (uint64_t *) capturescript.hostptr; @@ -987,9 +1059,13 @@ void a5xx_crashdump_init(struct adreno_device *adreno_dev) /* Program each shader block */ for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { - ptr += _a5xx_crashdump_init(&a5xx_shader_blocks[i], ptr, + ptr += _a5xx_crashdump_init_shader(&a5xx_shader_blocks[i], ptr, &offset); } + /* Program the hlsq sp tp register sets */ + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) + ptr += _a5xx_crashdump_init_hlsq(&a5xx_hlsq_sp_tp_registers[i], + ptr, &offset); *ptr++ = 0; *ptr++ = 0; diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c index ac3805800691..bfc547fe16d1 100644 --- a/drivers/gpu/msm/adreno_dispatch.c +++ b/drivers/gpu/msm/adreno_dispatch.c @@ -284,6 +284,7 @@ static void _retire_marker(struct kgsl_cmdbatch *cmdbatch) struct kgsl_context *context = cmdbatch->context; struct adreno_context *drawctxt = ADRENO_CONTEXT(cmdbatch->context); struct kgsl_device *device = context->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); /* * Write the start and end timestamp to the memstore to keep the @@ -301,7 +302,16 @@ static void _retire_marker(struct kgsl_cmdbatch *cmdbatch) /* Retire pending GPU events for the object */ kgsl_process_event_group(device, &context->events); - trace_adreno_cmdbatch_retired(cmdbatch, -1, 0, 0, drawctxt->rb, + /* + * For A3xx we still get the rptr from the CP_RB_RPTR instead of + * rptr scratch out address. At this point GPU clocks turned off. + * So avoid reading GPU register directly for A3xx. + */ + if (adreno_is_a3xx(adreno_dev)) + trace_adreno_cmdbatch_retired(cmdbatch, -1, 0, 0, drawctxt->rb, + 0); + else + trace_adreno_cmdbatch_retired(cmdbatch, -1, 0, 0, drawctxt->rb, adreno_get_rptr(drawctxt->rb)); kgsl_cmdbatch_destroy(cmdbatch); } @@ -613,12 +623,13 @@ static int sendcmd(struct adreno_device *adreno_dev, } } - mutex_unlock(&device->mutex); if (ret) { dispatcher->inflight--; dispatch_q->inflight--; + mutex_unlock(&device->mutex); + /* * Don't log a message in case of: * -ENOENT means that the context was detached before the @@ -642,6 +653,8 @@ static int sendcmd(struct adreno_device *adreno_dev, time.ticks, (unsigned long) secs, nsecs / 1000, drawctxt->rb, adreno_get_rptr(drawctxt->rb)); + mutex_unlock(&device->mutex); + cmdbatch->submit_ticks = time.ticks; dispatch_q->cmd_q[dispatch_q->tail] = cmdbatch; @@ -1923,9 +1936,20 @@ static void retire_cmdbatch(struct adreno_device *adreno_dev, if (test_bit(CMDBATCH_FLAG_PROFILE, &cmdbatch->priv)) cmdbatch_profile_ticks(adreno_dev, cmdbatch, &start, &end); - trace_adreno_cmdbatch_retired(cmdbatch, (int) dispatcher->inflight, - start, end, ADRENO_CMDBATCH_RB(cmdbatch), - adreno_get_rptr(drawctxt->rb)); + /* + * For A3xx we still get the rptr from the CP_RB_RPTR instead of + * rptr scratch out address. At this point GPU clocks turned off. + * So avoid reading GPU register directly for A3xx. + */ + if (adreno_is_a3xx(adreno_dev)) + trace_adreno_cmdbatch_retired(cmdbatch, + (int) dispatcher->inflight, start, end, + ADRENO_CMDBATCH_RB(cmdbatch), 0); + else + trace_adreno_cmdbatch_retired(cmdbatch, + (int) dispatcher->inflight, start, end, + ADRENO_CMDBATCH_RB(cmdbatch), + adreno_get_rptr(drawctxt->rb)); drawctxt->submit_retire_ticks[drawctxt->ticks_index] = end - cmdbatch->submit_ticks; diff --git a/drivers/gpu/msm/adreno_snapshot.c b/drivers/gpu/msm/adreno_snapshot.c index b069b16c75ef..0eff3da0e494 100644 --- a/drivers/gpu/msm/adreno_snapshot.c +++ b/drivers/gpu/msm/adreno_snapshot.c @@ -1118,7 +1118,8 @@ static const struct adreno_vbif_snapshot_registers *vbif_registers( adreno_readreg(adreno_dev, ADRENO_REG_VBIF_VERSION, &version); for (i = 0; i < count; i++) { - if (list[i].version == version) + if ((list[i].version & list[i].mask) == + (version & list[i].mask)) return &list[i]; } diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index aec5533ad65d..48061b1a6f50 100644 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -2144,8 +2144,8 @@ static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device, } up_read(¤t->mm->mmap_sem); - if (dmabuf == NULL) - return -ENODEV; + if (IS_ERR_OR_NULL(dmabuf)) + return dmabuf ? PTR_ERR(dmabuf) : -ENODEV; ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf); if (ret) { @@ -3663,19 +3663,15 @@ static int kgsl_mmap(struct file *file, struct vm_area_struct *vma) if (cache == KGSL_CACHEMODE_WRITEBACK || cache == KGSL_CACHEMODE_WRITETHROUGH) { - struct scatterlist *s; int i; unsigned long addr = vma->vm_start; + struct kgsl_memdesc *m = &entry->memdesc; + + for (i = 0; i < m->page_count; i++) { + struct page *page = m->pages[i]; - for_each_sg(entry->memdesc.sgt->sgl, s, - entry->memdesc.sgt->nents, i) { - int j; - for (j = 0; j < (s->length >> PAGE_SHIFT); j++) { - struct page *page = sg_page(s); - page = nth_page(page, j); - vm_insert_page(vma, addr, page); - addr += PAGE_SIZE; - } + vm_insert_page(vma, addr, page); + addr += PAGE_SIZE; } } diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h index 7ee71d102ca2..ee7149e1fd41 100644 --- a/drivers/gpu/msm/kgsl.h +++ b/drivers/gpu/msm/kgsl.h @@ -181,8 +181,9 @@ struct kgsl_memdesc_ops { * @ops: Function hooks for the memdesc memory type * @flags: Flags set from userspace * @dev: Pointer to the struct device that owns this memory - * @memmap: bitmap of pages for mmapsize - * @memmap_len: Number of bits for memmap + * @attrs: dma attributes for this memory + * @pages: An array of pointers to allocated pages + * @page_count: Total number of pages allocated */ struct kgsl_memdesc { struct kgsl_pagetable *pagetable; @@ -199,6 +200,8 @@ struct kgsl_memdesc { uint64_t flags; struct device *dev; struct dma_attrs attrs; + struct page **pages; + unsigned int page_count; }; /* diff --git a/drivers/gpu/msm/kgsl_debugfs.c b/drivers/gpu/msm/kgsl_debugfs.c index 766cd811588c..93ac790f3a55 100644 --- a/drivers/gpu/msm/kgsl_debugfs.c +++ b/drivers/gpu/msm/kgsl_debugfs.c @@ -150,7 +150,7 @@ static int print_mem_entry(int id, void *ptr, void *data) (unsigned long *) m->useraddr, m->size, entry->id, flags, memtype_str(kgsl_memdesc_usermem_type(m)), - usage, m->sgt->nents, m->mapsize); + usage, (m->sgt ? m->sgt->nents : 0), m->mapsize); if (entry->metadata[0] != 0) seq_printf(s, " %s", entry->metadata); diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c index 865cd9d8f498..b467ef81d257 100644 --- a/drivers/gpu/msm/kgsl_iommu.c +++ b/drivers/gpu/msm/kgsl_iommu.c @@ -1627,16 +1627,34 @@ kgsl_iommu_map(struct kgsl_pagetable *pt, uint64_t addr = memdesc->gpuaddr; uint64_t size = memdesc->size; unsigned int flags = _get_protection_flags(memdesc); + struct sg_table *sgt = NULL; - ret = _iommu_map_sg_sync_pc(pt, addr, memdesc, memdesc->sgt->sgl, - memdesc->sgt->nents, flags); + /* + * For paged memory allocated through kgsl, memdesc->pages is not NULL. + * Allocate sgt here just for its map operation. Contiguous memory + * already has its sgt, so no need to allocate it here. + */ + if (memdesc->pages != NULL) + sgt = kgsl_alloc_sgt_from_pages(memdesc); + else + sgt = memdesc->sgt; + + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + ret = _iommu_map_sg_sync_pc(pt, addr, memdesc, sgt->sgl, + sgt->nents, flags); if (ret) - return ret; + goto done; ret = _iommu_map_guard_page(pt, memdesc, addr + size, flags); if (ret) _iommu_unmap_sync_pc(pt, memdesc, addr, size); +done: + if (memdesc->pages != NULL) + kgsl_free_sgt(sgt); + return ret; } @@ -1647,6 +1665,8 @@ static int kgsl_iommu_map_offset(struct kgsl_pagetable *pt, { int pg_sz; unsigned int protflags = _get_protection_flags(memdesc); + int ret; + struct sg_table *sgt = NULL; pg_sz = (1 << kgsl_memdesc_get_align(memdesc)); if (!IS_ALIGNED(virtaddr | virtoffset | physoffset | size, pg_sz)) @@ -1655,9 +1675,27 @@ static int kgsl_iommu_map_offset(struct kgsl_pagetable *pt, if (size == 0) return -EINVAL; - return _iommu_map_sg_offset_sync_pc(pt, virtaddr + virtoffset, - memdesc, memdesc->sgt->sgl, memdesc->sgt->nents, - physoffset, size, protflags); + /* + * For paged memory allocated through kgsl, memdesc->pages is not NULL. + * Allocate sgt here just for its map operation. Contiguous memory + * already has its sgt, so no need to allocate it here. + */ + if (memdesc->pages != NULL) + sgt = kgsl_alloc_sgt_from_pages(memdesc); + else + sgt = memdesc->sgt; + + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + ret = _iommu_map_sg_offset_sync_pc(pt, virtaddr + virtoffset, + memdesc, sgt->sgl, sgt->nents, + physoffset, size, protflags); + + if (memdesc->pages != NULL) + kgsl_free_sgt(sgt); + + return ret; } /* This function must be called with context bank attached */ diff --git a/drivers/gpu/msm/kgsl_pool.c b/drivers/gpu/msm/kgsl_pool.c index 7fb3b37ac191..7967b19779db 100644 --- a/drivers/gpu/msm/kgsl_pool.c +++ b/drivers/gpu/msm/kgsl_pool.c @@ -263,6 +263,31 @@ void kgsl_pool_free_sgt(struct sg_table *sgt) } } +/** + * kgsl_pool_free_pages() - Free pages in the pages array + * @pages: pointer of the pages array + * + * Free the pages by collapsing any physical adjacent pages. + * Pages are added back to the pool, if pool has sufficient space + * otherwise they are given back to system. + */ +void kgsl_pool_free_pages(struct page **pages, unsigned int pcount) +{ + int i; + + if (pages == NULL || pcount == 0) + return; + + for (i = 0; i < pcount;) { + /* + * Free each page or compound page group individually. + */ + struct page *p = pages[i]; + + i += 1 << compound_order(p); + kgsl_pool_free_page(p); + } +} static int kgsl_pool_idx_lookup(unsigned int order) { int i; diff --git a/drivers/gpu/msm/kgsl_pool.h b/drivers/gpu/msm/kgsl_pool.h index f2cdda19140b..efbfa96f1498 100644 --- a/drivers/gpu/msm/kgsl_pool.h +++ b/drivers/gpu/msm/kgsl_pool.h @@ -34,6 +34,7 @@ kgsl_gfp_mask(unsigned int page_order) } void kgsl_pool_free_sgt(struct sg_table *sgt); +void kgsl_pool_free_pages(struct page **pages, unsigned int page_count); void kgsl_init_page_pools(void); void kgsl_exit_page_pools(void); int kgsl_pool_alloc_page(int *page_size, struct page **pages, diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c index 2b9eef8b6351..11b323e9d40c 100644 --- a/drivers/gpu/msm/kgsl_pwrctrl.c +++ b/drivers/gpu/msm/kgsl_pwrctrl.c @@ -363,6 +363,8 @@ void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device, if (new_level == old_level) return; + kgsl_pwrscale_update_stats(device); + /* * Set the active and previous powerlevel first in case the clocks are * off - if we don't do this then the pwrlevel change won't take effect @@ -934,6 +936,31 @@ static ssize_t kgsl_pwrctrl_gpu_available_frequencies_show( return num_chars; } +static ssize_t kgsl_pwrctrl_gpu_clock_stats_show( + struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kgsl_device *device = kgsl_device_from_dev(dev); + struct kgsl_pwrctrl *pwr; + int index, num_chars = 0; + + if (device == NULL) + return 0; + pwr = &device->pwrctrl; + mutex_lock(&device->mutex); + kgsl_pwrscale_update_stats(device); + mutex_unlock(&device->mutex); + for (index = 0; index < pwr->num_pwrlevels - 1; index++) + num_chars += snprintf(buf + num_chars, PAGE_SIZE - num_chars, + "%llu ", pwr->clock_times[index]); + + if (num_chars < PAGE_SIZE) + buf[num_chars++] = '\n'; + + return num_chars; +} + static ssize_t kgsl_pwrctrl_reset_count_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1201,6 +1228,9 @@ static DEVICE_ATTR(gpubusy, 0444, kgsl_pwrctrl_gpubusy_show, static DEVICE_ATTR(gpu_available_frequencies, 0444, kgsl_pwrctrl_gpu_available_frequencies_show, NULL); +static DEVICE_ATTR(gpu_clock_stats, 0444, + kgsl_pwrctrl_gpu_clock_stats_show, + NULL); static DEVICE_ATTR(max_pwrlevel, 0644, kgsl_pwrctrl_max_pwrlevel_show, kgsl_pwrctrl_max_pwrlevel_store); @@ -1249,6 +1279,7 @@ static const struct device_attribute *pwrctrl_attr_list[] = { &dev_attr_deep_nap_timer, &dev_attr_gpubusy, &dev_attr_gpu_available_frequencies, + &dev_attr_gpu_clock_stats, &dev_attr_max_pwrlevel, &dev_attr_min_pwrlevel, &dev_attr_thermal_pwrlevel, diff --git a/drivers/gpu/msm/kgsl_pwrctrl.h b/drivers/gpu/msm/kgsl_pwrctrl.h index 0029c389484f..8fd06531aa81 100644 --- a/drivers/gpu/msm/kgsl_pwrctrl.h +++ b/drivers/gpu/msm/kgsl_pwrctrl.h @@ -122,6 +122,7 @@ struct kgsl_regulator { * @min_pwrlevel - minimum allowable powerlevel per the user * @num_pwrlevels - number of available power levels * @interval_timeout - timeout in jiffies to be idle before a power event + * @clock_times - Each GPU frequency's accumulated active time in us * @strtstp_sleepwake - true if the device supports low latency GPU start/stop * @regulators - array of pointers to kgsl_regulator structs * @pcl - bus scale identifier @@ -178,6 +179,7 @@ struct kgsl_pwrctrl { unsigned int min_pwrlevel; unsigned int num_pwrlevels; unsigned long interval_timeout; + u64 clock_times[KGSL_MAX_PWRLEVELS]; bool strtstp_sleepwake; struct kgsl_regulator regulators[KGSL_MAX_REGULATORS]; uint32_t pcl; diff --git a/drivers/gpu/msm/kgsl_pwrscale.c b/drivers/gpu/msm/kgsl_pwrscale.c index 4f6677d9a1de..d90aec42f30a 100644 --- a/drivers/gpu/msm/kgsl_pwrscale.c +++ b/drivers/gpu/msm/kgsl_pwrscale.c @@ -127,6 +127,7 @@ EXPORT_SYMBOL(kgsl_pwrscale_busy); */ void kgsl_pwrscale_update_stats(struct kgsl_device *device) { + struct kgsl_pwrctrl *pwrctrl = &device->pwrctrl; struct kgsl_pwrscale *psc = &device->pwrscale; BUG_ON(!mutex_is_locked(&device->mutex)); @@ -150,6 +151,8 @@ void kgsl_pwrscale_update_stats(struct kgsl_device *device) device->pwrscale.accum_stats.busy_time += stats.busy_time; device->pwrscale.accum_stats.ram_time += stats.ram_time; device->pwrscale.accum_stats.ram_wait += stats.ram_wait; + pwrctrl->clock_times[pwrctrl->active_pwrlevel] += + stats.busy_time; } } EXPORT_SYMBOL(kgsl_pwrscale_update_stats); diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c index b20f0d6d51a2..73edc3f7e146 100644 --- a/drivers/gpu/msm/kgsl_sharedmem.c +++ b/drivers/gpu/msm/kgsl_sharedmem.c @@ -354,8 +354,7 @@ static int kgsl_page_alloc_vmfault(struct kgsl_memdesc *memdesc, struct vm_area_struct *vma, struct vm_fault *vmf) { - int i, pgoff; - struct scatterlist *s = memdesc->sgt->sgl; + int pgoff; unsigned int offset; offset = ((unsigned long) vmf->virtual_address - vma->vm_start); @@ -365,30 +364,15 @@ static int kgsl_page_alloc_vmfault(struct kgsl_memdesc *memdesc, pgoff = offset >> PAGE_SHIFT; - /* - * The sglist might be comprised of mixed blocks of memory depending - * on how many 64K pages were allocated. This means we have to do math - * to find the actual 4K page to map in user space - */ - - for (i = 0; i < memdesc->sgt->nents; i++) { - int npages = s->length >> PAGE_SHIFT; + if (pgoff < memdesc->page_count) { + struct page *page = memdesc->pages[pgoff]; - if (pgoff < npages) { - struct page *page = sg_page(s); + get_page(page); + vmf->page = page; - page = nth_page(page, pgoff); + memdesc->mapsize += PAGE_SIZE; - get_page(page); - vmf->page = page; - - memdesc->mapsize += PAGE_SIZE; - - return 0; - } - - pgoff -= npages; - s = sg_next(s); + return 0; } return VM_FAULT_SIGBUS; @@ -451,9 +435,15 @@ static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc) for_each_sg_page(memdesc->sgt->sgl, &sg_iter, memdesc->sgt->nents, 0) ClearPagePrivate(sg_page_iter_page(&sg_iter)); + } - kgsl_pool_free_sgt(memdesc->sgt); + /* Free pages using the pages array for non secure paged memory */ + if (memdesc->pages != NULL) + kgsl_pool_free_pages(memdesc->pages, memdesc->page_count); + else + kgsl_pool_free_sgt(memdesc->sgt); + } /* @@ -473,31 +463,10 @@ static int kgsl_page_alloc_map_kernel(struct kgsl_memdesc *memdesc) return -ENOMEM; mutex_lock(&kernel_map_global_lock); - if (!memdesc->hostptr) { + if ((!memdesc->hostptr) && (memdesc->pages != NULL)) { pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL); - struct page **pages = NULL; - struct scatterlist *sg; - int npages = PAGE_ALIGN(memdesc->size) >> PAGE_SHIFT; - int sglen = memdesc->sgt->nents; - int i, count = 0; - - /* create a list of pages to call vmap */ - pages = kgsl_malloc(npages * sizeof(struct page *)); - if (pages == NULL) { - ret = -ENOMEM; - goto done; - } - - for_each_sg(memdesc->sgt->sgl, sg, sglen, i) { - struct page *page = sg_page(sg); - int j; - - for (j = 0; j < sg->length >> PAGE_SHIFT; j++) - pages[count++] = page++; - } - - memdesc->hostptr = vmap(pages, count, + memdesc->hostptr = vmap(memdesc->pages, memdesc->page_count, VM_IOREMAP, page_prot); if (memdesc->hostptr) KGSL_STATS_ADD(memdesc->size, @@ -505,11 +474,10 @@ static int kgsl_page_alloc_map_kernel(struct kgsl_memdesc *memdesc) &kgsl_driver.stats.vmalloc_max); else ret = -ENOMEM; - kgsl_free(pages); } if (memdesc->hostptr) memdesc->hostptr_count++; -done: + mutex_unlock(&kernel_map_global_lock); return ret; @@ -677,7 +645,6 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, unsigned int j, page_size, len_alloc; unsigned int pcount = 0; size_t len; - struct page **pages = NULL; unsigned int align; size = PAGE_ALIGN(size); @@ -708,18 +675,17 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, memdesc->pagetable = pagetable; memdesc->ops = &kgsl_page_alloc_ops; - memdesc->sgt = kmalloc(sizeof(struct sg_table), GFP_KERNEL); - if (memdesc->sgt == NULL) - return -ENOMEM; - /* - * Allocate space to store the list of pages to send to vmap. This is an - * array of pointers so we can track 1024 pages per page of allocation + * Allocate space to store the list of pages. This is an array of + * pointers so we can track 1024 pages per page of allocation. + * Keep this array around for non global non secure buffers that + * are allocated by kgsl. This helps with improving the vm fault + * routine by finding the faulted page in constant time. */ - pages = kgsl_malloc(len_alloc * sizeof(struct page *)); + memdesc->pages = kgsl_malloc(len_alloc * sizeof(struct page *)); - if (pages == NULL) { + if (memdesc->pages == NULL) { ret = -ENOMEM; goto done; } @@ -730,9 +696,9 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, int page_count; page_count = kgsl_pool_alloc_page(&page_size, - pages + pcount, len_alloc - pcount, + memdesc->pages + pcount, + len_alloc - pcount, &align); - if (page_count <= 0) { if (page_count == -EAGAIN) continue; @@ -756,16 +722,12 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, pcount += page_count; len -= page_size; memdesc->size += page_size; + memdesc->page_count += page_count; /* Get the needed page size for the next iteration */ page_size = get_page_size(len, align); } - ret = sg_alloc_table_from_pages(memdesc->sgt, pages, pcount, 0, - memdesc->size, GFP_KERNEL); - if (ret) - goto done; - /* Call to the hypervisor to lock any secure buffer allocations */ if (memdesc->flags & KGSL_MEMFLAGS_SECURE) { unsigned int i; @@ -774,10 +736,27 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, int source_vm = VMID_HLOS; int dest_vm = VMID_CP_PIXEL; + memdesc->sgt = kmalloc(sizeof(struct sg_table), GFP_KERNEL); + if (memdesc->sgt == NULL) { + ret = -ENOMEM; + goto done; + } + + ret = sg_alloc_table_from_pages(memdesc->sgt, memdesc->pages, + memdesc->page_count, 0, memdesc->size, GFP_KERNEL); + if (ret) { + kfree(memdesc->sgt); + goto done; + } + ret = hyp_assign_table(memdesc->sgt, &source_vm, 1, &dest_vm, &dest_perms, 1); - if (ret) + if (ret) { + sg_free_table(memdesc->sgt); + kfree(memdesc->sgt); + memdesc->sgt = NULL; goto done; + } /* Set private bit for each sg to indicate that its secured */ for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i) @@ -789,6 +768,14 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, KGSL_STATS_ADD(memdesc->size, &kgsl_driver.stats.secure, &kgsl_driver.stats.secure_max); + /* + * We don't need the array for secure buffers because they are + * not mapped to CPU + */ + kgsl_free(memdesc->pages); + memdesc->pages = NULL; + memdesc->page_count = 0; + /* Don't map and zero the locked secure buffer */ goto done; } @@ -798,19 +785,18 @@ kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc, done: if (ret) { - if (pages) { + if (memdesc->pages) { unsigned int count = 1; for (j = 0; j < pcount; j += count) { - count = 1 << compound_order(pages[j]); - kgsl_pool_free_page(pages[j]); + count = 1 << compound_order(memdesc->pages[j]); + kgsl_pool_free_page(memdesc->pages[j]); } } - kfree(memdesc->sgt); + kgsl_free(memdesc->pages); memset(memdesc, 0, sizeof(*memdesc)); } - kgsl_free(pages); return ret; } @@ -833,6 +819,9 @@ void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc) kfree(memdesc->sgt); } + if (memdesc->pages) + kgsl_free(memdesc->pages); + memset(memdesc, 0, sizeof(*memdesc)); } EXPORT_SYMBOL(kgsl_sharedmem_free); diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h index b1a964da5143..c05aaecb5284 100644 --- a/drivers/gpu/msm/kgsl_sharedmem.h +++ b/drivers/gpu/msm/kgsl_sharedmem.h @@ -306,4 +306,47 @@ static inline void kgsl_free_global(struct kgsl_device *device, void kgsl_sharedmem_set_noretry(bool val); bool kgsl_sharedmem_get_noretry(void); +/** + * kgsl_alloc_sgt_from_pages() - Allocate a sg table + * + * @memdesc: memory descriptor of the allocation + * + * Allocate and return pointer to a sg table + */ +static inline struct sg_table *kgsl_alloc_sgt_from_pages( + struct kgsl_memdesc *m) +{ + int ret; + struct sg_table *sgt; + + sgt = kmalloc(sizeof(struct sg_table), GFP_KERNEL); + if (sgt == NULL) + return ERR_PTR(-ENOMEM); + + ret = sg_alloc_table_from_pages(sgt, m->pages, m->page_count, 0, + m->size, GFP_KERNEL); + if (ret) { + kfree(sgt); + return ERR_PTR(ret); + } + + return sgt; +} + +/** + * kgsl_free_sgt() - Free a sg table structure + * + * @sgt: sg table pointer to be freed + * + * Free the sg table allocated using sgt and free the + * sgt structure itself + */ +static inline void kgsl_free_sgt(struct sg_table *sgt) +{ + if (sgt != NULL) { + sg_free_table(sgt); + kfree(sgt); + } +} + #endif /* __KGSL_SHAREDMEM_H */ |