summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-10-20 00:10:27 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-10-20 00:10:27 -0300
commitd0b2f91bede3bd5e3d24dd6803e56eee959c1797 (patch)
tree7fee4ab0509879c373c4f2cbd5b8a5be5b4041ee /drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
parente914f8eb445e8f74b00303c19c2ffceaedd16a05 (diff)
Linux-libre 4.8.2-gnupck-4.8.2-gnu
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c480
1 files changed, 363 insertions, 117 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 1127b2b62..1c8f2754f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -225,7 +225,8 @@ static const u32 tonga_mgcg_cgcg_init[] =
static const u32 golden_settings_polaris11_a11[] =
{
- mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
+ mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
+ mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
mmDB_DEBUG2, 0xf00fffff, 0x00000400,
mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
@@ -234,7 +235,7 @@ static const u32 golden_settings_polaris11_a11[] =
mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
- mmSQ_CONFIG, 0x07f80000, 0x07180000,
+ mmSQ_CONFIG, 0x07f80000, 0x01180000,
mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
mmTCC_CTRL, 0x00100000, 0xf31fff7f,
mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
@@ -256,8 +257,8 @@ static const u32 polaris11_golden_common_all[] =
static const u32 golden_settings_polaris10_a11[] =
{
mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
- mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
- mmCB_HW_CONTROL_2, 0, 0x0f000000,
+ mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
+ mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
mmDB_DEBUG2, 0xf00fffff, 0x00000400,
mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
@@ -364,6 +365,7 @@ static const u32 golden_settings_iceland_a11[] =
mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
mmTCC_CTRL, 0x00100000, 0xf31fff7f,
@@ -460,8 +462,10 @@ static const u32 cz_golden_settings_a11[] =
mmGB_GPU_ID, 0x0000000f, 0x00000000,
mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+ mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
+ mmTCC_CTRL, 0x00100000, 0xf31fff7f,
mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
@@ -742,26 +746,25 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
return r;
}
-static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
+static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
struct fence *f = NULL;
uint32_t scratch;
uint32_t tmp = 0;
- unsigned i;
- int r;
+ long r;
r = amdgpu_gfx_scratch_get(adev, &scratch);
if (r) {
- DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
+ DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
return r;
}
WREG32(scratch, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
@@ -773,28 +776,25 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
if (r)
goto err2;
- r = fence_wait(f, false);
- if (r) {
- DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+ r = fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out.\n");
+ r = -ETIMEDOUT;
goto err2;
- }
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(scratch);
- if (tmp == 0xDEADBEEF)
- break;
- DRM_UDELAY(1);
- }
- if (i < adev->usec_timeout) {
- DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
- ring->idx, i);
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err2;
+ }
+ tmp = RREG32(scratch);
+ if (tmp == 0xDEADBEEF) {
+ DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+ r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
scratch, tmp);
r = -EINVAL;
}
err2:
- fence_put(f);
amdgpu_ib_free(adev, &ib, NULL);
fence_put(f);
err1:
@@ -1115,6 +1115,71 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
buffer[count++] = cpu_to_le32(0);
}
+static void cz_init_cp_jump_table(struct amdgpu_device *adev)
+{
+ const __le32 *fw_data;
+ volatile u32 *dst_ptr;
+ int me, i, max_me = 4;
+ u32 bo_offset = 0;
+ u32 table_offset, table_size;
+
+ if (adev->asic_type == CHIP_CARRIZO)
+ max_me = 5;
+
+ /* write the cp table buffer */
+ dst_ptr = adev->gfx.rlc.cp_table_ptr;
+ for (me = 0; me < max_me; me++) {
+ if (me == 0) {
+ const struct gfx_firmware_header_v1_0 *hdr =
+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
+ fw_data = (const __le32 *)
+ (adev->gfx.ce_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ table_offset = le32_to_cpu(hdr->jt_offset);
+ table_size = le32_to_cpu(hdr->jt_size);
+ } else if (me == 1) {
+ const struct gfx_firmware_header_v1_0 *hdr =
+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
+ fw_data = (const __le32 *)
+ (adev->gfx.pfp_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ table_offset = le32_to_cpu(hdr->jt_offset);
+ table_size = le32_to_cpu(hdr->jt_size);
+ } else if (me == 2) {
+ const struct gfx_firmware_header_v1_0 *hdr =
+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
+ fw_data = (const __le32 *)
+ (adev->gfx.me_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ table_offset = le32_to_cpu(hdr->jt_offset);
+ table_size = le32_to_cpu(hdr->jt_size);
+ } else if (me == 3) {
+ const struct gfx_firmware_header_v1_0 *hdr =
+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+ fw_data = (const __le32 *)
+ (adev->gfx.mec_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ table_offset = le32_to_cpu(hdr->jt_offset);
+ table_size = le32_to_cpu(hdr->jt_size);
+ } else if (me == 4) {
+ const struct gfx_firmware_header_v1_0 *hdr =
+ (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
+ fw_data = (const __le32 *)
+ (adev->gfx.mec2_fw->data +
+ le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ table_offset = le32_to_cpu(hdr->jt_offset);
+ table_size = le32_to_cpu(hdr->jt_size);
+ }
+
+ for (i = 0; i < table_size; i ++) {
+ dst_ptr[bo_offset + i] =
+ cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
+ }
+
+ bo_offset += table_size;
+ }
+}
+
static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
{
int r;
@@ -1130,6 +1195,18 @@ static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
adev->gfx.rlc.clear_state_obj = NULL;
}
+
+ /* jump table block */
+ if (adev->gfx.rlc.cp_table_obj) {
+ r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
+ if (unlikely(r != 0))
+ dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
+ amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
+ amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+
+ amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
+ adev->gfx.rlc.cp_table_obj = NULL;
+ }
}
static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
@@ -1186,6 +1263,46 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
}
+ if ((adev->asic_type == CHIP_CARRIZO) ||
+ (adev->asic_type == CHIP_STONEY)) {
+ adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
+ if (adev->gfx.rlc.cp_table_obj == NULL) {
+ r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ NULL, NULL,
+ &adev->gfx.rlc.cp_table_obj);
+ if (r) {
+ dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
+ return r;
+ }
+ }
+
+ r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
+ if (unlikely(r != 0)) {
+ dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
+ return r;
+ }
+ r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->gfx.rlc.cp_table_gpu_addr);
+ if (r) {
+ amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+ dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
+ return r;
+ }
+ r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
+ if (r) {
+ dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
+ return r;
+ }
+
+ cz_init_cp_jump_table(adev);
+
+ amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
+ amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+
+ }
+
return 0;
}
@@ -1567,7 +1684,6 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
RREG32(sec_ded_counter_registers[i]);
fail:
- fence_put(f);
amdgpu_ib_free(adev, &ib, NULL);
fence_put(f);
@@ -3294,9 +3410,15 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
}
}
-void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
+static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
+ u32 se_num, u32 sh_num, u32 instance)
{
- u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+ u32 data;
+
+ if (instance == 0xffffffff)
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+ else
+ data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
@@ -3346,13 +3468,13 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v8_0_select_se_sh(adev, i, j);
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
data = gfx_v8_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
rb_bitmap_width_per_sh);
}
}
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs;
@@ -3456,7 +3578,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
* making sure that the following register writes will be broadcasted
* to all the shaders
*/
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
WREG32(mmPA_SC_FIFO_SIZE,
(adev->gfx.config.sc_prim_fifo_size_frontend <<
@@ -3479,7 +3601,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
- gfx_v8_0_select_se_sh(adev, i, j);
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
for (k = 0; k < adev->usec_timeout; k++) {
if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
break;
@@ -3487,7 +3609,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
}
}
}
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex);
mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -3648,13 +3770,13 @@ static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
WREG32(mmRLC_SRM_CNTL, data);
}
-static void polaris11_init_power_gating(struct amdgpu_device *adev)
+static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
{
uint32_t data;
if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
- AMD_PG_SUPPORT_GFX_SMG |
- AMD_PG_SUPPORT_GFX_DMG)) {
+ AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_DMG)) {
data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
@@ -3679,6 +3801,53 @@ static void polaris11_init_power_gating(struct amdgpu_device *adev)
}
}
+static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_PG_CNTL);
+
+ if (enable)
+ data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
+
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_PG_CNTL);
+
+ if (enable)
+ data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
+
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_PG_CNTL);
+
+ if (enable)
+ data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
+ else
+ data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
+
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+}
+
static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
{
if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
@@ -3691,8 +3860,25 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
gfx_v8_0_init_save_restore_list(adev);
gfx_v8_0_enable_save_restore_machine(adev);
- if (adev->asic_type == CHIP_POLARIS11)
- polaris11_init_power_gating(adev);
+ if ((adev->asic_type == CHIP_CARRIZO) ||
+ (adev->asic_type == CHIP_STONEY)) {
+ WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
+ gfx_v8_0_init_power_gating(adev);
+ WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
+ if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
+ cz_enable_sck_slow_down_on_power_up(adev, true);
+ cz_enable_sck_slow_down_on_power_down(adev, true);
+ } else {
+ cz_enable_sck_slow_down_on_power_up(adev, false);
+ cz_enable_sck_slow_down_on_power_down(adev, false);
+ }
+ if (adev->pg_flags & AMD_PG_SUPPORT_CP)
+ cz_enable_cp_power_gating(adev, true);
+ else
+ cz_enable_cp_power_gating(adev, false);
+ } else if (adev->asic_type == CHIP_POLARIS11) {
+ gfx_v8_0_init_power_gating(adev);
+ }
}
}
@@ -4931,7 +5117,7 @@ static int gfx_v8_0_soft_reset(void *handle)
* Fetches a GPU clock counter snapshot.
* Returns the 64 bit clock counter snapshot.
*/
-uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
{
uint64_t clock;
@@ -4991,12 +5177,18 @@ static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
}
+static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
+ .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
+ .select_se_sh = &gfx_v8_0_select_se_sh,
+};
+
static int gfx_v8_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
+ adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
gfx_v8_0_set_ring_funcs(adev);
gfx_v8_0_set_irq_funcs(adev);
gfx_v8_0_set_gds_init(adev);
@@ -5029,51 +5221,43 @@ static int gfx_v8_0_late_init(void *handle)
return 0;
}
-static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
- bool enable)
+static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
+ bool enable)
{
uint32_t data, temp;
- /* Send msg to SMU via Powerplay */
- amdgpu_set_powergating_state(adev,
- AMD_IP_BLOCK_TYPE_SMC,
- enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
+ if (adev->asic_type == CHIP_POLARIS11)
+ /* Send msg to SMU via Powerplay */
+ amdgpu_set_powergating_state(adev,
+ AMD_IP_BLOCK_TYPE_SMC,
+ enable ?
+ AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
- if (enable) {
- /* Enable static MGPG */
- temp = data = RREG32(mmRLC_PG_CNTL);
+ temp = data = RREG32(mmRLC_PG_CNTL);
+ /* Enable static MGPG */
+ if (enable)
data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
-
- if (temp != data)
- WREG32(mmRLC_PG_CNTL, data);
- } else {
- temp = data = RREG32(mmRLC_PG_CNTL);
+ else
data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
- if (temp != data)
- WREG32(mmRLC_PG_CNTL, data);
- }
+ if (temp != data)
+ WREG32(mmRLC_PG_CNTL, data);
}
-static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
- bool enable)
+static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
+ bool enable)
{
uint32_t data, temp;
- if (enable) {
- /* Enable dynamic MGPG */
- temp = data = RREG32(mmRLC_PG_CNTL);
+ temp = data = RREG32(mmRLC_PG_CNTL);
+ /* Enable dynamic MGPG */
+ if (enable)
data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
-
- if (temp != data)
- WREG32(mmRLC_PG_CNTL, data);
- } else {
- temp = data = RREG32(mmRLC_PG_CNTL);
+ else
data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
- if (temp != data)
- WREG32(mmRLC_PG_CNTL, data);
- }
+ if (temp != data)
+ WREG32(mmRLC_PG_CNTL, data);
}
static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
@@ -5081,19 +5265,63 @@ static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *ade
{
uint32_t data, temp;
- if (enable) {
- /* Enable quick PG */
- temp = data = RREG32(mmRLC_PG_CNTL);
- data |= 0x100000;
+ temp = data = RREG32(mmRLC_PG_CNTL);
+ /* Enable quick PG */
+ if (enable)
+ data |= RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
- if (temp != data)
- WREG32(mmRLC_PG_CNTL, data);
- } else {
- temp = data = RREG32(mmRLC_PG_CNTL);
- data &= ~0x100000;
+ if (temp != data)
+ WREG32(mmRLC_PG_CNTL, data);
+}
- if (temp != data)
- WREG32(mmRLC_PG_CNTL, data);
+static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_PG_CNTL);
+
+ if (enable)
+ data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ u32 data, orig;
+
+ orig = data = RREG32(mmRLC_PG_CNTL);
+
+ if (enable)
+ data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
+ else
+ data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
+
+ if (orig != data)
+ WREG32(mmRLC_PG_CNTL, data);
+
+ /* Read any GFX register to wake up GFX. */
+ if (!enable)
+ data = RREG32(mmDB_RENDER_CONTROL);
+}
+
+static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
+ bool enable)
+{
+ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
+ cz_enable_gfx_cg_power_gating(adev, true);
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
+ cz_enable_gfx_pipeline_power_gating(adev, true);
+ } else {
+ cz_enable_gfx_cg_power_gating(adev, false);
+ cz_enable_gfx_pipeline_power_gating(adev, false);
}
}
@@ -5101,21 +5329,42 @@ static int gfx_v8_0_set_powergating_state(void *handle,
enum amd_powergating_state state)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
return 0;
switch (adev->asic_type) {
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
+ cz_update_gfx_cg_power_gating(adev, enable);
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
+ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
+ else
+ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
+ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
+ else
+ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
+ break;
case CHIP_POLARIS11:
- if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
- polaris11_enable_gfx_static_mg_power_gating(adev,
- state == AMD_PG_STATE_GATE ? true : false);
- else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
- polaris11_enable_gfx_dynamic_mg_power_gating(adev,
- state == AMD_PG_STATE_GATE ? true : false);
+ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
+ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
+ else
+ gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
+ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
+ else
+ gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
+
+ if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
+ polaris11_enable_gfx_quick_mg_power_gating(adev, true);
else
- polaris11_enable_gfx_quick_mg_power_gating(adev,
- state == AMD_PG_STATE_GATE ? true : false);
+ polaris11_enable_gfx_quick_mg_power_gating(adev, false);
break;
default:
break;
@@ -5129,7 +5378,7 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
{
uint32_t data;
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
@@ -5517,6 +5766,8 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
WREG32(mmRLC_CGCG_CGLS_CTRL, data);
}
+ gfx_v8_0_wait_for_rlc_serdes(adev);
+
adev->gfx.rlc.funcs->exit_safe_mode(adev);
}
static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
@@ -5642,17 +5893,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
unsigned vm_id, bool ctx_switch)
{
u32 header, control = 0;
- u32 next_rptr = ring->wptr + 5;
-
- if (ctx_switch)
- next_rptr += 2;
-
- next_rptr += 4;
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
- amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
- amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
- amdgpu_ring_write(ring, next_rptr);
/* insert SWITCH_BUFFER packet before first IB in the ring frame */
if (ctx_switch) {
@@ -5681,23 +5921,9 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vm_id, bool ctx_switch)
{
- u32 header, control = 0;
- u32 next_rptr = ring->wptr + 5;
-
- control |= INDIRECT_BUFFER_VALID;
-
- next_rptr += 4;
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
- amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
- amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
- amdgpu_ring_write(ring, next_rptr);
+ u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
- header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
-
- control |= ib->length_dw | (vm_id << 24);
-
- amdgpu_ring_write(ring, header);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
amdgpu_ring_write(ring,
#ifdef __BIG_ENDIAN
(2 << 0) |
@@ -6150,9 +6376,9 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_TOPAZ:
- case CHIP_STONEY:
adev->gfx.rlc.funcs = &iceland_rlc_funcs;
break;
+ case CHIP_STONEY:
case CHIP_CARRIZO:
adev->gfx.rlc.funcs = &cz_rlc_funcs;
break;
@@ -6190,6 +6416,20 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
}
}
+static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+ u32 bitmap)
+{
+ u32 data;
+
+ if (!bitmap)
+ return;
+
+ data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+ data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+ WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
{
u32 data, mask;
@@ -6210,16 +6450,22 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
int i, j, k, counter, active_cu_number = 0;
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+ unsigned disable_masks[4 * 2];
memset(cu_info, 0, sizeof(*cu_info));
+ amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
mask = 1;
ao_bitmap = 0;
counter = 0;
- gfx_v8_0_select_se_sh(adev, i, j);
+ gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+ if (i < 4 && j < 2)
+ gfx_v8_0_set_user_cu_inactive_bitmap(
+ adev, disable_masks[i * 2 + j]);
bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
cu_info->bitmap[i][j] = bitmap;
@@ -6235,7 +6481,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
}
}
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;