summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-12-15 14:52:16 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2015-12-15 14:52:16 -0300
commit8d91c1e411f55d7ea91b1183a2e9f8088fb4d5be (patch)
treee9891aa6c295060d065adffd610c4f49ecf884f3 /drivers/gpu/drm/amd/amdkfd
parenta71852147516bc1cb5b0b3cbd13639bfd4022dc8 (diff)
Linux-libre 4.3.2-gnu
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_regs.h11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c103
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c20
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c249
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c99
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h398
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h1
14 files changed, 875 insertions, 39 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index 8dfac37ff..e13c67c8d 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -4,6 +4,6 @@
config HSA_AMD
tristate "HSA kernel driver for AMD GPU devices"
- depends on DRM_RADEON && AMD_IOMMU_V2 && X86_64
+ depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64
help
Enable this if you want to use HSA features on AMD GPU devices.
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 28551153e..7fc9b0f44 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -2,7 +2,8 @@
# Makefile for Heterogenous System Architecture support for AMD GPU devices
#
-ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/
+ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/ \
+ -Idrivers/gpu/drm/amd/include/asic_reg
amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
index 183be5b84..48769d12d 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
@@ -65,17 +65,6 @@
#define AQL_ENABLE 1
-#define SDMA_RB_VMID(x) (x << 24)
-#define SDMA_RB_ENABLE (1 << 0)
-#define SDMA_RB_SIZE(x) ((x) << 1) /* log2 */
-#define SDMA_RPTR_WRITEBACK_ENABLE (1 << 12)
-#define SDMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
-#define SDMA_OFFSET(x) (x << 0)
-#define SDMA_DB_ENABLE (1 << 28)
-#define SDMA_ATC (1 << 0)
-#define SDMA_VA_PTR32 (1 << 4)
-#define SDMA_VA_SHARED_BASE(x) (x << 8)
-
#define GRBM_GFX_INDEX 0x30800
#define ATC_VMID_PASID_MAPPING_VALID (1U << 31)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index c99197301..c6a1b4cc6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -31,7 +31,7 @@
#include <uapi/linux/kfd_ioctl.h>
#include <linux/time.h>
#include <linux/mm.h>
-#include <uapi/asm-generic/mman-common.h>
+#include <linux/mman.h>
#include <asm/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 75312c829..3f95f7cb4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -80,7 +80,12 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x1318, &kaveri_device_info }, /* Kaveri */
{ 0x131B, &kaveri_device_info }, /* Kaveri */
{ 0x131C, &kaveri_device_info }, /* Kaveri */
- { 0x131D, &kaveri_device_info } /* Kaveri */
+ { 0x131D, &kaveri_device_info }, /* Kaveri */
+ { 0x9870, &carrizo_device_info }, /* Carrizo */
+ { 0x9874, &carrizo_device_info }, /* Carrizo */
+ { 0x9875, &carrizo_device_info }, /* Carrizo */
+ { 0x9876, &carrizo_device_info }, /* Carrizo */
+ { 0x9877, &carrizo_device_info } /* Carrizo */
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 9ce8a20a7..c6f435aa8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -23,6 +23,7 @@
#include "kfd_device_queue_manager.h"
#include "cik_regs.h"
+#include "oss/oss_2_4_sh_mask.h"
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
@@ -135,13 +136,16 @@ static int register_process_cik(struct device_queue_manager *dqm,
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
- uint32_t value = SDMA_ATC;
+ uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
if (q->process->is_32bit_user_mode)
- value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd));
+ value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
+ get_sh_mem_bases_32(qpd_to_pdd(qpd));
else
- value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64(
- qpd_to_pdd(qpd)));
+ value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
+ SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
+ SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
+
q->properties.sdma_vm_addr = value;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index 4c15212a3..7e9cae9d3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -22,6 +22,10 @@
*/
#include "kfd_device_queue_manager.h"
+#include "gca/gfx_8_0_enum.h"
+#include "gca/gfx_8_0_sh_mask.h"
+#include "gca/gfx_8_0_enum.h"
+#include "oss/oss_3_0_sh_mask.h"
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
@@ -37,14 +41,40 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops)
{
- pr_warn("amdkfd: VI DQM is not currently supported\n");
-
ops->set_cache_memory_policy = set_cache_memory_policy_vi;
ops->register_process = register_process_vi;
ops->initialize = initialize_cpsch_vi;
ops->init_sdma_vm = init_sdma_vm;
}
+static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
+{
+ /* In 64-bit mode, we can only control the top 3 bits of the LDS,
+ * scratch and GPUVM apertures.
+ * The hardware fills in the remaining 59 bits according to the
+ * following pattern:
+ * LDS: X0000000'00000000 - X0000001'00000000 (4GB)
+ * Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
+ * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB)
+ *
+ * (where X/Y is the configurable nybble with the low-bit 0)
+ *
+ * LDS and scratch will have the same top nybble programmed in the
+ * top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
+ * GPUVM can have a different top nybble programmed in the
+ * top 3 bits of SH_MEM_BASES.SHARED_BASE.
+ * We don't bother to support different top nybbles
+ * for LDS/Scratch and GPUVM.
+ */
+
+ BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
+ top_address_nybble == 0);
+
+ return top_address_nybble << 12 |
+ (top_address_nybble << 12) <<
+ SH_MEM_BASES__SHARED_BASE__SHIFT;
+}
+
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
@@ -52,18 +82,83 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size)
{
- return false;
+ uint32_t default_mtype;
+ uint32_t ape1_mtype;
+
+ default_mtype = (default_policy == cache_policy_coherent) ?
+ MTYPE_CC :
+ MTYPE_NC;
+
+ ape1_mtype = (alternate_policy == cache_policy_coherent) ?
+ MTYPE_CC :
+ MTYPE_NC;
+
+ qpd->sh_mem_config = (qpd->sh_mem_config &
+ SH_MEM_CONFIG__ADDRESS_MODE_MASK) |
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
+ default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
+ ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
+ SH_MEM_CONFIG__PRIVATE_ATC_MASK;
+
+ return true;
}
static int register_process_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
- return -1;
+ struct kfd_process_device *pdd;
+ unsigned int temp;
+
+ BUG_ON(!dqm || !qpd);
+
+ pdd = qpd_to_pdd(qpd);
+
+ /* check if sh_mem_config register already configured */
+ if (qpd->sh_mem_config == 0) {
+ qpd->sh_mem_config =
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
+ MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
+ MTYPE_CC << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
+ SH_MEM_CONFIG__PRIVATE_ATC_MASK;
+
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ }
+
+ if (qpd->pqm->process->is_32bit_user_mode) {
+ temp = get_sh_mem_bases_32(pdd);
+ qpd->sh_mem_bases = temp << SH_MEM_BASES__SHARED_BASE__SHIFT;
+ qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA32 <<
+ SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
+ } else {
+ temp = get_sh_mem_bases_nybble_64(pdd);
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+ qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 <<
+ SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
+ }
+
+ pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
+ qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
+
+ return 0;
}
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
+ uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
+
+ if (q->process->is_32bit_user_mode)
+ value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
+ get_sh_mem_bases_32(qpd_to_pdd(qpd));
+ else
+ value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
+ SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
+ SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
+
+ q->properties.sdma_vm_addr = value;
}
static int initialize_cpsch_vi(struct device_queue_manager *dqm)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 35b987574..2b655103b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -33,7 +33,7 @@
#include <linux/time.h>
#include "kfd_priv.h"
#include <linux/mm.h>
-#include <uapi/asm-generic/mman-common.h>
+#include <linux/mman.h>
#include <asm/processor.h>
/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 434979428..d83de985e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -27,6 +27,7 @@
#include "kfd_mqd_manager.h"
#include "cik_regs.h"
#include "cik_structs.h"
+#include "oss/oss_2_4_sh_mask.h"
static inline struct cik_mqd *get_mqd(void *mqd)
{
@@ -214,17 +215,20 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
BUG_ON(!mm || !mqd || !q);
m = get_sdma_mqd(mqd);
- m->sdma_rlc_rb_cntl =
- SDMA_RB_SIZE((ffs(q->queue_size / sizeof(unsigned int)))) |
- SDMA_RB_VMID(q->vmid) |
- SDMA_RPTR_WRITEBACK_ENABLE |
- SDMA_RPTR_WRITEBACK_TIMER(6);
+ m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) <<
+ SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+ q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8);
m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8);
m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
- m->sdma_rlc_doorbell = SDMA_OFFSET(q->doorbell_off) | SDMA_DB_ENABLE;
+ m->sdma_rlc_doorbell = q->doorbell_off <<
+ SDMA0_RLC0_DOORBELL__OFFSET__SHIFT |
+ 1 << SDMA0_RLC0_DOORBELL__ENABLE__SHIFT;
+
m->sdma_rlc_virtual_addr = q->sdma_vm_addr;
m->sdma_engine_id = q->sdma_engine_id;
@@ -234,7 +238,9 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
if (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0) {
- m->sdma_rlc_rb_cntl |= SDMA_RB_ENABLE;
+ m->sdma_rlc_rb_cntl |=
+ 1 << SDMA0_RLC0_RB_CNTL__RB_ENABLE__SHIFT;
+
q->is_active = true;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index b3a7e3ba1..fa32c32fa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -22,12 +22,255 @@
*/
#include <linux/printk.h>
+#include <linux/slab.h>
#include "kfd_priv.h"
#include "kfd_mqd_manager.h"
+#include "vi_structs.h"
+#include "gca/gfx_8_0_sh_mask.h"
+#include "gca/gfx_8_0_enum.h"
+
+#define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8
+
+static inline struct vi_mqd *get_mqd(void *mqd)
+{
+ return (struct vi_mqd *)mqd;
+}
+
+static int init_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ int retval;
+ uint64_t addr;
+ struct vi_mqd *m;
+
+ retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd),
+ mqd_mem_obj);
+ if (retval != 0)
+ return -ENOMEM;
+
+ m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr;
+ addr = (*mqd_mem_obj)->gpu_addr;
+
+ memset(m, 0, sizeof(struct vi_mqd));
+
+ m->header = 0xC0310800;
+ m->compute_pipelinestat_enable = 1;
+ m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+
+ m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+ 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+ m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT |
+ MTYPE_UC << CP_MQD_CONTROL__MTYPE__SHIFT;
+
+ m->cp_mqd_base_addr_lo = lower_32_bits(addr);
+ m->cp_mqd_base_addr_hi = upper_32_bits(addr);
+
+ m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+ 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+ 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+ m->cp_hqd_pipe_priority = 1;
+ m->cp_hqd_queue_priority = 15;
+
+ m->cp_hqd_eop_rptr = 1 << CP_HQD_EOP_RPTR__INIT_FETCHER__SHIFT;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL)
+ m->cp_hqd_iq_rptr = 1;
+
+ *mqd = m;
+ if (gart_addr != NULL)
+ *gart_addr = addr;
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr)
+{
+ return mm->dev->kfd2kgd->hqd_load
+ (mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
+}
+
+static int __update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q, unsigned int mtype,
+ unsigned int atc_bit)
+{
+ struct vi_mqd *m;
+
+ BUG_ON(!mm || !q || !mqd);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ m = get_mqd(mqd);
+
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT |
+ atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT |
+ mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT;
+ m->cp_hqd_pq_control |=
+ ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+ pr_debug("kfd: cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+ m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+ m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+ m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+
+ m->cp_hqd_pq_doorbell_control =
+ 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT |
+ q->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("kfd: cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ m->cp_hqd_eop_control = atc_bit << CP_HQD_EOP_CONTROL__EOP_ATC__SHIFT |
+ mtype << CP_HQD_EOP_CONTROL__MTYPE__SHIFT;
+
+ m->cp_hqd_ib_control = atc_bit << CP_HQD_IB_CONTROL__IB_ATC__SHIFT |
+ 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
+ mtype << CP_HQD_IB_CONTROL__MTYPE__SHIFT;
+
+ m->cp_hqd_eop_control |=
+ ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1;
+ m->cp_hqd_eop_base_addr_lo =
+ lower_32_bits(q->eop_ring_buffer_address >> 8);
+ m->cp_hqd_eop_base_addr_hi =
+ upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+ m->cp_hqd_iq_timer = atc_bit << CP_HQD_IQ_TIMER__IQ_ATC__SHIFT |
+ mtype << CP_HQD_IQ_TIMER__MTYPE__SHIFT;
+
+ m->cp_hqd_vmid = q->vmid;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+ 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
+ }
+
+ m->cp_hqd_active = 0;
+ q->is_active = false;
+ if (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0) {
+ m->cp_hqd_active = 1;
+ q->is_active = true;
+ }
+
+ return 0;
+}
+
+
+static int update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ return __update_mqd(mm, mqd, q, MTYPE_CC, 1);
+}
+
+static int destroy_mqd(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_destroy
+ (mm->dev->kgd, type, timeout,
+ pipe_id, queue_id);
+}
+
+static void uninit_mqd(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+ BUG_ON(!mm || !mqd);
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+}
+
+static bool is_occupied(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_is_occupied(
+ mm->dev->kgd, queue_address,
+ pipe_id, queue_id);
+}
+
+static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct vi_mqd *m;
+ int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+ if (retval != 0)
+ return retval;
+
+ m = get_mqd(*mqd);
+
+ m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+
+ return retval;
+}
+
+static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ struct vi_mqd *m;
+ int retval = __update_mqd(mm, mqd, q, MTYPE_UC, 0);
+
+ if (retval != 0)
+ return retval;
+
+ m = get_mqd(mqd);
+ m->cp_hqd_vmid = q->vmid;
+ return retval;
+}
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev)
+ struct kfd_dev *dev)
{
- pr_warn("amdkfd: VI MQD is not currently supported\n");
- return NULL;
+ struct mqd_manager *mqd;
+
+ BUG_ON(!dev);
+ BUG_ON(type >= KFD_MQD_TYPE_MAX);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
+ if (!mqd)
+ return NULL;
+
+ mqd->dev = dev;
+
+ switch (type) {
+ case KFD_MQD_TYPE_CP:
+ case KFD_MQD_TYPE_COMPUTE:
+ mqd->init_mqd = init_mqd;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+ break;
+ case KFD_MQD_TYPE_HIQ:
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd_hiq;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+ break;
+ case KFD_MQD_TYPE_SDMA:
+ break;
+ default:
+ kfree(mqd);
+ return NULL;
+ }
+
+ return mqd;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 99b6d28a1..90f391434 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -27,6 +27,7 @@
#include "kfd_kernel_queue.h"
#include "kfd_priv.h"
#include "kfd_pm4_headers.h"
+#include "kfd_pm4_headers_vi.h"
#include "kfd_pm4_opcodes.h"
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
@@ -55,6 +56,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
bool *over_subscription)
{
unsigned int process_count, queue_count;
+ unsigned int map_queue_size;
BUG_ON(!pm || !rlib_size || !over_subscription);
@@ -69,9 +71,13 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
pr_debug("kfd: over subscribed runlist\n");
}
+ map_queue_size =
+ (pm->dqm->dev->device_info->asic_family == CHIP_CARRIZO) ?
+ sizeof(struct pm4_mes_map_queues) :
+ sizeof(struct pm4_map_queues);
/* calculate run list ib allocation size */
*rlib_size = process_count * sizeof(struct pm4_map_process) +
- queue_count * sizeof(struct pm4_map_queues);
+ queue_count * map_queue_size;
/*
* Increase the allocation size in case we need a chained run list
@@ -176,6 +182,71 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
+static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct queue *q, bool is_static)
+{
+ struct pm4_mes_map_queues *packet;
+ bool use_static = is_static;
+
+ BUG_ON(!pm || !buffer || !q);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ packet = (struct pm4_mes_map_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_map_queues));
+
+ packet->header.u32all = build_pm4_header(IT_MAP_QUEUES,
+ sizeof(struct pm4_map_queues));
+ packet->bitfields2.alloc_format =
+ alloc_format__mes_map_queues__one_per_pipe_vi;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
+
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_map_queues__compute_vi;
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_compute_vi;
+
+ switch (q->properties.type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ if (use_static)
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_latency_static_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__debug_interface_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_map_queues__sdma0_vi;
+ use_static = false; /* no static queues under SDMA */
+ break;
+ default:
+ pr_err("kfd: in %s queue type %d\n", __func__,
+ q->properties.type);
+ BUG();
+ break;
+ }
+ packet->bitfields3.doorbell_offset =
+ q->properties.doorbell_off;
+
+ packet->mqd_addr_lo =
+ lower_32_bits(q->gart_mqd_addr);
+
+ packet->mqd_addr_hi =
+ upper_32_bits(q->gart_mqd_addr);
+
+ packet->wptr_addr_lo =
+ lower_32_bits((uint64_t)q->properties.write_ptr);
+
+ packet->wptr_addr_hi =
+ upper_32_bits((uint64_t)q->properties.write_ptr);
+
+ return 0;
+}
+
static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
struct queue *q, bool is_static)
{
@@ -292,8 +363,17 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n",
kq->queue->queue, qpd->is_debug);
- retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
- kq->queue, qpd->is_debug);
+ if (pm->dqm->dev->device_info->asic_family ==
+ CHIP_CARRIZO)
+ retval = pm_create_map_queue_vi(pm,
+ &rl_buffer[rl_wptr],
+ kq->queue,
+ qpd->is_debug);
+ else
+ retval = pm_create_map_queue(pm,
+ &rl_buffer[rl_wptr],
+ kq->queue,
+ qpd->is_debug);
if (retval != 0)
return retval;
@@ -309,8 +389,17 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n",
q->queue, qpd->is_debug);
- retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
- q, qpd->is_debug);
+ if (pm->dqm->dev->device_info->asic_family ==
+ CHIP_CARRIZO)
+ retval = pm_create_map_queue_vi(pm,
+ &rl_buffer[rl_wptr],
+ q,
+ qpd->is_debug);
+ else
+ retval = pm_create_map_queue(pm,
+ &rl_buffer[rl_wptr],
+ q,
+ qpd->is_debug);
if (retval != 0)
return retval;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
new file mode 100644
index 000000000..08c721922
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
@@ -0,0 +1,398 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef F32_MES_PM4_PACKETS_H
+#define F32_MES_PM4_PACKETS_H
+
+#ifndef PM4_MES_HEADER_DEFINED
+#define PM4_MES_HEADER_DEFINED
+union PM4_MES_TYPE_3_HEADER {
+ struct {
+ uint32_t reserved1 : 8; /* < reserved */
+ uint32_t opcode : 8; /* < IT opcode */
+ uint32_t count : 14;/* < number of DWORDs - 1 in the
+ information body. */
+ uint32_t type : 2; /* < packet identifier.
+ It should be 3 for type 3 packets */
+ };
+ uint32_t u32All;
+};
+#endif /* PM4_MES_HEADER_DEFINED */
+
+/*--------------------MES_SET_RESOURCES--------------------*/
+
+#ifndef PM4_MES_SET_RESOURCES_DEFINED
+#define PM4_MES_SET_RESOURCES_DEFINED
+enum mes_set_resources_queue_type_enum {
+ queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
+ queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
+ queue_type__mes_set_resources__hsa_debug_interface_queue = 4
+};
+
+
+struct pm4_mes_set_resources {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t vmid_mask:16;
+ uint32_t unmap_latency:8;
+ uint32_t reserved1:5;
+ enum mes_set_resources_queue_type_enum queue_type:3;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ uint32_t queue_mask_lo;
+ uint32_t queue_mask_hi;
+ uint32_t gws_mask_lo;
+ uint32_t gws_mask_hi;
+
+ union {
+ struct {
+ uint32_t oac_mask:16;
+ uint32_t reserved2:16;
+ } bitfields7;
+ uint32_t ordinal7;
+ };
+
+ union {
+ struct {
+ uint32_t gds_heap_base:6;
+ uint32_t reserved3:5;
+ uint32_t gds_heap_size:6;
+ uint32_t reserved4:15;
+ } bitfields8;
+ uint32_t ordinal8;
+ };
+
+};
+#endif
+
+/*--------------------MES_RUN_LIST--------------------*/
+
+#ifndef PM4_MES_RUN_LIST_DEFINED
+#define PM4_MES_RUN_LIST_DEFINED
+
+struct pm4_mes_runlist {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t reserved1:2;
+ uint32_t ib_base_lo:30;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t ib_base_hi:16;
+ uint32_t reserved2:16;
+ } bitfields3;
+ uint32_t ordinal3;
+ };
+
+ union {
+ struct {
+ uint32_t ib_size:20;
+ uint32_t chain:1;
+ uint32_t offload_polling:1;
+ uint32_t reserved3:1;
+ uint32_t valid:1;
+ uint32_t reserved4:8;
+ } bitfields4;
+ uint32_t ordinal4;
+ };
+
+};
+#endif
+
+/*--------------------MES_MAP_PROCESS--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_DEFINED
+#define PM4_MES_MAP_PROCESS_DEFINED
+
+struct pm4_mes_map_process {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16;
+ uint32_t reserved1:8;
+ uint32_t diq_enable:1;
+ uint32_t process_quantum:7;
+ } bitfields2;
+ uint32_t ordinal2;
+};
+
+ union {
+ struct {
+ uint32_t page_table_base:28;
+ uint32_t reserved2:4;
+ } bitfields3;
+ uint32_t ordinal3;
+ };
+
+ uint32_t sh_mem_bases;
+ uint32_t sh_mem_ape1_base;
+ uint32_t sh_mem_ape1_limit;
+ uint32_t sh_mem_config;
+ uint32_t gds_addr_lo;
+ uint32_t gds_addr_hi;
+
+ union {
+ struct {
+ uint32_t num_gws:6;
+ uint32_t reserved3:2;
+ uint32_t num_oac:4;
+ uint32_t reserved4:4;
+ uint32_t gds_size:6;
+ uint32_t num_queues:10;
+ } bitfields10;
+ uint32_t ordinal10;
+ };
+
+};
+#endif
+
+/*--------------------MES_MAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED
+#define PM4_MES_MAP_QUEUES_VI_DEFINED
+enum mes_map_queues_queue_sel_vi_enum {
+ queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0,
+queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1
+};
+
+enum mes_map_queues_queue_type_vi_enum {
+ queue_type__mes_map_queues__normal_compute_vi = 0,
+ queue_type__mes_map_queues__debug_interface_queue_vi = 1,
+ queue_type__mes_map_queues__normal_latency_static_queue_vi = 2,
+queue_type__mes_map_queues__low_latency_static_queue_vi = 3
+};
+
+enum mes_map_queues_alloc_format_vi_enum {
+ alloc_format__mes_map_queues__one_per_pipe_vi = 0,
+alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
+};
+
+enum mes_map_queues_engine_sel_vi_enum {
+ engine_sel__mes_map_queues__compute_vi = 0,
+ engine_sel__mes_map_queues__sdma0_vi = 2,
+ engine_sel__mes_map_queues__sdma1_vi = 3
+};
+
+
+struct pm4_mes_map_queues {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t reserved1:4;
+ enum mes_map_queues_queue_sel_vi_enum queue_sel:2;
+ uint32_t reserved2:15;
+ enum mes_map_queues_queue_type_vi_enum queue_type:3;
+ enum mes_map_queues_alloc_format_vi_enum alloc_format:2;
+ enum mes_map_queues_engine_sel_vi_enum engine_sel:3;
+ uint32_t num_queues:3;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t reserved3:1;
+ uint32_t check_disable:1;
+ uint32_t doorbell_offset:21;
+ uint32_t reserved4:3;
+ uint32_t queue:6;
+ } bitfields3;
+ uint32_t ordinal3;
+ };
+
+ uint32_t mqd_addr_lo;
+ uint32_t mqd_addr_hi;
+ uint32_t wptr_addr_lo;
+ uint32_t wptr_addr_hi;
+};
+#endif
+
+/*--------------------MES_QUERY_STATUS--------------------*/
+
+#ifndef PM4_MES_QUERY_STATUS_DEFINED
+#define PM4_MES_QUERY_STATUS_DEFINED
+enum mes_query_status_interrupt_sel_enum {
+ interrupt_sel__mes_query_status__completion_status = 0,
+ interrupt_sel__mes_query_status__process_status = 1,
+ interrupt_sel__mes_query_status__queue_status = 2
+};
+
+enum mes_query_status_command_enum {
+ command__mes_query_status__interrupt_only = 0,
+ command__mes_query_status__fence_only_immediate = 1,
+ command__mes_query_status__fence_only_after_write_ack = 2,
+ command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
+};
+
+enum mes_query_status_engine_sel_enum {
+ engine_sel__mes_query_status__compute = 0,
+ engine_sel__mes_query_status__sdma0_queue = 2,
+ engine_sel__mes_query_status__sdma1_queue = 3
+};
+
+struct pm4_mes_query_status {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t context_id:28;
+ enum mes_query_status_interrupt_sel_enum
+ interrupt_sel:2;
+ enum mes_query_status_command_enum command:2;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16;
+ uint32_t reserved1:16;
+ } bitfields3a;
+ struct {
+ uint32_t reserved2:2;
+ uint32_t doorbell_offset:21;
+ uint32_t reserved3:2;
+ enum mes_query_status_engine_sel_enum engine_sel:3;
+ uint32_t reserved4:4;
+ } bitfields3b;
+ uint32_t ordinal3;
+ };
+
+ uint32_t addr_lo;
+ uint32_t addr_hi;
+ uint32_t data_lo;
+ uint32_t data_hi;
+};
+#endif
+
+/*--------------------MES_UNMAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
+#define PM4_MES_UNMAP_QUEUES_DEFINED
+enum mes_unmap_queues_action_enum {
+ action__mes_unmap_queues__preempt_queues = 0,
+ action__mes_unmap_queues__reset_queues = 1,
+ action__mes_unmap_queues__disable_process_queues = 2,
+ action__mes_unmap_queues__reserved = 3
+};
+
+enum mes_unmap_queues_queue_sel_enum {
+ queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
+ queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
+ queue_sel__mes_unmap_queues__unmap_all_queues = 2,
+ queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3
+};
+
+enum mes_unmap_queues_engine_sel_enum {
+ engine_sel__mes_unmap_queues__compute = 0,
+ engine_sel__mes_unmap_queues__sdma0 = 2,
+ engine_sel__mes_unmap_queues__sdmal = 3
+};
+
+struct PM4_MES_UNMAP_QUEUES {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ enum mes_unmap_queues_action_enum action:2;
+ uint32_t reserved1:2;
+ enum mes_unmap_queues_queue_sel_enum queue_sel:2;
+ uint32_t reserved2:20;
+ enum mes_unmap_queues_engine_sel_enum engine_sel:3;
+ uint32_t num_queues:3;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16;
+ uint32_t reserved3:16;
+ } bitfields3a;
+ struct {
+ uint32_t reserved4:2;
+ uint32_t doorbell_offset0:21;
+ uint32_t reserved5:9;
+ } bitfields3b;
+ uint32_t ordinal3;
+ };
+
+ union {
+ struct {
+ uint32_t reserved6:2;
+ uint32_t doorbell_offset1:21;
+ uint32_t reserved7:9;
+ } bitfields4;
+ uint32_t ordinal4;
+ };
+
+ union {
+ struct {
+ uint32_t reserved8:2;
+ uint32_t doorbell_offset2:21;
+ uint32_t reserved9:9;
+ } bitfields5;
+ uint32_t ordinal5;
+ };
+
+ union {
+ struct {
+ uint32_t reserved10:2;
+ uint32_t doorbell_offset3:21;
+ uint32_t reserved11:9;
+ } bitfields6;
+ uint32_t ordinal6;
+ };
+};
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index c25728bc3..74909e72a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1186,6 +1186,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
* TODO: Retrieve max engine clock values from KGD
*/
+ if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
+ dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE;
+ pr_info("amdkfd: adding doorbell packet type capability\n");
+ }
+
res = 0;
err:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 989624b3c..c3ddb9b95 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -40,6 +40,7 @@
#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00
#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8
#define HSA_CAP_RESERVED 0xfffff000
+#define HSA_CAP_DOORBELL_PACKET_TYPE 0x00001000
struct kfd_node_properties {
uint32_t cpu_cores_count;