From 863981e96738983919de841ec669e157e6bdaeb0 Mon Sep 17 00:00:00 2001 From: André Fabian Silva Delgado Date: Sun, 11 Sep 2016 04:34:46 -0300 Subject: Linux-libre 4.7.1-gnu --- drivers/gpu/drm/amd/amdgpu/Kconfig | 10 + drivers/gpu/drm/amd/amdgpu/amdgpu.h | 218 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 9 +- drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 45 +- drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 172 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 145 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 306 ++++- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 23 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 37 +- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 35 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 53 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 22 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 95 +- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 71 +- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 11 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 81 +- drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c | 39 +- drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 85 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 105 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 70 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 131 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 21 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 395 +++--- drivers/gpu/drm/amd/amdgpu/atom.h | 2 +- drivers/gpu/drm/amd/amdgpu/atombios_crtc.c | 98 +- drivers/gpu/drm/amd/amdgpu/atombios_crtc.h | 2 + drivers/gpu/drm/amd/amdgpu/atombios_encoders.c | 93 +- drivers/gpu/drm/amd/amdgpu/atombios_i2c.c | 15 + drivers/gpu/drm/amd/amdgpu/atombios_i2c.h | 2 + drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 227 +--- drivers/gpu/drm/amd/amdgpu/cik.c | 17 +- drivers/gpu/drm/amd/amdgpu/cik_ih.c | 40 +- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 88 +- drivers/gpu/drm/amd/amdgpu/cikd.h | 4 +- drivers/gpu/drm/amd/amdgpu/cz_dpm.c | 15 +- drivers/gpu/drm/amd/amdgpu/cz_ih.c | 40 +- drivers/gpu/drm/amd/amdgpu/cz_smumgr.h | 2 +- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 42 +- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 235 ++-- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 37 +- drivers/gpu/drm/amd/amdgpu/fiji_dpm.c | 7 +- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 346 +---- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h | 1 - drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1644 ++++++++++++++++++++---- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h | 1 - drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 114 +- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 152 +-- drivers/gpu/drm/amd/amdgpu/iceland_dpm.c | 7 +- drivers/gpu/drm/amd/amdgpu/iceland_ih.c | 38 +- drivers/gpu/drm/amd/amdgpu/kv_dpm.c | 70 +- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 84 +- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 271 ++-- drivers/gpu/drm/amd/amdgpu/smu_ucode_xfer_vi.h | 1 + drivers/gpu/drm/amd/amdgpu/tonga_dpm.c | 7 +- drivers/gpu/drm/amd/amdgpu/tonga_ih.c | 38 +- drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 123 +- drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 241 ++-- drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 420 +++--- drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 98 +- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 85 +- drivers/gpu/drm/amd/amdgpu/vi.c | 211 ++- drivers/gpu/drm/amd/amdgpu/vid.h | 5 +- 76 files changed, 4055 insertions(+), 3102 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu') diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index b30fcfa4b..7335c0420 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -15,3 +15,13 @@ config DRM_AMDGPU_USERPTR help This option selects CONFIG_MMU_NOTIFIER if it isn't already selected to enabled full userptr support. + +config DRM_AMDGPU_GART_DEBUGFS + bool "Allow GART access through debugfs" + depends on DRM_AMDGPU + depends on DEBUG_FS + default n + help + Selecting this option creates a debugfs file to inspect the mapped + pages. Uses more memory for housekeeping, enable only for debugging. + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 1bcbade47..e055d5be1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -283,7 +283,8 @@ struct amdgpu_ring_funcs { int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx); /* command emit functions */ void (*emit_ib)(struct amdgpu_ring *ring, - struct amdgpu_ib *ib); + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch); void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, uint64_t seq, unsigned flags); void (*emit_pipeline_sync)(struct amdgpu_ring *ring); @@ -302,6 +303,8 @@ struct amdgpu_ring_funcs { void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); /* pad the indirect buffer to the necessary number of dw */ void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); + unsigned (*init_cond_exec)(struct amdgpu_ring *ring); + void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset); }; /* @@ -365,13 +368,6 @@ struct amdgpu_fence_driver { #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) #define AMDGPU_FENCE_FLAG_INT (1 << 1) -struct amdgpu_user_fence { - /* write-back bo */ - struct amdgpu_bo *bo; - /* write-back address offset to bo start */ - uint32_t offset; -}; - int amdgpu_fence_driver_init(struct amdgpu_device *adev); void amdgpu_fence_driver_fini(struct amdgpu_device *adev); void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev); @@ -391,6 +387,14 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); /* * TTM. */ + +#define AMDGPU_TTM_LRU_SIZE 20 + +struct amdgpu_mman_lru { + struct list_head *lru[TTM_NUM_MEM_TYPES]; + struct list_head *swap_lru; +}; + struct amdgpu_mman { struct ttm_bo_global_ref bo_global_ref; struct drm_global_reference mem_global_ref; @@ -408,6 +412,9 @@ struct amdgpu_mman { struct amdgpu_ring *buffer_funcs_ring; /* Scheduler entity for buffer moves */ struct amd_sched_entity entity; + + /* custom LRU management */ + struct amdgpu_mman_lru log2_size[AMDGPU_TTM_LRU_SIZE]; }; int amdgpu_copy_buffer(struct amdgpu_ring *ring, @@ -494,9 +501,10 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_file *file_priv); unsigned long amdgpu_gem_timeout(uint64_t timeout_ns); struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); -struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, - struct sg_table *sg); +struct drm_gem_object * +amdgpu_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sg); struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gobj, int flags); @@ -586,11 +594,16 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, void *owner); +bool amdgpu_sync_is_idle(struct amdgpu_sync *sync); +int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src, + struct fence *fence); struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); int amdgpu_sync_wait(struct amdgpu_sync *sync); void amdgpu_sync_free(struct amdgpu_sync *sync); int amdgpu_sync_init(void); void amdgpu_sync_fini(void); +int amdgpu_fence_slab_init(void); +void amdgpu_fence_slab_fini(void); /* * GART structures, functions & helpers @@ -609,8 +622,9 @@ struct amdgpu_gart { unsigned num_gpu_pages; unsigned num_cpu_pages; unsigned table_size; +#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS struct page **pages; - dma_addr_t *pages_addr; +#endif bool ready; const struct amdgpu_gart_funcs *gart_funcs; }; @@ -709,6 +723,7 @@ struct amdgpu_flip_work { unsigned shared_count; struct fence **shared; struct fence_cb cb; + bool async; }; @@ -721,17 +736,7 @@ struct amdgpu_ib { uint32_t length_dw; uint64_t gpu_addr; uint32_t *ptr; - struct amdgpu_user_fence *user; - struct amdgpu_vm *vm; - unsigned vm_id; - uint64_t vm_pd_addr; - struct amdgpu_ctx *ctx; - uint32_t gds_base, gds_size; - uint32_t gws_base, gws_size; - uint32_t oa_base, oa_size; uint32_t flags; - /* resulting sequence number */ - uint64_t sequence; }; enum amdgpu_ring_type { @@ -742,22 +747,25 @@ enum amdgpu_ring_type { AMDGPU_RING_TYPE_VCE }; -extern struct amd_sched_backend_ops amdgpu_sched_ops; +extern const struct amd_sched_backend_ops amdgpu_sched_ops; int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, - struct amdgpu_job **job); + struct amdgpu_job **job, struct amdgpu_vm *vm); int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, struct amdgpu_job **job); + void amdgpu_job_free(struct amdgpu_job *job); +void amdgpu_job_free_func(struct kref *refcount); int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, struct amd_sched_entity *entity, void *owner, struct fence **f); +void amdgpu_job_timeout_func(struct work_struct *work); struct amdgpu_ring { struct amdgpu_device *adev; const struct amdgpu_ring_funcs *funcs; struct amdgpu_fence_driver fence_drv; - struct amd_gpu_scheduler sched; + struct amd_gpu_scheduler sched; spinlock_t fence_lock; struct amdgpu_bo *ring_obj; @@ -785,9 +793,12 @@ struct amdgpu_ring { unsigned wptr_offs; unsigned next_rptr_offs; unsigned fence_offs; - struct amdgpu_ctx *current_ctx; + uint64_t current_ctx; enum amdgpu_ring_type type; char name[16]; + unsigned cond_exe_offs; + u64 cond_exe_gpu_addr; + volatile u32 *cond_exe_cpu_addr; }; /* @@ -830,13 +841,6 @@ struct amdgpu_vm_pt { uint64_t addr; }; -struct amdgpu_vm_id { - struct amdgpu_vm_manager_id *mgr_id; - uint64_t pd_gpu_addr; - /* last flushed PD/PT update */ - struct fence *flushed_updates; -}; - struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root va; @@ -862,19 +866,29 @@ struct amdgpu_vm { struct amdgpu_vm_pt *page_tables; /* for id and flush management per ring */ - struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS]; + struct amdgpu_vm_id *ids[AMDGPU_MAX_RINGS]; /* protecting freed */ spinlock_t freed_lock; /* Scheduler entity for page table updates */ struct amd_sched_entity entity; + + /* client id */ + u64 client_id; }; -struct amdgpu_vm_manager_id { +struct amdgpu_vm_id { struct list_head list; - struct fence *active; - atomic_long_t owner; + struct fence *first; + struct amdgpu_sync active; + struct fence *last_flush; + struct amdgpu_ring *last_user; + atomic64_t owner; + + uint64_t pd_gpu_addr; + /* last flushed PD/PT update */ + struct fence *flushed_updates; uint32_t gds_base; uint32_t gds_size; @@ -889,7 +903,7 @@ struct amdgpu_vm_manager { struct mutex lock; unsigned num_ids; struct list_head ids_lru; - struct amdgpu_vm_manager_id ids[AMDGPU_NUM_VM]; + struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; uint32_t max_pfn; /* vram base address for page table entry */ @@ -901,6 +915,8 @@ struct amdgpu_vm_manager { struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS]; unsigned vm_pte_num_rings; atomic_t vm_pte_next_ring; + /* client id counter */ + atomic64_t client_counter; }; void amdgpu_vm_manager_init(struct amdgpu_device *adev); @@ -916,11 +932,11 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_sync *sync, struct fence *fence, unsigned *vm_id, uint64_t *vm_pd_addr); -void amdgpu_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr, - uint32_t gds_base, uint32_t gds_size, - uint32_t gws_base, uint32_t gws_size, - uint32_t oa_base, uint32_t oa_size); +int amdgpu_vm_flush(struct amdgpu_ring *ring, + unsigned vm_id, uint64_t pd_addr, + uint32_t gds_base, uint32_t gds_size, + uint32_t gws_base, uint32_t gws_size, + uint32_t oa_base, uint32_t oa_size); void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id); uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, @@ -1026,6 +1042,11 @@ void amdgpu_bo_list_free(struct amdgpu_bo_list *list); */ #include "clearstate_defs.h" +struct amdgpu_rlc_funcs { + void (*enter_safe_mode)(struct amdgpu_device *adev); + void (*exit_safe_mode)(struct amdgpu_device *adev); +}; + struct amdgpu_rlc { /* for power gating */ struct amdgpu_bo *save_restore_obj; @@ -1044,6 +1065,24 @@ struct amdgpu_rlc { uint64_t cp_table_gpu_addr; volatile uint32_t *cp_table_ptr; u32 cp_table_size; + + /* safe mode for updating CG/PG state */ + bool in_safe_mode; + const struct amdgpu_rlc_funcs *funcs; + + /* for firmware data */ + u32 save_and_restore_offset; + u32 clear_state_descriptor_offset; + u32 avail_scratch_ram_locations; + u32 reg_restore_list_size; + u32 reg_list_format_start; + u32 reg_list_format_separate_start; + u32 starting_offsets_start; + u32 reg_list_format_size_bytes; + u32 reg_list_size_bytes; + + u32 *register_list_format; + u32 *register_restore; }; struct amdgpu_mec { @@ -1097,6 +1136,12 @@ struct amdgpu_gca_config { uint32_t macrotile_mode_array[16]; }; +struct amdgpu_cu_info { + uint32_t number; /* total active CU number */ + uint32_t ao_cu_mask; + uint32_t bitmap[4][4]; +}; + struct amdgpu_gfx { struct mutex gpu_clock_mutex; struct amdgpu_gca_config config; @@ -1129,17 +1174,19 @@ struct amdgpu_gfx { struct amdgpu_irq_src priv_reg_irq; struct amdgpu_irq_src priv_inst_irq; /* gfx status */ - uint32_t gfx_current_status; + uint32_t gfx_current_status; /* ce ram size*/ - unsigned ce_ram_size; + unsigned ce_ram_size; + struct amdgpu_cu_info cu_info; }; int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, struct amdgpu_ib *ib); -void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f); +void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, + struct fence *f); int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_ib *ib, struct fence *last_vm_update, - struct fence **f); + struct amdgpu_job *job, struct fence **f); int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); int amdgpu_ib_ring_tests(struct amdgpu_device *adev); @@ -1164,7 +1211,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring); struct amdgpu_cs_chunk { uint32_t chunk_id; uint32_t length_dw; - uint32_t *kdata; + void *kdata; }; struct amdgpu_cs_parser { @@ -1195,13 +1242,25 @@ struct amdgpu_cs_parser { struct amdgpu_job { struct amd_sched_job base; struct amdgpu_device *adev; + struct amdgpu_vm *vm; struct amdgpu_ring *ring; struct amdgpu_sync sync; struct amdgpu_ib *ibs; struct fence *fence; /* the hw fence */ uint32_t num_ibs; void *owner; - struct amdgpu_user_fence uf; + uint64_t ctx; + unsigned vm_id; + uint64_t vm_pd_addr; + uint32_t gds_base, gds_size; + uint32_t gws_base, gws_size; + uint32_t oa_base, oa_size; + + /* user fence handling */ + struct amdgpu_bo *uf_bo; + uint32_t uf_offset; + uint64_t uf_sequence; + }; #define to_amdgpu_job(sched_job) \ container_of((sched_job), struct amdgpu_job, base) @@ -1582,10 +1641,12 @@ void amdgpu_get_pcie_info(struct amdgpu_device *adev); /* * UVD */ -#define AMDGPU_MAX_UVD_HANDLES 10 -#define AMDGPU_UVD_STACK_SIZE (1024*1024) -#define AMDGPU_UVD_HEAP_SIZE (1024*1024) -#define AMDGPU_UVD_FIRMWARE_OFFSET 256 +#define AMDGPU_DEFAULT_UVD_HANDLES 10 +#define AMDGPU_MAX_UVD_HANDLES 40 +#define AMDGPU_UVD_STACK_SIZE (200*1024) +#define AMDGPU_UVD_HEAP_SIZE (256*1024) +#define AMDGPU_UVD_SESSION_SIZE (50*1024) +#define AMDGPU_UVD_FIRMWARE_OFFSET 256 struct amdgpu_uvd { struct amdgpu_bo *vcpu_bo; @@ -1593,6 +1654,7 @@ struct amdgpu_uvd { uint64_t gpu_addr; unsigned fw_version; void *saved_bo; + unsigned max_handles; atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; struct delayed_work idle_work; @@ -1645,7 +1707,7 @@ struct amdgpu_sdma { struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; struct amdgpu_irq_src trap_irq; struct amdgpu_irq_src illegal_inst_irq; - int num_instances; + int num_instances; }; /* @@ -1691,12 +1753,12 @@ static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {} * Debugfs */ struct amdgpu_debugfs { - struct drm_info_list *files; + const struct drm_info_list *files; unsigned num_files; }; int amdgpu_debugfs_add_files(struct amdgpu_device *adev, - struct drm_info_list *files, + const struct drm_info_list *files, unsigned nfiles); int amdgpu_debugfs_fence_init(struct amdgpu_device *adev); @@ -1738,13 +1800,6 @@ struct amdgpu_allowed_register_entry { bool grbm_indexed; }; -struct amdgpu_cu_info { - uint32_t number; /* total active CU number */ - uint32_t ao_cu_mask; - uint32_t bitmap[4][4]; -}; - - /* * ASIC specific functions. */ @@ -1762,10 +1817,11 @@ struct amdgpu_asic_funcs { u32 (*get_xclk)(struct amdgpu_device *adev); /* get the gpu clock counter */ uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev); - int (*get_cu_info)(struct amdgpu_device *adev, struct amdgpu_cu_info *info); /* MM block clocks */ int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk); int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk); + /* query virtual capabilities */ + u32 (*get_virtual_caps)(struct amdgpu_device *adev); }; /* @@ -1855,20 +1911,17 @@ struct amdgpu_atcs { /* * CGS */ -void *amdgpu_cgs_create_device(struct amdgpu_device *adev); -void amdgpu_cgs_destroy_device(void *cgs_device); - - -/* - * CGS - */ -void *amdgpu_cgs_create_device(struct amdgpu_device *adev); -void amdgpu_cgs_destroy_device(void *cgs_device); +struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev); +void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device); /* GPU virtualization */ +#define AMDGPU_VIRT_CAPS_SRIOV_EN (1 << 0) +#define AMDGPU_VIRT_CAPS_IS_VF (1 << 1) struct amdgpu_virtualization { bool supports_sr_iov; + bool is_virtual; + u32 caps; }; /* @@ -1904,16 +1957,15 @@ struct amdgpu_device { int usec_timeout; const struct amdgpu_asic_funcs *asic_funcs; bool shutdown; - bool suspend; bool need_dma32; bool accel_working; - struct work_struct reset_work; + struct work_struct reset_work; struct notifier_block acpi_nb; struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; - unsigned debugfs_count; + unsigned debugfs_count; #if defined(CONFIG_DEBUG_FS) - struct dentry *debugfs_regs; + struct dentry *debugfs_regs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; #endif struct amdgpu_atif atif; struct amdgpu_atcs atcs; @@ -1926,7 +1978,6 @@ struct amdgpu_device { /* BIOS */ uint8_t *bios; bool is_atom_bios; - uint16_t bios_header_start; struct amdgpu_bo *stollen_vga_memory; uint32_t bios_scratch[AMDGPU_BIOS_NUM_SCRATCH]; @@ -2159,11 +2210,11 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev)) #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d)) #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec)) +#define amdgpu_asic_get_virtual_caps(adev) ((adev)->asic_funcs->get_virtual_caps((adev))) #define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev)) #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev)) #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l)) #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v))) -#define amdgpu_asic_get_cu_info(adev, info) (adev)->asic_funcs->get_cu_info((adev), (info)) #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) @@ -2175,7 +2226,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) -#define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib)) +#define amdgpu_ring_emit_ib(r, ib, vm_id, c) (r)->funcs->emit_ib((r), (ib), (vm_id), (c)) #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) @@ -2183,6 +2234,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) +#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) +#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) @@ -2196,7 +2249,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h)) #define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev)) #define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev)) -#define amdgpu_display_page_flip(adev, crtc, base) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base)) +#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async)) #define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos)) #define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c)) #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) @@ -2339,7 +2392,7 @@ static inline void amdgpu_unregister_atpx_handler(void) {} * KMS */ extern const struct drm_ioctl_desc amdgpu_ioctls_kms[]; -extern int amdgpu_max_kms_ioctl; +extern const int amdgpu_max_kms_ioctl; int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags); int amdgpu_driver_unload_kms(struct drm_device *dev); @@ -2398,5 +2451,4 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, uint64_t addr, struct amdgpu_bo **bo); #include "amdgpu_object.h" - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index b7b583c42..252edba16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -467,13 +467,6 @@ static int acp_soft_reset(void *handle) return 0; } -static void acp_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "ACP STATUS\n"); -} - static int acp_set_clockgating_state(void *handle, enum amd_clockgating_state state) { @@ -487,6 +480,7 @@ static int acp_set_powergating_state(void *handle, } const struct amd_ip_funcs acp_ip_funcs = { + .name = "acp_ip", .early_init = acp_early_init, .late_init = NULL, .sw_init = acp_sw_init, @@ -498,7 +492,6 @@ const struct amd_ip_funcs acp_ip_funcs = { .is_idle = acp_is_idle, .wait_for_idle = acp_wait_for_idle, .soft_reset = acp_soft_reset, - .print_status = acp_print_status, .set_clockgating_state = acp_set_clockgating_state, .set_powergating_state = acp_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h index f6e32a639..8a396313c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h @@ -30,7 +30,7 @@ struct amdgpu_acp { struct device *parent; - void *cgs_device; + struct cgs_device *cgs_device; struct amd_acp_private *private; struct mfd_cell *acp_cell; struct resource *acp_res; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 84b0ce39e..9df1bcb35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -234,16 +234,6 @@ amdgpu_atombios_get_hpd_info_from_gpio(struct amdgpu_device *adev, return hpd; } -static bool amdgpu_atombios_apply_quirks(struct amdgpu_device *adev, - uint32_t supported_device, - int *connector_type, - struct amdgpu_i2c_bus_rec *i2c_bus, - uint16_t *line_mux, - struct amdgpu_hpd *hpd) -{ - return true; -} - static const int object_connector_convert[] = { DRM_MODE_CONNECTOR_Unknown, DRM_MODE_CONNECTOR_DVII, @@ -514,11 +504,6 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * conn_id = le16_to_cpu(path->usConnObjectId); - if (!amdgpu_atombios_apply_quirks - (adev, le16_to_cpu(path->usDeviceTag), &connector_type, - &ddc_bus, &conn_id, &hpd)) - continue; - amdgpu_display_add_connector(adev, conn_id, le16_to_cpu(path->usDeviceTag), @@ -699,6 +684,36 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev) return ret; } +union gfx_info { + ATOM_GFX_INFO_V2_1 info; +}; + +int amdgpu_atombios_get_gfx_info(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index = GetIndexIntoMasterTable(DATA, GFX_Info); + uint8_t frev, crev; + uint16_t data_offset; + int ret = -EINVAL; + + if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + union gfx_info *gfx_info = (union gfx_info *) + (mode_info->atom_context->bios + data_offset); + + adev->gfx.config.max_shader_engines = gfx_info->info.max_shader_engines; + adev->gfx.config.max_tile_pipes = gfx_info->info.max_tile_pipes; + adev->gfx.config.max_cu_per_sh = gfx_info->info.max_cu_per_sh; + adev->gfx.config.max_sh_per_se = gfx_info->info.max_sh_per_se; + adev->gfx.config.max_backends_per_se = gfx_info->info.max_backends_per_se; + adev->gfx.config.max_texture_channel_caches = + gfx_info->info.max_texture_channel_caches; + + ret = 0; + } + return ret; +} + union igp_info { struct _ATOM_INTEGRATED_SYSTEM_INFO info; struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index 9e1442053..8c2e69661 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h @@ -144,6 +144,8 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev); +int amdgpu_atombios_get_gfx_info(struct amdgpu_device *adev); + bool amdgpu_atombios_get_asic_ss_info(struct amdgpu_device *adev, struct amdgpu_atom_ss *ss, int id, u32 clock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index cd639c362..33e47a43a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -141,7 +141,7 @@ out_cleanup: void amdgpu_benchmark(struct amdgpu_device *adev, int test_number) { int i; - int common_modes[AMDGPU_BENCHMARK_COMMON_MODES_N] = { + static const int common_modes[AMDGPU_BENCHMARK_COMMON_MODES_N] = { 640 * 480 * 4, 720 * 480 * 4, 800 * 600 * 4, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 80add2237..99ca75baa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -349,7 +349,7 @@ static inline bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev) bool amdgpu_get_bios(struct amdgpu_device *adev) { bool r; - uint16_t tmp; + uint16_t tmp, bios_header_start; r = amdgpu_atrm_get_bios(adev); if (r == false) @@ -383,11 +383,11 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) goto free_bios; } - adev->bios_header_start = RBIOS16(0x48); - if (!adev->bios_header_start) { + bios_header_start = RBIOS16(0x48); + if (!bios_header_start) { goto free_bios; } - tmp = adev->bios_header_start + 4; + tmp = bios_header_start + 4; if (!memcmp(adev->bios + tmp, "ATOM", 4) || !memcmp(adev->bios + tmp, "MOTA", 4)) { adev->is_atom_bios = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index eacd810fc..823bf5e0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -106,7 +106,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, struct amdgpu_bo *bo; struct mm_struct *usermm; - gobj = drm_gem_object_lookup(adev->ddev, filp, info[i].bo_handle); + gobj = drm_gem_object_lookup(filp, info[i].bo_handle); if (!gobj) { r = -ENOENT; goto error_free; @@ -263,7 +263,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, for (i = 0; i < args->in.bo_number; ++i) { if (copy_from_user(&info[i], uptr, bytes)) goto error_free; - + uptr += args->in.bo_info_size; } } @@ -271,7 +271,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, switch (args->in.operation) { case AMDGPU_BO_LIST_OP_CREATE: r = amdgpu_bo_list_create(fpriv, &list, &handle); - if (r) + if (r) goto error_free; r = amdgpu_bo_list_set(adev, filp, list, info, @@ -281,7 +281,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, goto error_free; break; - + case AMDGPU_BO_LIST_OP_DESTROY: amdgpu_bo_list_destroy(fpriv, handle); handle = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index fa197c9af..6f9dcfddc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -42,7 +42,7 @@ struct amdgpu_cgs_device { struct amdgpu_device *adev = \ ((struct amdgpu_cgs_device *)cgs_device)->adev -static int amdgpu_cgs_gpu_mem_info(void *cgs_device, enum cgs_gpu_mem_type type, +static int amdgpu_cgs_gpu_mem_info(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type, uint64_t *mc_start, uint64_t *mc_size, uint64_t *mem_size) { @@ -73,7 +73,7 @@ static int amdgpu_cgs_gpu_mem_info(void *cgs_device, enum cgs_gpu_mem_type type, return 0; } -static int amdgpu_cgs_gmap_kmem(void *cgs_device, void *kmem, +static int amdgpu_cgs_gmap_kmem(struct cgs_device *cgs_device, void *kmem, uint64_t size, uint64_t min_offset, uint64_t max_offset, cgs_handle_t *kmem_handle, uint64_t *mcaddr) @@ -102,7 +102,7 @@ static int amdgpu_cgs_gmap_kmem(void *cgs_device, void *kmem, return ret; } -static int amdgpu_cgs_gunmap_kmem(void *cgs_device, cgs_handle_t kmem_handle) +static int amdgpu_cgs_gunmap_kmem(struct cgs_device *cgs_device, cgs_handle_t kmem_handle) { struct amdgpu_bo *obj = (struct amdgpu_bo *)kmem_handle; @@ -118,7 +118,7 @@ static int amdgpu_cgs_gunmap_kmem(void *cgs_device, cgs_handle_t kmem_handle) return 0; } -static int amdgpu_cgs_alloc_gpu_mem(void *cgs_device, +static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, enum cgs_gpu_mem_type type, uint64_t size, uint64_t align, uint64_t min_offset, uint64_t max_offset, @@ -208,7 +208,7 @@ static int amdgpu_cgs_alloc_gpu_mem(void *cgs_device, return ret; } -static int amdgpu_cgs_free_gpu_mem(void *cgs_device, cgs_handle_t handle) +static int amdgpu_cgs_free_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle) { struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; @@ -225,7 +225,7 @@ static int amdgpu_cgs_free_gpu_mem(void *cgs_device, cgs_handle_t handle) return 0; } -static int amdgpu_cgs_gmap_gpu_mem(void *cgs_device, cgs_handle_t handle, +static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle, uint64_t *mcaddr) { int r; @@ -246,7 +246,7 @@ static int amdgpu_cgs_gmap_gpu_mem(void *cgs_device, cgs_handle_t handle, return r; } -static int amdgpu_cgs_gunmap_gpu_mem(void *cgs_device, cgs_handle_t handle) +static int amdgpu_cgs_gunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle) { int r; struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; @@ -258,7 +258,7 @@ static int amdgpu_cgs_gunmap_gpu_mem(void *cgs_device, cgs_handle_t handle) return r; } -static int amdgpu_cgs_kmap_gpu_mem(void *cgs_device, cgs_handle_t handle, +static int amdgpu_cgs_kmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle, void **map) { int r; @@ -271,7 +271,7 @@ static int amdgpu_cgs_kmap_gpu_mem(void *cgs_device, cgs_handle_t handle, return r; } -static int amdgpu_cgs_kunmap_gpu_mem(void *cgs_device, cgs_handle_t handle) +static int amdgpu_cgs_kunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle) { int r; struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; @@ -283,20 +283,20 @@ static int amdgpu_cgs_kunmap_gpu_mem(void *cgs_device, cgs_handle_t handle) return r; } -static uint32_t amdgpu_cgs_read_register(void *cgs_device, unsigned offset) +static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset) { CGS_FUNC_ADEV; return RREG32(offset); } -static void amdgpu_cgs_write_register(void *cgs_device, unsigned offset, +static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned offset, uint32_t value) { CGS_FUNC_ADEV; WREG32(offset, value); } -static uint32_t amdgpu_cgs_read_ind_register(void *cgs_device, +static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device, enum cgs_ind_reg space, unsigned index) { @@ -320,7 +320,7 @@ static uint32_t amdgpu_cgs_read_ind_register(void *cgs_device, return 0; } -static void amdgpu_cgs_write_ind_register(void *cgs_device, +static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device, enum cgs_ind_reg space, unsigned index, uint32_t value) { @@ -343,7 +343,7 @@ static void amdgpu_cgs_write_ind_register(void *cgs_device, WARN(1, "Invalid indirect register space"); } -static uint8_t amdgpu_cgs_read_pci_config_byte(void *cgs_device, unsigned addr) +static uint8_t amdgpu_cgs_read_pci_config_byte(struct cgs_device *cgs_device, unsigned addr) { CGS_FUNC_ADEV; uint8_t val; @@ -353,7 +353,7 @@ static uint8_t amdgpu_cgs_read_pci_config_byte(void *cgs_device, unsigned addr) return val; } -static uint16_t amdgpu_cgs_read_pci_config_word(void *cgs_device, unsigned addr) +static uint16_t amdgpu_cgs_read_pci_config_word(struct cgs_device *cgs_device, unsigned addr) { CGS_FUNC_ADEV; uint16_t val; @@ -363,7 +363,7 @@ static uint16_t amdgpu_cgs_read_pci_config_word(void *cgs_device, unsigned addr) return val; } -static uint32_t amdgpu_cgs_read_pci_config_dword(void *cgs_device, +static uint32_t amdgpu_cgs_read_pci_config_dword(struct cgs_device *cgs_device, unsigned addr) { CGS_FUNC_ADEV; @@ -374,7 +374,7 @@ static uint32_t amdgpu_cgs_read_pci_config_dword(void *cgs_device, return val; } -static void amdgpu_cgs_write_pci_config_byte(void *cgs_device, unsigned addr, +static void amdgpu_cgs_write_pci_config_byte(struct cgs_device *cgs_device, unsigned addr, uint8_t value) { CGS_FUNC_ADEV; @@ -382,7 +382,7 @@ static void amdgpu_cgs_write_pci_config_byte(void *cgs_device, unsigned addr, WARN(ret, "pci_write_config_byte error"); } -static void amdgpu_cgs_write_pci_config_word(void *cgs_device, unsigned addr, +static void amdgpu_cgs_write_pci_config_word(struct cgs_device *cgs_device, unsigned addr, uint16_t value) { CGS_FUNC_ADEV; @@ -390,7 +390,7 @@ static void amdgpu_cgs_write_pci_config_word(void *cgs_device, unsigned addr, WARN(ret, "pci_write_config_word error"); } -static void amdgpu_cgs_write_pci_config_dword(void *cgs_device, unsigned addr, +static void amdgpu_cgs_write_pci_config_dword(struct cgs_device *cgs_device, unsigned addr, uint32_t value) { CGS_FUNC_ADEV; @@ -399,7 +399,7 @@ static void amdgpu_cgs_write_pci_config_dword(void *cgs_device, unsigned addr, } -static int amdgpu_cgs_get_pci_resource(void *cgs_device, +static int amdgpu_cgs_get_pci_resource(struct cgs_device *cgs_device, enum cgs_resource_type resource_type, uint64_t size, uint64_t offset, @@ -433,7 +433,7 @@ static int amdgpu_cgs_get_pci_resource(void *cgs_device, } } -static const void *amdgpu_cgs_atom_get_data_table(void *cgs_device, +static const void *amdgpu_cgs_atom_get_data_table(struct cgs_device *cgs_device, unsigned table, uint16_t *size, uint8_t *frev, uint8_t *crev) { @@ -449,7 +449,7 @@ static const void *amdgpu_cgs_atom_get_data_table(void *cgs_device, return NULL; } -static int amdgpu_cgs_atom_get_cmd_table_revs(void *cgs_device, unsigned table, +static int amdgpu_cgs_atom_get_cmd_table_revs(struct cgs_device *cgs_device, unsigned table, uint8_t *frev, uint8_t *crev) { CGS_FUNC_ADEV; @@ -462,7 +462,7 @@ static int amdgpu_cgs_atom_get_cmd_table_revs(void *cgs_device, unsigned table, return -EINVAL; } -static int amdgpu_cgs_atom_exec_cmd_table(void *cgs_device, unsigned table, +static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigned table, void *args) { CGS_FUNC_ADEV; @@ -471,33 +471,33 @@ static int amdgpu_cgs_atom_exec_cmd_table(void *cgs_device, unsigned table, adev->mode_info.atom_context, table, args); } -static int amdgpu_cgs_create_pm_request(void *cgs_device, cgs_handle_t *request) +static int amdgpu_cgs_create_pm_request(struct cgs_device *cgs_device, cgs_handle_t *request) { /* TODO */ return 0; } -static int amdgpu_cgs_destroy_pm_request(void *cgs_device, cgs_handle_t request) +static int amdgpu_cgs_destroy_pm_request(struct cgs_device *cgs_device, cgs_handle_t request) { /* TODO */ return 0; } -static int amdgpu_cgs_set_pm_request(void *cgs_device, cgs_handle_t request, +static int amdgpu_cgs_set_pm_request(struct cgs_device *cgs_device, cgs_handle_t request, int active) { /* TODO */ return 0; } -static int amdgpu_cgs_pm_request_clock(void *cgs_device, cgs_handle_t request, +static int amdgpu_cgs_pm_request_clock(struct cgs_device *cgs_device, cgs_handle_t request, enum cgs_clock clock, unsigned freq) { /* TODO */ return 0; } -static int amdgpu_cgs_pm_request_engine(void *cgs_device, cgs_handle_t request, +static int amdgpu_cgs_pm_request_engine(struct cgs_device *cgs_device, cgs_handle_t request, enum cgs_engine engine, int powered) { /* TODO */ @@ -506,7 +506,7 @@ static int amdgpu_cgs_pm_request_engine(void *cgs_device, cgs_handle_t request, -static int amdgpu_cgs_pm_query_clock_limits(void *cgs_device, +static int amdgpu_cgs_pm_query_clock_limits(struct cgs_device *cgs_device, enum cgs_clock clock, struct cgs_clock_limits *limits) { @@ -514,7 +514,7 @@ static int amdgpu_cgs_pm_query_clock_limits(void *cgs_device, return 0; } -static int amdgpu_cgs_set_camera_voltages(void *cgs_device, uint32_t mask, +static int amdgpu_cgs_set_camera_voltages(struct cgs_device *cgs_device, uint32_t mask, const uint32_t *voltages) { DRM_ERROR("not implemented"); @@ -565,7 +565,7 @@ static const struct amdgpu_irq_src_funcs cgs_irq_funcs = { .process = cgs_process_irq, }; -static int amdgpu_cgs_add_irq_source(void *cgs_device, unsigned src_id, +static int amdgpu_cgs_add_irq_source(struct cgs_device *cgs_device, unsigned src_id, unsigned num_types, cgs_irq_source_set_func_t set, cgs_irq_handler_func_t handler, @@ -600,19 +600,19 @@ static int amdgpu_cgs_add_irq_source(void *cgs_device, unsigned src_id, return ret; } -static int amdgpu_cgs_irq_get(void *cgs_device, unsigned src_id, unsigned type) +static int amdgpu_cgs_irq_get(struct cgs_device *cgs_device, unsigned src_id, unsigned type) { CGS_FUNC_ADEV; return amdgpu_irq_get(adev, adev->irq.sources[src_id], type); } -static int amdgpu_cgs_irq_put(void *cgs_device, unsigned src_id, unsigned type) +static int amdgpu_cgs_irq_put(struct cgs_device *cgs_device, unsigned src_id, unsigned type) { CGS_FUNC_ADEV; return amdgpu_irq_put(adev, adev->irq.sources[src_id], type); } -int amdgpu_cgs_set_clockgating_state(void *cgs_device, +int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device, enum amd_ip_block_type block_type, enum amd_clockgating_state state) { @@ -633,7 +633,7 @@ int amdgpu_cgs_set_clockgating_state(void *cgs_device, return r; } -int amdgpu_cgs_set_powergating_state(void *cgs_device, +int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device, enum amd_ip_block_type block_type, enum amd_powergating_state state) { @@ -655,7 +655,7 @@ int amdgpu_cgs_set_powergating_state(void *cgs_device, } -static uint32_t fw_type_convert(void *cgs_device, uint32_t fw_type) +static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type) { CGS_FUNC_ADEV; enum AMDGPU_UCODE_ID result = AMDGPU_UCODE_ID_MAXIMUM; @@ -681,9 +681,10 @@ static uint32_t fw_type_convert(void *cgs_device, uint32_t fw_type) result = AMDGPU_UCODE_ID_CP_MEC1; break; case CGS_UCODE_ID_CP_MEC_JT2: - if (adev->asic_type == CHIP_TONGA) + if (adev->asic_type == CHIP_TONGA || adev->asic_type == CHIP_POLARIS11 + || adev->asic_type == CHIP_POLARIS10) result = AMDGPU_UCODE_ID_CP_MEC2; - else if (adev->asic_type == CHIP_CARRIZO) + else result = AMDGPU_UCODE_ID_CP_MEC1; break; case CGS_UCODE_ID_RLC_G: @@ -695,13 +696,24 @@ static uint32_t fw_type_convert(void *cgs_device, uint32_t fw_type) return result; } -static int amdgpu_cgs_get_firmware_info(void *cgs_device, +static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type) +{ + CGS_FUNC_ADEV; + if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) { + release_firmware(adev->pm.fw); + return 0; + } + /* cannot release other firmware because they are not created by cgs */ + return -EINVAL; +} + +static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, enum cgs_ucode_id type, struct cgs_firmware_info *info) { CGS_FUNC_ADEV; - if (CGS_UCODE_ID_SMU != type) { + if ((CGS_UCODE_ID_SMU != type) && (CGS_UCODE_ID_SMU_SK != type)) { uint64_t gpu_addr; uint32_t data_size; const struct gfx_firmware_header_v1_0 *header; @@ -734,30 +746,44 @@ static int amdgpu_cgs_get_firmware_info(void *cgs_device, const uint8_t *src; const struct smc_firmware_header_v1_0 *hdr; - switch (adev->asic_type) { - case CHIP_TONGA: - strcpy(fw_name, "/*(DEBLOBBED)*/"); - break; - case CHIP_FIJI: - strcpy(fw_name, "/*(DEBLOBBED)*/"); - break; - default: - DRM_ERROR("SMC firmware not supported\n"); - return -EINVAL; - } + if (!adev->pm.fw) { + switch (adev->asic_type) { + case CHIP_TONGA: + strcpy(fw_name, "/*(DEBLOBBED)*/"); + break; + case CHIP_FIJI: + strcpy(fw_name, "/*(DEBLOBBED)*/"); + break; + case CHIP_POLARIS11: + if (type == CGS_UCODE_ID_SMU) + strcpy(fw_name, "/*(DEBLOBBED)*/"); + else if (type == CGS_UCODE_ID_SMU_SK) + strcpy(fw_name, "/*(DEBLOBBED)*/"); + break; + case CHIP_POLARIS10: + if (type == CGS_UCODE_ID_SMU) + strcpy(fw_name, "/*(DEBLOBBED)*/"); + else if (type == CGS_UCODE_ID_SMU_SK) + strcpy(fw_name, "/*(DEBLOBBED)*/"); + break; + default: + DRM_ERROR("SMC firmware not supported\n"); + return -EINVAL; + } - err = reject_firmware(&adev->pm.fw, fw_name, adev->dev); - if (err) { - DRM_ERROR("Failed to request firmware\n"); - return err; - } + err = reject_firmware(&adev->pm.fw, fw_name, adev->dev); + if (err) { + DRM_ERROR("Failed to request firmware\n"); + return err; + } - err = amdgpu_ucode_validate(adev->pm.fw); - if (err) { - DRM_ERROR("Failed to load firmware \"%s\"", fw_name); - release_firmware(adev->pm.fw); - adev->pm.fw = NULL; - return err; + err = amdgpu_ucode_validate(adev->pm.fw); + if (err) { + DRM_ERROR("Failed to load firmware \"%s\"", fw_name); + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + return err; + } } hdr = (const struct smc_firmware_header_v1_0 *) adev->pm.fw->data; @@ -774,7 +800,7 @@ static int amdgpu_cgs_get_firmware_info(void *cgs_device, return 0; } -static int amdgpu_cgs_query_system_info(void *cgs_device, +static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device, struct cgs_system_info *sys_info) { CGS_FUNC_ADEV; @@ -801,6 +827,9 @@ static int amdgpu_cgs_query_system_info(void *cgs_device, case CGS_SYSTEM_INFO_PG_FLAGS: sys_info->value = adev->pg_flags; break; + case CGS_SYSTEM_INFO_GFX_CU_INFO: + sys_info->value = adev->gfx.cu_info.number; + break; default: return -ENODEV; } @@ -808,7 +837,7 @@ static int amdgpu_cgs_query_system_info(void *cgs_device, return 0; } -static int amdgpu_cgs_get_active_displays_info(void *cgs_device, +static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, struct cgs_display_info *info) { CGS_FUNC_ADEV; @@ -851,7 +880,7 @@ static int amdgpu_cgs_get_active_displays_info(void *cgs_device, } -static int amdgpu_cgs_notify_dpm_enabled(void *cgs_device, bool enabled) +static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool enabled) { CGS_FUNC_ADEV; @@ -867,7 +896,7 @@ static int amdgpu_cgs_notify_dpm_enabled(void *cgs_device, bool enabled) */ #if defined(CONFIG_ACPI) -static int amdgpu_cgs_acpi_eval_object(void *cgs_device, +static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device, struct cgs_acpi_method_info *info) { CGS_FUNC_ADEV; @@ -1030,14 +1059,14 @@ error: return result; } #else -static int amdgpu_cgs_acpi_eval_object(void *cgs_device, +static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device, struct cgs_acpi_method_info *info) { return -EIO; } #endif -int amdgpu_cgs_call_acpi_method(void *cgs_device, +int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device, uint32_t acpi_method, uint32_t acpi_function, void *pinput, void *poutput, @@ -1107,6 +1136,7 @@ static const struct cgs_ops amdgpu_cgs_ops = { amdgpu_cgs_pm_query_clock_limits, amdgpu_cgs_set_camera_voltages, amdgpu_cgs_get_firmware_info, + amdgpu_cgs_rel_firmware, amdgpu_cgs_set_powergating_state, amdgpu_cgs_set_clockgating_state, amdgpu_cgs_get_active_displays_info, @@ -1121,7 +1151,7 @@ static const struct cgs_os_ops amdgpu_cgs_os_ops = { amdgpu_cgs_irq_put }; -void *amdgpu_cgs_create_device(struct amdgpu_device *adev) +struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev) { struct amdgpu_cgs_device *cgs_device = kmalloc(sizeof(*cgs_device), GFP_KERNEL); @@ -1135,10 +1165,10 @@ void *amdgpu_cgs_create_device(struct amdgpu_device *adev) cgs_device->base.os_ops = &amdgpu_cgs_os_ops; cgs_device->adev = adev; - return cgs_device; + return (struct cgs_device *)cgs_device; } -void amdgpu_cgs_destroy_device(void *cgs_device) +void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device) { kfree(cgs_device); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 7ef2c1392..cb07da411 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -439,7 +439,7 @@ static void amdgpu_connector_add_common_modes(struct drm_encoder *encoder, struct drm_display_mode *mode = NULL; struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode; int i; - struct mode_size { + static const struct mode_size { int w; int h; } common_modes[17] = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9392e50a7..9bc8f1d99 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -24,7 +24,6 @@ * Authors: * Jerome Glisse */ -#include #include #include #include @@ -88,44 +87,41 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, } static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, - struct amdgpu_user_fence *uf, - struct drm_amdgpu_cs_chunk_fence *fence_data) + struct drm_amdgpu_cs_chunk_fence *data, + uint32_t *offset) { struct drm_gem_object *gobj; - uint32_t handle; - handle = fence_data->handle; - gobj = drm_gem_object_lookup(p->adev->ddev, p->filp, - fence_data->handle); + gobj = drm_gem_object_lookup(p->filp, data->handle); if (gobj == NULL) return -EINVAL; - uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); - uf->offset = fence_data->offset; - - if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) { - drm_gem_object_unreference_unlocked(gobj); - return -EINVAL; - } - - p->uf_entry.robj = amdgpu_bo_ref(uf->bo); + p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; p->uf_entry.tv.shared = true; p->uf_entry.user_pages = NULL; + *offset = data->offset; drm_gem_object_unreference_unlocked(gobj); + + if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { + amdgpu_bo_unref(&p->uf_entry.robj); + return -EINVAL; + } + return 0; } int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; union drm_amdgpu_cs *cs = data; uint64_t *chunk_array_user; uint64_t *chunk_array; - struct amdgpu_user_fence uf = {}; unsigned size, num_ibs = 0; + uint32_t uf_offset = 0; int i; int ret; @@ -200,7 +196,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto free_partial_kdata; } - ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata); + ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata, + &uf_offset); if (ret) goto free_partial_kdata; @@ -215,11 +212,14 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } } - ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job); + ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm); if (ret) goto free_all_kdata; - p->job->uf = uf; + if (p->uf_entry.robj) { + p->job->uf_bo = amdgpu_bo_ref(p->uf_entry.robj); + p->job->uf_offset = uf_offset; + } kfree(chunk_array); return 0; @@ -377,7 +377,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); - if (p->job->uf.bo) + if (p->uf_entry.robj) list_add(&p->uf_entry.tv.head, &p->validated); if (need_mmap_lock) @@ -473,6 +473,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto error_validate; if (p->bo_list) { + struct amdgpu_bo *gds = p->bo_list->gds_obj; + struct amdgpu_bo *gws = p->bo_list->gws_obj; + struct amdgpu_bo *oa = p->bo_list->oa_obj; struct amdgpu_vm *vm = &fpriv->vm; unsigned i; @@ -481,6 +484,19 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo); } + + if (gds) { + p->job->gds_base = amdgpu_bo_gpu_offset(gds); + p->job->gds_size = amdgpu_bo_size(gds); + } + if (gws) { + p->job->gws_base = amdgpu_bo_gpu_offset(gws); + p->job->gws_size = amdgpu_bo_size(gws); + } + if (oa) { + p->job->oa_base = amdgpu_bo_gpu_offset(oa); + p->job->oa_size = amdgpu_bo_size(oa); + } } error_validate: @@ -527,16 +543,6 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) return 0; } -static int cmp_size_smaller_first(void *priv, struct list_head *a, - struct list_head *b) -{ - struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head); - struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head); - - /* Sort A before B if A is smaller. */ - return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; -} - /** * cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. @@ -553,18 +559,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo if (!error) { amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm); - /* Sort the buffer list from the smallest to largest buffer, - * which affects the order of buffers in the LRU list. - * This assures that the smallest buffers are added first - * to the LRU list, so they are likely to be later evicted - * first, instead of large buffers whose eviction is more - * expensive. - * - * This slightly lowers the number of bytes moved by TTM - * per frame under memory pressure. - */ - list_sort(NULL, &parser->validated, cmp_size_smaller_first); - ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, parser->fence); @@ -763,41 +757,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, ib->length_dw = chunk_ib->ib_bytes / 4; ib->flags = chunk_ib->flags; - ib->ctx = parser->ctx; j++; } - /* add GDS resources to first IB */ - if (parser->bo_list) { - struct amdgpu_bo *gds = parser->bo_list->gds_obj; - struct amdgpu_bo *gws = parser->bo_list->gws_obj; - struct amdgpu_bo *oa = parser->bo_list->oa_obj; - struct amdgpu_ib *ib = &parser->job->ibs[0]; - - if (gds) { - ib->gds_base = amdgpu_bo_gpu_offset(gds); - ib->gds_size = amdgpu_bo_size(gds); - } - if (gws) { - ib->gws_base = amdgpu_bo_gpu_offset(gws); - ib->gws_size = amdgpu_bo_size(gws); - } - if (oa) { - ib->oa_base = amdgpu_bo_gpu_offset(oa); - ib->oa_size = amdgpu_bo_size(oa); - } - } - /* wrap the last IB with user fence */ - if (parser->job->uf.bo) { - struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1]; - - /* UVD & VCE fw doesn't support user fences */ - if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD || - parser->job->ring->type == AMDGPU_RING_TYPE_VCE) - return -EINVAL; - - ib->user = &parser->job->uf; - } + /* UVD & VCE fw doesn't support user fences */ + if (parser->job->uf_bo && ( + parser->job->ring->type == AMDGPU_RING_TYPE_UVD || + parser->job->ring->type == AMDGPU_RING_TYPE_VCE)) + return -EINVAL; return 0; } @@ -862,28 +829,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_ring *ring = p->job->ring; - struct amd_sched_fence *fence; + struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity; + struct fence *fence; struct amdgpu_job *job; + int r; job = p->job; p->job = NULL; - job->base.sched = &ring->sched; - job->base.s_entity = &p->ctx->rings[ring->idx].entity; - job->owner = p->filp; - - fence = amd_sched_fence_create(job->base.s_entity, p->filp); - if (!fence) { + r = amd_sched_job_init(&job->base, &ring->sched, + entity, amdgpu_job_timeout_func, + amdgpu_job_free_func, + p->filp, &fence); + if (r) { amdgpu_job_free(job); - return -ENOMEM; + return r; } - job->base.s_fence = fence; - p->fence = fence_get(&fence->base); - - cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, - &fence->base); - job->ibs[job->num_ibs - 1].sequence = cs->out.handle; + job->owner = p->filp; + job->ctx = entity->fence_context; + p->fence = fence_get(fence); + cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence); + job->uf_sequence = cs->out.handle; trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2139da773..6e920086a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -59,6 +59,8 @@ static const char *amdgpu_asic_name[] = { "FIJI", "CARRIZO", "STONEY", + "POLARIS10", + "POLARIS11", "LAST", }; @@ -346,7 +348,7 @@ static int amdgpu_doorbell_init(struct amdgpu_device *adev) adev->doorbell.base = pci_resource_start(adev->pdev, 2); adev->doorbell.size = pci_resource_len(adev->pdev, 2); - adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), + adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), AMDGPU_DOORBELL_MAX_ASSIGNMENT+1); if (adev->doorbell.num_doorbells == 0) return -EINVAL; @@ -825,8 +827,10 @@ static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg) */ static void amdgpu_atombios_fini(struct amdgpu_device *adev) { - if (adev->mode_info.atom_context) + if (adev->mode_info.atom_context) { kfree(adev->mode_info.atom_context->scratch); + kfree(adev->mode_info.atom_context->iio); + } kfree(adev->mode_info.atom_context); adev->mode_info.atom_context = NULL; kfree(adev->mode_info.atom_card_info); @@ -936,15 +940,11 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) } if (amdgpu_gart_size != -1) { - /* gtt size must be power of two and greater or equal to 32M */ + /* gtt size must be greater or equal to 32M */ if (amdgpu_gart_size < 32) { dev_warn(adev->dev, "gart size (%d) too small\n", amdgpu_gart_size); amdgpu_gart_size = -1; - } else if (!amdgpu_check_pot_argument(amdgpu_gart_size)) { - dev_warn(adev->dev, "gart size (%d) must be a power of 2\n", - amdgpu_gart_size); - amdgpu_gart_size = -1; } } @@ -1144,6 +1144,8 @@ static int amdgpu_early_init(struct amdgpu_device *adev) case CHIP_TOPAZ: case CHIP_TONGA: case CHIP_FIJI: + case CHIP_POLARIS11: + case CHIP_POLARIS10: case CHIP_CARRIZO: case CHIP_STONEY: if (adev->asic_type == CHIP_CARRIZO || adev->asic_type == CHIP_STONEY) @@ -1196,7 +1198,7 @@ static int amdgpu_early_init(struct amdgpu_device *adev) if (r == -ENOENT) { adev->ip_block_status[i].valid = false; } else if (r) { - DRM_ERROR("early_init %d failed %d\n", i, r); + DRM_ERROR("early_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); return r; } else { adev->ip_block_status[i].valid = true; @@ -1219,7 +1221,7 @@ static int amdgpu_init(struct amdgpu_device *adev) continue; r = adev->ip_blocks[i].funcs->sw_init((void *)adev); if (r) { - DRM_ERROR("sw_init %d failed %d\n", i, r); + DRM_ERROR("sw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); return r; } adev->ip_block_status[i].sw = true; @@ -1252,7 +1254,7 @@ static int amdgpu_init(struct amdgpu_device *adev) continue; r = adev->ip_blocks[i].funcs->hw_init((void *)adev); if (r) { - DRM_ERROR("hw_init %d failed %d\n", i, r); + DRM_ERROR("hw_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); return r; } adev->ip_block_status[i].hw = true; @@ -1272,13 +1274,13 @@ static int amdgpu_late_init(struct amdgpu_device *adev) r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev, AMD_CG_STATE_GATE); if (r) { - DRM_ERROR("set_clockgating_state(gate) %d failed %d\n", i, r); + DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); return r; } if (adev->ip_blocks[i].funcs->late_init) { r = adev->ip_blocks[i].funcs->late_init((void *)adev); if (r) { - DRM_ERROR("late_init %d failed %d\n", i, r); + DRM_ERROR("late_init of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); return r; } } @@ -1302,13 +1304,13 @@ static int amdgpu_fini(struct amdgpu_device *adev) r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev, AMD_CG_STATE_UNGATE); if (r) { - DRM_ERROR("set_clockgating_state(ungate) %d failed %d\n", i, r); + DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); return r; } r = adev->ip_blocks[i].funcs->hw_fini((void *)adev); /* XXX handle errors */ if (r) { - DRM_DEBUG("hw_fini %d failed %d\n", i, r); + DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); } adev->ip_block_status[i].hw = false; } @@ -1319,12 +1321,17 @@ static int amdgpu_fini(struct amdgpu_device *adev) r = adev->ip_blocks[i].funcs->sw_fini((void *)adev); /* XXX handle errors */ if (r) { - DRM_DEBUG("sw_fini %d failed %d\n", i, r); + DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); } adev->ip_block_status[i].sw = false; adev->ip_block_status[i].valid = false; } + for (i = adev->num_ip_blocks - 1; i >= 0; i--) { + if (adev->ip_blocks[i].funcs->late_fini) + adev->ip_blocks[i].funcs->late_fini((void *)adev); + } + return 0; } @@ -1332,20 +1339,29 @@ static int amdgpu_suspend(struct amdgpu_device *adev) { int i, r; + /* ungate SMC block first */ + r = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, + AMD_CG_STATE_UNGATE); + if (r) { + DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n",r); + } + for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_block_status[i].valid) continue; /* ungate blocks so that suspend can properly shut them down */ - r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev, - AMD_CG_STATE_UNGATE); - if (r) { - DRM_ERROR("set_clockgating_state(ungate) %d failed %d\n", i, r); + if (i != AMD_IP_BLOCK_TYPE_SMC) { + r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev, + AMD_CG_STATE_UNGATE); + if (r) { + DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); + } } /* XXX handle errors */ r = adev->ip_blocks[i].funcs->suspend(adev); /* XXX handle errors */ if (r) { - DRM_ERROR("suspend %d failed %d\n", i, r); + DRM_ERROR("suspend of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); } } @@ -1361,7 +1377,7 @@ static int amdgpu_resume(struct amdgpu_device *adev) continue; r = adev->ip_blocks[i].funcs->resume(adev); if (r) { - DRM_ERROR("resume %d failed %d\n", i, r); + DRM_ERROR("resume of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r); return r; } } @@ -1369,6 +1385,15 @@ static int amdgpu_resume(struct amdgpu_device *adev) return 0; } +static bool amdgpu_device_is_virtual(void) +{ +#ifdef CONFIG_X86 + return boot_cpu_has(X86_FEATURE_HYPERVISOR); +#else + return false; +#endif +} + /** * amdgpu_device_init - initialize the driver * @@ -1503,9 +1528,14 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->virtualization.supports_sr_iov = amdgpu_atombios_has_gpu_virtualization_table(adev); + /* Check if we are executing in a virtualized environment */ + adev->virtualization.is_virtual = amdgpu_device_is_virtual(); + adev->virtualization.caps = amdgpu_asic_get_virtual_caps(adev); + /* Post card if necessary */ if (!amdgpu_card_posted(adev) || - adev->virtualization.supports_sr_iov) { + (adev->virtualization.is_virtual && + !(adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN))) { if (!adev->bios) { dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n"); return -EINVAL; @@ -2007,7 +2037,7 @@ void amdgpu_get_pcie_info(struct amdgpu_device *adev) * Debugfs */ int amdgpu_debugfs_add_files(struct amdgpu_device *adev, - struct drm_info_list *files, + const struct drm_info_list *files, unsigned nfiles) { unsigned i; @@ -2119,32 +2149,246 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, return result; } +static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = f->f_inode->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + value = RREG32_PCIE(*pos >> 2); + r = put_user(value, (uint32_t *)buf); + if (r) + return r; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = f->f_inode->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + r = get_user(value, (uint32_t *)buf); + if (r) + return r; + + WREG32_PCIE(*pos >> 2, value); + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = f->f_inode->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + value = RREG32_DIDT(*pos >> 2); + r = put_user(value, (uint32_t *)buf); + if (r) + return r; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = f->f_inode->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + r = get_user(value, (uint32_t *)buf); + if (r) + return r; + + WREG32_DIDT(*pos >> 2, value); + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = f->f_inode->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + value = RREG32_SMC(*pos >> 2); + r = put_user(value, (uint32_t *)buf); + if (r) + return r; + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + +static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = f->f_inode->i_private; + ssize_t result = 0; + int r; + + if (size & 0x3 || *pos & 0x3) + return -EINVAL; + + while (size) { + uint32_t value; + + r = get_user(value, (uint32_t *)buf); + if (r) + return r; + + WREG32_SMC(*pos >> 2, value); + + result += 4; + buf += 4; + *pos += 4; + size -= 4; + } + + return result; +} + static const struct file_operations amdgpu_debugfs_regs_fops = { .owner = THIS_MODULE, .read = amdgpu_debugfs_regs_read, .write = amdgpu_debugfs_regs_write, .llseek = default_llseek }; +static const struct file_operations amdgpu_debugfs_regs_didt_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_regs_didt_read, + .write = amdgpu_debugfs_regs_didt_write, + .llseek = default_llseek +}; +static const struct file_operations amdgpu_debugfs_regs_pcie_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_regs_pcie_read, + .write = amdgpu_debugfs_regs_pcie_write, + .llseek = default_llseek +}; +static const struct file_operations amdgpu_debugfs_regs_smc_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_regs_smc_read, + .write = amdgpu_debugfs_regs_smc_write, + .llseek = default_llseek +}; + +static const struct file_operations *debugfs_regs[] = { + &amdgpu_debugfs_regs_fops, + &amdgpu_debugfs_regs_didt_fops, + &amdgpu_debugfs_regs_pcie_fops, + &amdgpu_debugfs_regs_smc_fops, +}; + +static const char *debugfs_regs_names[] = { + "amdgpu_regs", + "amdgpu_regs_didt", + "amdgpu_regs_pcie", + "amdgpu_regs_smc", +}; static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) { struct drm_minor *minor = adev->ddev->primary; struct dentry *ent, *root = minor->debugfs_root; + unsigned i, j; + + for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { + ent = debugfs_create_file(debugfs_regs_names[i], + S_IFREG | S_IRUGO, root, + adev, debugfs_regs[i]); + if (IS_ERR(ent)) { + for (j = 0; j < i; j++) { + debugfs_remove(adev->debugfs_regs[i]); + adev->debugfs_regs[i] = NULL; + } + return PTR_ERR(ent); + } - ent = debugfs_create_file("amdgpu_regs", S_IFREG | S_IRUGO, root, - adev, &amdgpu_debugfs_regs_fops); - if (IS_ERR(ent)) - return PTR_ERR(ent); - i_size_write(ent->d_inode, adev->rmmio_size); - adev->debugfs_regs = ent; + if (!i) + i_size_write(ent->d_inode, adev->rmmio_size); + adev->debugfs_regs[i] = ent; + } return 0; } static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) { - debugfs_remove(adev->debugfs_regs); - adev->debugfs_regs = NULL; + unsigned i; + + for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { + if (adev->debugfs_regs[i]) { + debugfs_remove(adev->debugfs_regs[i]); + adev->debugfs_regs[i] = NULL; + } + } } int amdgpu_debugfs_init(struct drm_minor *minor) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 3fb405b3a..b0832da2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -131,12 +131,17 @@ static void amdgpu_flip_work_func(struct work_struct *__work) vblank->framedur_ns / 1000, vblank->linedur_ns / 1000, stat, vpos, hpos); - /* set the flip status */ + /* Do the flip (mmio) */ + adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base, work->async); + + /* Set the flip status */ amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED; spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - /* Do the flip (mmio) */ - adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base); + + DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n", + amdgpuCrtc->crtc_id, amdgpuCrtc, work); + } /* @@ -192,6 +197,7 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc, work->event = event; work->adev = adev; work->crtc_id = amdgpu_crtc->crtc_id; + work->async = (page_flip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; /* schedule unpin of the old buffer */ old_amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb); @@ -252,6 +258,9 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc, amdgpu_crtc->pflip_status = AMDGPU_FLIP_PENDING; amdgpu_crtc->pflip_works = work; + + DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_PENDING, work: %p,\n", + amdgpu_crtc->crtc_id, amdgpu_crtc, work); /* update crtc fb */ crtc->primary->fb = fb; spin_unlock_irqrestore(&crtc->dev->event_lock, flags); @@ -554,7 +563,7 @@ amdgpu_user_framebuffer_create(struct drm_device *dev, struct amdgpu_framebuffer *amdgpu_fb; int ret; - obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]); + obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]); if (obj == NULL) { dev_err(&dev->pdev->dev, "No GEM object associated to handle 0x%08X, " "can't create framebuffer\n", mode_cmd->handles[0]); @@ -588,20 +597,20 @@ const struct drm_mode_config_funcs amdgpu_mode_funcs = { .output_poll_changed = amdgpu_output_poll_changed }; -static struct drm_prop_enum_list amdgpu_underscan_enum_list[] = +static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] = { { UNDERSCAN_OFF, "off" }, { UNDERSCAN_ON, "on" }, { UNDERSCAN_AUTO, "auto" }, }; -static struct drm_prop_enum_list amdgpu_audio_enum_list[] = +static const struct drm_prop_enum_list amdgpu_audio_enum_list[] = { { AMDGPU_AUDIO_DISABLE, "off" }, { AMDGPU_AUDIO_ENABLE, "on" }, { AMDGPU_AUDIO_AUTO, "auto" }, }; /* XXX support different dither options? spatial, temporal, both, etc. */ -static struct drm_prop_enum_list amdgpu_dither_enum_list[] = +static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = { { AMDGPU_FMT_DITHER_DISABLE, "off" }, { AMDGPU_FMT_DITHER_ENABLE, "on" }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f1e17d600..f888c015f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -50,9 +50,11 @@ * KMS wrapper. * - 3.0.0 - initial driver * - 3.1.0 - allow reading more status registers (GRBM, SRBM, SDMA, CP) + * - 3.2.0 - GFX8: Uses EOP_TC_WB_ACTION_EN, so UMDs don't have to do the same + * at the end of IBs. */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 1 +#define KMS_DRIVER_MINOR 2 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; @@ -166,7 +168,7 @@ module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444); MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))"); module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444); -static struct pci_device_id pciidlist[] = { +static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_CIK /* Kaveri */ {0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|AMD_IS_MOBILITY|AMD_IS_APU}, @@ -277,6 +279,28 @@ static struct pci_device_id pciidlist[] = { {0x1002, 0x9877, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CARRIZO|AMD_IS_APU}, /* stoney */ {0x1002, 0x98E4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_STONEY|AMD_IS_APU}, + /* Polaris11 */ + {0x1002, 0x67E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, + {0x1002, 0x67E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, + {0x1002, 0x67E8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, + {0x1002, 0x67EB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, + {0x1002, 0x67EF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, + {0x1002, 0x67FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, + {0x1002, 0x67E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, + {0x1002, 0x67E7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, + {0x1002, 0x67E9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS11}, + /* Polaris10 */ + {0x1002, 0x67C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, + {0x1002, 0x67C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, + {0x1002, 0x67C2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, + {0x1002, 0x67C4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, + {0x1002, 0x67C7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, + {0x1002, 0x67DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, + {0x1002, 0x67C8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, + {0x1002, 0x67C9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, + {0x1002, 0x67CA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, + {0x1002, 0x67CC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, + {0x1002, 0x67CF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_POLARIS10}, {0, 0, 0} }; @@ -514,7 +538,7 @@ static struct drm_driver kms_driver = { .irq_uninstall = amdgpu_irq_uninstall, .irq_handler = amdgpu_irq_handler, .ioctls = amdgpu_ioctls_kms, - .gem_free_object = amdgpu_gem_object_free, + .gem_free_object_unlocked = amdgpu_gem_object_free, .gem_open_object = amdgpu_gem_object_open, .gem_close_object = amdgpu_gem_object_close, .dumb_create = amdgpu_mode_dumb_create, @@ -553,22 +577,22 @@ static struct pci_driver amdgpu_kms_pci_driver = { .driver.pm = &amdgpu_pm_ops, }; + + static int __init amdgpu_init(void) { amdgpu_sync_init(); -#ifdef CONFIG_VGA_CONSOLE + amdgpu_fence_slab_init(); if (vgacon_text_force()) { DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n"); return -EINVAL; } -#endif DRM_INFO("amdgpu kernel modesetting enabled.\n"); driver = &kms_driver; pdriver = &amdgpu_kms_pci_driver; driver->driver_features |= DRIVER_MODESET; driver->num_ioctls = amdgpu_max_kms_ioctl; amdgpu_register_atpx_handler(); - /* let modprobe override vga console setting */ return drm_pci_init(driver, pdriver); } @@ -579,6 +603,7 @@ static void __exit amdgpu_exit(void) drm_pci_exit(driver, pdriver); amdgpu_unregister_atpx_handler(); amdgpu_sync_fini(); + amdgpu_fence_slab_fini(); } module_init(amdgpu_init); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index d81f1f488..d1558768c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -55,8 +55,21 @@ struct amdgpu_fence { }; static struct kmem_cache *amdgpu_fence_slab; -static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0); +int amdgpu_fence_slab_init(void) +{ + amdgpu_fence_slab = kmem_cache_create( + "amdgpu_fence", sizeof(struct amdgpu_fence), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!amdgpu_fence_slab) + return -ENOMEM; + return 0; +} + +void amdgpu_fence_slab_fini(void) +{ + kmem_cache_destroy(amdgpu_fence_slab); +} /* * Cast helper */ @@ -198,7 +211,7 @@ void amdgpu_fence_process(struct amdgpu_ring *ring) /* There is always exactly one thread signaling this fence slot */ fence = rcu_dereference_protected(*ptr, 1); - rcu_assign_pointer(*ptr, NULL); + RCU_INIT_POINTER(*ptr, NULL); BUG_ON(!fence); @@ -352,9 +365,9 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, (unsigned long)ring); - ring->fence_drv.num_fences_mask = num_hw_submission - 1; + ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1; spin_lock_init(&ring->fence_drv.lock); - ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *), + ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *), GFP_KERNEL); if (!ring->fence_drv.fences) return -ENOMEM; @@ -396,13 +409,6 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, */ int amdgpu_fence_driver_init(struct amdgpu_device *adev) { - if (atomic_inc_return(&amdgpu_fence_slab_ref) == 1) { - amdgpu_fence_slab = kmem_cache_create( - "amdgpu_fence", sizeof(struct amdgpu_fence), 0, - SLAB_HWCACHE_ALIGN, NULL); - if (!amdgpu_fence_slab) - return -ENOMEM; - } if (amdgpu_debugfs_fence_init(adev)) dev_err(adev->dev, "fence debugfs file creation failed\n"); @@ -437,13 +443,10 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) amd_sched_fini(&ring->sched); del_timer_sync(&ring->fence_drv.fallback_timer); for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j) - fence_put(ring->fence_drv.fences[i]); + fence_put(ring->fence_drv.fences[j]); kfree(ring->fence_drv.fences); ring->fence_drv.initialized = false; } - - if (atomic_dec_and_test(&amdgpu_fence_slab_ref)) - kmem_cache_destroy(amdgpu_fence_slab); } /** @@ -639,7 +642,7 @@ static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data) return 0; } -static struct drm_info_list amdgpu_debugfs_fence_list[] = { +static const struct drm_info_list amdgpu_debugfs_fence_list[] = { {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL} }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 7312d729d..921bce2df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -238,18 +238,17 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset, t = offset / AMDGPU_GPU_PAGE_SIZE; p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); for (i = 0; i < pages; i++, p++) { - if (adev->gart.pages[p]) { - adev->gart.pages[p] = NULL; - adev->gart.pages_addr[p] = adev->dummy_page.addr; - page_base = adev->gart.pages_addr[p]; - if (!adev->gart.ptr) - continue; +#ifdef CONFIG_AMDGPU_GART_DEBUGFS + adev->gart.pages[p] = NULL; +#endif + page_base = adev->dummy_page.addr; + if (!adev->gart.ptr) + continue; - for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { - amdgpu_gart_set_pte_pde(adev, adev->gart.ptr, - t, page_base, flags); - page_base += AMDGPU_GPU_PAGE_SIZE; - } + for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { + amdgpu_gart_set_pte_pde(adev, adev->gart.ptr, + t, page_base, flags); + page_base += AMDGPU_GPU_PAGE_SIZE; } } mb(); @@ -287,10 +286,11 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset, p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); for (i = 0; i < pages; i++, p++) { - adev->gart.pages_addr[p] = dma_addr[i]; +#ifdef CONFIG_AMDGPU_GART_DEBUGFS adev->gart.pages[p] = pagelist[i]; +#endif if (adev->gart.ptr) { - page_base = adev->gart.pages_addr[p]; + page_base = dma_addr[i]; for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { amdgpu_gart_set_pte_pde(adev, adev->gart.ptr, t, page_base, flags); page_base += AMDGPU_GPU_PAGE_SIZE; @@ -312,11 +312,11 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset, */ int amdgpu_gart_init(struct amdgpu_device *adev) { - int r, i; + int r; - if (adev->gart.pages) { + if (adev->dummy_page.page) return 0; - } + /* We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE */ if (PAGE_SIZE < AMDGPU_GPU_PAGE_SIZE) { DRM_ERROR("Page size is smaller than GPU page size!\n"); @@ -330,22 +330,16 @@ int amdgpu_gart_init(struct amdgpu_device *adev) adev->gart.num_gpu_pages = adev->mc.gtt_size / AMDGPU_GPU_PAGE_SIZE; DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", adev->gart.num_cpu_pages, adev->gart.num_gpu_pages); + +#ifdef CONFIG_AMDGPU_GART_DEBUGFS /* Allocate pages table */ adev->gart.pages = vzalloc(sizeof(void *) * adev->gart.num_cpu_pages); if (adev->gart.pages == NULL) { amdgpu_gart_fini(adev); return -ENOMEM; } - adev->gart.pages_addr = vzalloc(sizeof(dma_addr_t) * - adev->gart.num_cpu_pages); - if (adev->gart.pages_addr == NULL) { - amdgpu_gart_fini(adev); - return -ENOMEM; - } - /* set GART entry to point to the dummy page by default */ - for (i = 0; i < adev->gart.num_cpu_pages; i++) { - adev->gart.pages_addr[i] = adev->dummy_page.addr; - } +#endif + return 0; } @@ -358,15 +352,14 @@ int amdgpu_gart_init(struct amdgpu_device *adev) */ void amdgpu_gart_fini(struct amdgpu_device *adev) { - if (adev->gart.pages && adev->gart.pages_addr && adev->gart.ready) { + if (adev->gart.ready) { /* unbind pages */ amdgpu_gart_unbind(adev, 0, adev->gart.num_cpu_pages); } adev->gart.ready = false; +#ifdef CONFIG_AMDGPU_GART_DEBUGFS vfree(adev->gart.pages); - vfree(adev->gart.pages_addr); adev->gart.pages = NULL; - adev->gart.pages_addr = NULL; - +#endif amdgpu_dummy_page_fini(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index c3f4e8559..503d54098 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -43,7 +43,7 @@ struct amdgpu_ring; struct amdgpu_bo; struct amdgpu_gds_asic_info { - uint32_t total_size; + uint32_t total_size; uint32_t gfx_partition_size; uint32_t cs_partition_size; }; @@ -52,8 +52,8 @@ struct amdgpu_gds { struct amdgpu_gds_asic_info mem; struct amdgpu_gds_asic_info gws; struct amdgpu_gds_asic_info oa; - /* At present, GDS, GWS and OA resources for gfx (graphics) - * is always pre-allocated and available for graphics operation. + /* At present, GDS, GWS and OA resources for gfx (graphics) + * is always pre-allocated and available for graphics operation. * Such resource is shared between all gfx clients. * TODO: move this operation to user space * */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index fa6a27bff..8fab64860 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -93,7 +93,7 @@ void amdgpu_gem_force_release(struct amdgpu_device *adev) struct drm_device *ddev = adev->ddev; struct drm_file *file; - mutex_lock(&ddev->struct_mutex); + mutex_lock(&ddev->filelist_mutex); list_for_each_entry(file, &ddev->filelist, lhead) { struct drm_gem_object *gobj; @@ -103,13 +103,13 @@ void amdgpu_gem_force_release(struct amdgpu_device *adev) spin_lock(&file->table_lock); idr_for_each_entry(&file->object_idr, gobj, handle) { WARN_ONCE(1, "And also active allocations!\n"); - drm_gem_object_unreference(gobj); + drm_gem_object_unreference_unlocked(gobj); } idr_destroy(&file->object_idr); spin_unlock(&file->table_lock); } - mutex_unlock(&ddev->struct_mutex); + mutex_unlock(&ddev->filelist_mutex); } /* @@ -338,7 +338,7 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp, struct drm_gem_object *gobj; struct amdgpu_bo *robj; - gobj = drm_gem_object_lookup(dev, filp, handle); + gobj = drm_gem_object_lookup(filp, handle); if (gobj == NULL) { return -ENOENT; } @@ -402,7 +402,7 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, int r = 0; long ret; - gobj = drm_gem_object_lookup(dev, filp, handle); + gobj = drm_gem_object_lookup(filp, handle); if (gobj == NULL) { return -ENOENT; } @@ -436,7 +436,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, int r = -1; DRM_DEBUG("%d \n", args->handle); - gobj = drm_gem_object_lookup(dev, filp, args->handle); + gobj = drm_gem_object_lookup(filp, args->handle); if (gobj == NULL) return -ENOENT; robj = gem_to_amdgpu_bo(gobj); @@ -584,7 +584,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - gobj = drm_gem_object_lookup(dev, filp, args->handle); + gobj = drm_gem_object_lookup(filp, args->handle); if (gobj == NULL) return -ENOENT; rbo = gem_to_amdgpu_bo(gobj); @@ -646,7 +646,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, struct amdgpu_bo *robj; int r; - gobj = drm_gem_object_lookup(dev, filp, args->handle); + gobj = drm_gem_object_lookup(filp, args->handle); if (gobj == NULL) { return -ENOENT; } @@ -769,7 +769,7 @@ static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data) struct drm_file *file; int r; - r = mutex_lock_interruptible(&dev->struct_mutex); + r = mutex_lock_interruptible(&dev->filelist_mutex); if (r) return r; @@ -793,11 +793,11 @@ static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data) spin_unlock(&file->table_lock); } - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&dev->filelist_mutex); return 0; } -static struct drm_info_list amdgpu_debugfs_gem_list[] = { +static const struct drm_info_list amdgpu_debugfs_gem_list[] = { {"amdgpu_gem_info", &amdgpu_debugfs_gem_info, 0, NULL}, }; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 8443cea68..34e35423b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -74,9 +74,6 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); } - ib->vm = vm; - ib->vm_id = 0; - return 0; } @@ -89,7 +86,8 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, * * Free an IB (all asics). */ -void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f) +void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, + struct fence *f) { amdgpu_sa_bo_free(adev, &ib->sa_bo, f); } @@ -117,28 +115,37 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fen */ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_ib *ibs, struct fence *last_vm_update, - struct fence **f) + struct amdgpu_job *job, struct fence **f) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; - struct amdgpu_ctx *ctx, *old_ctx; + bool skip_preamble, need_ctx_switch; + unsigned patch_offset = ~0; struct amdgpu_vm *vm; struct fence *hwf; + uint64_t ctx; + unsigned i; int r = 0; if (num_ibs == 0) return -EINVAL; - ctx = ibs->ctx; - vm = ibs->vm; + /* ring tests don't use a job */ + if (job) { + vm = job->vm; + ctx = job->ctx; + } else { + vm = NULL; + ctx = 0; + } if (!ring->ready) { dev_err(adev->dev, "couldn't schedule ib\n"); return -EINVAL; } - if (vm && !ibs->vm_id) { + if (vm && !job->vm_id) { dev_err(adev->dev, "VM IB without ID\n"); return -EINVAL; } @@ -149,58 +156,68 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return r; } + if (ring->type == AMDGPU_RING_TYPE_SDMA && ring->funcs->init_cond_exec) + patch_offset = amdgpu_ring_init_cond_exec(ring); + if (vm) { - /* do context switch */ - amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr, - ib->gds_base, ib->gds_size, - ib->gws_base, ib->gws_size, - ib->oa_base, ib->oa_size); - - if (ring->funcs->emit_hdp_flush) - amdgpu_ring_emit_hdp_flush(ring); + r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr, + job->gds_base, job->gds_size, + job->gws_base, job->gws_size, + job->oa_base, job->oa_size); + if (r) { + amdgpu_ring_undo(ring); + return r; + } } - old_ctx = ring->current_ctx; + if (ring->funcs->emit_hdp_flush) + amdgpu_ring_emit_hdp_flush(ring); + + /* always set cond_exec_polling to CONTINUE */ + *ring->cond_exe_cpu_addr = 1; + + skip_preamble = ring->current_ctx == ctx; + need_ctx_switch = ring->current_ctx != ctx; for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; - if (ib->ctx != ctx || ib->vm != vm) { - ring->current_ctx = old_ctx; - if (ib->vm_id) - amdgpu_vm_reset_id(adev, ib->vm_id); - amdgpu_ring_undo(ring); - return -EINVAL; - } - amdgpu_ring_emit_ib(ring, ib); - ring->current_ctx = ctx; - } + /* drop preamble IBs if we don't have a context switch */ + if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) + continue; - if (vm) { - if (ring->funcs->emit_hdp_invalidate) - amdgpu_ring_emit_hdp_invalidate(ring); + amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, + need_ctx_switch); + need_ctx_switch = false; } + if (ring->funcs->emit_hdp_invalidate) + amdgpu_ring_emit_hdp_invalidate(ring); + r = amdgpu_fence_emit(ring, &hwf); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); - ring->current_ctx = old_ctx; - if (ib->vm_id) - amdgpu_vm_reset_id(adev, ib->vm_id); + if (job && job->vm_id) + amdgpu_vm_reset_id(adev, job->vm_id); amdgpu_ring_undo(ring); return r; } /* wrap the last IB with fence */ - if (ib->user) { - uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); - addr += ib->user->offset; - amdgpu_ring_emit_fence(ring, addr, ib->sequence, + if (job && job->uf_bo) { + uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo); + + addr += job->uf_offset; + amdgpu_ring_emit_fence(ring, addr, job->uf_sequence, AMDGPU_FENCE_FLAG_64BIT); } if (f) *f = fence_get(hwf); + if (patch_offset != ~0 && ring->funcs->patch_cond_exec) + amdgpu_ring_patch_cond_exec(ring, patch_offset); + + ring->current_ctx = ctx; amdgpu_ring_commit(ring); return 0; } @@ -315,7 +332,7 @@ static int amdgpu_debugfs_sa_info(struct seq_file *m, void *data) } -static struct drm_info_list amdgpu_debugfs_sa_list[] = { +static const struct drm_info_list amdgpu_debugfs_sa_list[] = { {"amdgpu_sa_info", &amdgpu_debugfs_sa_info, 0, NULL}, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 762cfdb85..835a3fa8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -219,7 +219,6 @@ int amdgpu_irq_init(struct amdgpu_device *adev) if (r) { return r; } - adev->ddev->vblank_disable_allowed = true; /* enable msi */ adev->irq.msi_enabled = false; @@ -498,7 +497,7 @@ static int amdgpu_irqdomain_map(struct irq_domain *d, return 0; } -static struct irq_domain_ops amdgpu_hw_irqdomain_ops = { +static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = { .map = amdgpu_irqdomain_map, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 9c9b19e2f..f0dafa514 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -28,8 +28,25 @@ #include "amdgpu.h" #include "amdgpu_trace.h" +static void amdgpu_job_free_handler(struct work_struct *ws) +{ + struct amdgpu_job *job = container_of(ws, struct amdgpu_job, base.work_free_job); + amd_sched_job_put(&job->base); +} + +void amdgpu_job_timeout_func(struct work_struct *work) +{ + struct amdgpu_job *job = container_of(work, struct amdgpu_job, base.work_tdr.work); + DRM_ERROR("ring %s timeout, last signaled seq=%u, last emitted seq=%u\n", + job->base.sched->name, + (uint32_t)atomic_read(&job->ring->fence_drv.last_seq), + job->ring->fence_drv.sync_seq); + + amd_sched_job_put(&job->base); +} + int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, - struct amdgpu_job **job) + struct amdgpu_job **job, struct amdgpu_vm *vm) { size_t size = sizeof(struct amdgpu_job); @@ -43,8 +60,10 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, return -ENOMEM; (*job)->adev = adev; + (*job)->vm = vm; (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; + INIT_WORK(&(*job)->base.work_free_job, amdgpu_job_free_handler); amdgpu_sync_create(&(*job)->sync); @@ -56,7 +75,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, { int r; - r = amdgpu_job_alloc(adev, 1, job); + r = amdgpu_job_alloc(adev, 1, job, NULL); if (r) return r; @@ -78,8 +97,16 @@ void amdgpu_job_free(struct amdgpu_job *job) amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f); fence_put(job->fence); - amdgpu_bo_unref(&job->uf.bo); + amdgpu_bo_unref(&job->uf_bo); amdgpu_sync_free(&job->sync); + + if (!job->base.use_sched) + kfree(job); +} + +void amdgpu_job_free_func(struct kref *refcount) +{ + struct amdgpu_job *job = container_of(refcount, struct amdgpu_job, base.refcount); kfree(job); } @@ -87,16 +114,22 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, struct amd_sched_entity *entity, void *owner, struct fence **f) { + struct fence *fence; + int r; job->ring = ring; - job->base.sched = &ring->sched; - job->base.s_entity = entity; - job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner); - if (!job->base.s_fence) - return -ENOMEM; - *f = fence_get(&job->base.s_fence->base); + if (!f) + return -EINVAL; + + r = amd_sched_job_init(&job->base, &ring->sched, + entity, amdgpu_job_timeout_func, + amdgpu_job_free_func, owner, &fence); + if (r) + return r; job->owner = owner; + job->ctx = entity->fence_context; + *f = fence_get(fence); amd_sched_entity_push_job(&job->base); return 0; @@ -105,27 +138,19 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) { struct amdgpu_job *job = to_amdgpu_job(sched_job); - struct amdgpu_vm *vm = job->ibs->vm; + struct amdgpu_vm *vm = job->vm; struct fence *fence = amdgpu_sync_get_fence(&job->sync); - if (fence == NULL && vm && !job->ibs->vm_id) { + if (fence == NULL && vm && !job->vm_id) { struct amdgpu_ring *ring = job->ring; - unsigned i, vm_id; - uint64_t vm_pd_addr; int r; r = amdgpu_vm_grab_id(vm, ring, &job->sync, &job->base.s_fence->base, - &vm_id, &vm_pd_addr); + &job->vm_id, &job->vm_pd_addr); if (r) DRM_ERROR("Error getting VM ID (%d)\n", r); - else { - for (i = 0; i < job->num_ibs; ++i) { - job->ibs[i].vm_id = vm_id; - job->ibs[i].vm_pd_addr = vm_pd_addr; - } - } fence = amdgpu_sync_get_fence(&job->sync); } @@ -153,7 +178,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job) trace_amdgpu_sched_run_job(job); r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, - job->sync.last_vm_update, &fence); + job->sync.last_vm_update, job, &fence); if (r) { DRM_ERROR("Error scheduling IBs (%d)\n", r); goto err; @@ -165,7 +190,9 @@ err: return fence; } -struct amd_sched_backend_ops amdgpu_sched_ops = { +const struct amd_sched_backend_ops amdgpu_sched_ops = { .dependency = amdgpu_job_dependency, .run_job = amdgpu_job_run, + .begin_job = amd_sched_job_begin, + .finish_job = amd_sched_job_finish, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index d78739d29..d851ea150 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -427,7 +427,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file } case AMDGPU_INFO_DEV_INFO: { struct drm_amdgpu_info_device dev_info = {}; - struct amdgpu_cu_info cu_info; dev_info.device_id = dev->pdev->device; dev_info.chip_rev = adev->rev_id; @@ -462,11 +461,11 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file AMDGPU_GPU_PAGE_SIZE; dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; - amdgpu_asic_get_cu_info(adev, &cu_info); - dev_info.cu_active_number = cu_info.number; - dev_info.cu_ao_mask = cu_info.ao_cu_mask; + dev_info.cu_active_number = adev->gfx.cu_info.number; + dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; dev_info.ce_ram_size = adev->gfx.ce_ram_size; - memcpy(&dev_info.cu_bitmap[0], &cu_info.bitmap[0], sizeof(cu_info.bitmap)); + memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], + sizeof(adev->gfx.cu_info.bitmap)); dev_info.vram_type = adev->mc.vram_type; dev_info.vram_bit_width = adev->mc.vram_width; dev_info.vce_harvest_config = adev->vce.harvest_config; @@ -756,4 +755,4 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), }; -int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms); +const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 9f4a45cd2..32fa7b791 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -232,7 +232,10 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) int r; mutex_lock(&adev->mn_lock); - down_write(&mm->mmap_sem); + if (down_write_killable(&mm->mmap_sem)) { + mutex_unlock(&adev->mn_lock); + return ERR_PTR(-EINTR); + } hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm) if (rmn->mm == mm) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 81bd964d3..6b1d7d306 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -283,7 +283,7 @@ struct amdgpu_display_funcs { u32 (*hpd_get_gpio_reg)(struct amdgpu_device *adev); /* pageflipping */ void (*page_flip)(struct amdgpu_device *adev, - int crtc_id, u64 crtc_base); + int crtc_id, u64 crtc_base, bool async); int (*page_flip_get_scanoutpos)(struct amdgpu_device *adev, int crtc, u32 *vbl, u32 *position); /* display topology setup */ @@ -530,7 +530,7 @@ struct amdgpu_framebuffer { ((em) == ATOM_ENCODER_MODE_DP_MST)) /* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */ -#define USE_REAL_VBLANKSTART (1 << 30) +#define USE_REAL_VBLANKSTART (1 << 30) #define GET_DISTANCE_TO_VBLANKSTART (1 << 31) void amdgpu_link_encoder_connector(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index acc08018c..bdb01d932 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -71,7 +71,7 @@ static inline int amdgpu_bo_reserve(struct amdgpu_bo *bo, bool no_intr) { int r; - r = ttm_bo_reserve(&bo->tbo, !no_intr, false, false, 0); + r = ttm_bo_reserve(&bo->tbo, !no_intr, false, NULL); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) dev_err(bo->adev->dev, "%p reserve failed\n", bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index ff9597ce2..0e13d80d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -270,30 +270,28 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type state = 0; - long idx; + unsigned long idx; int ret; if (strlen(buf) == 1) adev->pp_force_state_enabled = false; - else { - ret = kstrtol(buf, 0, &idx); + else if (adev->pp_enabled) { + struct pp_states_info data; - if (ret) { + ret = kstrtoul(buf, 0, &idx); + if (ret || idx >= ARRAY_SIZE(data.states)) { count = -EINVAL; goto fail; } - if (adev->pp_enabled) { - struct pp_states_info data; - amdgpu_dpm_get_pp_num_states(adev, &data); - state = data.states[idx]; - /* only set user selected power states */ - if (state != POWER_STATE_TYPE_INTERNAL_BOOT && - state != POWER_STATE_TYPE_DEFAULT) { - amdgpu_dpm_dispatch_task(adev, - AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL); - adev->pp_force_state_enabled = true; - } + amdgpu_dpm_get_pp_num_states(adev, &data); + state = data.states[idx]; + /* only set user selected power states */ + if (state != POWER_STATE_TYPE_INTERNAL_BOOT && + state != POWER_STATE_TYPE_DEFAULT) { + amdgpu_dpm_dispatch_task(adev, + AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL); + adev->pp_force_state_enabled = true; } } fail: @@ -362,16 +360,23 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int ret; long level; + uint32_t i, mask = 0; + char sub_str[2]; - ret = kstrtol(buf, 0, &level); + for (i = 0; i < strlen(buf) - 1; i++) { + sub_str[0] = *(buf + i); + sub_str[1] = '\0'; + ret = kstrtol(sub_str, 0, &level); - if (ret) { - count = -EINVAL; - goto fail; + if (ret) { + count = -EINVAL; + goto fail; + } + mask |= 1 << level; } if (adev->pp_enabled) - amdgpu_dpm_force_clock_level(adev, PP_SCLK, level); + amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); fail: return count; } @@ -399,16 +404,23 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int ret; long level; + uint32_t i, mask = 0; + char sub_str[2]; - ret = kstrtol(buf, 0, &level); + for (i = 0; i < strlen(buf) - 1; i++) { + sub_str[0] = *(buf + i); + sub_str[1] = '\0'; + ret = kstrtol(sub_str, 0, &level); - if (ret) { - count = -EINVAL; - goto fail; + if (ret) { + count = -EINVAL; + goto fail; + } + mask |= 1 << level; } if (adev->pp_enabled) - amdgpu_dpm_force_clock_level(adev, PP_MCLK, level); + amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); fail: return count; } @@ -436,16 +448,23 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int ret; long level; + uint32_t i, mask = 0; + char sub_str[2]; - ret = kstrtol(buf, 0, &level); + for (i = 0; i < strlen(buf) - 1; i++) { + sub_str[0] = *(buf + i); + sub_str[1] = '\0'; + ret = kstrtol(sub_str, 0, &level); - if (ret) { - count = -EINVAL; - goto fail; + if (ret) { + count = -EINVAL; + goto fail; + } + mask |= 1 << level; } if (adev->pp_enabled) - amdgpu_dpm_force_clock_level(adev, PP_PCIE, level); + amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); fail: return count; } @@ -1212,7 +1231,7 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) return 0; } -static struct drm_info_list amdgpu_pm_info_list[] = { +static const struct drm_info_list amdgpu_pm_info_list[] = { {"amdgpu_pm_info", amdgpu_debugfs_pm_info, 0, NULL}, }; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index e9c6ae6ed..82256558e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -99,6 +99,10 @@ static int amdgpu_pp_early_init(void *handle) #ifdef CONFIG_DRM_AMD_POWERPLAY switch (adev->asic_type) { + case CHIP_POLARIS11: + case CHIP_POLARIS10: + adev->pp_enabled = true; + break; case CHIP_TONGA: case CHIP_FIJI: adev->pp_enabled = (amdgpu_powerplay == 0) ? false : true; @@ -179,13 +183,6 @@ static int amdgpu_pp_sw_fini(void *handle) if (ret) return ret; -#ifdef CONFIG_DRM_AMD_POWERPLAY - if (adev->pp_enabled) { - amdgpu_pm_sysfs_fini(adev); - amd_powerplay_fini(adev->powerplay.pp_handle); - } -#endif - return ret; } @@ -219,6 +216,22 @@ static int amdgpu_pp_hw_fini(void *handle) return ret; } +static void amdgpu_pp_late_fini(void *handle) +{ +#ifdef CONFIG_DRM_AMD_POWERPLAY + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (adev->pp_enabled) { + amdgpu_pm_sysfs_fini(adev); + amd_powerplay_fini(adev->powerplay.pp_handle); + } + + if (adev->powerplay.ip_funcs->late_fini) + adev->powerplay.ip_funcs->late_fini( + adev->powerplay.pp_handle); +#endif +} + static int amdgpu_pp_suspend(void *handle) { int ret = 0; @@ -299,28 +312,20 @@ static int amdgpu_pp_soft_reset(void *handle) return ret; } -static void amdgpu_pp_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->print_status) - adev->powerplay.ip_funcs->print_status( - adev->powerplay.pp_handle); -} - const struct amd_ip_funcs amdgpu_pp_ip_funcs = { + .name = "amdgpu_powerplay", .early_init = amdgpu_pp_early_init, .late_init = amdgpu_pp_late_init, .sw_init = amdgpu_pp_sw_init, .sw_fini = amdgpu_pp_sw_fini, .hw_init = amdgpu_pp_hw_init, .hw_fini = amdgpu_pp_hw_fini, + .late_fini = amdgpu_pp_late_fini, .suspend = amdgpu_pp_suspend, .resume = amdgpu_pp_resume, .is_idle = amdgpu_pp_is_idle, .wait_for_idle = amdgpu_pp_wait_for_idle, .soft_reset = amdgpu_pp_soft_reset, - .print_status = amdgpu_pp_print_status, .set_clockgating_state = amdgpu_pp_set_clockgating_state, .set_powergating_state = amdgpu_pp_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index be6388f73..7700dc22f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -57,9 +57,10 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) ttm_bo_kunmap(&bo->dma_buf_vmap); } -struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, - struct sg_table *sg) +struct drm_gem_object * +amdgpu_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sg) { struct reservation_object *resv = attach->dmabuf->resv; struct amdgpu_device *adev = dev->dev_private; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 972eed2ef..870f94942 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -46,7 +46,8 @@ * wptr. The GPU then starts fetching commands and executes * them until the pointers are equal again. */ -static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); +static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, + struct amdgpu_ring *ring); /** * amdgpu_ring_alloc - allocate space on the ring buffer @@ -215,18 +216,17 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring, * * @adev: amdgpu_device pointer * @ring: amdgpu_ring structure holding ring information - * @ring_size: size of the ring + * @max_ndw: maximum number of dw for ring alloc * @nop: nop packet for this ring * * Initialize the driver information for the selected ring (all asics). * Returns 0 on success, error on failure. */ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, - unsigned ring_size, u32 nop, u32 align_mask, + unsigned max_dw, u32 nop, u32 align_mask, struct amdgpu_irq_src *irq_src, unsigned irq_type, enum amdgpu_ring_type ring_type) { - u32 rb_bufsz; int r; if (ring->adev == NULL) { @@ -265,8 +265,17 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, dev_err(adev->dev, "(%d) ring next_rptr wb alloc failed\n", r); return r; } - ring->next_rptr_gpu_addr = adev->wb.gpu_addr + (ring->next_rptr_offs * 4); + ring->next_rptr_gpu_addr = adev->wb.gpu_addr + ring->next_rptr_offs * 4; ring->next_rptr_cpu_addr = &adev->wb.wb[ring->next_rptr_offs]; + + r = amdgpu_wb_get(adev, &ring->cond_exe_offs); + if (r) { + dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r); + return r; + } + ring->cond_exe_gpu_addr = adev->wb.gpu_addr + (ring->cond_exe_offs * 4); + ring->cond_exe_cpu_addr = &adev->wb.wb[ring->cond_exe_offs]; + spin_lock_init(&ring->fence_lock); r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type); if (r) { @@ -274,10 +283,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, return r; } - /* Align ring size */ - rb_bufsz = order_base_2(ring_size / 8); - ring_size = (1 << (rb_bufsz + 1)) * 4; - ring->ring_size = ring_size; + ring->ring_size = roundup_pow_of_two(max_dw * 4 * + amdgpu_sched_hw_submission); ring->align_mask = align_mask; ring->nop = nop; ring->type = ring_type; @@ -310,8 +317,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, } } ring->ptr_mask = (ring->ring_size / 4) - 1; - ring->max_dw = DIV_ROUND_UP(ring->ring_size / 4, - amdgpu_sched_hw_submission); + ring->max_dw = max_dw; if (amdgpu_debugfs_ring_init(adev, ring)) { DRM_ERROR("Failed to register debugfs file for rings !\n"); @@ -337,6 +343,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) ring->ring = NULL; ring->ring_obj = NULL; + amdgpu_wb_free(ring->adev, ring->cond_exe_offs); amdgpu_wb_free(ring->adev, ring->fence_offs); amdgpu_wb_free(ring->adev, ring->rptr_offs); amdgpu_wb_free(ring->adev, ring->wptr_offs); @@ -363,9 +370,8 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *) m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; - int roffset = *(int*)node->info_ent->data; + int roffset = (unsigned long)node->info_ent->data; struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset); - uint32_t rptr, wptr, rptr_next; unsigned i; @@ -408,46 +414,37 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data) return 0; } -/* TODO: clean this up !*/ -static int amdgpu_gfx_index = offsetof(struct amdgpu_device, gfx.gfx_ring[0]); -static int cayman_cp1_index = offsetof(struct amdgpu_device, gfx.compute_ring[0]); -static int cayman_cp2_index = offsetof(struct amdgpu_device, gfx.compute_ring[1]); -static int amdgpu_dma1_index = offsetof(struct amdgpu_device, sdma.instance[0].ring); -static int amdgpu_dma2_index = offsetof(struct amdgpu_device, sdma.instance[1].ring); -static int r600_uvd_index = offsetof(struct amdgpu_device, uvd.ring); -static int si_vce1_index = offsetof(struct amdgpu_device, vce.ring[0]); -static int si_vce2_index = offsetof(struct amdgpu_device, vce.ring[1]); - -static struct drm_info_list amdgpu_debugfs_ring_info_list[] = { - {"amdgpu_ring_gfx", amdgpu_debugfs_ring_info, 0, &amdgpu_gfx_index}, - {"amdgpu_ring_cp1", amdgpu_debugfs_ring_info, 0, &cayman_cp1_index}, - {"amdgpu_ring_cp2", amdgpu_debugfs_ring_info, 0, &cayman_cp2_index}, - {"amdgpu_ring_dma1", amdgpu_debugfs_ring_info, 0, &amdgpu_dma1_index}, - {"amdgpu_ring_dma2", amdgpu_debugfs_ring_info, 0, &amdgpu_dma2_index}, - {"amdgpu_ring_uvd", amdgpu_debugfs_ring_info, 0, &r600_uvd_index}, - {"amdgpu_ring_vce1", amdgpu_debugfs_ring_info, 0, &si_vce1_index}, - {"amdgpu_ring_vce2", amdgpu_debugfs_ring_info, 0, &si_vce2_index}, -}; +static struct drm_info_list amdgpu_debugfs_ring_info_list[AMDGPU_MAX_RINGS]; +static char amdgpu_debugfs_ring_names[AMDGPU_MAX_RINGS][32]; #endif -static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) +static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, + struct amdgpu_ring *ring) { #if defined(CONFIG_DEBUG_FS) + unsigned offset = (uint8_t*)ring - (uint8_t*)adev; unsigned i; + struct drm_info_list *info; + char *name; + for (i = 0; i < ARRAY_SIZE(amdgpu_debugfs_ring_info_list); ++i) { - struct drm_info_list *info = &amdgpu_debugfs_ring_info_list[i]; - int roffset = *(int*)amdgpu_debugfs_ring_info_list[i].data; - struct amdgpu_ring *other = (void *)(((uint8_t*)adev) + roffset); - unsigned r; + info = &amdgpu_debugfs_ring_info_list[i]; + if (!info->data) + break; + } - if (other != ring) - continue; + if (i == ARRAY_SIZE(amdgpu_debugfs_ring_info_list)) + return -ENOSPC; - r = amdgpu_debugfs_add_files(adev, info, 1); - if (r) - return r; - } + name = &amdgpu_debugfs_ring_names[i][0]; + sprintf(name, "amdgpu_ring_%s", ring->name); + info->name = name; + info->show = amdgpu_debugfs_ring_info; + info->driver_features = 0; + info->data = (void*)(uintptr_t)offset; + + return amdgpu_debugfs_add_files(adev, info, 1); #endif return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 8bf84efaf..48618ee32 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -115,6 +115,7 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev, return r; } r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr); + memset(sa_manager->cpu_ptr, 0, sa_manager->size); amdgpu_bo_unreserve(sa_manager->bo); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index c48b4fce5..34a92808b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -108,6 +108,29 @@ static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence) *keep = fence_get(fence); } +/** + * amdgpu_sync_add_later - add the fence to the hash + * + * @sync: sync object to add the fence to + * @f: fence to add + * + * Tries to add the fence to an existing hash entry. Returns true when an entry + * was found, false otherwise. + */ +static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct fence *f) +{ + struct amdgpu_sync_entry *e; + + hash_for_each_possible(sync->fences, e, node, f->context) { + if (unlikely(e->fence->context != f->context)) + continue; + + amdgpu_sync_keep_later(&e->fence, f); + return true; + } + return false; +} + /** * amdgpu_sync_fence - remember to sync to this fence * @@ -127,13 +150,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM) amdgpu_sync_keep_later(&sync->last_vm_update, f); - hash_for_each_possible(sync->fences, e, node, f->context) { - if (unlikely(e->fence->context != f->context)) - continue; - - amdgpu_sync_keep_later(&e->fence, f); + if (amdgpu_sync_add_later(sync, f)) return 0; - } e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); if (!e) @@ -204,6 +222,81 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, return r; } +/** + * amdgpu_sync_is_idle - test if all fences are signaled + * + * @sync: the sync object + * + * Returns true if all fences in the sync object are signaled. + */ +bool amdgpu_sync_is_idle(struct amdgpu_sync *sync) +{ + struct amdgpu_sync_entry *e; + struct hlist_node *tmp; + int i; + + hash_for_each_safe(sync->fences, i, tmp, e, node) { + struct fence *f = e->fence; + + if (fence_is_signaled(f)) { + hash_del(&e->node); + fence_put(f); + kmem_cache_free(amdgpu_sync_slab, e); + continue; + } + + return false; + } + + return true; +} + +/** + * amdgpu_sync_cycle_fences - move fences from one sync object into another + * + * @dst: the destination sync object + * @src: the source sync object + * @fence: fence to add to source + * + * Remove all fences from source and put them into destination and add + * fence as new one into source. + */ +int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src, + struct fence *fence) +{ + struct amdgpu_sync_entry *e, *newone; + struct hlist_node *tmp; + int i; + + /* Allocate the new entry before moving the old ones */ + newone = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); + if (!newone) + return -ENOMEM; + + hash_for_each_safe(src->fences, i, tmp, e, node) { + struct fence *f = e->fence; + + hash_del(&e->node); + if (fence_is_signaled(f)) { + fence_put(f); + kmem_cache_free(amdgpu_sync_slab, e); + continue; + } + + if (amdgpu_sync_add_later(dst, f)) { + kmem_cache_free(amdgpu_sync_slab, e); + continue; + } + + hash_add(dst->fences, &e->node, f->context); + } + + hash_add(src->fences, &newone->node, fence->context); + newone->fence = fence_get(fence); + + return 0; +} + struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) { struct amdgpu_sync_entry *e; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 11af4492b..3b9053af4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -911,6 +911,52 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, return flags; } +static void amdgpu_ttm_lru_removal(struct ttm_buffer_object *tbo) +{ + struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev); + unsigned i, j; + + for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) { + struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i]; + + for (j = 0; j < TTM_NUM_MEM_TYPES; ++j) + if (&tbo->lru == lru->lru[j]) + lru->lru[j] = tbo->lru.prev; + + if (&tbo->swap == lru->swap_lru) + lru->swap_lru = tbo->swap.prev; + } +} + +static struct amdgpu_mman_lru *amdgpu_ttm_lru(struct ttm_buffer_object *tbo) +{ + struct amdgpu_device *adev = amdgpu_get_adev(tbo->bdev); + unsigned log2_size = min(ilog2(tbo->num_pages), + AMDGPU_TTM_LRU_SIZE - 1); + + return &adev->mman.log2_size[log2_size]; +} + +static struct list_head *amdgpu_ttm_lru_tail(struct ttm_buffer_object *tbo) +{ + struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo); + struct list_head *res = lru->lru[tbo->mem.mem_type]; + + lru->lru[tbo->mem.mem_type] = &tbo->lru; + + return res; +} + +static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo) +{ + struct amdgpu_mman_lru *lru = amdgpu_ttm_lru(tbo); + struct list_head *res = lru->swap_lru; + + lru->swap_lru = &tbo->swap; + + return res; +} + static struct ttm_bo_driver amdgpu_bo_driver = { .ttm_tt_create = &amdgpu_ttm_tt_create, .ttm_tt_populate = &amdgpu_ttm_tt_populate, @@ -924,10 +970,14 @@ static struct ttm_bo_driver amdgpu_bo_driver = { .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify, .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_free = &amdgpu_ttm_io_mem_free, + .lru_removal = &amdgpu_ttm_lru_removal, + .lru_tail = &amdgpu_ttm_lru_tail, + .swap_lru_tail = &amdgpu_ttm_swap_lru_tail, }; int amdgpu_ttm_init(struct amdgpu_device *adev) { + unsigned i, j; int r; r = amdgpu_ttm_global_init(adev); @@ -945,6 +995,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_ERROR("failed initializing buffer object driver(%d).\n", r); return r; } + + for (i = 0; i < AMDGPU_TTM_LRU_SIZE; ++i) { + struct amdgpu_mman_lru *lru = &adev->mman.log2_size[i]; + + for (j = 0; j < TTM_NUM_MEM_TYPES; ++j) + lru->lru[j] = &adev->mman.bdev.man[j].lru; + lru->swap_lru = &adev->mman.bdev.glob->swap_lru; + } + adev->mman.initialized = true; r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, adev->mc.real_vram_size >> PAGE_SHIFT); @@ -1167,7 +1226,7 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data) static int ttm_pl_vram = TTM_PL_VRAM; static int ttm_pl_tt = TTM_PL_TT; -static struct drm_info_list amdgpu_ttm_debugfs_list[] = { +static const struct drm_info_list amdgpu_ttm_debugfs_list[] = { {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram}, {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, &ttm_pl_tt}, {"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL}, @@ -1218,6 +1277,8 @@ static const struct file_operations amdgpu_ttm_vram_fops = { .llseek = default_llseek }; +#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS + static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -1265,6 +1326,8 @@ static const struct file_operations amdgpu_ttm_gtt_fops = { #endif +#endif + static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) @@ -1280,6 +1343,7 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) i_size_write(ent->d_inode, adev->mc.mc_vram_size); adev->mman.vram = ent; +#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS ent = debugfs_create_file("amdgpu_gtt", S_IFREG | S_IRUGO, root, adev, &amdgpu_ttm_gtt_fops); if (IS_ERR(ent)) @@ -1287,6 +1351,7 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) i_size_write(ent->d_inode, adev->mc.gtt_size); adev->mman.gtt = ent; +#endif count = ARRAY_SIZE(amdgpu_ttm_debugfs_list); #ifdef CONFIG_SWIOTLB @@ -1308,7 +1373,10 @@ static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev) debugfs_remove(adev->mman.vram); adev->mman.vram = NULL; +#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS debugfs_remove(adev->mman.gtt); adev->mman.gtt = NULL; #endif + +#endif } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index cdb963cc2..3959055eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -41,19 +41,23 @@ /* 1 second timeout */ #define UVD_IDLE_TIMEOUT_MS 1000 +/* Polaris10/11 firmware version */ +#define FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8)) /* Firmware Names */ #ifdef CONFIG_DRM_AMDGPU_CIK #define FIRMWARE_BONAIRE "/*(DEBLOBBED)*/" -#define FIRMWARE_KABINI "/*(DEBLOBBED)*/" -#define FIRMWARE_KAVERI "/*(DEBLOBBED)*/" -#define FIRMWARE_HAWAII "/*(DEBLOBBED)*/" +#define FIRMWARE_KABINI "/*(DEBLOBBED)*/" +#define FIRMWARE_KAVERI "/*(DEBLOBBED)*/" +#define FIRMWARE_HAWAII "/*(DEBLOBBED)*/" #define FIRMWARE_MULLINS "/*(DEBLOBBED)*/" #endif #define FIRMWARE_TONGA "/*(DEBLOBBED)*/" #define FIRMWARE_CARRIZO "/*(DEBLOBBED)*/" #define FIRMWARE_FIJI "/*(DEBLOBBED)*/" #define FIRMWARE_STONEY "/*(DEBLOBBED)*/" +#define FIRMWARE_POLARIS10 "/*(DEBLOBBED)*/" +#define FIRMWARE_POLARIS11 "/*(DEBLOBBED)*/" /** * amdgpu_uvd_cs_ctx - Command submission parser context @@ -124,6 +128,12 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) case CHIP_STONEY: fw_name = FIRMWARE_STONEY; break; + case CHIP_POLARIS10: + fw_name = FIRMWARE_POLARIS10; + break; + case CHIP_POLARIS11: + fw_name = FIRMWARE_POLARIS11; + break; default: return -EINVAL; } @@ -144,6 +154,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) return r; } + /* Set the default UVD handles that the firmware can handle */ + adev->uvd.max_handles = AMDGPU_DEFAULT_UVD_HANDLES; + hdr = (const struct common_firmware_header *)adev->uvd.fw->data; family_id = le32_to_cpu(hdr->ucode_version) & 0xff; version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; @@ -151,11 +164,28 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", version_major, version_minor, family_id); + /* + * Limit the number of UVD handles depending on microcode major + * and minor versions. The firmware version which has 40 UVD + * instances support is 1.80. So all subsequent versions should + * also have the same support. + */ + if ((version_major > 0x01) || + ((version_major == 0x01) && (version_minor >= 0x50))) + adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES; + adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) | (family_id << 8)); + if ((adev->asic_type == CHIP_POLARIS10 || + adev->asic_type == CHIP_POLARIS11) && + (adev->uvd.fw_version < FW_1_66_16)) + DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n", + version_major, version_minor); + bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) - + AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE; + + AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE + + AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles; r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, @@ -198,7 +228,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) return r; } - for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { + for (i = 0; i < adev->uvd.max_handles; ++i) { atomic_set(&adev->uvd.handles[i], 0); adev->uvd.filp[i] = NULL; } @@ -214,19 +244,20 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) { int r; - if (adev->uvd.vcpu_bo == NULL) - return 0; + kfree(adev->uvd.saved_bo); amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); - r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); - if (!r) { - amdgpu_bo_kunmap(adev->uvd.vcpu_bo); - amdgpu_bo_unpin(adev->uvd.vcpu_bo); - amdgpu_bo_unreserve(adev->uvd.vcpu_bo); - } + if (adev->uvd.vcpu_bo) { + r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); + if (!r) { + amdgpu_bo_kunmap(adev->uvd.vcpu_bo); + amdgpu_bo_unpin(adev->uvd.vcpu_bo); + amdgpu_bo_unreserve(adev->uvd.vcpu_bo); + } - amdgpu_bo_unref(&adev->uvd.vcpu_bo); + amdgpu_bo_unref(&adev->uvd.vcpu_bo); + } amdgpu_ring_fini(&adev->uvd.ring); @@ -244,7 +275,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) if (adev->uvd.vcpu_bo == NULL) return 0; - for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) + for (i = 0; i < adev->uvd.max_handles; ++i) if (atomic_read(&adev->uvd.handles[i])) break; @@ -301,7 +332,7 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) struct amdgpu_ring *ring = &adev->uvd.ring; int i, r; - for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { + for (i = 0; i < adev->uvd.max_handles; ++i) { uint32_t handle = atomic_read(&adev->uvd.handles[i]); if (handle != 0 && adev->uvd.filp[i] == filp) { struct fence *fence; @@ -383,7 +414,8 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) * * Peek into the decode message and calculate the necessary buffer sizes. */ -static int amdgpu_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) +static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, + unsigned buf_sizes[]) { unsigned stream_type = msg[4]; unsigned width = msg[6]; @@ -405,7 +437,6 @@ static int amdgpu_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) switch (stream_type) { case 0: /* H264 */ - case 7: /* H264 Perf */ switch(level) { case 30: num_dpb_buffer = 8100 / fs_in_mb; @@ -483,6 +514,54 @@ static int amdgpu_uvd_cs_msg_decode(uint32_t *msg, unsigned buf_sizes[]) min_dpb_size += ALIGN(width_in_mb * height_in_mb * 32, 64); break; + case 7: /* H264 Perf */ + switch(level) { + case 30: + num_dpb_buffer = 8100 / fs_in_mb; + break; + case 31: + num_dpb_buffer = 18000 / fs_in_mb; + break; + case 32: + num_dpb_buffer = 20480 / fs_in_mb; + break; + case 41: + num_dpb_buffer = 32768 / fs_in_mb; + break; + case 42: + num_dpb_buffer = 34816 / fs_in_mb; + break; + case 50: + num_dpb_buffer = 110400 / fs_in_mb; + break; + case 51: + num_dpb_buffer = 184320 / fs_in_mb; + break; + default: + num_dpb_buffer = 184320 / fs_in_mb; + break; + } + num_dpb_buffer++; + if (num_dpb_buffer > 17) + num_dpb_buffer = 17; + + /* reference picture buffer */ + min_dpb_size = image_size * num_dpb_buffer; + + if (adev->asic_type < CHIP_POLARIS10){ + /* macroblock context buffer */ + min_dpb_size += + width_in_mb * height_in_mb * num_dpb_buffer * 192; + + /* IT surface buffer */ + min_dpb_size += width_in_mb * height_in_mb * 32; + } else { + /* macroblock context buffer */ + min_ctx_size = + width_in_mb * height_in_mb * num_dpb_buffer * 192; + } + break; + case 16: /* H265 */ image_size = (ALIGN(width, 16) * ALIGN(height, 16) * 3) / 2; image_size = ALIGN(image_size, 256); @@ -561,7 +640,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, amdgpu_bo_kunmap(bo); /* try to alloc a new handle */ - for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { + for (i = 0; i < adev->uvd.max_handles; ++i) { if (atomic_read(&adev->uvd.handles[i]) == handle) { DRM_ERROR("Handle 0x%x already in use!\n", handle); return -EINVAL; @@ -578,13 +657,13 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, case 1: /* it's a decode msg, calc buffer sizes */ - r = amdgpu_uvd_cs_msg_decode(msg, ctx->buf_sizes); + r = amdgpu_uvd_cs_msg_decode(adev, msg, ctx->buf_sizes); amdgpu_bo_kunmap(bo); if (r) return r; /* validate the handle */ - for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { + for (i = 0; i < adev->uvd.max_handles; ++i) { if (atomic_read(&adev->uvd.handles[i]) == handle) { if (adev->uvd.filp[i] != ctx->parser->filp) { DRM_ERROR("UVD handle collision detected!\n"); @@ -599,7 +678,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, case 2: /* it's a destroy msg, free the handle */ - for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) + for (i = 0; i < adev->uvd.max_handles; ++i) atomic_cmpxchg(&adev->uvd.handles[i], handle, 0); amdgpu_bo_kunmap(bo); return 0; @@ -879,7 +958,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, ib->length_dw = 16; if (direct) { - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f); job->fence = f; if (r) goto err_free; @@ -1011,13 +1090,17 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) fences = amdgpu_fence_count_emitted(&adev->uvd.ring); - for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) + for (i = 0; i < adev->uvd.max_handles; ++i) if (atomic_read(&adev->uvd.handles[i])) ++handles; if (fences == 0 && handles == 0) { if (adev->pm.dpm_enabled) { amdgpu_dpm_enable_uvd(adev, false); + /* just work around for uvd clock remain high even + * when uvd dpm disabled on Polaris10 */ + if (adev->asic_type == CHIP_POLARIS10) + amdgpu_asic_set_uvd_clocks(adev, 0, 0); } else { amdgpu_asic_set_uvd_clocks(adev, 0, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index e933cb785..0a08cf930 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -41,15 +41,17 @@ /* Firmware Names */ #ifdef CONFIG_DRM_AMDGPU_CIK #define FIRMWARE_BONAIRE "/*(DEBLOBBED)*/" -#define FIRMWARE_KABINI "/*(DEBLOBBED)*/" -#define FIRMWARE_KAVERI "/*(DEBLOBBED)*/" -#define FIRMWARE_HAWAII "/*(DEBLOBBED)*/" +#define FIRMWARE_KABINI "/*(DEBLOBBED)*/" +#define FIRMWARE_KAVERI "/*(DEBLOBBED)*/" +#define FIRMWARE_HAWAII "/*(DEBLOBBED)*/" #define FIRMWARE_MULLINS "/*(DEBLOBBED)*/" #endif #define FIRMWARE_TONGA "/*(DEBLOBBED)*/" #define FIRMWARE_CARRIZO "/*(DEBLOBBED)*/" #define FIRMWARE_FIJI "/*(DEBLOBBED)*/" #define FIRMWARE_STONEY "/*(DEBLOBBED)*/" +#define FIRMWARE_POLARIS10 "/*(DEBLOBBED)*/" +#define FIRMWARE_POLARIS11 "/*(DEBLOBBED)*/" #ifdef CONFIG_DRM_AMDGPU_CIK /*(DEBLOBBED)*/ @@ -106,6 +108,12 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) case CHIP_STONEY: fw_name = FIRMWARE_STONEY; break; + case CHIP_POLARIS10: + fw_name = FIRMWARE_POLARIS10; + break; + case CHIP_POLARIS11: + fw_name = FIRMWARE_POLARIS11; + break; default: return -EINVAL; @@ -419,7 +427,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f); job->fence = f; if (r) goto err; @@ -481,7 +489,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, ib->ptr[i] = 0x0; if (direct) { - r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f); job->fence = f; if (r) goto err; @@ -745,7 +753,8 @@ out: * @ib: the IB to execute * */ -void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) +void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) { amdgpu_ring_write(ring, VCE_CMD_IB); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index ef99d2370..f40cf761c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -34,7 +34,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, bool direct, struct fence **fence); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); -void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); +void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch); void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags); int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b6c011b83..9f36ed30b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -53,6 +53,18 @@ /* Special value that no flush is necessary */ #define AMDGPU_VM_NO_FLUSH (~0ll) +/* Local structure. Encapsulate some VM table update parameters to reduce + * the number of function parameters + */ +struct amdgpu_vm_update_params { + /* address where to copy page table entries from */ + uint64_t src; + /* DMA addresses to use for mapping */ + dma_addr_t *pages_addr; + /* indirect buffer to fill with commands */ + struct amdgpu_ib *ib; +}; + /** * amdgpu_vm_num_pde - return the number of page directory entries * @@ -166,74 +178,109 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, { uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); struct amdgpu_device *adev = ring->adev; - struct amdgpu_vm_id *id = &vm->ids[ring->idx]; struct fence *updates = sync->last_vm_update; + struct amdgpu_vm_id *id; + unsigned i = ring->idx; int r; mutex_lock(&adev->vm_manager.lock); - /* check if the id is still valid */ - if (id->mgr_id) { - struct fence *flushed = id->flushed_updates; - bool is_later; - long owner; + /* Check if we can use a VMID already assigned to this VM */ + do { + struct fence *flushed; - if (!flushed) - is_later = true; - else if (!updates) - is_later = false; - else - is_later = fence_is_later(updates, flushed); + id = vm->ids[i++]; + if (i == AMDGPU_MAX_RINGS) + i = 0; + + /* Check all the prerequisites to using this VMID */ + if (!id) + continue; + + if (atomic64_read(&id->owner) != vm->client_id) + continue; - owner = atomic_long_read(&id->mgr_id->owner); - if (!is_later && owner == (long)id && - pd_addr == id->pd_gpu_addr) { + if (pd_addr != id->pd_gpu_addr) + continue; + + if (id->last_user != ring && + (!id->last_flush || !fence_is_signaled(id->last_flush))) + continue; + + flushed = id->flushed_updates; + if (updates && (!flushed || fence_is_later(updates, flushed))) + continue; + /* Good we can use this VMID */ + if (id->last_user == ring) { r = amdgpu_sync_fence(ring->adev, sync, - id->mgr_id->active); - if (r) { - mutex_unlock(&adev->vm_manager.lock); - return r; - } + id->first); + if (r) + goto error; + } + + /* And remember this submission as user of the VMID */ + r = amdgpu_sync_fence(ring->adev, &id->active, fence); + if (r) + goto error; + + list_move_tail(&id->list, &adev->vm_manager.ids_lru); + vm->ids[ring->idx] = id; + + *vm_id = id - adev->vm_manager.ids; + *vm_pd_addr = AMDGPU_VM_NO_FLUSH; + trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); + + mutex_unlock(&adev->vm_manager.lock); + return 0; - fence_put(id->mgr_id->active); - id->mgr_id->active = fence_get(fence); + } while (i != ring->idx); - list_move_tail(&id->mgr_id->list, - &adev->vm_manager.ids_lru); + id = list_first_entry(&adev->vm_manager.ids_lru, + struct amdgpu_vm_id, + list); - *vm_id = id->mgr_id - adev->vm_manager.ids; - *vm_pd_addr = AMDGPU_VM_NO_FLUSH; - trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, - *vm_pd_addr); + if (!amdgpu_sync_is_idle(&id->active)) { + struct list_head *head = &adev->vm_manager.ids_lru; + struct amdgpu_vm_id *tmp; - mutex_unlock(&adev->vm_manager.lock); - return 0; + list_for_each_entry_safe(id, tmp, &adev->vm_manager.ids_lru, + list) { + if (amdgpu_sync_is_idle(&id->active)) { + list_move(&id->list, head); + head = &id->list; + } } + id = list_first_entry(&adev->vm_manager.ids_lru, + struct amdgpu_vm_id, + list); } - id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru, - struct amdgpu_vm_manager_id, - list); + r = amdgpu_sync_cycle_fences(sync, &id->active, fence); + if (r) + goto error; - r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active); - if (!r) { - fence_put(id->mgr_id->active); - id->mgr_id->active = fence_get(fence); + fence_put(id->first); + id->first = fence_get(fence); - fence_put(id->flushed_updates); - id->flushed_updates = fence_get(updates); + fence_put(id->last_flush); + id->last_flush = NULL; - id->pd_gpu_addr = pd_addr; + fence_put(id->flushed_updates); + id->flushed_updates = fence_get(updates); - list_move_tail(&id->mgr_id->list, &adev->vm_manager.ids_lru); - atomic_long_set(&id->mgr_id->owner, (long)id); + id->pd_gpu_addr = pd_addr; - *vm_id = id->mgr_id - adev->vm_manager.ids; - *vm_pd_addr = pd_addr; - trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); - } + list_move_tail(&id->list, &adev->vm_manager.ids_lru); + id->last_user = ring; + atomic64_set(&id->owner, vm->client_id); + vm->ids[ring->idx] = id; + + *vm_id = id - adev->vm_manager.ids; + *vm_pd_addr = pd_addr; + trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); +error: mutex_unlock(&adev->vm_manager.lock); return r; } @@ -247,43 +294,62 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, * * Emit a VM flush when it is necessary. */ -void amdgpu_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr, - uint32_t gds_base, uint32_t gds_size, - uint32_t gws_base, uint32_t gws_size, - uint32_t oa_base, uint32_t oa_size) +int amdgpu_vm_flush(struct amdgpu_ring *ring, + unsigned vm_id, uint64_t pd_addr, + uint32_t gds_base, uint32_t gds_size, + uint32_t gws_base, uint32_t gws_size, + uint32_t oa_base, uint32_t oa_size) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id]; + struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id]; bool gds_switch_needed = ring->funcs->emit_gds_switch && ( - mgr_id->gds_base != gds_base || - mgr_id->gds_size != gds_size || - mgr_id->gws_base != gws_base || - mgr_id->gws_size != gws_size || - mgr_id->oa_base != oa_base || - mgr_id->oa_size != oa_size); + id->gds_base != gds_base || + id->gds_size != gds_size || + id->gws_base != gws_base || + id->gws_size != gws_size || + id->oa_base != oa_base || + id->oa_size != oa_size); + int r; if (ring->funcs->emit_pipeline_sync && ( - pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed)) + pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || + ring->type == AMDGPU_RING_TYPE_COMPUTE)) amdgpu_ring_emit_pipeline_sync(ring); - if (pd_addr != AMDGPU_VM_NO_FLUSH) { + if (ring->funcs->emit_vm_flush && + pd_addr != AMDGPU_VM_NO_FLUSH) { + struct fence *fence; + trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id); amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr); + + mutex_lock(&adev->vm_manager.lock); + if ((id->pd_gpu_addr == pd_addr) && (id->last_user == ring)) { + r = amdgpu_fence_emit(ring, &fence); + if (r) { + mutex_unlock(&adev->vm_manager.lock); + return r; + } + fence_put(id->last_flush); + id->last_flush = fence; + } + mutex_unlock(&adev->vm_manager.lock); } if (gds_switch_needed) { - mgr_id->gds_base = gds_base; - mgr_id->gds_size = gds_size; - mgr_id->gws_base = gws_base; - mgr_id->gws_size = gws_size; - mgr_id->oa_base = oa_base; - mgr_id->oa_size = oa_size; + id->gds_base = gds_base; + id->gds_size = gds_size; + id->gws_base = gws_base; + id->gws_size = gws_size; + id->oa_base = oa_base; + id->oa_size = oa_size; amdgpu_ring_emit_gds_switch(ring, vm_id, gds_base, gds_size, gws_base, gws_size, oa_base, oa_size); } + + return 0; } /** @@ -296,14 +362,14 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, */ void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id) { - struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id]; - - mgr_id->gds_base = 0; - mgr_id->gds_size = 0; - mgr_id->gws_base = 0; - mgr_id->gws_size = 0; - mgr_id->oa_base = 0; - mgr_id->oa_size = 0; + struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id]; + + id->gds_base = 0; + id->gds_size = 0; + id->gws_base = 0; + id->gws_size = 0; + id->oa_base = 0; + id->oa_size = 0; } /** @@ -335,9 +401,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * amdgpu_vm_update_pages - helper to call the right asic function * * @adev: amdgpu_device pointer - * @gtt: GART instance to use for mapping - * @gtt_flags: GTT hw access flags - * @ib: indirect buffer to fill with commands + * @vm_update_params: see amdgpu_vm_update_params definition * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update @@ -348,30 +412,29 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * to setup the page table using the DMA. */ static void amdgpu_vm_update_pages(struct amdgpu_device *adev, - struct amdgpu_gart *gtt, - uint32_t gtt_flags, - struct amdgpu_ib *ib, + struct amdgpu_vm_update_params + *vm_update_params, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); - if ((gtt == &adev->gart) && (flags == gtt_flags)) { - uint64_t src = gtt->table_addr + (addr >> 12) * 8; - amdgpu_vm_copy_pte(adev, ib, pe, src, count); + if (vm_update_params->src) { + amdgpu_vm_copy_pte(adev, vm_update_params->ib, + pe, (vm_update_params->src + (addr >> 12) * 8), count); - } else if (gtt) { - dma_addr_t *pages_addr = gtt->pages_addr; - amdgpu_vm_write_pte(adev, ib, pages_addr, pe, addr, - count, incr, flags); + } else if (vm_update_params->pages_addr) { + amdgpu_vm_write_pte(adev, vm_update_params->ib, + vm_update_params->pages_addr, + pe, addr, count, incr, flags); } else if (count < 3) { - amdgpu_vm_write_pte(adev, ib, NULL, pe, addr, + amdgpu_vm_write_pte(adev, vm_update_params->ib, NULL, pe, addr, count, incr, flags); } else { - amdgpu_vm_set_pte_pde(adev, ib, pe, addr, + amdgpu_vm_set_pte_pde(adev, vm_update_params->ib, pe, addr, count, incr, flags); } } @@ -391,10 +454,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_ring *ring; struct fence *fence = NULL; struct amdgpu_job *job; + struct amdgpu_vm_update_params vm_update_params; unsigned entries; uint64_t addr; int r; + memset(&vm_update_params, 0, sizeof(vm_update_params)); ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); r = reservation_object_reserve_shared(bo->tbo.resv); @@ -412,7 +477,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (r) goto error; - amdgpu_vm_update_pages(adev, NULL, 0, &job->ibs[0], addr, 0, entries, + vm_update_params.ib = &job->ibs[0]; + amdgpu_vm_update_pages(adev, &vm_update_params, addr, 0, entries, 0, 0); amdgpu_ring_pad_ib(ring, &job->ibs[0]); @@ -485,11 +551,12 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, uint64_t last_pde = ~0, last_pt = ~0; unsigned count = 0, pt_idx, ndw; struct amdgpu_job *job; - struct amdgpu_ib *ib; + struct amdgpu_vm_update_params vm_update_params; struct fence *fence = NULL; int r; + memset(&vm_update_params, 0, sizeof(vm_update_params)); ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); /* padding, etc. */ @@ -502,7 +569,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, if (r) return r; - ib = &job->ibs[0]; + vm_update_params.ib = &job->ibs[0]; /* walk over the address space and update the page directory */ for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { @@ -522,7 +589,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, ((last_pt + incr * count) != pt)) { if (count) { - amdgpu_vm_update_pages(adev, NULL, 0, ib, + amdgpu_vm_update_pages(adev, &vm_update_params, last_pde, last_pt, count, incr, AMDGPU_PTE_VALID); @@ -537,14 +604,15 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, } if (count) - amdgpu_vm_update_pages(adev, NULL, 0, ib, last_pde, last_pt, - count, incr, AMDGPU_PTE_VALID); + amdgpu_vm_update_pages(adev, &vm_update_params, + last_pde, last_pt, + count, incr, AMDGPU_PTE_VALID); - if (ib->length_dw != 0) { - amdgpu_ring_pad_ib(ring, ib); + if (vm_update_params.ib->length_dw != 0) { + amdgpu_ring_pad_ib(ring, vm_update_params.ib); amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM); - WARN_ON(ib->length_dw > ndw); + WARN_ON(vm_update_params.ib->length_dw > ndw); r = amdgpu_job_submit(job, ring, &vm->entity, AMDGPU_FENCE_OWNER_VM, &fence); if (r) @@ -570,18 +638,15 @@ error_free: * amdgpu_vm_frag_ptes - add fragment information to PTEs * * @adev: amdgpu_device pointer - * @gtt: GART instance to use for mapping - * @gtt_flags: GTT hw mapping flags - * @ib: IB for the update + * @vm_update_params: see amdgpu_vm_update_params definition * @pe_start: first PTE to handle * @pe_end: last PTE to handle * @addr: addr those PTEs should point to * @flags: hw mapping flags */ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, - struct amdgpu_gart *gtt, - uint32_t gtt_flags, - struct amdgpu_ib *ib, + struct amdgpu_vm_update_params + *vm_update_params, uint64_t pe_start, uint64_t pe_end, uint64_t addr, uint32_t flags) { @@ -618,10 +683,11 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, return; /* system pages are non continuously */ - if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { + if (vm_update_params->src || vm_update_params->pages_addr || + !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { count = (pe_end - pe_start) / 8; - amdgpu_vm_update_pages(adev, gtt, gtt_flags, ib, pe_start, + amdgpu_vm_update_pages(adev, vm_update_params, pe_start, addr, count, AMDGPU_GPU_PAGE_SIZE, flags); return; @@ -630,21 +696,21 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, /* handle the 4K area at the beginning */ if (pe_start != frag_start) { count = (frag_start - pe_start) / 8; - amdgpu_vm_update_pages(adev, NULL, 0, ib, pe_start, addr, + amdgpu_vm_update_pages(adev, vm_update_params, pe_start, addr, count, AMDGPU_GPU_PAGE_SIZE, flags); addr += AMDGPU_GPU_PAGE_SIZE * count; } /* handle the area in the middle */ count = (frag_end - frag_start) / 8; - amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_start, addr, count, + amdgpu_vm_update_pages(adev, vm_update_params, frag_start, addr, count, AMDGPU_GPU_PAGE_SIZE, flags | frag_flags); /* handle the 4K area at the end */ if (frag_end != pe_end) { addr += AMDGPU_GPU_PAGE_SIZE * count; count = (pe_end - frag_end) / 8; - amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_end, addr, + amdgpu_vm_update_pages(adev, vm_update_params, frag_end, addr, count, AMDGPU_GPU_PAGE_SIZE, flags); } } @@ -653,8 +719,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, * amdgpu_vm_update_ptes - make sure that page tables are valid * * @adev: amdgpu_device pointer - * @gtt: GART instance to use for mapping - * @gtt_flags: GTT hw mapping flags + * @vm_update_params: see amdgpu_vm_update_params definition * @vm: requested vm * @start: start of GPU address range * @end: end of GPU address range @@ -664,10 +729,9 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, * Update the page tables in the range @start - @end. */ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, - struct amdgpu_gart *gtt, - uint32_t gtt_flags, + struct amdgpu_vm_update_params + *vm_update_params, struct amdgpu_vm *vm, - struct amdgpu_ib *ib, uint64_t start, uint64_t end, uint64_t dst, uint32_t flags) { @@ -693,7 +757,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, if (last_pe_end != pe_start) { - amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib, + amdgpu_vm_frag_ptes(adev, vm_update_params, last_pe_start, last_pe_end, last_dst, flags); @@ -708,17 +772,16 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, dst += nptes * AMDGPU_GPU_PAGE_SIZE; } - amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib, - last_pe_start, last_pe_end, - last_dst, flags); + amdgpu_vm_frag_ptes(adev, vm_update_params, last_pe_start, + last_pe_end, last_dst, flags); } /** * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table * * @adev: amdgpu_device pointer - * @gtt: GART instance to use for mapping - * @gtt_flags: flags as they are used for GTT + * @src: address where to copy page table entries from + * @pages_addr: DMA addresses to use for mapping * @vm: requested vm * @start: start of mapped range * @last: last mapped entry @@ -730,8 +793,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, * Returns 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_gart *gtt, - uint32_t gtt_flags, + uint64_t src, + dma_addr_t *pages_addr, struct amdgpu_vm *vm, uint64_t start, uint64_t last, uint32_t flags, uint64_t addr, @@ -741,11 +804,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, void *owner = AMDGPU_FENCE_OWNER_VM; unsigned nptes, ncmds, ndw; struct amdgpu_job *job; - struct amdgpu_ib *ib; + struct amdgpu_vm_update_params vm_update_params; struct fence *f = NULL; int r; ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); + memset(&vm_update_params, 0, sizeof(vm_update_params)); + vm_update_params.src = src; + vm_update_params.pages_addr = pages_addr; /* sync to everything on unmapping */ if (!(flags & AMDGPU_PTE_VALID)) @@ -762,11 +828,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* padding, etc. */ ndw = 64; - if ((gtt == &adev->gart) && (flags == gtt_flags)) { + if (vm_update_params.src) { /* only copy commands needed */ ndw += ncmds * 7; - } else if (gtt) { + } else if (vm_update_params.pages_addr) { /* header for write data commands */ ndw += ncmds * 4; @@ -785,7 +851,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) return r; - ib = &job->ibs[0]; + vm_update_params.ib = &job->ibs[0]; r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv, owner); @@ -796,11 +862,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - amdgpu_vm_update_ptes(adev, gtt, gtt_flags, vm, ib, start, last + 1, - addr, flags); + amdgpu_vm_update_ptes(adev, &vm_update_params, vm, start, + last + 1, addr, flags); - amdgpu_ring_pad_ib(ring, ib); - WARN_ON(ib->length_dw > ndw); + amdgpu_ring_pad_ib(ring, vm_update_params.ib); + WARN_ON(vm_update_params.ib->length_dw > ndw); r = amdgpu_job_submit(job, ring, &vm->entity, AMDGPU_FENCE_OWNER_VM, &f); if (r) @@ -823,11 +889,12 @@ error_free: * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks * * @adev: amdgpu_device pointer - * @gtt: GART instance to use for mapping + * @gtt_flags: flags as they are used for GTT + * @pages_addr: DMA addresses to use for mapping * @vm: requested vm * @mapping: mapped range and flags to use for the update * @addr: addr to set the area to - * @gtt_flags: flags as they are used for GTT + * @flags: HW flags for the mapping * @fence: optional resulting fence * * Split the mapping into smaller chunks so that each update fits @@ -835,16 +902,16 @@ error_free: * Returns 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, - struct amdgpu_gart *gtt, uint32_t gtt_flags, + dma_addr_t *pages_addr, struct amdgpu_vm *vm, struct amdgpu_bo_va_mapping *mapping, - uint64_t addr, struct fence **fence) + uint32_t flags, uint64_t addr, + struct fence **fence) { const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE; - uint64_t start = mapping->it.start; - uint32_t flags = gtt_flags; + uint64_t src = 0, start = mapping->it.start; int r; /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here @@ -857,10 +924,15 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, trace_amdgpu_vm_bo_update(mapping); + if (pages_addr) { + if (flags == gtt_flags) + src = adev->gart.table_addr + (addr >> 12) * 8; + addr = 0; + } addr += mapping->offset; - if (!gtt || ((gtt == &adev->gart) && (flags == gtt_flags))) - return amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm, + if (!pages_addr || src) + return amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm, start, mapping->it.last, flags, addr, fence); @@ -868,7 +940,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, uint64_t last; last = min((uint64_t)mapping->it.last, start + max_size - 1); - r = amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm, + r = amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm, start, last, flags, addr, fence); if (r) @@ -899,16 +971,20 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, { struct amdgpu_vm *vm = bo_va->vm; struct amdgpu_bo_va_mapping *mapping; - struct amdgpu_gart *gtt = NULL; - uint32_t flags; + dma_addr_t *pages_addr = NULL; + uint32_t gtt_flags, flags; uint64_t addr; int r; if (mem) { + struct ttm_dma_tt *ttm; + addr = (u64)mem->start << PAGE_SHIFT; switch (mem->mem_type) { case TTM_PL_TT: - gtt = &bo_va->bo->adev->gart; + ttm = container_of(bo_va->bo->tbo.ttm, struct + ttm_dma_tt, ttm); + pages_addr = ttm->dma_address; break; case TTM_PL_VRAM: @@ -923,6 +999,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, } flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); + gtt_flags = (adev == bo_va->bo->adev) ? flags : 0; spin_lock(&vm->status_lock); if (!list_empty(&bo_va->vm_status)) @@ -930,7 +1007,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, spin_unlock(&vm->status_lock); list_for_each_entry(mapping, &bo_va->invalids, list) { - r = amdgpu_vm_bo_split_mapping(adev, gtt, flags, vm, mapping, addr, + r = amdgpu_vm_bo_split_mapping(adev, gtt_flags, pages_addr, vm, + mapping, flags, addr, &bo_va->last_pt_update); if (r) return r; @@ -976,8 +1054,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, vm, mapping, - 0, NULL); + r = amdgpu_vm_bo_split_mapping(adev, 0, NULL, vm, mapping, + 0, 0, NULL); kfree(mapping); if (r) return r; @@ -1320,11 +1398,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct amd_sched_rq *rq; int i, r; - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - vm->ids[i].mgr_id = NULL; - vm->ids[i].flushed_updates = NULL; - } + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + vm->ids[i] = NULL; vm->va = RB_ROOT; + vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->cleared); @@ -1416,15 +1493,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_bo_unref(&vm->page_directory); fence_put(vm->page_directory_fence); - - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_vm_id *id = &vm->ids[i]; - - if (id->mgr_id) - atomic_long_cmpxchg(&id->mgr_id->owner, - (long)id, 0); - fence_put(id->flushed_updates); - } } /** @@ -1443,11 +1511,13 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) /* skip over VMID 0, since it is the system VM */ for (i = 1; i < adev->vm_manager.num_ids; ++i) { amdgpu_vm_reset_id(adev, i); + amdgpu_sync_create(&adev->vm_manager.ids[i].active); list_add_tail(&adev->vm_manager.ids[i].list, &adev->vm_manager.ids_lru); } atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); + atomic64_set(&adev->vm_manager.client_counter, 0); } /** @@ -1461,6 +1531,11 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) { unsigned i; - for (i = 0; i < AMDGPU_NUM_VM; ++i) - fence_put(adev->vm_manager.ids[i].active); + for (i = 0; i < AMDGPU_NUM_VM; ++i) { + struct amdgpu_vm_id *id = &adev->vm_manager.ids[i]; + + fence_put(adev->vm_manager.ids[i].first); + amdgpu_sync_free(&adev->vm_manager.ids[i].active); + fence_put(id->flushed_updates); + } } diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h index fece8f45d..49daf6d72 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.h +++ b/drivers/gpu/drm/amd/amdgpu/atom.h @@ -92,7 +92,7 @@ #define ATOM_WS_AND_MASK 0x45 #define ATOM_WS_FB_WINDOW 0x46 #define ATOM_WS_ATTRIBUTES 0x47 -#define ATOM_WS_REGPTR 0x48 +#define ATOM_WS_REGPTR 0x48 #define ATOM_IIO_NOP 0 #define ATOM_IIO_START 1 diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c index 49aa35016..49a39b1a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c @@ -461,13 +461,14 @@ union set_pixel_clock { PIXEL_CLOCK_PARAMETERS_V3 v3; PIXEL_CLOCK_PARAMETERS_V5 v5; PIXEL_CLOCK_PARAMETERS_V6 v6; + PIXEL_CLOCK_PARAMETERS_V7 v7; }; /* on DCE5, make sure the voltage is high enough to support the * required disp clk. */ void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev, - u32 dispclk) + u32 dispclk) { u8 frev, crev; int index; @@ -510,6 +511,49 @@ void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev, amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); } +union set_dce_clock { + SET_DCE_CLOCK_PS_ALLOCATION_V1_1 v1_1; + SET_DCE_CLOCK_PS_ALLOCATION_V2_1 v2_1; +}; + +u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev, + u32 freq, u8 clk_type, u8 clk_src) +{ + u8 frev, crev; + int index; + union set_dce_clock args; + u32 ret_freq = 0; + + memset(&args, 0, sizeof(args)); + + index = GetIndexIntoMasterTable(COMMAND, SetDCEClock); + if (!amdgpu_atom_parse_cmd_header(adev->mode_info.atom_context, index, &frev, + &crev)) + return 0; + + switch (frev) { + case 2: + switch (crev) { + case 1: + args.v2_1.asParam.ulDCEClkFreq = cpu_to_le32(freq); /* 10kHz units */ + args.v2_1.asParam.ucDCEClkType = clk_type; + args.v2_1.asParam.ucDCEClkSrc = clk_src; + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + ret_freq = le32_to_cpu(args.v2_1.asParam.ulDCEClkFreq) * 10; + break; + default: + DRM_ERROR("Unknown table version %d %d\n", frev, crev); + return 0; + } + break; + default: + DRM_ERROR("Unknown table version %d %d\n", frev, crev); + return 0; + } + + return ret_freq; +} + static bool is_pixel_clock_source_from_pll(u32 encoder_mode, int pll_id) { if (ENCODER_MODE_IS_DP(encoder_mode)) { @@ -523,18 +567,18 @@ static bool is_pixel_clock_source_from_pll(u32 encoder_mode, int pll_id) } void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc, - u32 crtc_id, - int pll_id, - u32 encoder_mode, - u32 encoder_id, - u32 clock, - u32 ref_div, - u32 fb_div, - u32 frac_fb_div, - u32 post_div, - int bpc, - bool ss_enabled, - struct amdgpu_atom_ss *ss) + u32 crtc_id, + int pll_id, + u32 encoder_mode, + u32 encoder_id, + u32 clock, + u32 ref_div, + u32 fb_div, + u32 frac_fb_div, + u32 post_div, + int bpc, + bool ss_enabled, + struct amdgpu_atom_ss *ss) { struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; @@ -652,6 +696,34 @@ void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc, args.v6.ucEncoderMode = encoder_mode; args.v6.ucPpll = pll_id; break; + case 7: + args.v7.ulPixelClock = cpu_to_le32(clock * 10); /* 100 hz units */ + args.v7.ucMiscInfo = 0; + if ((encoder_mode == ATOM_ENCODER_MODE_DVI) && + (clock > 165000)) + args.v7.ucMiscInfo |= PIXEL_CLOCK_V7_MISC_DVI_DUALLINK_EN; + args.v7.ucCRTC = crtc_id; + if (encoder_mode == ATOM_ENCODER_MODE_HDMI) { + switch (bpc) { + case 8: + default: + args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_DIS; + break; + case 10: + args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_5_4; + break; + case 12: + args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_3_2; + break; + case 16: + args.v7.ucDeepColorRatio = PIXEL_CLOCK_V7_DEEPCOLOR_RATIO_2_1; + break; + } + } + args.v7.ucTransmitterID = encoder_id; + args.v7.ucEncoderMode = encoder_mode; + args.v7.ucPpll = pll_id; + break; default: DRM_ERROR("Unknown table version %d %d\n", frev, crev); return; diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h index c67083335..0eeda8e3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h +++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.h @@ -37,6 +37,8 @@ void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc, struct drm_display_mode *mode); void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev, u32 dispclk); +u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev, + u32 freq, u8 clk_type, u8 clk_src); void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc, u32 crtc_id, int pll_id, diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 1cd6de575..48b6bd671 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -567,6 +567,7 @@ union dig_encoder_control { DIG_ENCODER_CONTROL_PARAMETERS_V2 v2; DIG_ENCODER_CONTROL_PARAMETERS_V3 v3; DIG_ENCODER_CONTROL_PARAMETERS_V4 v4; + DIG_ENCODER_CONTROL_PARAMETERS_V5 v5; }; void @@ -694,6 +695,47 @@ amdgpu_atombios_encoder_setup_dig_encoder(struct drm_encoder *encoder, else args.v4.ucHPD_ID = hpd_id + 1; break; + case 5: + switch (action) { + case ATOM_ENCODER_CMD_SETUP_PANEL_MODE: + args.v5.asDPPanelModeParam.ucAction = action; + args.v5.asDPPanelModeParam.ucPanelMode = panel_mode; + args.v5.asDPPanelModeParam.ucDigId = dig->dig_encoder; + break; + case ATOM_ENCODER_CMD_STREAM_SETUP: + args.v5.asStreamParam.ucAction = action; + args.v5.asStreamParam.ucDigId = dig->dig_encoder; + args.v5.asStreamParam.ucDigMode = + amdgpu_atombios_encoder_get_encoder_mode(encoder); + if (ENCODER_MODE_IS_DP(args.v5.asStreamParam.ucDigMode)) + args.v5.asStreamParam.ucLaneNum = dp_lane_count; + else if (amdgpu_dig_monitor_is_duallink(encoder, + amdgpu_encoder->pixel_clock)) + args.v5.asStreamParam.ucLaneNum = 8; + else + args.v5.asStreamParam.ucLaneNum = 4; + args.v5.asStreamParam.ulPixelClock = + cpu_to_le32(amdgpu_encoder->pixel_clock / 10); + args.v5.asStreamParam.ucBitPerColor = + amdgpu_atombios_encoder_get_bpc(encoder); + args.v5.asStreamParam.ucLinkRateIn270Mhz = dp_clock / 27000; + break; + case ATOM_ENCODER_CMD_DP_LINK_TRAINING_START: + case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN1: + case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN2: + case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN3: + case ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN4: + case ATOM_ENCODER_CMD_DP_LINK_TRAINING_COMPLETE: + case ATOM_ENCODER_CMD_DP_VIDEO_OFF: + case ATOM_ENCODER_CMD_DP_VIDEO_ON: + args.v5.asCmdParam.ucAction = action; + args.v5.asCmdParam.ucDigId = dig->dig_encoder; + break; + default: + DRM_ERROR("Unsupported action 0x%x\n", action); + break; + } + break; default: DRM_ERROR("Unknown table version %d, %d\n", frev, crev); break; @@ -714,11 +756,12 @@ union dig_transmitter_control { DIG_TRANSMITTER_CONTROL_PARAMETERS_V3 v3; DIG_TRANSMITTER_CONTROL_PARAMETERS_V4 v4; DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_5 v5; + DIG_TRANSMITTER_CONTROL_PARAMETERS_V1_6 v6; }; void amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int action, - uint8_t lane_num, uint8_t lane_set) + uint8_t lane_num, uint8_t lane_set) { struct drm_device *dev = encoder->dev; struct amdgpu_device *adev = dev->dev_private; @@ -1070,6 +1113,54 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a args.v5.ucDigEncoderSel = 1 << dig_encoder; args.v5.ucDPLaneSet = lane_set; break; + case 6: + args.v6.ucAction = action; + if (is_dp) + args.v6.ulSymClock = cpu_to_le32(dp_clock / 10); + else + args.v6.ulSymClock = cpu_to_le32(amdgpu_encoder->pixel_clock / 10); + + switch (amdgpu_encoder->encoder_id) { + case ENCODER_OBJECT_ID_INTERNAL_UNIPHY: + if (dig->linkb) + args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYB; + else + args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYA; + break; + case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: + if (dig->linkb) + args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYD; + else + args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYC; + break; + case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: + if (dig->linkb) + args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYF; + else + args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYE; + break; + case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3: + args.v6.ucPhyId = ATOM_PHY_ID_UNIPHYG; + break; + } + if (is_dp) + args.v6.ucLaneNum = dp_lane_count; + else if (amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock)) + args.v6.ucLaneNum = 8; + else + args.v6.ucLaneNum = 4; + args.v6.ucConnObjId = connector_object_id; + if (action == ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH) + args.v6.ucDPLaneSet = lane_set; + else + args.v6.ucDigMode = amdgpu_atombios_encoder_get_encoder_mode(encoder); + + if (hpd_id == AMDGPU_HPD_NONE) + args.v6.ucHPDSel = 0; + else + args.v6.ucHPDSel = hpd_id + 1; + args.v6.ucDigEncoderSel = 1 << dig_encoder; + break; default: DRM_ERROR("Unknown table version %d, %d\n", frev, crev); break; diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c index 13cdb01e9..bc56c8a18 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c @@ -156,3 +156,18 @@ u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap) return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; } +void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device* adev, u8 slave_addr, u8 line_number, u8 offset, u8 data) +{ + PROCESS_I2C_CHANNEL_TRANSACTION_PS_ALLOCATION args; + int index = GetIndexIntoMasterTable(COMMAND, ProcessI2cChannelTransaction); + + args.ucRegIndex = offset; + args.lpI2CDataOut = data; + args.ucFlag = 1; + args.ucI2CSpeed = TARGET_HW_I2C_CLOCK; + args.ucTransBytes = 1; + args.ucSlaveAddr = slave_addr; + args.ucLineNumber = line_number; + + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); +} diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h index d6128d9de..251aaf41f 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h +++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h @@ -27,5 +27,7 @@ int amdgpu_atombios_i2c_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs, int num); u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap); +void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device* adev, + u8 slave_addr, u8 line_number, u8 offset, u8 data); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 285ea5ea2..040dd26ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -2548,19 +2548,17 @@ static int ci_get_dependency_volt_by_clk(struct amdgpu_device *adev, return 0; } -static u8 ci_get_sleep_divider_id_from_clock(struct amdgpu_device *adev, - u32 sclk, u32 min_sclk_in_sr) +static u8 ci_get_sleep_divider_id_from_clock(u32 sclk, u32 min_sclk_in_sr) { u32 i; u32 tmp; - u32 min = (min_sclk_in_sr > CISLAND_MINIMUM_ENGINE_CLOCK) ? - min_sclk_in_sr : CISLAND_MINIMUM_ENGINE_CLOCK; + u32 min = max(min_sclk_in_sr, (u32)CISLAND_MINIMUM_ENGINE_CLOCK); if (sclk < min) return 0; for (i = CISLAND_MAX_DEEPSLEEP_DIVIDER_ID; ; i--) { - tmp = sclk / (1 << i); + tmp = sclk >> i; if (tmp >= min || i == 0) break; } @@ -3357,8 +3355,7 @@ static int ci_populate_single_graphic_level(struct amdgpu_device *adev, graphic_level->PowerThrottle = 0; if (pi->caps_sclk_ds) - graphic_level->DeepSleepDivId = ci_get_sleep_divider_id_from_clock(adev, - engine_clock, + graphic_level->DeepSleepDivId = ci_get_sleep_divider_id_from_clock(engine_clock, CISLAND_MINIMUM_ENGINE_CLOCK); graphic_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW; @@ -6223,6 +6220,9 @@ static int ci_dpm_sw_fini(void *handle) ci_dpm_fini(adev); mutex_unlock(&adev->pm.mutex); + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + return 0; } @@ -6308,215 +6308,6 @@ static int ci_dpm_wait_for_idle(void *handle) return 0; } -static void ci_dpm_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "CIK DPM registers\n"); - dev_info(adev->dev, " BIOS_SCRATCH_4=0x%08X\n", - RREG32(mmBIOS_SCRATCH_4)); - dev_info(adev->dev, " MC_ARB_DRAM_TIMING=0x%08X\n", - RREG32(mmMC_ARB_DRAM_TIMING)); - dev_info(adev->dev, " MC_ARB_DRAM_TIMING2=0x%08X\n", - RREG32(mmMC_ARB_DRAM_TIMING2)); - dev_info(adev->dev, " MC_ARB_BURST_TIME=0x%08X\n", - RREG32(mmMC_ARB_BURST_TIME)); - dev_info(adev->dev, " MC_ARB_DRAM_TIMING_1=0x%08X\n", - RREG32(mmMC_ARB_DRAM_TIMING_1)); - dev_info(adev->dev, " MC_ARB_DRAM_TIMING2_1=0x%08X\n", - RREG32(mmMC_ARB_DRAM_TIMING2_1)); - dev_info(adev->dev, " MC_CG_CONFIG=0x%08X\n", - RREG32(mmMC_CG_CONFIG)); - dev_info(adev->dev, " MC_ARB_CG=0x%08X\n", - RREG32(mmMC_ARB_CG)); - dev_info(adev->dev, " DIDT_SQ_CTRL0=0x%08X\n", - RREG32_DIDT(ixDIDT_SQ_CTRL0)); - dev_info(adev->dev, " DIDT_DB_CTRL0=0x%08X\n", - RREG32_DIDT(ixDIDT_DB_CTRL0)); - dev_info(adev->dev, " DIDT_TD_CTRL0=0x%08X\n", - RREG32_DIDT(ixDIDT_TD_CTRL0)); - dev_info(adev->dev, " DIDT_TCP_CTRL0=0x%08X\n", - RREG32_DIDT(ixDIDT_TCP_CTRL0)); - dev_info(adev->dev, " CG_THERMAL_INT=0x%08X\n", - RREG32_SMC(ixCG_THERMAL_INT)); - dev_info(adev->dev, " CG_THERMAL_CTRL=0x%08X\n", - RREG32_SMC(ixCG_THERMAL_CTRL)); - dev_info(adev->dev, " GENERAL_PWRMGT=0x%08X\n", - RREG32_SMC(ixGENERAL_PWRMGT)); - dev_info(adev->dev, " MC_SEQ_CNTL_3=0x%08X\n", - RREG32(mmMC_SEQ_CNTL_3)); - dev_info(adev->dev, " LCAC_MC0_CNTL=0x%08X\n", - RREG32_SMC(ixLCAC_MC0_CNTL)); - dev_info(adev->dev, " LCAC_MC1_CNTL=0x%08X\n", - RREG32_SMC(ixLCAC_MC1_CNTL)); - dev_info(adev->dev, " LCAC_CPL_CNTL=0x%08X\n", - RREG32_SMC(ixLCAC_CPL_CNTL)); - dev_info(adev->dev, " SCLK_PWRMGT_CNTL=0x%08X\n", - RREG32_SMC(ixSCLK_PWRMGT_CNTL)); - dev_info(adev->dev, " BIF_LNCNT_RESET=0x%08X\n", - RREG32(mmBIF_LNCNT_RESET)); - dev_info(adev->dev, " FIRMWARE_FLAGS=0x%08X\n", - RREG32_SMC(ixFIRMWARE_FLAGS)); - dev_info(adev->dev, " CG_SPLL_FUNC_CNTL=0x%08X\n", - RREG32_SMC(ixCG_SPLL_FUNC_CNTL)); - dev_info(adev->dev, " CG_SPLL_FUNC_CNTL_2=0x%08X\n", - RREG32_SMC(ixCG_SPLL_FUNC_CNTL_2)); - dev_info(adev->dev, " CG_SPLL_FUNC_CNTL_3=0x%08X\n", - RREG32_SMC(ixCG_SPLL_FUNC_CNTL_3)); - dev_info(adev->dev, " CG_SPLL_FUNC_CNTL_4=0x%08X\n", - RREG32_SMC(ixCG_SPLL_FUNC_CNTL_4)); - dev_info(adev->dev, " CG_SPLL_SPREAD_SPECTRUM=0x%08X\n", - RREG32_SMC(ixCG_SPLL_SPREAD_SPECTRUM)); - dev_info(adev->dev, " CG_SPLL_SPREAD_SPECTRUM_2=0x%08X\n", - RREG32_SMC(ixCG_SPLL_SPREAD_SPECTRUM_2)); - dev_info(adev->dev, " DLL_CNTL=0x%08X\n", - RREG32(mmDLL_CNTL)); - dev_info(adev->dev, " MCLK_PWRMGT_CNTL=0x%08X\n", - RREG32(mmMCLK_PWRMGT_CNTL)); - dev_info(adev->dev, " MPLL_AD_FUNC_CNTL=0x%08X\n", - RREG32(mmMPLL_AD_FUNC_CNTL)); - dev_info(adev->dev, " MPLL_DQ_FUNC_CNTL=0x%08X\n", - RREG32(mmMPLL_DQ_FUNC_CNTL)); - dev_info(adev->dev, " MPLL_FUNC_CNTL=0x%08X\n", - RREG32(mmMPLL_FUNC_CNTL)); - dev_info(adev->dev, " MPLL_FUNC_CNTL_1=0x%08X\n", - RREG32(mmMPLL_FUNC_CNTL_1)); - dev_info(adev->dev, " MPLL_FUNC_CNTL_2=0x%08X\n", - RREG32(mmMPLL_FUNC_CNTL_2)); - dev_info(adev->dev, " MPLL_SS1=0x%08X\n", - RREG32(mmMPLL_SS1)); - dev_info(adev->dev, " MPLL_SS2=0x%08X\n", - RREG32(mmMPLL_SS2)); - dev_info(adev->dev, " CG_DISPLAY_GAP_CNTL=0x%08X\n", - RREG32_SMC(ixCG_DISPLAY_GAP_CNTL)); - dev_info(adev->dev, " CG_DISPLAY_GAP_CNTL2=0x%08X\n", - RREG32_SMC(ixCG_DISPLAY_GAP_CNTL2)); - dev_info(adev->dev, " CG_STATIC_SCREEN_PARAMETER=0x%08X\n", - RREG32_SMC(ixCG_STATIC_SCREEN_PARAMETER)); - dev_info(adev->dev, " CG_FREQ_TRAN_VOTING_0=0x%08X\n", - RREG32_SMC(ixCG_FREQ_TRAN_VOTING_0)); - dev_info(adev->dev, " CG_FREQ_TRAN_VOTING_1=0x%08X\n", - RREG32_SMC(ixCG_FREQ_TRAN_VOTING_1)); - dev_info(adev->dev, " CG_FREQ_TRAN_VOTING_2=0x%08X\n", - RREG32_SMC(ixCG_FREQ_TRAN_VOTING_2)); - dev_info(adev->dev, " CG_FREQ_TRAN_VOTING_3=0x%08X\n", - RREG32_SMC(ixCG_FREQ_TRAN_VOTING_3)); - dev_info(adev->dev, " CG_FREQ_TRAN_VOTING_4=0x%08X\n", - RREG32_SMC(ixCG_FREQ_TRAN_VOTING_4)); - dev_info(adev->dev, " CG_FREQ_TRAN_VOTING_5=0x%08X\n", - RREG32_SMC(ixCG_FREQ_TRAN_VOTING_5)); - dev_info(adev->dev, " CG_FREQ_TRAN_VOTING_6=0x%08X\n", - RREG32_SMC(ixCG_FREQ_TRAN_VOTING_6)); - dev_info(adev->dev, " CG_FREQ_TRAN_VOTING_7=0x%08X\n", - RREG32_SMC(ixCG_FREQ_TRAN_VOTING_7)); - dev_info(adev->dev, " RCU_UC_EVENTS=0x%08X\n", - RREG32_SMC(ixRCU_UC_EVENTS)); - dev_info(adev->dev, " DPM_TABLE_475=0x%08X\n", - RREG32_SMC(ixDPM_TABLE_475)); - dev_info(adev->dev, " MC_SEQ_RAS_TIMING_LP=0x%08X\n", - RREG32(mmMC_SEQ_RAS_TIMING_LP)); - dev_info(adev->dev, " MC_SEQ_RAS_TIMING=0x%08X\n", - RREG32(mmMC_SEQ_RAS_TIMING)); - dev_info(adev->dev, " MC_SEQ_CAS_TIMING_LP=0x%08X\n", - RREG32(mmMC_SEQ_CAS_TIMING_LP)); - dev_info(adev->dev, " MC_SEQ_CAS_TIMING=0x%08X\n", - RREG32(mmMC_SEQ_CAS_TIMING)); - dev_info(adev->dev, " MC_SEQ_DLL_STBY_LP=0x%08X\n", - RREG32(mmMC_SEQ_DLL_STBY_LP)); - dev_info(adev->dev, " MC_SEQ_DLL_STBY=0x%08X\n", - RREG32(mmMC_SEQ_DLL_STBY)); - dev_info(adev->dev, " MC_SEQ_G5PDX_CMD0_LP=0x%08X\n", - RREG32(mmMC_SEQ_G5PDX_CMD0_LP)); - dev_info(adev->dev, " MC_SEQ_G5PDX_CMD0=0x%08X\n", - RREG32(mmMC_SEQ_G5PDX_CMD0)); - dev_info(adev->dev, " MC_SEQ_G5PDX_CMD1_LP=0x%08X\n", - RREG32(mmMC_SEQ_G5PDX_CMD1_LP)); - dev_info(adev->dev, " MC_SEQ_G5PDX_CMD1=0x%08X\n", - RREG32(mmMC_SEQ_G5PDX_CMD1)); - dev_info(adev->dev, " MC_SEQ_G5PDX_CTRL_LP=0x%08X\n", - RREG32(mmMC_SEQ_G5PDX_CTRL_LP)); - dev_info(adev->dev, " MC_SEQ_G5PDX_CTRL=0x%08X\n", - RREG32(mmMC_SEQ_G5PDX_CTRL)); - dev_info(adev->dev, " MC_SEQ_PMG_DVS_CMD_LP=0x%08X\n", - RREG32(mmMC_SEQ_PMG_DVS_CMD_LP)); - dev_info(adev->dev, " MC_SEQ_PMG_DVS_CMD=0x%08X\n", - RREG32(mmMC_SEQ_PMG_DVS_CMD)); - dev_info(adev->dev, " MC_SEQ_PMG_DVS_CTL_LP=0x%08X\n", - RREG32(mmMC_SEQ_PMG_DVS_CTL_LP)); - dev_info(adev->dev, " MC_SEQ_PMG_DVS_CTL=0x%08X\n", - RREG32(mmMC_SEQ_PMG_DVS_CTL)); - dev_info(adev->dev, " MC_SEQ_MISC_TIMING_LP=0x%08X\n", - RREG32(mmMC_SEQ_MISC_TIMING_LP)); - dev_info(adev->dev, " MC_SEQ_MISC_TIMING=0x%08X\n", - RREG32(mmMC_SEQ_MISC_TIMING)); - dev_info(adev->dev, " MC_SEQ_MISC_TIMING2_LP=0x%08X\n", - RREG32(mmMC_SEQ_MISC_TIMING2_LP)); - dev_info(adev->dev, " MC_SEQ_MISC_TIMING2=0x%08X\n", - RREG32(mmMC_SEQ_MISC_TIMING2)); - dev_info(adev->dev, " MC_SEQ_PMG_CMD_EMRS_LP=0x%08X\n", - RREG32(mmMC_SEQ_PMG_CMD_EMRS_LP)); - dev_info(adev->dev, " MC_PMG_CMD_EMRS=0x%08X\n", - RREG32(mmMC_PMG_CMD_EMRS)); - dev_info(adev->dev, " MC_SEQ_PMG_CMD_MRS_LP=0x%08X\n", - RREG32(mmMC_SEQ_PMG_CMD_MRS_LP)); - dev_info(adev->dev, " MC_PMG_CMD_MRS=0x%08X\n", - RREG32(mmMC_PMG_CMD_MRS)); - dev_info(adev->dev, " MC_SEQ_PMG_CMD_MRS1_LP=0x%08X\n", - RREG32(mmMC_SEQ_PMG_CMD_MRS1_LP)); - dev_info(adev->dev, " MC_PMG_CMD_MRS1=0x%08X\n", - RREG32(mmMC_PMG_CMD_MRS1)); - dev_info(adev->dev, " MC_SEQ_WR_CTL_D0_LP=0x%08X\n", - RREG32(mmMC_SEQ_WR_CTL_D0_LP)); - dev_info(adev->dev, " MC_SEQ_WR_CTL_D0=0x%08X\n", - RREG32(mmMC_SEQ_WR_CTL_D0)); - dev_info(adev->dev, " MC_SEQ_WR_CTL_D1_LP=0x%08X\n", - RREG32(mmMC_SEQ_WR_CTL_D1_LP)); - dev_info(adev->dev, " MC_SEQ_WR_CTL_D1=0x%08X\n", - RREG32(mmMC_SEQ_WR_CTL_D1)); - dev_info(adev->dev, " MC_SEQ_RD_CTL_D0_LP=0x%08X\n", - RREG32(mmMC_SEQ_RD_CTL_D0_LP)); - dev_info(adev->dev, " MC_SEQ_RD_CTL_D0=0x%08X\n", - RREG32(mmMC_SEQ_RD_CTL_D0)); - dev_info(adev->dev, " MC_SEQ_RD_CTL_D1_LP=0x%08X\n", - RREG32(mmMC_SEQ_RD_CTL_D1_LP)); - dev_info(adev->dev, " MC_SEQ_RD_CTL_D1=0x%08X\n", - RREG32(mmMC_SEQ_RD_CTL_D1)); - dev_info(adev->dev, " MC_SEQ_PMG_TIMING_LP=0x%08X\n", - RREG32(mmMC_SEQ_PMG_TIMING_LP)); - dev_info(adev->dev, " MC_SEQ_PMG_TIMING=0x%08X\n", - RREG32(mmMC_SEQ_PMG_TIMING)); - dev_info(adev->dev, " MC_SEQ_PMG_CMD_MRS2_LP=0x%08X\n", - RREG32(mmMC_SEQ_PMG_CMD_MRS2_LP)); - dev_info(adev->dev, " MC_PMG_CMD_MRS2=0x%08X\n", - RREG32(mmMC_PMG_CMD_MRS2)); - dev_info(adev->dev, " MC_SEQ_WR_CTL_2_LP=0x%08X\n", - RREG32(mmMC_SEQ_WR_CTL_2_LP)); - dev_info(adev->dev, " MC_SEQ_WR_CTL_2=0x%08X\n", - RREG32(mmMC_SEQ_WR_CTL_2)); - dev_info(adev->dev, " PCIE_LC_SPEED_CNTL=0x%08X\n", - RREG32_PCIE(ixPCIE_LC_SPEED_CNTL)); - dev_info(adev->dev, " PCIE_LC_LINK_WIDTH_CNTL=0x%08X\n", - RREG32_PCIE(ixPCIE_LC_LINK_WIDTH_CNTL)); - dev_info(adev->dev, " SMC_IND_INDEX_0=0x%08X\n", - RREG32(mmSMC_IND_INDEX_0)); - dev_info(adev->dev, " SMC_IND_DATA_0=0x%08X\n", - RREG32(mmSMC_IND_DATA_0)); - dev_info(adev->dev, " SMC_IND_ACCESS_CNTL=0x%08X\n", - RREG32(mmSMC_IND_ACCESS_CNTL)); - dev_info(adev->dev, " SMC_RESP_0=0x%08X\n", - RREG32(mmSMC_RESP_0)); - dev_info(adev->dev, " SMC_MESSAGE_0=0x%08X\n", - RREG32(mmSMC_MESSAGE_0)); - dev_info(adev->dev, " SMC_SYSCON_RESET_CNTL=0x%08X\n", - RREG32_SMC(ixSMC_SYSCON_RESET_CNTL)); - dev_info(adev->dev, " SMC_SYSCON_CLOCK_CNTL_0=0x%08X\n", - RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0)); - dev_info(adev->dev, " SMC_SYSCON_MISC_CNTL=0x%08X\n", - RREG32_SMC(ixSMC_SYSCON_MISC_CNTL)); - dev_info(adev->dev, " SMC_PC_C=0x%08X\n", - RREG32_SMC(ixSMC_PC_C)); -} - static int ci_dpm_soft_reset(void *handle) { return 0; @@ -6571,7 +6362,7 @@ static int ci_dpm_set_interrupt_state(struct amdgpu_device *adev, } static int ci_dpm_process_interrupt(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, + struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { bool queue_thermal = false; @@ -6613,6 +6404,7 @@ static int ci_dpm_set_powergating_state(void *handle, } const struct amd_ip_funcs ci_dpm_ip_funcs = { + .name = "ci_dpm", .early_init = ci_dpm_early_init, .late_init = ci_dpm_late_init, .sw_init = ci_dpm_sw_init, @@ -6624,7 +6416,6 @@ const struct amd_ip_funcs ci_dpm_ip_funcs = { .is_idle = ci_dpm_is_idle, .wait_for_idle = ci_dpm_wait_for_idle, .soft_reset = ci_dpm_soft_reset, - .print_status = ci_dpm_print_status, .set_clockgating_state = ci_dpm_set_clockgating_state, .set_powergating_state = ci_dpm_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index bddc9ba11..910431808 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -962,7 +962,13 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev, return true; } -static struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = { +static u32 cik_get_virtual_caps(struct amdgpu_device *adev) +{ + /* CIK does not support SR-IOV */ + return 0; +} + +static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = { {mmGRBM_STATUS, false}, {mmGB_ADDR_CONFIG, false}, {mmMC_ARB_RAMCFG, false}, @@ -2007,7 +2013,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .get_xclk = &cik_get_xclk, .set_uvd_clocks = &cik_set_uvd_clocks, .set_vce_clocks = &cik_set_vce_clocks, - .get_cu_info = &gfx_v7_0_get_cu_info, + .get_virtual_caps = &cik_get_virtual_caps, /* these should be moved to their own ip modules */ .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter, .wait_for_mc_idle = &gmc_v7_0_mc_wait_for_idle, @@ -2214,11 +2220,6 @@ static int cik_common_wait_for_idle(void *handle) return 0; } -static void cik_common_print_status(void *handle) -{ - -} - static int cik_common_soft_reset(void *handle) { /* XXX hard reset?? */ @@ -2238,6 +2239,7 @@ static int cik_common_set_powergating_state(void *handle, } const struct amd_ip_funcs cik_common_ip_funcs = { + .name = "cik_common", .early_init = cik_common_early_init, .late_init = NULL, .sw_init = cik_common_sw_init, @@ -2249,7 +2251,6 @@ const struct amd_ip_funcs cik_common_ip_funcs = { .is_idle = cik_common_is_idle, .wait_for_idle = cik_common_wait_for_idle, .soft_reset = cik_common_soft_reset, - .print_status = cik_common_print_status, .set_clockgating_state = cik_common_set_clockgating_state, .set_powergating_state = cik_common_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 30c9b3bee..be3d6f79a 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -103,7 +103,6 @@ static void cik_ih_disable_interrupts(struct amdgpu_device *adev) */ static int cik_ih_irq_init(struct amdgpu_device *adev) { - int ret = 0; int rb_bufsz; u32 interrupt_cntl, ih_cntl, ih_rb_cntl; u64 wptr_off; @@ -156,7 +155,7 @@ static int cik_ih_irq_init(struct amdgpu_device *adev) /* enable irqs */ cik_ih_enable_interrupts(adev); - return ret; + return 0; } /** @@ -243,7 +242,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev, /* wptr/rptr are in bytes! */ u32 ring_index = adev->irq.ih.rptr >> 2; uint32_t dw[4]; - + dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]); dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]); dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); @@ -372,35 +371,6 @@ static int cik_ih_wait_for_idle(void *handle) return -ETIMEDOUT; } -static void cik_ih_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "CIK IH registers\n"); - dev_info(adev->dev, " SRBM_STATUS=0x%08X\n", - RREG32(mmSRBM_STATUS)); - dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", - RREG32(mmSRBM_STATUS2)); - dev_info(adev->dev, " INTERRUPT_CNTL=0x%08X\n", - RREG32(mmINTERRUPT_CNTL)); - dev_info(adev->dev, " INTERRUPT_CNTL2=0x%08X\n", - RREG32(mmINTERRUPT_CNTL2)); - dev_info(adev->dev, " IH_CNTL=0x%08X\n", - RREG32(mmIH_CNTL)); - dev_info(adev->dev, " IH_RB_CNTL=0x%08X\n", - RREG32(mmIH_RB_CNTL)); - dev_info(adev->dev, " IH_RB_BASE=0x%08X\n", - RREG32(mmIH_RB_BASE)); - dev_info(adev->dev, " IH_RB_WPTR_ADDR_LO=0x%08X\n", - RREG32(mmIH_RB_WPTR_ADDR_LO)); - dev_info(adev->dev, " IH_RB_WPTR_ADDR_HI=0x%08X\n", - RREG32(mmIH_RB_WPTR_ADDR_HI)); - dev_info(adev->dev, " IH_RB_RPTR=0x%08X\n", - RREG32(mmIH_RB_RPTR)); - dev_info(adev->dev, " IH_RB_WPTR=0x%08X\n", - RREG32(mmIH_RB_WPTR)); -} - static int cik_ih_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -412,8 +382,6 @@ static int cik_ih_soft_reset(void *handle) srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK; if (srbm_soft_reset) { - cik_ih_print_status((void *)adev); - tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); @@ -428,8 +396,6 @@ static int cik_ih_soft_reset(void *handle) /* Wait a little for things to settle down */ udelay(50); - - cik_ih_print_status((void *)adev); } return 0; @@ -448,6 +414,7 @@ static int cik_ih_set_powergating_state(void *handle, } const struct amd_ip_funcs cik_ih_ip_funcs = { + .name = "cik_ih", .early_init = cik_ih_early_init, .late_init = NULL, .sw_init = cik_ih_sw_init, @@ -459,7 +426,6 @@ const struct amd_ip_funcs cik_ih_ip_funcs = { .is_idle = cik_ih_is_idle, .wait_for_idle = cik_ih_wait_for_idle, .soft_reset = cik_ih_soft_reset, - .print_status = cik_ih_print_status, .set_clockgating_state = cik_ih_set_clockgating_state, .set_powergating_state = cik_ih_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 263ecd5e4..c56485e4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -57,6 +57,16 @@ static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev); u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev); + +static void cik_sdma_free_microcode(struct amdgpu_device *adev) +{ + int i; + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; + } +} + /* * sDMA - System DMA * Starting with CIK, the GPU has new asynchronous @@ -201,9 +211,10 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (CIK). */ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) { - u32 extra_bits = ib->vm_id & 0xf; + u32 extra_bits = vm_id & 0xf; u32 next_rptr = ring->wptr + 5; while ((next_rptr & 7) != 4) @@ -409,6 +420,8 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); /* set the wb address whether it's enabled or not */ WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], @@ -436,7 +449,12 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); ring->ready = true; + } + + cik_sdma_enable(adev, true); + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; r = amdgpu_ring_test_ring(ring); if (r) { ring->ready = false; @@ -519,8 +537,8 @@ static int cik_sdma_start(struct amdgpu_device *adev) if (r) return r; - /* unhalt the MEs */ - cik_sdma_enable(adev, true); + /* halt the engine before programing */ + cik_sdma_enable(adev, false); /* start the gfx rings and rlc compute queues */ r = cik_sdma_gfx_resume(adev); @@ -634,7 +652,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[3] = 1; ib.ptr[4] = 0xDEADBEEF; ib.length_dw = 5; - r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); if (r) goto err1; @@ -967,7 +985,7 @@ static int cik_sdma_sw_init(void *handle) ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; sprintf(ring->name, "sdma%d", i); - r = amdgpu_ring_init(adev, ring, 256 * 1024, + r = amdgpu_ring_init(adev, ring, 1024, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0), 0xf, &adev->sdma.trap_irq, (i == 0) ? @@ -988,6 +1006,7 @@ static int cik_sdma_sw_fini(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); + cik_sdma_free_microcode(adev); return 0; } @@ -1055,57 +1074,6 @@ static int cik_sdma_wait_for_idle(void *handle) return -ETIMEDOUT; } -static void cik_sdma_print_status(void *handle) -{ - int i, j; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "CIK SDMA registers\n"); - dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", - RREG32(mmSRBM_STATUS2)); - for (i = 0; i < adev->sdma.num_instances; i++) { - dev_info(adev->dev, " SDMA%d_STATUS_REG=0x%08X\n", - i, RREG32(mmSDMA0_STATUS_REG + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_ME_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_SEM_INCOMPLETE_TIMER_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_SEM_WAIT_FAIL_TIMER_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_IB_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_WPTR=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_HI=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_LO=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_BASE=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n", - i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i])); - mutex_lock(&adev->srbm_mutex); - for (j = 0; j < 16; j++) { - cik_srbm_select(adev, 0, 0, 0, j); - dev_info(adev->dev, " VM %d:\n", j); - dev_info(adev->dev, " SDMA0_GFX_VIRTUAL_ADDR=0x%08X\n", - RREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i])); - dev_info(adev->dev, " SDMA0_GFX_APE1_CNTL=0x%08X\n", - RREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i])); - } - cik_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - } -} - static int cik_sdma_soft_reset(void *handle) { u32 srbm_soft_reset = 0; @@ -1128,8 +1096,6 @@ static int cik_sdma_soft_reset(void *handle) } if (srbm_soft_reset) { - cik_sdma_print_status((void *)adev); - tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); @@ -1144,8 +1110,6 @@ static int cik_sdma_soft_reset(void *handle) /* Wait a little for things to settle down */ udelay(50); - - cik_sdma_print_status((void *)adev); } return 0; @@ -1269,6 +1233,7 @@ static int cik_sdma_set_powergating_state(void *handle, } const struct amd_ip_funcs cik_sdma_ip_funcs = { + .name = "cik_sdma", .early_init = cik_sdma_early_init, .late_init = NULL, .sw_init = cik_sdma_sw_init, @@ -1280,7 +1245,6 @@ const struct amd_ip_funcs cik_sdma_ip_funcs = { .is_idle = cik_sdma_is_idle, .wait_for_idle = cik_sdma_wait_for_idle, .soft_reset = cik_sdma_soft_reset, - .print_status = cik_sdma_print_status, .set_clockgating_state = cik_sdma_set_clockgating_state, .set_powergating_state = cik_sdma_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 60d449320..c4f6f00d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -190,8 +190,8 @@ # define MACRO_TILE_ASPECT(x) ((x) << 4) # define NUM_BANKS(x) ((x) << 6) -#define MSG_ENTER_RLC_SAFE_MODE 1 -#define MSG_EXIT_RLC_SAFE_MODE 0 +#define MSG_ENTER_RLC_SAFE_MODE 1 +#define MSG_EXIT_RLC_SAFE_MODE 0 /* * PM4 diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c index e7ef2261f..933e425a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c @@ -1579,7 +1579,6 @@ static int cz_dpm_update_sclk_limit(struct amdgpu_device *adev) static int cz_dpm_set_deep_sleep_sclk_threshold(struct amdgpu_device *adev) { - int ret = 0; struct cz_power_info *pi = cz_get_pi(adev); if (pi->caps_sclk_ds) { @@ -1588,20 +1587,19 @@ static int cz_dpm_set_deep_sleep_sclk_threshold(struct amdgpu_device *adev) CZ_MIN_DEEP_SLEEP_SCLK); } - return ret; + return 0; } /* ?? without dal support, is this still needed in setpowerstate list*/ static int cz_dpm_set_watermark_threshold(struct amdgpu_device *adev) { - int ret = 0; struct cz_power_info *pi = cz_get_pi(adev); cz_send_msg_to_smc_with_parameter(adev, PPSMC_MSG_SetWatermarkFrequency, pi->sclk_dpm.soft_max_clk); - return ret; + return 0; } static int cz_dpm_enable_nbdpm(struct amdgpu_device *adev) @@ -1636,7 +1634,6 @@ static void cz_dpm_nbdpm_lm_pstate_enable(struct amdgpu_device *adev, static int cz_dpm_update_low_memory_pstate(struct amdgpu_device *adev) { - int ret = 0; struct cz_power_info *pi = cz_get_pi(adev); struct cz_ps *ps = &pi->requested_ps; @@ -1647,21 +1644,19 @@ static int cz_dpm_update_low_memory_pstate(struct amdgpu_device *adev) cz_dpm_nbdpm_lm_pstate_enable(adev, true); } - return ret; + return 0; } /* with dpm enabled */ static int cz_dpm_set_power_state(struct amdgpu_device *adev) { - int ret = 0; - cz_dpm_update_sclk_limit(adev); cz_dpm_set_deep_sleep_sclk_threshold(adev); cz_dpm_set_watermark_threshold(adev); cz_dpm_enable_nbdpm(adev); cz_dpm_update_low_memory_pstate(adev); - return ret; + return 0; } static void cz_dpm_post_set_power_state(struct amdgpu_device *adev) @@ -2230,6 +2225,7 @@ static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate) } const struct amd_ip_funcs cz_dpm_ip_funcs = { + .name = "cz_dpm", .early_init = cz_dpm_early_init, .late_init = cz_dpm_late_init, .sw_init = cz_dpm_sw_init, @@ -2241,7 +2237,6 @@ const struct amd_ip_funcs cz_dpm_ip_funcs = { .is_idle = NULL, .wait_for_idle = NULL, .soft_reset = NULL, - .print_status = NULL, .set_clockgating_state = cz_dpm_set_clockgating_state, .set_powergating_state = cz_dpm_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index c79638f8e..3d23a70b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -103,7 +103,6 @@ static void cz_ih_disable_interrupts(struct amdgpu_device *adev) */ static int cz_ih_irq_init(struct amdgpu_device *adev) { - int ret = 0; int rb_bufsz; u32 interrupt_cntl, ih_cntl, ih_rb_cntl; u64 wptr_off; @@ -157,7 +156,7 @@ static int cz_ih_irq_init(struct amdgpu_device *adev) /* enable interrupts */ cz_ih_enable_interrupts(adev); - return ret; + return 0; } /** @@ -222,7 +221,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev, /* wptr/rptr are in bytes! */ u32 ring_index = adev->irq.ih.rptr >> 2; uint32_t dw[4]; - + dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]); dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]); dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); @@ -351,35 +350,6 @@ static int cz_ih_wait_for_idle(void *handle) return -ETIMEDOUT; } -static void cz_ih_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "CZ IH registers\n"); - dev_info(adev->dev, " SRBM_STATUS=0x%08X\n", - RREG32(mmSRBM_STATUS)); - dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", - RREG32(mmSRBM_STATUS2)); - dev_info(adev->dev, " INTERRUPT_CNTL=0x%08X\n", - RREG32(mmINTERRUPT_CNTL)); - dev_info(adev->dev, " INTERRUPT_CNTL2=0x%08X\n", - RREG32(mmINTERRUPT_CNTL2)); - dev_info(adev->dev, " IH_CNTL=0x%08X\n", - RREG32(mmIH_CNTL)); - dev_info(adev->dev, " IH_RB_CNTL=0x%08X\n", - RREG32(mmIH_RB_CNTL)); - dev_info(adev->dev, " IH_RB_BASE=0x%08X\n", - RREG32(mmIH_RB_BASE)); - dev_info(adev->dev, " IH_RB_WPTR_ADDR_LO=0x%08X\n", - RREG32(mmIH_RB_WPTR_ADDR_LO)); - dev_info(adev->dev, " IH_RB_WPTR_ADDR_HI=0x%08X\n", - RREG32(mmIH_RB_WPTR_ADDR_HI)); - dev_info(adev->dev, " IH_RB_RPTR=0x%08X\n", - RREG32(mmIH_RB_RPTR)); - dev_info(adev->dev, " IH_RB_WPTR=0x%08X\n", - RREG32(mmIH_RB_WPTR)); -} - static int cz_ih_soft_reset(void *handle) { u32 srbm_soft_reset = 0; @@ -391,8 +361,6 @@ static int cz_ih_soft_reset(void *handle) SOFT_RESET_IH, 1); if (srbm_soft_reset) { - cz_ih_print_status((void *)adev); - tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); @@ -407,8 +375,6 @@ static int cz_ih_soft_reset(void *handle) /* Wait a little for things to settle down */ udelay(50); - - cz_ih_print_status((void *)adev); } return 0; @@ -429,6 +395,7 @@ static int cz_ih_set_powergating_state(void *handle, } const struct amd_ip_funcs cz_ih_ip_funcs = { + .name = "cz_ih", .early_init = cz_ih_early_init, .late_init = NULL, .sw_init = cz_ih_sw_init, @@ -440,7 +407,6 @@ const struct amd_ip_funcs cz_ih_ip_funcs = { .is_idle = cz_ih_is_idle, .wait_for_idle = cz_ih_wait_for_idle, .soft_reset = cz_ih_soft_reset, - .print_status = cz_ih_print_status, .set_clockgating_state = cz_ih_set_clockgating_state, .set_powergating_state = cz_ih_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_smumgr.h b/drivers/gpu/drm/amd/amdgpu/cz_smumgr.h index 924d355b4..026342fcf 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_smumgr.h +++ b/drivers/gpu/drm/amd/amdgpu/cz_smumgr.h @@ -77,7 +77,7 @@ struct cz_smu_private_data { uint8_t driver_buffer_length; uint8_t scratch_buffer_length; uint16_t toc_entry_used_count; - uint16_t toc_entry_initialize_index; + uint16_t toc_entry_initialize_index; uint16_t toc_entry_power_profiling_index; uint16_t toc_entry_aram; uint16_t toc_entry_ih_register_restore_task_index; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 6de2ce535..8227344d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -284,10 +284,16 @@ static void dce_v10_0_pageflip_interrupt_fini(struct amdgpu_device *adev) * surface base address. */ static void dce_v10_0_page_flip(struct amdgpu_device *adev, - int crtc_id, u64 crtc_base) + int crtc_id, u64 crtc_base, bool async) { struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + u32 tmp; + /* flip at hsync for async, default is vsync */ + tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset); + tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, + GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0); + WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); /* update the primary scanout address */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); @@ -2211,6 +2217,14 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, dce_v10_0_vga_enable(crtc, false); + /* Make sure surface address is updated at vertical blank rather than + * horizontal blank + */ + tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset); + tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, + GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0); + WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); + WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(fb_location)); WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, @@ -2261,13 +2275,6 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset, (viewport_w << 16) | viewport_h); - /* pageflip setup */ - /* make sure flip is at vb rather than hb */ - tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, - GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0); - WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); - /* set pageflip to happen only at start of vblank interval (front porch) */ WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 3); @@ -2587,7 +2594,7 @@ static int dce_v10_0_crtc_cursor_set2(struct drm_crtc *crtc, return -EINVAL; } - obj = drm_gem_object_lookup(crtc->dev, file_priv, handle); + obj = drm_gem_object_lookup(file_priv, handle); if (!obj) { DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id); return -ENOENT; @@ -2992,6 +2999,8 @@ static int dce_v10_0_sw_init(void *handle) adev->ddev->mode_config.funcs = &amdgpu_mode_funcs; + adev->ddev->mode_config.async_page_flip = true; + adev->ddev->mode_config.max_width = 16384; adev->ddev->mode_config.max_height = 16384; @@ -3130,14 +3139,6 @@ static int dce_v10_0_wait_for_idle(void *handle) return 0; } -static void dce_v10_0_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "DCE 10.x registers\n"); - /* XXX todo */ -} - static int dce_v10_0_soft_reset(void *handle) { u32 srbm_soft_reset = 0, tmp; @@ -3147,8 +3148,6 @@ static int dce_v10_0_soft_reset(void *handle) srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK; if (srbm_soft_reset) { - dce_v10_0_print_status((void *)adev); - tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); @@ -3163,7 +3162,6 @@ static int dce_v10_0_soft_reset(void *handle) /* Wait a little for things to settle down */ udelay(50); - dce_v10_0_print_status((void *)adev); } return 0; } @@ -3370,7 +3368,7 @@ static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev, /* wakeup usersapce */ if (works->event) - drm_send_vblank_event(adev->ddev, crtc_id, works->event); + drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event); spin_unlock_irqrestore(&adev->ddev->event_lock, flags); @@ -3501,6 +3499,7 @@ static int dce_v10_0_set_powergating_state(void *handle, } const struct amd_ip_funcs dce_v10_0_ip_funcs = { + .name = "dce_v10_0", .early_init = dce_v10_0_early_init, .late_init = NULL, .sw_init = dce_v10_0_sw_init, @@ -3512,7 +3511,6 @@ const struct amd_ip_funcs dce_v10_0_ip_funcs = { .is_idle = dce_v10_0_is_idle, .wait_for_idle = dce_v10_0_wait_for_idle, .soft_reset = dce_v10_0_soft_reset, - .print_status = dce_v10_0_print_status, .set_clockgating_state = dce_v10_0_set_clockgating_state, .set_powergating_state = dce_v10_0_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index e9ccc6b78..af26ec0bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -132,6 +132,22 @@ static const u32 stoney_golden_settings_a11[] = mmFBC_MISC, 0x1f311fff, 0x14302000, }; +static const u32 polaris11_golden_settings_a11[] = +{ + mmDCI_CLK_CNTL, 0x00000080, 0x00000000, + mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070, + mmFBC_DEBUG1, 0xffffffff, 0x00000008, + mmFBC_MISC, 0x9f313fff, 0x14302008, + mmHDMI_CONTROL, 0x313f031f, 0x00000011, +}; + +static const u32 polaris10_golden_settings_a11[] = +{ + mmDCI_CLK_CNTL, 0x00000080, 0x00000000, + mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070, + mmFBC_MISC, 0x9f313fff, 0x14302008, + mmHDMI_CONTROL, 0x313f031f, 0x00000011, +}; static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev) { @@ -149,6 +165,16 @@ static void dce_v11_0_init_golden_registers(struct amdgpu_device *adev) stoney_golden_settings_a11, (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); break; + case CHIP_POLARIS11: + amdgpu_program_register_sequence(adev, + polaris11_golden_settings_a11, + (const u32)ARRAY_SIZE(polaris11_golden_settings_a11)); + break; + case CHIP_POLARIS10: + amdgpu_program_register_sequence(adev, + polaris10_golden_settings_a11, + (const u32)ARRAY_SIZE(polaris10_golden_settings_a11)); + break; default: break; } @@ -276,10 +302,17 @@ static void dce_v11_0_pageflip_interrupt_fini(struct amdgpu_device *adev) * surface base address. */ static void dce_v11_0_page_flip(struct amdgpu_device *adev, - int crtc_id, u64 crtc_base) + int crtc_id, u64 crtc_base, bool async) { struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + u32 tmp; + /* flip at hsync for async, default is vsync */ + /* use UPDATE_IMMEDIATE_EN instead for async? */ + tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset); + tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, + GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0); + WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); /* update the scanout addresses */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); @@ -565,35 +598,14 @@ static void dce_v11_0_stop_mc_access(struct amdgpu_device *adev, crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]), CRTC_CONTROL, CRTC_MASTER_EN); if (crtc_enabled) { -#if 0 - u32 frame_count; - int j; - +#if 1 save->crtc_enabled[i] = true; tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) { - amdgpu_display_vblank_wait(adev, i); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); + /*it is correct only for RGB ; black is 0*/ + WREG32(mmCRTC_BLANK_DATA_COLOR + crtc_offsets[i], 0); tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1); WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - } - /* wait for the next frame */ - frame_count = amdgpu_display_vblank_get_counter(adev, i); - for (j = 0; j < adev->usec_timeout; j++) { - if (amdgpu_display_vblank_get_counter(adev, i) != frame_count) - break; - udelay(1); - } - tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK) == 0) { - tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1); - WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp); - } - tmp = RREG32(mmCRTC_MASTER_UPDATE_LOCK + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, CRTC_MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK) == 0) { - tmp = REG_SET_FIELD(tmp, CRTC_MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 1); - WREG32(mmCRTC_MASTER_UPDATE_LOCK + crtc_offsets[i], tmp); } #else /* XXX this is a hack to avoid strange behavior with EFI on certain systems */ @@ -614,54 +626,20 @@ static void dce_v11_0_stop_mc_access(struct amdgpu_device *adev, static void dce_v11_0_resume_mc_access(struct amdgpu_device *adev, struct amdgpu_mode_mc_save *save) { - u32 tmp, frame_count; - int i, j; + u32 tmp; + int i; /* update crtc base addresses */ for (i = 0; i < adev->mode_info.num_crtc; i++) { WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i], upper_32_bits(adev->mc.vram_start)); - WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i], - upper_32_bits(adev->mc.vram_start)); WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i], (u32)adev->mc.vram_start); - WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i], - (u32)adev->mc.vram_start); if (save->crtc_enabled[i]) { - tmp = RREG32(mmCRTC_MASTER_UPDATE_MODE + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, CRTC_MASTER_UPDATE_MODE, MASTER_UPDATE_MODE) != 3) { - tmp = REG_SET_FIELD(tmp, CRTC_MASTER_UPDATE_MODE, MASTER_UPDATE_MODE, 3); - WREG32(mmCRTC_MASTER_UPDATE_MODE + crtc_offsets[i], tmp); - } - tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK)) { - tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0); - WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp); - } - tmp = RREG32(mmCRTC_MASTER_UPDATE_LOCK + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, CRTC_MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK)) { - tmp = REG_SET_FIELD(tmp, CRTC_MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 0); - WREG32(mmCRTC_MASTER_UPDATE_LOCK + crtc_offsets[i], tmp); - } - for (j = 0; j < adev->usec_timeout; j++) { - tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]); - if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_SURFACE_UPDATE_PENDING) == 0) - break; - udelay(1); - } tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]); tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1); WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp); - WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0); - /* wait for the next frame */ - frame_count = amdgpu_display_vblank_get_counter(adev, i); - for (j = 0; j < adev->usec_timeout; j++) { - if (amdgpu_display_vblank_get_counter(adev, i) != frame_count) - break; - udelay(1); - } } } @@ -1624,6 +1602,7 @@ static const u32 pin_offsets[] = AUD4_REGISTER_OFFSET, AUD5_REGISTER_OFFSET, AUD6_REGISTER_OFFSET, + AUD7_REGISTER_OFFSET, }; static int dce_v11_0_audio_init(struct amdgpu_device *adev) @@ -1635,7 +1614,20 @@ static int dce_v11_0_audio_init(struct amdgpu_device *adev) adev->mode_info.audio.enabled = true; - adev->mode_info.audio.num_pins = 7; + switch (adev->asic_type) { + case CHIP_CARRIZO: + case CHIP_STONEY: + adev->mode_info.audio.num_pins = 7; + break; + case CHIP_POLARIS10: + adev->mode_info.audio.num_pins = 8; + break; + case CHIP_POLARIS11: + adev->mode_info.audio.num_pins = 6; + break; + default: + return -EINVAL; + } for (i = 0; i < adev->mode_info.audio.num_pins; i++) { adev->mode_info.audio.pin[i].channels = -1; @@ -2201,6 +2193,14 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, dce_v11_0_vga_enable(crtc, false); + /* Make sure surface address is updated at vertical blank rather than + * horizontal blank + */ + tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset); + tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, + GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0); + WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); + WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(fb_location)); WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, @@ -2251,13 +2251,6 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset, (viewport_w << 16) | viewport_h); - /* pageflip setup */ - /* make sure flip is at vb rather than hb */ - tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset); - tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, - GRPH_SURFACE_UPDATE_H_RETRACE_EN, 0); - WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); - /* set pageflip to happen only at start of vblank interval (front porch) */ WREG32(mmCRTC_MASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 3); @@ -2427,6 +2420,40 @@ static u32 dce_v11_0_pick_pll(struct drm_crtc *crtc) u32 pll_in_use; int pll; + if ((adev->asic_type == CHIP_POLARIS10) || + (adev->asic_type == CHIP_POLARIS11)) { + struct amdgpu_encoder *amdgpu_encoder = + to_amdgpu_encoder(amdgpu_crtc->encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + + if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) + return ATOM_DP_DTO; + + switch (amdgpu_encoder->encoder_id) { + case ENCODER_OBJECT_ID_INTERNAL_UNIPHY: + if (dig->linkb) + return ATOM_COMBOPHY_PLL1; + else + return ATOM_COMBOPHY_PLL0; + break; + case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1: + if (dig->linkb) + return ATOM_COMBOPHY_PLL3; + else + return ATOM_COMBOPHY_PLL2; + break; + case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2: + if (dig->linkb) + return ATOM_COMBOPHY_PLL5; + else + return ATOM_COMBOPHY_PLL4; + break; + default: + DRM_ERROR("invalid encoder_id: 0x%x\n", amdgpu_encoder->encoder_id); + return ATOM_PPLL_INVALID; + } + } + if (ENCODER_MODE_IS_DP(amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder))) { if (adev->clock.dp_extclk) /* skip PPLL programming if using ext clock */ @@ -2578,7 +2605,7 @@ static int dce_v11_0_crtc_cursor_set2(struct drm_crtc *crtc, return -EINVAL; } - obj = drm_gem_object_lookup(crtc->dev, file_priv, handle); + obj = drm_gem_object_lookup(file_priv, handle); if (!obj) { DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id); return -ENOENT; @@ -2782,7 +2809,17 @@ static void dce_v11_0_crtc_disable(struct drm_crtc *crtc) case ATOM_PPLL2: /* disable the ppll */ amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, amdgpu_crtc->pll_id, - 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss); + 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss); + break; + case ATOM_COMBOPHY_PLL0: + case ATOM_COMBOPHY_PLL1: + case ATOM_COMBOPHY_PLL2: + case ATOM_COMBOPHY_PLL3: + case ATOM_COMBOPHY_PLL4: + case ATOM_COMBOPHY_PLL5: + /* disable the ppll */ + amdgpu_atombios_crtc_program_pll(crtc, ATOM_CRTC_INVALID, amdgpu_crtc->pll_id, + 0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss); break; default: break; @@ -2800,11 +2837,28 @@ static int dce_v11_0_crtc_mode_set(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + struct drm_device *dev = crtc->dev; + struct amdgpu_device *adev = dev->dev_private; if (!amdgpu_crtc->adjusted_clock) return -EINVAL; - amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode); + if ((adev->asic_type == CHIP_POLARIS10) || + (adev->asic_type == CHIP_POLARIS11)) { + struct amdgpu_encoder *amdgpu_encoder = + to_amdgpu_encoder(amdgpu_crtc->encoder); + int encoder_mode = + amdgpu_atombios_encoder_get_encoder_mode(amdgpu_crtc->encoder); + + /* SetPixelClock calculates the plls and ss values now */ + amdgpu_atombios_crtc_program_pll(crtc, amdgpu_crtc->crtc_id, + amdgpu_crtc->pll_id, + encoder_mode, amdgpu_encoder->encoder_id, + adjusted_mode->clock, 0, 0, 0, 0, + amdgpu_crtc->bpc, amdgpu_crtc->ss_enabled, &amdgpu_crtc->ss); + } else { + amdgpu_atombios_crtc_set_pll(crtc, adjusted_mode); + } amdgpu_atombios_crtc_set_dtd_timing(crtc, adjusted_mode); dce_v11_0_crtc_do_set_base(crtc, old_fb, x, y, 0); amdgpu_atombios_crtc_overscan_setup(crtc, mode, adjusted_mode); @@ -2955,6 +3009,16 @@ static int dce_v11_0_early_init(void *handle) adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 9; break; + case CHIP_POLARIS10: + adev->mode_info.num_crtc = 6; + adev->mode_info.num_hpd = 6; + adev->mode_info.num_dig = 6; + break; + case CHIP_POLARIS11: + adev->mode_info.num_crtc = 5; + adev->mode_info.num_hpd = 5; + adev->mode_info.num_dig = 5; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -2987,6 +3051,8 @@ static int dce_v11_0_sw_init(void *handle) adev->ddev->mode_config.funcs = &amdgpu_mode_funcs; + adev->ddev->mode_config.async_page_flip = true; + adev->ddev->mode_config.max_width = 16384; adev->ddev->mode_config.max_height = 16384; @@ -3057,7 +3123,15 @@ static int dce_v11_0_hw_init(void *handle) /* init dig PHYs, disp eng pll */ amdgpu_atombios_crtc_powergate_init(adev); amdgpu_atombios_encoder_init_dig(adev); - amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk); + if ((adev->asic_type == CHIP_POLARIS10) || + (adev->asic_type == CHIP_POLARIS11)) { + amdgpu_atombios_crtc_set_dce_clock(adev, adev->clock.default_dispclk, + DCE_CLOCK_TYPE_DISPCLK, ATOM_GCK_DFS); + amdgpu_atombios_crtc_set_dce_clock(adev, 0, + DCE_CLOCK_TYPE_DPREFCLK, ATOM_GCK_DFS); + } else { + amdgpu_atombios_crtc_set_disp_eng_pll(adev, adev->clock.default_dispclk); + } /* initialize hpd */ dce_v11_0_hpd_init(adev); @@ -3126,14 +3200,6 @@ static int dce_v11_0_wait_for_idle(void *handle) return 0; } -static void dce_v11_0_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "DCE 10.x registers\n"); - /* XXX todo */ -} - static int dce_v11_0_soft_reset(void *handle) { u32 srbm_soft_reset = 0, tmp; @@ -3143,8 +3209,6 @@ static int dce_v11_0_soft_reset(void *handle) srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK; if (srbm_soft_reset) { - dce_v11_0_print_status((void *)adev); - tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); @@ -3159,7 +3223,6 @@ static int dce_v11_0_soft_reset(void *handle) /* Wait a little for things to settle down */ udelay(50); - dce_v11_0_print_status((void *)adev); } return 0; } @@ -3366,7 +3429,7 @@ static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev, /* wakeup usersapce */ if(works->event) - drm_send_vblank_event(adev->ddev, crtc_id, works->event); + drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event); spin_unlock_irqrestore(&adev->ddev->event_lock, flags); @@ -3497,6 +3560,7 @@ static int dce_v11_0_set_powergating_state(void *handle, } const struct amd_ip_funcs dce_v11_0_ip_funcs = { + .name = "dce_v11_0", .early_init = dce_v11_0_early_init, .late_init = NULL, .sw_init = dce_v11_0_sw_init, @@ -3508,7 +3572,6 @@ const struct amd_ip_funcs dce_v11_0_ip_funcs = { .is_idle = dce_v11_0_is_idle, .wait_for_idle = dce_v11_0_wait_for_idle, .soft_reset = dce_v11_0_soft_reset, - .print_status = dce_v11_0_print_status, .set_clockgating_state = dce_v11_0_set_clockgating_state, .set_powergating_state = dce_v11_0_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index e56b55d8c..3fb65e41a 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -233,10 +233,13 @@ static void dce_v8_0_pageflip_interrupt_fini(struct amdgpu_device *adev) * surface base address. */ static void dce_v8_0_page_flip(struct amdgpu_device *adev, - int crtc_id, u64 crtc_base) + int crtc_id, u64 crtc_base, bool async) { struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + /* flip at hsync for async, default is vsync */ + WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ? + GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0); /* update the primary scanout addresses */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); @@ -1999,7 +2002,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, uint32_t fb_format, fb_pitch_pixels; u32 fb_swap = (GRPH_ENDIAN_NONE << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); u32 pipe_config; - u32 tmp, viewport_w, viewport_h; + u32 viewport_w, viewport_h; int r; bool bypass_lut = false; @@ -2135,6 +2138,11 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, dce_v8_0_vga_enable(crtc, false); + /* Make sure surface address is updated at vertical blank rather than + * horizontal blank + */ + WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, 0); + WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(fb_location)); WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, @@ -2182,12 +2190,6 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, WREG32(mmVIEWPORT_SIZE + amdgpu_crtc->crtc_offset, (viewport_w << 16) | viewport_h); - /* pageflip setup */ - /* make sure flip is at vb rather than hb */ - tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset); - tmp &= ~GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK; - WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); - /* set pageflip to happen only at start of vblank interval (front porch) */ WREG32(mmMASTER_UPDATE_MODE + amdgpu_crtc->crtc_offset, 3); @@ -2499,7 +2501,7 @@ static int dce_v8_0_crtc_cursor_set2(struct drm_crtc *crtc, return -EINVAL; } - obj = drm_gem_object_lookup(crtc->dev, file_priv, handle); + obj = drm_gem_object_lookup(file_priv, handle); if (!obj) { DRM_ERROR("Cannot find cursor object %x for crtc %d\n", handle, amdgpu_crtc->crtc_id); return -ENOENT; @@ -2902,6 +2904,8 @@ static int dce_v8_0_sw_init(void *handle) adev->ddev->mode_config.funcs = &amdgpu_mode_funcs; + adev->ddev->mode_config.async_page_flip = true; + adev->ddev->mode_config.max_width = 16384; adev->ddev->mode_config.max_height = 16384; @@ -3038,14 +3042,6 @@ static int dce_v8_0_wait_for_idle(void *handle) return 0; } -static void dce_v8_0_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "DCE 8.x registers\n"); - /* XXX todo */ -} - static int dce_v8_0_soft_reset(void *handle) { u32 srbm_soft_reset = 0, tmp; @@ -3055,8 +3051,6 @@ static int dce_v8_0_soft_reset(void *handle) srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK; if (srbm_soft_reset) { - dce_v8_0_print_status((void *)adev); - tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); @@ -3071,7 +3065,6 @@ static int dce_v8_0_soft_reset(void *handle) /* Wait a little for things to settle down */ udelay(50); - dce_v8_0_print_status((void *)adev); } return 0; } @@ -3379,7 +3372,7 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev, /* wakeup usersapce */ if (works->event) - drm_send_vblank_event(adev->ddev, crtc_id, works->event); + drm_crtc_send_vblank_event(&amdgpu_crtc->base, works->event); spin_unlock_irqrestore(&adev->ddev->event_lock, flags); @@ -3431,6 +3424,7 @@ static int dce_v8_0_set_powergating_state(void *handle, } const struct amd_ip_funcs dce_v8_0_ip_funcs = { + .name = "dce_v8_0", .early_init = dce_v8_0_early_init, .late_init = NULL, .sw_init = dce_v8_0_sw_init, @@ -3442,7 +3436,6 @@ const struct amd_ip_funcs dce_v8_0_ip_funcs = { .is_idle = dce_v8_0_is_idle, .wait_for_idle = dce_v8_0_wait_for_idle, .soft_reset = dce_v8_0_soft_reset, - .print_status = dce_v8_0_print_status, .set_clockgating_state = dce_v8_0_set_clockgating_state, .set_powergating_state = dce_v8_0_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c b/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c index ae8d0b55d..07ed7dd92 100644 --- a/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c @@ -72,6 +72,11 @@ static int fiji_dpm_sw_init(void *handle) static int fiji_dpm_sw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + return 0; } @@ -143,6 +148,7 @@ static int fiji_dpm_set_powergating_state(void *handle, } const struct amd_ip_funcs fiji_dpm_ip_funcs = { + .name = "fiji_dpm", .early_init = fiji_dpm_early_init, .late_init = NULL, .sw_init = fiji_dpm_sw_init, @@ -154,7 +160,6 @@ const struct amd_ip_funcs fiji_dpm_ip_funcs = { .is_idle = NULL, .wait_for_idle = NULL, .soft_reset = NULL, - .print_status = NULL, .set_clockgating_state = fiji_dpm_set_clockgating_state, .set_powergating_state = fiji_dpm_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 699cda831..507160a4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -53,7 +53,6 @@ static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev); -int gfx_v7_0_get_cu_info(struct amdgpu_device *, struct amdgpu_cu_info *); /*(DEBLOBBED)*/ @@ -853,6 +852,7 @@ static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev); static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer); static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev); static void gfx_v7_0_init_pg(struct amdgpu_device *adev); +static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev); /* * Core functions @@ -962,6 +962,22 @@ out: return err; } +static void gfx_v7_0_free_microcode(struct amdgpu_device *adev) +{ + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + release_firmware(adev->gfx.mec_fw); + adev->gfx.mec_fw = NULL; + release_firmware(adev->gfx.mec2_fw); + adev->gfx.mec2_fw = NULL; + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; +} + /** * gfx_v7_0_tiling_mode_table_init - init the hw tiling table * @@ -1689,6 +1705,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) gfx_v7_0_tiling_mode_table_init(adev); gfx_v7_0_setup_rb(adev); + gfx_v7_0_get_cu_info(adev); /* set HW defaults for 3D engine */ WREG32(mmCP_MEQ_THRESHOLDS, @@ -2000,17 +2017,13 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, * on the gfx ring for execution by the GPU. */ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) { - bool need_ctx_switch = ring->current_ctx != ib->ctx; u32 header, control = 0; u32 next_rptr = ring->wptr + 5; - /* drop the CE preamble IB for the same context */ - if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch) - return; - - if (need_ctx_switch) + if (ctx_switch) next_rptr += 2; next_rptr += 4; @@ -2021,7 +2034,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, amdgpu_ring_write(ring, next_rptr); /* insert SWITCH_BUFFER packet before first IB in the ring frame */ - if (need_ctx_switch) { + if (ctx_switch) { amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); amdgpu_ring_write(ring, 0); } @@ -2031,7 +2044,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, else header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | (ib->vm_id << 24); + control |= ib->length_dw | (vm_id << 24); amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, @@ -2044,7 +2057,8 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) { u32 header, control = 0; u32 next_rptr = ring->wptr + 5; @@ -2059,7 +2073,7 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | (ib->vm_id << 24); + control |= ib->length_dw | (vm_id << 24); amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, @@ -2107,7 +2121,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; - r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); if (r) goto err2; @@ -3024,6 +3038,19 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev) static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) { int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, 4); /* poll interval */ + if (usepfp) { /* synce CE with ME to prevent CE fetch CEIB before context switch done */ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); @@ -3051,18 +3078,6 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); - uint32_t seq = ring->fence_drv.sync_seq; - uint64_t addr = ring->fence_drv.gpu_addr; - - amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); - amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ - WAIT_REG_MEM_FUNCTION(3) | /* equal */ - WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ - amdgpu_ring_write(ring, addr & 0xfffffffc); - amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); - amdgpu_ring_write(ring, seq); - amdgpu_ring_write(ring, 0xffffffff); - amdgpu_ring_write(ring, 4); /* poll interval */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | @@ -3840,18 +3855,13 @@ static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev) static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev) { - uint32_t tmp, active_cu_number; - struct amdgpu_cu_info cu_info; - - gfx_v7_0_get_cu_info(adev, &cu_info); - tmp = cu_info.ao_cu_mask; - active_cu_number = cu_info.number; + u32 tmp; - WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, tmp); + WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); tmp = RREG32(mmRLC_MAX_PG_CU); tmp &= ~RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK; - tmp |= (active_cu_number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT); + tmp |= (adev->gfx.cu_info.number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT); WREG32(mmRLC_MAX_PG_CU, tmp); } @@ -4385,7 +4395,7 @@ static int gfx_v7_0_sw_init(void *handle) ring = &adev->gfx.gfx_ring[i]; ring->ring_obj = NULL; sprintf(ring->name, "gfx"); - r = amdgpu_ring_init(adev, ring, 1024 * 1024, + r = amdgpu_ring_init(adev, ring, 1024, PACKET3(PACKET3_NOP, 0x3FFF), 0xf, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP, AMDGPU_RING_TYPE_GFX); @@ -4409,10 +4419,10 @@ static int gfx_v7_0_sw_init(void *handle) ring->me = 1; /* first MEC */ ring->pipe = i / 8; ring->queue = i % 8; - sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue); + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; /* type-2 packets are deprecated on MEC, use type-3 instead */ - r = amdgpu_ring_init(adev, ring, 1024 * 1024, + r = amdgpu_ring_init(adev, ring, 1024, PACKET3(PACKET3_NOP, 0x3FFF), 0xf, &adev->gfx.eop_irq, irq_type, AMDGPU_RING_TYPE_COMPUTE); @@ -4466,6 +4476,7 @@ static int gfx_v7_0_sw_fini(void *handle) gfx_v7_0_cp_compute_fini(adev); gfx_v7_0_rlc_fini(adev); gfx_v7_0_mec_fini(adev); + gfx_v7_0_free_microcode(adev); return 0; } @@ -4543,256 +4554,6 @@ static int gfx_v7_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static void gfx_v7_0_print_status(void *handle) -{ - int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "GFX 7.x registers\n"); - dev_info(adev->dev, " GRBM_STATUS=0x%08X\n", - RREG32(mmGRBM_STATUS)); - dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n", - RREG32(mmGRBM_STATUS2)); - dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n", - RREG32(mmGRBM_STATUS_SE0)); - dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n", - RREG32(mmGRBM_STATUS_SE1)); - dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n", - RREG32(mmGRBM_STATUS_SE2)); - dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n", - RREG32(mmGRBM_STATUS_SE3)); - dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT)); - dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT1)); - dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT2)); - dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT3)); - dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", - RREG32(mmCP_CPF_BUSY_STAT)); - dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_CPF_STALLED_STAT1)); - dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS)); - dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT)); - dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_CPC_STALLED_STAT1)); - dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS)); - - for (i = 0; i < 32; i++) { - dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n", - i, RREG32(mmGB_TILE_MODE0 + (i * 4))); - } - for (i = 0; i < 16; i++) { - dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n", - i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4))); - } - for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { - dev_info(adev->dev, " se: %d\n", i); - gfx_v7_0_select_se_sh(adev, i, 0xffffffff); - dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n", - RREG32(mmPA_SC_RASTER_CONFIG)); - dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n", - RREG32(mmPA_SC_RASTER_CONFIG_1)); - } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); - - dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n", - RREG32(mmGB_ADDR_CONFIG)); - dev_info(adev->dev, " HDP_ADDR_CONFIG=0x%08X\n", - RREG32(mmHDP_ADDR_CONFIG)); - dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n", - RREG32(mmDMIF_ADDR_CALC)); - - dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n", - RREG32(mmCP_MEQ_THRESHOLDS)); - dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n", - RREG32(mmSX_DEBUG_1)); - dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n", - RREG32(mmTA_CNTL_AUX)); - dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n", - RREG32(mmSPI_CONFIG_CNTL)); - dev_info(adev->dev, " SQ_CONFIG=0x%08X\n", - RREG32(mmSQ_CONFIG)); - dev_info(adev->dev, " DB_DEBUG=0x%08X\n", - RREG32(mmDB_DEBUG)); - dev_info(adev->dev, " DB_DEBUG2=0x%08X\n", - RREG32(mmDB_DEBUG2)); - dev_info(adev->dev, " DB_DEBUG3=0x%08X\n", - RREG32(mmDB_DEBUG3)); - dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n", - RREG32(mmCB_HW_CONTROL)); - dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n", - RREG32(mmSPI_CONFIG_CNTL_1)); - dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n", - RREG32(mmPA_SC_FIFO_SIZE)); - dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n", - RREG32(mmVGT_NUM_INSTANCES)); - dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n", - RREG32(mmCP_PERFMON_CNTL)); - dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n", - RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS)); - dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n", - RREG32(mmVGT_CACHE_INVALIDATION)); - dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n", - RREG32(mmVGT_GS_VERTEX_REUSE)); - dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n", - RREG32(mmPA_SC_LINE_STIPPLE_STATE)); - dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n", - RREG32(mmPA_CL_ENHANCE)); - dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n", - RREG32(mmPA_SC_ENHANCE)); - - dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n", - RREG32(mmCP_ME_CNTL)); - dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n", - RREG32(mmCP_MAX_CONTEXT)); - dev_info(adev->dev, " CP_ENDIAN_SWAP=0x%08X\n", - RREG32(mmCP_ENDIAN_SWAP)); - dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n", - RREG32(mmCP_DEVICE_ID)); - - dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n", - RREG32(mmCP_SEM_WAIT_TIMER)); - if (adev->asic_type != CHIP_HAWAII) - dev_info(adev->dev, " CP_SEM_INCOMPLETE_TIMER_CNTL=0x%08X\n", - RREG32(mmCP_SEM_INCOMPLETE_TIMER_CNTL)); - - dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n", - RREG32(mmCP_RB_WPTR_DELAY)); - dev_info(adev->dev, " CP_RB_VMID=0x%08X\n", - RREG32(mmCP_RB_VMID)); - dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n", - RREG32(mmCP_RB0_CNTL)); - dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n", - RREG32(mmCP_RB0_WPTR)); - dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n", - RREG32(mmCP_RB0_RPTR_ADDR)); - dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n", - RREG32(mmCP_RB0_RPTR_ADDR_HI)); - dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n", - RREG32(mmCP_RB0_CNTL)); - dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n", - RREG32(mmCP_RB0_BASE)); - dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n", - RREG32(mmCP_RB0_BASE_HI)); - dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n", - RREG32(mmCP_MEC_CNTL)); - dev_info(adev->dev, " CP_CPF_DEBUG=0x%08X\n", - RREG32(mmCP_CPF_DEBUG)); - - dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n", - RREG32(mmSCRATCH_ADDR)); - dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n", - RREG32(mmSCRATCH_UMSK)); - - /* init the pipes */ - mutex_lock(&adev->srbm_mutex); - for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { - int me = (i < 4) ? 1 : 2; - int pipe = (i < 4) ? i : (i - 4); - int queue; - - dev_info(adev->dev, " me: %d, pipe: %d\n", me, pipe); - cik_srbm_select(adev, me, pipe, 0, 0); - dev_info(adev->dev, " CP_HPD_EOP_BASE_ADDR=0x%08X\n", - RREG32(mmCP_HPD_EOP_BASE_ADDR)); - dev_info(adev->dev, " CP_HPD_EOP_BASE_ADDR_HI=0x%08X\n", - RREG32(mmCP_HPD_EOP_BASE_ADDR_HI)); - dev_info(adev->dev, " CP_HPD_EOP_VMID=0x%08X\n", - RREG32(mmCP_HPD_EOP_VMID)); - dev_info(adev->dev, " CP_HPD_EOP_CONTROL=0x%08X\n", - RREG32(mmCP_HPD_EOP_CONTROL)); - - for (queue = 0; queue < 8; queue++) { - cik_srbm_select(adev, me, pipe, queue, 0); - dev_info(adev->dev, " queue: %d\n", queue); - dev_info(adev->dev, " CP_PQ_WPTR_POLL_CNTL=0x%08X\n", - RREG32(mmCP_PQ_WPTR_POLL_CNTL)); - dev_info(adev->dev, " CP_HQD_PQ_DOORBELL_CONTROL=0x%08X\n", - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL)); - dev_info(adev->dev, " CP_HQD_ACTIVE=0x%08X\n", - RREG32(mmCP_HQD_ACTIVE)); - dev_info(adev->dev, " CP_HQD_DEQUEUE_REQUEST=0x%08X\n", - RREG32(mmCP_HQD_DEQUEUE_REQUEST)); - dev_info(adev->dev, " CP_HQD_PQ_RPTR=0x%08X\n", - RREG32(mmCP_HQD_PQ_RPTR)); - dev_info(adev->dev, " CP_HQD_PQ_WPTR=0x%08X\n", - RREG32(mmCP_HQD_PQ_WPTR)); - dev_info(adev->dev, " CP_HQD_PQ_BASE=0x%08X\n", - RREG32(mmCP_HQD_PQ_BASE)); - dev_info(adev->dev, " CP_HQD_PQ_BASE_HI=0x%08X\n", - RREG32(mmCP_HQD_PQ_BASE_HI)); - dev_info(adev->dev, " CP_HQD_PQ_CONTROL=0x%08X\n", - RREG32(mmCP_HQD_PQ_CONTROL)); - dev_info(adev->dev, " CP_HQD_PQ_WPTR_POLL_ADDR=0x%08X\n", - RREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR)); - dev_info(adev->dev, " CP_HQD_PQ_WPTR_POLL_ADDR_HI=0x%08X\n", - RREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI)); - dev_info(adev->dev, " CP_HQD_PQ_RPTR_REPORT_ADDR=0x%08X\n", - RREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR)); - dev_info(adev->dev, " CP_HQD_PQ_RPTR_REPORT_ADDR_HI=0x%08X\n", - RREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI)); - dev_info(adev->dev, " CP_HQD_PQ_DOORBELL_CONTROL=0x%08X\n", - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL)); - dev_info(adev->dev, " CP_HQD_PQ_WPTR=0x%08X\n", - RREG32(mmCP_HQD_PQ_WPTR)); - dev_info(adev->dev, " CP_HQD_VMID=0x%08X\n", - RREG32(mmCP_HQD_VMID)); - dev_info(adev->dev, " CP_MQD_BASE_ADDR=0x%08X\n", - RREG32(mmCP_MQD_BASE_ADDR)); - dev_info(adev->dev, " CP_MQD_BASE_ADDR_HI=0x%08X\n", - RREG32(mmCP_MQD_BASE_ADDR_HI)); - dev_info(adev->dev, " CP_MQD_CONTROL=0x%08X\n", - RREG32(mmCP_MQD_CONTROL)); - } - } - cik_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - - dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n", - RREG32(mmCP_INT_CNTL_RING0)); - dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n", - RREG32(mmRLC_LB_CNTL)); - dev_info(adev->dev, " RLC_CNTL=0x%08X\n", - RREG32(mmRLC_CNTL)); - dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n", - RREG32(mmRLC_CGCG_CGLS_CTRL)); - dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n", - RREG32(mmRLC_LB_CNTR_INIT)); - dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n", - RREG32(mmRLC_LB_CNTR_MAX)); - dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n", - RREG32(mmRLC_LB_INIT_CU_MASK)); - dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n", - RREG32(mmRLC_LB_PARAMS)); - dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n", - RREG32(mmRLC_LB_CNTL)); - dev_info(adev->dev, " RLC_MC_CNTL=0x%08X\n", - RREG32(mmRLC_MC_CNTL)); - dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n", - RREG32(mmRLC_UCODE_CNTL)); - - if (adev->asic_type == CHIP_BONAIRE) - dev_info(adev->dev, " RLC_DRIVER_CPDMA_STATUS=0x%08X\n", - RREG32(mmRLC_DRIVER_CPDMA_STATUS)); - - mutex_lock(&adev->srbm_mutex); - for (i = 0; i < 16; i++) { - cik_srbm_select(adev, 0, 0, 0, i); - dev_info(adev->dev, " VM %d:\n", i); - dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n", - RREG32(mmSH_MEM_CONFIG)); - dev_info(adev->dev, " SH_MEM_APE1_BASE=0x%08X\n", - RREG32(mmSH_MEM_APE1_BASE)); - dev_info(adev->dev, " SH_MEM_APE1_LIMIT=0x%08X\n", - RREG32(mmSH_MEM_APE1_LIMIT)); - dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n", - RREG32(mmSH_MEM_BASES)); - } - cik_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); -} - static int gfx_v7_0_soft_reset(void *handle) { u32 grbm_soft_reset = 0, srbm_soft_reset = 0; @@ -4826,7 +4587,6 @@ static int gfx_v7_0_soft_reset(void *handle) srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK; if (grbm_soft_reset || srbm_soft_reset) { - gfx_v7_0_print_status((void *)adev); /* disable CG/PG */ gfx_v7_0_fini_pg(adev); gfx_v7_0_update_cg(adev, false); @@ -4869,7 +4629,6 @@ static int gfx_v7_0_soft_reset(void *handle) } /* Wait a little for things to settle down */ udelay(50); - gfx_v7_0_print_status((void *)adev); } return 0; } @@ -5121,6 +4880,7 @@ static int gfx_v7_0_set_powergating_state(void *handle, } const struct amd_ip_funcs gfx_v7_0_ip_funcs = { + .name = "gfx_v7_0", .early_init = gfx_v7_0_early_init, .late_init = gfx_v7_0_late_init, .sw_init = gfx_v7_0_sw_init, @@ -5132,7 +4892,6 @@ const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .is_idle = gfx_v7_0_is_idle, .wait_for_idle = gfx_v7_0_wait_for_idle, .soft_reset = gfx_v7_0_soft_reset, - .print_status = gfx_v7_0_print_status, .set_clockgating_state = gfx_v7_0_set_clockgating_state, .set_powergating_state = gfx_v7_0_set_powergating_state, }; @@ -5239,14 +4998,11 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) } -int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, - struct amdgpu_cu_info *cu_info) +static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) { int i, j, k, counter, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; - - if (!adev || !cu_info) - return -EINVAL; + struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; memset(cu_info, 0, sizeof(*cu_info)); @@ -5277,6 +5033,4 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; - - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h index c04bfbabf..e747aa935 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h @@ -32,6 +32,5 @@ void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev); void gfx_v7_0_rlc_stop(struct amdgpu_device *adev); uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev); void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num); -int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 667d04755..1127b2b62 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -27,6 +27,8 @@ #include "vi.h" #include "vid.h" #include "amdgpu_ucode.h" +#include "amdgpu_atombios.h" +#include "atombios_i2c.h" #include "clearstate_vi.h" #include "gmc/gmc_8_2_d.h" @@ -46,11 +48,14 @@ #include "dce/dce_10_0_d.h" #include "dce/dce_10_0_sh_mask.h" +#include "smu/smu_7_1_3_d.h" + #define GFX8_NUM_GFX_RINGS 1 #define GFX8_NUM_COMPUTE_RINGS 8 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 +#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) @@ -84,6 +89,8 @@ enum { BPM_REG_FGCG_MAX }; +#define RLC_FormatDirectRegListLength 14 + /*(DEBLOBBED)*/ static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = @@ -216,6 +223,69 @@ static const u32 tonga_mgcg_cgcg_init[] = mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, }; +static const u32 golden_settings_polaris11_a11[] = +{ + mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208, + mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, + mmDB_DEBUG2, 0xf00fffff, 0x00000400, + mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, + mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, + mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, + mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, + mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, + mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, + mmSQ_CONFIG, 0x07f80000, 0x07180000, + mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, + mmTCC_CTRL, 0x00100000, 0xf31fff7f, + mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, + mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, + mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, + mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, +}; + +static const u32 polaris11_golden_common_all[] = +{ + mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, + mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, + mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, + mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, +}; + +static const u32 golden_settings_polaris10_a11[] = +{ + mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, + mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, + mmCB_HW_CONTROL_2, 0, 0x0f000000, + mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, + mmDB_DEBUG2, 0xf00fffff, 0x00000400, + mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, + mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, + mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, + mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, + mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, + mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, + mmSQ_CONFIG, 0x07f80000, 0x07180000, + mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, + mmTCC_CTRL, 0x00100000, 0xf31fff7f, + mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, + mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, + mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, +}; + +static const u32 polaris10_golden_common_all[] = +{ + mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, + mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, + mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, + mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, + mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, + mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, + mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, + mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, +}; + static const u32 fiji_golden_common_all[] = { mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, @@ -496,7 +566,7 @@ static const u32 stoney_golden_settings_a11[] = mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, - mmTCC_CTRL, 0x00100000, 0xf31fff7f, + mmTCC_CTRL, 0x00100000, 0xf31fff7f, mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, @@ -527,6 +597,9 @@ static const u32 stoney_mgcg_cgcg_init[] = static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); +static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); +static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); +static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) { @@ -565,6 +638,27 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) tonga_golden_common_all, (const u32)ARRAY_SIZE(tonga_golden_common_all)); break; + case CHIP_POLARIS11: + amdgpu_program_register_sequence(adev, + golden_settings_polaris11_a11, + (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); + amdgpu_program_register_sequence(adev, + polaris11_golden_common_all, + (const u32)ARRAY_SIZE(polaris11_golden_common_all)); + break; + case CHIP_POLARIS10: + amdgpu_program_register_sequence(adev, + golden_settings_polaris10_a11, + (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); + amdgpu_program_register_sequence(adev, + polaris10_golden_common_all, + (const u32)ARRAY_SIZE(polaris10_golden_common_all)); + WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); + if (adev->pdev->revision == 0xc7) { + amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); + amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); + } + break; case CHIP_CARRIZO: amdgpu_program_register_sequence(adev, cz_mgcg_cgcg_init, @@ -675,7 +769,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; - r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); if (r) goto err2; @@ -708,6 +802,26 @@ err1: return r; } + +static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; + release_firmware(adev->gfx.mec_fw); + adev->gfx.mec_fw = NULL; + if ((adev->asic_type != CHIP_STONEY) && + (adev->asic_type != CHIP_TOPAZ)) + release_firmware(adev->gfx.mec2_fw); + adev->gfx.mec2_fw = NULL; + + kfree(adev->gfx.rlc.register_list_format); +} + static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; @@ -716,6 +830,8 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; const struct gfx_firmware_header_v1_0 *cp_hdr; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + unsigned int *tmp = NULL, i; DRM_DEBUG("\n"); @@ -732,6 +848,12 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) case CHIP_FIJI: chip_name = "fiji"; break; + case CHIP_POLARIS11: + chip_name = "polaris11"; + break; + case CHIP_POLARIS10: + chip_name = "polaris10"; + break; case CHIP_STONEY: chip_name = "stoney"; break; @@ -777,9 +899,49 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; err = amdgpu_ucode_validate(adev->gfx.rlc_fw); - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); + + adev->gfx.rlc.save_and_restore_offset = + le32_to_cpu(rlc_hdr->save_and_restore_offset); + adev->gfx.rlc.clear_state_descriptor_offset = + le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); + adev->gfx.rlc.avail_scratch_ram_locations = + le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); + adev->gfx.rlc.reg_restore_list_size = + le32_to_cpu(rlc_hdr->reg_restore_list_size); + adev->gfx.rlc.reg_list_format_start = + le32_to_cpu(rlc_hdr->reg_list_format_start); + adev->gfx.rlc.reg_list_format_separate_start = + le32_to_cpu(rlc_hdr->reg_list_format_separate_start); + adev->gfx.rlc.starting_offsets_start = + le32_to_cpu(rlc_hdr->starting_offsets_start); + adev->gfx.rlc.reg_list_format_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); + adev->gfx.rlc.reg_list_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_size_bytes); + + adev->gfx.rlc.register_list_format = + kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + + adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); + + if (!adev->gfx.rlc.register_list_format) { + err = -ENOMEM; + goto out; + } + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); + + adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); snprintf(fw_name, sizeof(fw_name), "/*(DEBLOBBED)*/", chip_name); err = reject_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); @@ -880,6 +1042,153 @@ out: return err; } +static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, + volatile u32 *buffer) +{ + u32 count = 0, i; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + if (adev->gfx.rlc.cs_data == NULL) + return; + if (buffer == NULL) + return; + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + buffer[count++] = cpu_to_le32(0x80000000); + buffer[count++] = cpu_to_le32(0x80000000); + + for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + buffer[count++] = + cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); + buffer[count++] = cpu_to_le32(ext->reg_index - + PACKET3_SET_CONTEXT_REG_START); + for (i = 0; i < ext->reg_count; i++) + buffer[count++] = cpu_to_le32(ext->extent[i]); + } else { + return; + } + } + } + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); + buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - + PACKET3_SET_CONTEXT_REG_START); + switch (adev->asic_type) { + case CHIP_TONGA: + case CHIP_POLARIS10: + buffer[count++] = cpu_to_le32(0x16000012); + buffer[count++] = cpu_to_le32(0x0000002A); + break; + case CHIP_POLARIS11: + buffer[count++] = cpu_to_le32(0x16000012); + buffer[count++] = cpu_to_le32(0x00000000); + break; + case CHIP_FIJI: + buffer[count++] = cpu_to_le32(0x3a00161a); + buffer[count++] = cpu_to_le32(0x0000002e); + break; + case CHIP_TOPAZ: + case CHIP_CARRIZO: + buffer[count++] = cpu_to_le32(0x00000002); + buffer[count++] = cpu_to_le32(0x00000000); + break; + case CHIP_STONEY: + buffer[count++] = cpu_to_le32(0x00000000); + buffer[count++] = cpu_to_le32(0x00000000); + break; + default: + buffer[count++] = cpu_to_le32(0x00000000); + buffer[count++] = cpu_to_le32(0x00000000); + break; + } + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); + buffer[count++] = cpu_to_le32(0); +} + +static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) +{ + int r; + + /* clear state block */ + if (adev->gfx.rlc.clear_state_obj) { + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + if (unlikely(r != 0)) + dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r); + amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + + amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); + adev->gfx.rlc.clear_state_obj = NULL; + } +} + +static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) +{ + volatile u32 *dst_ptr; + u32 dws; + const struct cs_section_def *cs_data; + int r; + + adev->gfx.rlc.cs_data = vi_cs_data; + + cs_data = adev->gfx.rlc.cs_data; + + if (cs_data) { + /* clear state block */ + adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); + + if (adev->gfx.rlc.clear_state_obj == NULL) { + r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, + NULL, NULL, + &adev->gfx.rlc.clear_state_obj); + if (r) { + dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); + gfx_v8_0_rlc_fini(adev); + return r; + } + } + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + if (unlikely(r != 0)) { + gfx_v8_0_rlc_fini(adev); + return r; + } + r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.clear_state_gpu_addr); + if (r) { + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r); + gfx_v8_0_rlc_fini(adev); + return r; + } + + r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); + if (r) { + dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r); + gfx_v8_0_rlc_fini(adev); + return r; + } + /* set up the cs buffer */ + dst_ptr = adev->gfx.rlc.cs_ptr; + gfx_v8_0_get_csb_buffer(adev, dst_ptr); + amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + } + + return 0; +} + static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) { int r; @@ -1231,7 +1540,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); /* shedule the ib on the ring */ - r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); if (r) { DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); goto fail; @@ -1265,12 +1574,13 @@ fail: return r; } -static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) +static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; u32 mc_shared_chmap, mc_arb_ramcfg; u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; u32 tmp; + int ret; switch (adev->asic_type) { case CHIP_TOPAZ: @@ -1301,6 +1611,34 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.max_gs_threads = 32; adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; + break; + case CHIP_POLARIS11: + ret = amdgpu_atombios_get_gfx_info(adev); + if (ret) + return ret; + adev->gfx.config.max_gprs = 256; + adev->gfx.config.max_gs_threads = 32; + adev->gfx.config.max_hw_contexts = 8; + + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; + break; + case CHIP_POLARIS10: + ret = amdgpu_atombios_get_gfx_info(adev); + if (ret) + return ret; + adev->gfx.config.max_gprs = 256; + adev->gfx.config.max_gs_threads = 32; + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; @@ -1489,6 +1827,8 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) break; } adev->gfx.config.gb_addr_config = gb_addr_config; + + return 0; } static int gfx_v8_0_sw_init(void *handle) @@ -1522,6 +1862,12 @@ static int gfx_v8_0_sw_init(void *handle) return r; } + r = gfx_v8_0_rlc_init(adev); + if (r) { + DRM_ERROR("Failed to init rlc BOs!\n"); + return r; + } + r = gfx_v8_0_mec_init(adev); if (r) { DRM_ERROR("Failed to init MEC BOs!\n"); @@ -1539,7 +1885,7 @@ static int gfx_v8_0_sw_init(void *handle) ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; } - r = amdgpu_ring_init(adev, ring, 1024 * 1024, + r = amdgpu_ring_init(adev, ring, 1024, PACKET3(PACKET3_NOP, 0x3FFF), 0xf, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP, AMDGPU_RING_TYPE_GFX); @@ -1563,10 +1909,10 @@ static int gfx_v8_0_sw_init(void *handle) ring->me = 1; /* first MEC */ ring->pipe = i / 8; ring->queue = i % 8; - sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue); + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; /* type-2 packets are deprecated on MEC, use type-3 instead */ - r = amdgpu_ring_init(adev, ring, 1024 * 1024, + r = amdgpu_ring_init(adev, ring, 1024, PACKET3(PACKET3_NOP, 0x3FFF), 0xf, &adev->gfx.eop_irq, irq_type, AMDGPU_RING_TYPE_COMPUTE); @@ -1598,7 +1944,9 @@ static int gfx_v8_0_sw_init(void *handle) adev->gfx.ce_ram_size = 0x8000; - gfx_v8_0_gpu_early_init(adev); + r = gfx_v8_0_gpu_early_init(adev); + if (r) + return r; return 0; } @@ -1619,6 +1967,10 @@ static int gfx_v8_0_sw_fini(void *handle) gfx_v8_0_mec_fini(adev); + gfx_v8_0_rlc_fini(adev); + + gfx_v8_0_free_microcode(adev); + return 0; } @@ -2168,17 +2520,421 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | NUM_BANKS(ADDR_SURF_16_BANK)); mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK)); + mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK)); + mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK)); + + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); + + for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) + if (reg_offset != 7) + WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); + + break; + case CHIP_POLARIS11: + modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | + PIPE_CONFIG(ADDR_SURF_P4_16x16)); + modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + + mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + + mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK)); + + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); + + for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) + if (reg_offset != 7) + WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); + + break; + case CHIP_POLARIS10: + modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); + modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + + mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + + mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK)); + mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK)); + mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK)); for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); @@ -2664,6 +3420,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) gfx_v8_0_tiling_mode_table_init(adev); gfx_v8_0_setup_rb(adev); + gfx_v8_0_get_cu_info(adev); /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ @@ -2757,6 +3514,188 @@ static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, WREG32(mmCP_INT_CNTL_RING0, tmp); } +static void gfx_v8_0_init_csb(struct amdgpu_device *adev) +{ + /* csib */ + WREG32(mmRLC_CSIB_ADDR_HI, + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32(mmRLC_CSIB_ADDR_LO, + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32(mmRLC_CSIB_LENGTH, + adev->gfx.rlc.clear_state_size); +} + +static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, + int ind_offset, + int list_size, + int *unique_indices, + int *indices_count, + int max_indices, + int *ind_start_offsets, + int *offset_count, + int max_offset) +{ + int indices; + bool new_entry = true; + + for (; ind_offset < list_size; ind_offset++) { + + if (new_entry) { + new_entry = false; + ind_start_offsets[*offset_count] = ind_offset; + *offset_count = *offset_count + 1; + BUG_ON(*offset_count >= max_offset); + } + + if (register_list_format[ind_offset] == 0xFFFFFFFF) { + new_entry = true; + continue; + } + + ind_offset += 2; + + /* look for the matching indice */ + for (indices = 0; + indices < *indices_count; + indices++) { + if (unique_indices[indices] == + register_list_format[ind_offset]) + break; + } + + if (indices >= *indices_count) { + unique_indices[*indices_count] = + register_list_format[ind_offset]; + indices = *indices_count; + *indices_count = *indices_count + 1; + BUG_ON(*indices_count >= max_indices); + } + + register_list_format[ind_offset] = indices; + } +} + +static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) +{ + int i, temp, data; + int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; + int indices_count = 0; + int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + int offset_count = 0; + + int list_size; + unsigned int *register_list_format = + kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); + if (register_list_format == NULL) + return -ENOMEM; + memcpy(register_list_format, adev->gfx.rlc.register_list_format, + adev->gfx.rlc.reg_list_format_size_bytes); + + gfx_v8_0_parse_ind_reg_list(register_list_format, + RLC_FormatDirectRegListLength, + adev->gfx.rlc.reg_list_format_size_bytes >> 2, + unique_indices, + &indices_count, + sizeof(unique_indices) / sizeof(int), + indirect_start_offsets, + &offset_count, + sizeof(indirect_start_offsets)/sizeof(int)); + + /* save and restore list */ + temp = RREG32(mmRLC_SRM_CNTL); + temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; + WREG32(mmRLC_SRM_CNTL, temp); + + WREG32(mmRLC_SRM_ARAM_ADDR, 0); + for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) + WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); + + /* indirect list */ + WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); + for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) + WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); + + list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; + list_size = list_size >> 1; + WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); + WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); + + /* starting offsets starts */ + WREG32(mmRLC_GPM_SCRATCH_ADDR, + adev->gfx.rlc.starting_offsets_start); + for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) + WREG32(mmRLC_GPM_SCRATCH_DATA, + indirect_start_offsets[i]); + + /* unique indices */ + temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; + data = mmRLC_SRM_INDEX_CNTL_DATA_0; + for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { + amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false); + amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false); + } + kfree(register_list_format); + + return 0; +} + +static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RREG32(mmRLC_SRM_CNTL); + data |= RLC_SRM_CNTL__SRM_ENABLE_MASK; + WREG32(mmRLC_SRM_CNTL, data); +} + +static void polaris11_init_power_gating(struct amdgpu_device *adev) +{ + uint32_t data; + + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_GFX_DMG)) { + data = RREG32(mmCP_RB_WPTR_POLL_CNTL); + data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; + data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + WREG32(mmCP_RB_WPTR_POLL_CNTL, data); + + data = 0; + data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); + data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); + data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); + data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); + WREG32(mmRLC_PG_DELAY, data); + + data = RREG32(mmRLC_PG_DELAY_2); + data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; + data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); + WREG32(mmRLC_PG_DELAY_2, data); + + data = RREG32(mmRLC_AUTO_PG_CTRL); + data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; + data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); + WREG32(mmRLC_AUTO_PG_CTRL, data); + } +} + +static void gfx_v8_0_init_pg(struct amdgpu_device *adev) +{ + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_GFX_DMG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_GDS | + AMD_PG_SUPPORT_RLC_SMU_HS)) { + gfx_v8_0_init_csb(adev); + gfx_v8_0_init_save_restore_list(adev); + gfx_v8_0_enable_save_restore_machine(adev); + + if (adev->asic_type == CHIP_POLARIS11) + polaris11_init_power_gating(adev); + } +} + void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) { u32 tmp = RREG32(mmRLC_CNTL); @@ -2827,12 +3766,17 @@ static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) /* disable CG */ WREG32(mmRLC_CGCG_CGLS_CTRL, 0); + if (adev->asic_type == CHIP_POLARIS11 || + adev->asic_type == CHIP_POLARIS10) + WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0); /* disable PG */ WREG32(mmRLC_PG_CNTL, 0); gfx_v8_0_rlc_reset(adev); + gfx_v8_0_init_pg(adev); + if (!adev->pp_enabled) { if (!adev->firmware.smu_load) { /* legacy rlc firmware loading */ @@ -3004,18 +3948,27 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); switch (adev->asic_type) { case CHIP_TONGA: + case CHIP_POLARIS10: amdgpu_ring_write(ring, 0x16000012); amdgpu_ring_write(ring, 0x0000002A); break; + case CHIP_POLARIS11: + amdgpu_ring_write(ring, 0x16000012); + amdgpu_ring_write(ring, 0x00000000); + break; case CHIP_FIJI: amdgpu_ring_write(ring, 0x3a00161a); amdgpu_ring_write(ring, 0x0000002e); break; - case CHIP_TOPAZ: case CHIP_CARRIZO: amdgpu_ring_write(ring, 0x00000002); amdgpu_ring_write(ring, 0x00000000); break; + case CHIP_TOPAZ: + amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ? + 0x00000000 : 0x00000002); + amdgpu_ring_write(ring, 0x00000000); + break; case CHIP_STONEY: amdgpu_ring_write(ring, 0x00000000); amdgpu_ring_write(ring, 0x00000000); @@ -3090,6 +4043,8 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) if (ring->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_HIT, 0); tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 1); } else { @@ -3648,7 +4603,9 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) if (use_doorbell) { if ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_FIJI) || - (adev->asic_type == CHIP_STONEY)) { + (adev->asic_type == CHIP_STONEY) || + (adev->asic_type == CHIP_POLARIS11) || + (adev->asic_type == CHIP_POLARIS10)) { WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, @@ -3682,7 +4639,9 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); mqd->cp_hqd_persistent_state = tmp; - if (adev->asic_type == CHIP_STONEY) { + if (adev->asic_type == CHIP_STONEY || + adev->asic_type == CHIP_POLARIS11 || + adev->asic_type == CHIP_POLARIS10) { tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); @@ -3814,6 +4773,9 @@ static int gfx_v8_0_hw_fini(void *handle) gfx_v8_0_rlc_stop(adev); gfx_v8_0_cp_compute_fini(adev); + amdgpu_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); + return 0; } @@ -3858,185 +4820,6 @@ static int gfx_v8_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static void gfx_v8_0_print_status(void *handle) -{ - int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "GFX 8.x registers\n"); - dev_info(adev->dev, " GRBM_STATUS=0x%08X\n", - RREG32(mmGRBM_STATUS)); - dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n", - RREG32(mmGRBM_STATUS2)); - dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n", - RREG32(mmGRBM_STATUS_SE0)); - dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n", - RREG32(mmGRBM_STATUS_SE1)); - dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n", - RREG32(mmGRBM_STATUS_SE2)); - dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n", - RREG32(mmGRBM_STATUS_SE3)); - dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT)); - dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT1)); - dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT2)); - dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT3)); - dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", - RREG32(mmCP_CPF_BUSY_STAT)); - dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_CPF_STALLED_STAT1)); - dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS)); - dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT)); - dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_CPC_STALLED_STAT1)); - dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS)); - - for (i = 0; i < 32; i++) { - dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n", - i, RREG32(mmGB_TILE_MODE0 + (i * 4))); - } - for (i = 0; i < 16; i++) { - dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n", - i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4))); - } - for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { - dev_info(adev->dev, " se: %d\n", i); - gfx_v8_0_select_se_sh(adev, i, 0xffffffff); - dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n", - RREG32(mmPA_SC_RASTER_CONFIG)); - dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n", - RREG32(mmPA_SC_RASTER_CONFIG_1)); - } - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); - - dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n", - RREG32(mmGB_ADDR_CONFIG)); - dev_info(adev->dev, " HDP_ADDR_CONFIG=0x%08X\n", - RREG32(mmHDP_ADDR_CONFIG)); - dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n", - RREG32(mmDMIF_ADDR_CALC)); - - dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n", - RREG32(mmCP_MEQ_THRESHOLDS)); - dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n", - RREG32(mmSX_DEBUG_1)); - dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n", - RREG32(mmTA_CNTL_AUX)); - dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n", - RREG32(mmSPI_CONFIG_CNTL)); - dev_info(adev->dev, " SQ_CONFIG=0x%08X\n", - RREG32(mmSQ_CONFIG)); - dev_info(adev->dev, " DB_DEBUG=0x%08X\n", - RREG32(mmDB_DEBUG)); - dev_info(adev->dev, " DB_DEBUG2=0x%08X\n", - RREG32(mmDB_DEBUG2)); - dev_info(adev->dev, " DB_DEBUG3=0x%08X\n", - RREG32(mmDB_DEBUG3)); - dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n", - RREG32(mmCB_HW_CONTROL)); - dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n", - RREG32(mmSPI_CONFIG_CNTL_1)); - dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n", - RREG32(mmPA_SC_FIFO_SIZE)); - dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n", - RREG32(mmVGT_NUM_INSTANCES)); - dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n", - RREG32(mmCP_PERFMON_CNTL)); - dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n", - RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS)); - dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n", - RREG32(mmVGT_CACHE_INVALIDATION)); - dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n", - RREG32(mmVGT_GS_VERTEX_REUSE)); - dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n", - RREG32(mmPA_SC_LINE_STIPPLE_STATE)); - dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n", - RREG32(mmPA_CL_ENHANCE)); - dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n", - RREG32(mmPA_SC_ENHANCE)); - - dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n", - RREG32(mmCP_ME_CNTL)); - dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n", - RREG32(mmCP_MAX_CONTEXT)); - dev_info(adev->dev, " CP_ENDIAN_SWAP=0x%08X\n", - RREG32(mmCP_ENDIAN_SWAP)); - dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n", - RREG32(mmCP_DEVICE_ID)); - - dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n", - RREG32(mmCP_SEM_WAIT_TIMER)); - - dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n", - RREG32(mmCP_RB_WPTR_DELAY)); - dev_info(adev->dev, " CP_RB_VMID=0x%08X\n", - RREG32(mmCP_RB_VMID)); - dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n", - RREG32(mmCP_RB0_CNTL)); - dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n", - RREG32(mmCP_RB0_WPTR)); - dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n", - RREG32(mmCP_RB0_RPTR_ADDR)); - dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n", - RREG32(mmCP_RB0_RPTR_ADDR_HI)); - dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n", - RREG32(mmCP_RB0_CNTL)); - dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n", - RREG32(mmCP_RB0_BASE)); - dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n", - RREG32(mmCP_RB0_BASE_HI)); - dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n", - RREG32(mmCP_MEC_CNTL)); - dev_info(adev->dev, " CP_CPF_DEBUG=0x%08X\n", - RREG32(mmCP_CPF_DEBUG)); - - dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n", - RREG32(mmSCRATCH_ADDR)); - dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n", - RREG32(mmSCRATCH_UMSK)); - - dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n", - RREG32(mmCP_INT_CNTL_RING0)); - dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n", - RREG32(mmRLC_LB_CNTL)); - dev_info(adev->dev, " RLC_CNTL=0x%08X\n", - RREG32(mmRLC_CNTL)); - dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n", - RREG32(mmRLC_CGCG_CGLS_CTRL)); - dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n", - RREG32(mmRLC_LB_CNTR_INIT)); - dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n", - RREG32(mmRLC_LB_CNTR_MAX)); - dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n", - RREG32(mmRLC_LB_INIT_CU_MASK)); - dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n", - RREG32(mmRLC_LB_PARAMS)); - dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n", - RREG32(mmRLC_LB_CNTL)); - dev_info(adev->dev, " RLC_MC_CNTL=0x%08X\n", - RREG32(mmRLC_MC_CNTL)); - dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n", - RREG32(mmRLC_UCODE_CNTL)); - - mutex_lock(&adev->srbm_mutex); - for (i = 0; i < 16; i++) { - vi_srbm_select(adev, 0, 0, 0, i); - dev_info(adev->dev, " VM %d:\n", i); - dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n", - RREG32(mmSH_MEM_CONFIG)); - dev_info(adev->dev, " SH_MEM_APE1_BASE=0x%08X\n", - RREG32(mmSH_MEM_APE1_BASE)); - dev_info(adev->dev, " SH_MEM_APE1_LIMIT=0x%08X\n", - RREG32(mmSH_MEM_APE1_LIMIT)); - dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n", - RREG32(mmSH_MEM_BASES)); - } - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); -} - static int gfx_v8_0_soft_reset(void *handle) { u32 grbm_soft_reset = 0, srbm_soft_reset = 0; @@ -4077,7 +4860,6 @@ static int gfx_v8_0_soft_reset(void *handle) SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); if (grbm_soft_reset || srbm_soft_reset) { - gfx_v8_0_print_status((void *)adev); /* stop the rlc */ gfx_v8_0_rlc_stop(adev); @@ -4137,7 +4919,6 @@ static int gfx_v8_0_soft_reset(void *handle) /* Wait a little for things to settle down */ udelay(50); - gfx_v8_0_print_status((void *)adev); } return 0; } @@ -4219,6 +5000,7 @@ static int gfx_v8_0_early_init(void *handle) gfx_v8_0_set_ring_funcs(adev); gfx_v8_0_set_irq_funcs(adev); gfx_v8_0_set_gds_init(adev); + gfx_v8_0_set_rlc_funcs(adev); return 0; } @@ -4241,17 +5023,109 @@ static int gfx_v8_0_late_init(void *handle) if (r) return r; + amdgpu_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); + return 0; } +static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, temp; + + /* Send msg to SMU via Powerplay */ + amdgpu_set_powergating_state(adev, + AMD_IP_BLOCK_TYPE_SMC, + enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); + + if (enable) { + /* Enable static MGPG */ + temp = data = RREG32(mmRLC_PG_CNTL); + data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; + + if (temp != data) + WREG32(mmRLC_PG_CNTL, data); + } else { + temp = data = RREG32(mmRLC_PG_CNTL); + data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; + + if (temp != data) + WREG32(mmRLC_PG_CNTL, data); + } +} + +static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, temp; + + if (enable) { + /* Enable dynamic MGPG */ + temp = data = RREG32(mmRLC_PG_CNTL); + data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; + + if (temp != data) + WREG32(mmRLC_PG_CNTL, data); + } else { + temp = data = RREG32(mmRLC_PG_CNTL); + data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; + + if (temp != data) + WREG32(mmRLC_PG_CNTL, data); + } +} + +static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, temp; + + if (enable) { + /* Enable quick PG */ + temp = data = RREG32(mmRLC_PG_CNTL); + data |= 0x100000; + + if (temp != data) + WREG32(mmRLC_PG_CNTL, data); + } else { + temp = data = RREG32(mmRLC_PG_CNTL); + data &= ~0x100000; + + if (temp != data) + WREG32(mmRLC_PG_CNTL, data); + } +} + static int gfx_v8_0_set_powergating_state(void *handle, enum amd_powergating_state state) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) + return 0; + + switch (adev->asic_type) { + case CHIP_POLARIS11: + if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) + polaris11_enable_gfx_static_mg_power_gating(adev, + state == AMD_PG_STATE_GATE ? true : false); + else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) + polaris11_enable_gfx_dynamic_mg_power_gating(adev, + state == AMD_PG_STATE_GATE ? true : false); + else + polaris11_enable_gfx_quick_mg_power_gating(adev, + state == AMD_PG_STATE_GATE ? true : false); + break; + default: + break; + } + return 0; } -static void fiji_send_serdes_cmd(struct amdgpu_device *adev, - uint32_t reg_addr, uint32_t cmd) +static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, + uint32_t reg_addr, uint32_t cmd) { uint32_t data; @@ -4261,7 +5135,8 @@ static void fiji_send_serdes_cmd(struct amdgpu_device *adev, WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); data = RREG32(mmRLC_SERDES_WR_CTRL); - data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | + if (adev->asic_type == CHIP_STONEY) + data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | RLC_SERDES_WR_CTRL__P1_SELECT_MASK | RLC_SERDES_WR_CTRL__P2_SELECT_MASK | @@ -4269,42 +5144,218 @@ static void fiji_send_serdes_cmd(struct amdgpu_device *adev, RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | RLC_SERDES_WR_CTRL__POWER_UP_MASK | RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | - RLC_SERDES_WR_CTRL__BPM_DATA_MASK | - RLC_SERDES_WR_CTRL__REG_ADDR_MASK | RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); + else + data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | + RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | + RLC_SERDES_WR_CTRL__P1_SELECT_MASK | + RLC_SERDES_WR_CTRL__P2_SELECT_MASK | + RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | + RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | + RLC_SERDES_WR_CTRL__POWER_UP_MASK | + RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | + RLC_SERDES_WR_CTRL__BPM_DATA_MASK | + RLC_SERDES_WR_CTRL__REG_ADDR_MASK | + RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | - (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | - (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | - (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); + (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | + (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | + (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); WREG32(mmRLC_SERDES_WR_CTRL, data); } -static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) +#define MSG_ENTER_RLC_SAFE_MODE 1 +#define MSG_EXIT_RLC_SAFE_MODE 0 + +#define RLC_GPR_REG2__REQ_MASK 0x00000001 +#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 +#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e + +static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev) +{ + u32 data = 0; + unsigned i; + + data = RREG32(mmRLC_CNTL); + if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) + return; + + if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || + (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_GFX_DMG))) { + data |= RLC_GPR_REG2__REQ_MASK; + data &= ~RLC_GPR_REG2__MESSAGE_MASK; + data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); + WREG32(mmRLC_GPR_REG2, data); + + for (i = 0; i < adev->usec_timeout; i++) { + if ((RREG32(mmRLC_GPM_STAT) & + (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | + RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == + (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | + RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) + break; + udelay(1); + } + + for (i = 0; i < adev->usec_timeout; i++) { + if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0) + break; + udelay(1); + } + adev->gfx.rlc.in_safe_mode = true; + } +} + +static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev) +{ + u32 data; + unsigned i; + + data = RREG32(mmRLC_CNTL); + if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) + return; + + if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || + (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_GFX_DMG))) { + data |= RLC_GPR_REG2__REQ_MASK; + data &= ~RLC_GPR_REG2__MESSAGE_MASK; + data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); + WREG32(mmRLC_GPR_REG2, data); + adev->gfx.rlc.in_safe_mode = false; + } + + for (i = 0; i < adev->usec_timeout; i++) { + if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0) + break; + udelay(1); + } +} + +static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) +{ + u32 data; + unsigned i; + + data = RREG32(mmRLC_CNTL); + if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) + return; + + if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { + data |= RLC_SAFE_MODE__CMD_MASK; + data &= ~RLC_SAFE_MODE__MESSAGE_MASK; + data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); + WREG32(mmRLC_SAFE_MODE, data); + + for (i = 0; i < adev->usec_timeout; i++) { + if ((RREG32(mmRLC_GPM_STAT) & + (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | + RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == + (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | + RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) + break; + udelay(1); + } + + for (i = 0; i < adev->usec_timeout; i++) { + if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0) + break; + udelay(1); + } + adev->gfx.rlc.in_safe_mode = true; + } +} + +static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) +{ + u32 data = 0; + unsigned i; + + data = RREG32(mmRLC_CNTL); + if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) + return; + + if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { + if (adev->gfx.rlc.in_safe_mode) { + data |= RLC_SAFE_MODE__CMD_MASK; + data &= ~RLC_SAFE_MODE__MESSAGE_MASK; + WREG32(mmRLC_SAFE_MODE, data); + adev->gfx.rlc.in_safe_mode = false; + } + } + + for (i = 0; i < adev->usec_timeout; i++) { + if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0) + break; + udelay(1); + } +} + +static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev) +{ + adev->gfx.rlc.in_safe_mode = true; +} + +static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev) +{ + adev->gfx.rlc.in_safe_mode = false; +} + +static const struct amdgpu_rlc_funcs cz_rlc_funcs = { + .enter_safe_mode = cz_enter_rlc_safe_mode, + .exit_safe_mode = cz_exit_rlc_safe_mode +}; + +static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { + .enter_safe_mode = iceland_enter_rlc_safe_mode, + .exit_safe_mode = iceland_exit_rlc_safe_mode +}; + +static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = { + .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode, + .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode +}; + +static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) { uint32_t temp, data; + adev->gfx.rlc.funcs->enter_safe_mode(adev); + /* It is disabled by HW by default */ - if (enable) { - /* 1 - RLC memory Light sleep */ - temp = data = RREG32(mmRLC_MEM_SLP_CNTL); - data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; - if (temp != data) - WREG32(mmRLC_MEM_SLP_CNTL, data); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { + /* 1 - RLC memory Light sleep */ + temp = data = RREG32(mmRLC_MEM_SLP_CNTL); + data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; + if (temp != data) + WREG32(mmRLC_MEM_SLP_CNTL, data); + } - /* 2 - CP memory Light sleep */ - temp = data = RREG32(mmCP_MEM_SLP_CNTL); - data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; - if (temp != data) - WREG32(mmCP_MEM_SLP_CNTL, data); + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { + /* 2 - CP memory Light sleep */ + temp = data = RREG32(mmCP_MEM_SLP_CNTL); + data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; + if (temp != data) + WREG32(mmCP_MEM_SLP_CNTL, data); + } + } /* 3 - RLC_CGTT_MGCG_OVERRIDE */ temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); - data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | - RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | - RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | - RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); + if (adev->flags & AMD_IS_APU) + data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | + RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); + else + data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | + RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | + RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | + RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); if (temp != data) WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); @@ -4313,19 +5364,23 @@ static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev, gfx_v8_0_wait_for_rlc_serdes(adev); /* 5 - clear mgcg override */ - fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); - - /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ - temp = data = RREG32(mmCGTS_SM_CTRL_REG); - data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); - data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); - data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; - data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; - data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; - data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; - data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); - if (temp != data) - WREG32(mmCGTS_SM_CTRL_REG, data); + gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); + + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { + /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ + temp = data = RREG32(mmCGTS_SM_CTRL_REG); + data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); + data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); + data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; + data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; + if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && + (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) + data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; + data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; + data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); + if (temp != data) + WREG32(mmCGTS_SM_CTRL_REG, data); + } udelay(50); /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ @@ -4365,23 +5420,27 @@ static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev, gfx_v8_0_wait_for_rlc_serdes(adev); /* 6 - set mgcg override */ - fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); + gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); udelay(50); /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); } + + adev->gfx.rlc.funcs->exit_safe_mode(adev); } -static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev, - bool enable) +static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, + bool enable) { uint32_t temp, temp1, data, data1; temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); - if (enable) { + adev->gfx.rlc.funcs->enter_safe_mode(adev); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/ * Cmp_busy/GFX_Idle interrupts */ @@ -4396,25 +5455,29 @@ static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev, gfx_v8_0_wait_for_rlc_serdes(adev); /* 3 - clear cgcg override */ - fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); + gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); /* 4 - write cmd to set CGLS */ - fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); + gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); /* 5 - enable cgcg */ data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; - /* enable cgls*/ - data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { + /* enable cgls*/ + data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; - temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); - data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; + temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); + data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; - if (temp1 != data1) - WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); + if (temp1 != data1) + WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); + } else { + data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + } if (temp != data) WREG32(mmRLC_CGCG_CGLS_CTRL, data); @@ -4439,36 +5502,38 @@ static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev, gfx_v8_0_wait_for_rlc_serdes(adev); /* write cmd to Set CGCG Overrride */ - fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); + gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); /* write cmd to Clear CGLS */ - fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); + gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); /* disable cgcg, cgls should be disabled too. */ data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | - RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); + RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); if (temp != data) WREG32(mmRLC_CGCG_CGLS_CTRL, data); } + + adev->gfx.rlc.funcs->exit_safe_mode(adev); } -static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev, - bool enable) +static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, + bool enable) { if (enable) { /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) * === MGCG + MGLS + TS(CG/LS) === */ - fiji_update_medium_grain_clock_gating(adev, enable); - fiji_update_coarse_grain_clock_gating(adev, enable); + gfx_v8_0_update_medium_grain_clock_gating(adev, enable); + gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); } else { /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) * === CGCG + CGLS === */ - fiji_update_coarse_grain_clock_gating(adev, enable); - fiji_update_medium_grain_clock_gating(adev, enable); + gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); + gfx_v8_0_update_medium_grain_clock_gating(adev, enable); } return 0; } @@ -4480,8 +5545,10 @@ static int gfx_v8_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_FIJI: - fiji_update_gfx_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + case CHIP_CARRIZO: + case CHIP_STONEY: + gfx_v8_0_update_gfx_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); break; default: break; @@ -4571,17 +5638,13 @@ static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) } static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) { - bool need_ctx_switch = ring->current_ctx != ib->ctx; u32 header, control = 0; u32 next_rptr = ring->wptr + 5; - /* drop the CE preamble IB for the same context */ - if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch) - return; - - if (need_ctx_switch) + if (ctx_switch) next_rptr += 2; next_rptr += 4; @@ -4592,7 +5655,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, amdgpu_ring_write(ring, next_rptr); /* insert SWITCH_BUFFER packet before first IB in the ring frame */ - if (need_ctx_switch) { + if (ctx_switch) { amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); amdgpu_ring_write(ring, 0); } @@ -4602,7 +5665,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, else header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | (ib->vm_id << 24); + control |= ib->length_dw | (vm_id << 24); amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, @@ -4615,7 +5678,8 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) { u32 header, control = 0; u32 next_rptr = ring->wptr + 5; @@ -4631,7 +5695,7 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | (ib->vm_id << 24); + control |= ib->length_dw | (vm_id << 24); amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, @@ -4653,6 +5717,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | + EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5))); amdgpu_ring_write(ring, addr & 0xfffffffc); @@ -4991,6 +6056,7 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, } const struct amd_ip_funcs gfx_v8_0_ip_funcs = { + .name = "gfx_v8_0", .early_init = gfx_v8_0_early_init, .late_init = gfx_v8_0_late_init, .sw_init = gfx_v8_0_sw_init, @@ -5002,7 +6068,6 @@ const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .is_idle = gfx_v8_0_is_idle, .wait_for_idle = gfx_v8_0_wait_for_idle, .soft_reset = gfx_v8_0_soft_reset, - .print_status = gfx_v8_0_print_status, .set_clockgating_state = gfx_v8_0_set_clockgating_state, .set_powergating_state = gfx_v8_0_set_powergating_state, }; @@ -5081,6 +6146,22 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; } +static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_TOPAZ: + case CHIP_STONEY: + adev->gfx.rlc.funcs = &iceland_rlc_funcs; + break; + case CHIP_CARRIZO: + adev->gfx.rlc.funcs = &cz_rlc_funcs; + break; + default: + adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs; + break; + } +} + static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ @@ -5124,14 +6205,11 @@ static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) return (~data) & mask; } -int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, - struct amdgpu_cu_info *cu_info) +static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) { int i, j, k, counter, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; - - if (!adev || !cu_info) - return -EINVAL; + struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; memset(cu_info, 0, sizeof(*cu_info)); @@ -5162,6 +6240,4 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; - - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h index 021e05193..16a49f53a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h @@ -28,6 +28,5 @@ extern const struct amd_ip_funcs gfx_v8_0_ip_funcs; uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev); void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num); -int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 4ebaf9c97..233f38cc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1115,114 +1115,6 @@ static int gmc_v7_0_wait_for_idle(void *handle) } -static void gmc_v7_0_print_status(void *handle) -{ - int i, j; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "GMC 8.x registers\n"); - dev_info(adev->dev, " SRBM_STATUS=0x%08X\n", - RREG32(mmSRBM_STATUS)); - dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", - RREG32(mmSRBM_STATUS2)); - - dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", - RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR)); - dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", - RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS)); - dev_info(adev->dev, " MC_VM_MX_L1_TLB_CNTL=0x%08X\n", - RREG32(mmMC_VM_MX_L1_TLB_CNTL)); - dev_info(adev->dev, " VM_L2_CNTL=0x%08X\n", - RREG32(mmVM_L2_CNTL)); - dev_info(adev->dev, " VM_L2_CNTL2=0x%08X\n", - RREG32(mmVM_L2_CNTL2)); - dev_info(adev->dev, " VM_L2_CNTL3=0x%08X\n", - RREG32(mmVM_L2_CNTL3)); - dev_info(adev->dev, " VM_CONTEXT0_PAGE_TABLE_START_ADDR=0x%08X\n", - RREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR)); - dev_info(adev->dev, " VM_CONTEXT0_PAGE_TABLE_END_ADDR=0x%08X\n", - RREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR)); - dev_info(adev->dev, " VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR=0x%08X\n", - RREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR)); - dev_info(adev->dev, " VM_CONTEXT0_CNTL2=0x%08X\n", - RREG32(mmVM_CONTEXT0_CNTL2)); - dev_info(adev->dev, " VM_CONTEXT0_CNTL=0x%08X\n", - RREG32(mmVM_CONTEXT0_CNTL)); - dev_info(adev->dev, " 0x15D4=0x%08X\n", - RREG32(0x575)); - dev_info(adev->dev, " 0x15D8=0x%08X\n", - RREG32(0x576)); - dev_info(adev->dev, " 0x15DC=0x%08X\n", - RREG32(0x577)); - dev_info(adev->dev, " VM_CONTEXT1_PAGE_TABLE_START_ADDR=0x%08X\n", - RREG32(mmVM_CONTEXT1_PAGE_TABLE_START_ADDR)); - dev_info(adev->dev, " VM_CONTEXT1_PAGE_TABLE_END_ADDR=0x%08X\n", - RREG32(mmVM_CONTEXT1_PAGE_TABLE_END_ADDR)); - dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR=0x%08X\n", - RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR)); - dev_info(adev->dev, " VM_CONTEXT1_CNTL2=0x%08X\n", - RREG32(mmVM_CONTEXT1_CNTL2)); - dev_info(adev->dev, " VM_CONTEXT1_CNTL=0x%08X\n", - RREG32(mmVM_CONTEXT1_CNTL)); - for (i = 0; i < 16; i++) { - if (i < 8) - dev_info(adev->dev, " VM_CONTEXT%d_PAGE_TABLE_BASE_ADDR=0x%08X\n", - i, RREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i)); - else - dev_info(adev->dev, " VM_CONTEXT%d_PAGE_TABLE_BASE_ADDR=0x%08X\n", - i, RREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8)); - } - dev_info(adev->dev, " MC_VM_SYSTEM_APERTURE_LOW_ADDR=0x%08X\n", - RREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR)); - dev_info(adev->dev, " MC_VM_SYSTEM_APERTURE_HIGH_ADDR=0x%08X\n", - RREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR)); - dev_info(adev->dev, " MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR=0x%08X\n", - RREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR)); - dev_info(adev->dev, " MC_VM_FB_LOCATION=0x%08X\n", - RREG32(mmMC_VM_FB_LOCATION)); - dev_info(adev->dev, " MC_VM_AGP_BASE=0x%08X\n", - RREG32(mmMC_VM_AGP_BASE)); - dev_info(adev->dev, " MC_VM_AGP_TOP=0x%08X\n", - RREG32(mmMC_VM_AGP_TOP)); - dev_info(adev->dev, " MC_VM_AGP_BOT=0x%08X\n", - RREG32(mmMC_VM_AGP_BOT)); - - if (adev->asic_type == CHIP_KAVERI) { - dev_info(adev->dev, " CHUB_CONTROL=0x%08X\n", - RREG32(mmCHUB_CONTROL)); - } - - dev_info(adev->dev, " HDP_REG_COHERENCY_FLUSH_CNTL=0x%08X\n", - RREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL)); - dev_info(adev->dev, " HDP_NONSURFACE_BASE=0x%08X\n", - RREG32(mmHDP_NONSURFACE_BASE)); - dev_info(adev->dev, " HDP_NONSURFACE_INFO=0x%08X\n", - RREG32(mmHDP_NONSURFACE_INFO)); - dev_info(adev->dev, " HDP_NONSURFACE_SIZE=0x%08X\n", - RREG32(mmHDP_NONSURFACE_SIZE)); - dev_info(adev->dev, " HDP_MISC_CNTL=0x%08X\n", - RREG32(mmHDP_MISC_CNTL)); - dev_info(adev->dev, " HDP_HOST_PATH_CNTL=0x%08X\n", - RREG32(mmHDP_HOST_PATH_CNTL)); - - for (i = 0, j = 0; i < 32; i++, j += 0x6) { - dev_info(adev->dev, " %d:\n", i); - dev_info(adev->dev, " 0x%04X=0x%08X\n", - 0xb05 + j, RREG32(0xb05 + j)); - dev_info(adev->dev, " 0x%04X=0x%08X\n", - 0xb06 + j, RREG32(0xb06 + j)); - dev_info(adev->dev, " 0x%04X=0x%08X\n", - 0xb07 + j, RREG32(0xb07 + j)); - dev_info(adev->dev, " 0x%04X=0x%08X\n", - 0xb08 + j, RREG32(0xb08 + j)); - dev_info(adev->dev, " 0x%04X=0x%08X\n", - 0xb09 + j, RREG32(0xb09 + j)); - } - - dev_info(adev->dev, " BIF_FB_EN=0x%08X\n", - RREG32(mmBIF_FB_EN)); -} - static int gmc_v7_0_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1242,8 +1134,6 @@ static int gmc_v7_0_soft_reset(void *handle) } if (srbm_soft_reset) { - gmc_v7_0_print_status((void *)adev); - gmc_v7_0_mc_stop(adev, &save); if (gmc_v7_0_wait_for_idle(adev)) { dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); @@ -1267,8 +1157,6 @@ static int gmc_v7_0_soft_reset(void *handle) gmc_v7_0_mc_resume(adev, &save); udelay(50); - - gmc_v7_0_print_status((void *)adev); } return 0; @@ -1371,6 +1259,7 @@ static int gmc_v7_0_set_powergating_state(void *handle, } const struct amd_ip_funcs gmc_v7_0_ip_funcs = { + .name = "gmc_v7_0", .early_init = gmc_v7_0_early_init, .late_init = gmc_v7_0_late_init, .sw_init = gmc_v7_0_sw_init, @@ -1382,7 +1271,6 @@ const struct amd_ip_funcs gmc_v7_0_ip_funcs = { .is_idle = gmc_v7_0_is_idle, .wait_for_idle = gmc_v7_0_wait_for_idle, .soft_reset = gmc_v7_0_soft_reset, - .print_status = gmc_v7_0_print_status, .set_clockgating_state = gmc_v7_0_set_clockgating_state, .set_powergating_state = gmc_v7_0_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 20ed14a84..a7b6de8a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -73,6 +73,23 @@ static const u32 fiji_mgcg_cgcg_init[] = mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 }; +static const u32 golden_settings_polaris11_a11[] = +{ + mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff, + mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff, + mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff, + mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff +}; + +static const u32 golden_settings_polaris10_a11[] = +{ + mmMC_ARB_WTM_GRPWT_RD, 0x00000003, 0x00000000, + mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff, + mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff, + mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff, + mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff +}; + static const u32 cz_mgcg_cgcg_init[] = { mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 @@ -103,6 +120,16 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_tonga_a11, (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); break; + case CHIP_POLARIS11: + amdgpu_program_register_sequence(adev, + golden_settings_polaris11_a11, + (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); + break; + case CHIP_POLARIS10: + amdgpu_program_register_sequence(adev, + golden_settings_polaris10_a11, + (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); + break; case CHIP_CARRIZO: amdgpu_program_register_sequence(adev, cz_mgcg_cgcg_init, @@ -209,6 +236,12 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) case CHIP_TONGA: chip_name = "tonga"; break; + case CHIP_POLARIS11: + chip_name = "polaris11"; + break; + case CHIP_POLARIS10: + chip_name = "polaris10"; + break; case CHIP_FIJI: case CHIP_CARRIZO: case CHIP_STONEY: @@ -1085,111 +1118,6 @@ static int gmc_v8_0_wait_for_idle(void *handle) } -static void gmc_v8_0_print_status(void *handle) -{ - int i, j; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "GMC 8.x registers\n"); - dev_info(adev->dev, " SRBM_STATUS=0x%08X\n", - RREG32(mmSRBM_STATUS)); - dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", - RREG32(mmSRBM_STATUS2)); - - dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", - RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR)); - dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", - RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS)); - dev_info(adev->dev, " MC_VM_MX_L1_TLB_CNTL=0x%08X\n", - RREG32(mmMC_VM_MX_L1_TLB_CNTL)); - dev_info(adev->dev, " VM_L2_CNTL=0x%08X\n", - RREG32(mmVM_L2_CNTL)); - dev_info(adev->dev, " VM_L2_CNTL2=0x%08X\n", - RREG32(mmVM_L2_CNTL2)); - dev_info(adev->dev, " VM_L2_CNTL3=0x%08X\n", - RREG32(mmVM_L2_CNTL3)); - dev_info(adev->dev, " VM_L2_CNTL4=0x%08X\n", - RREG32(mmVM_L2_CNTL4)); - dev_info(adev->dev, " VM_CONTEXT0_PAGE_TABLE_START_ADDR=0x%08X\n", - RREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR)); - dev_info(adev->dev, " VM_CONTEXT0_PAGE_TABLE_END_ADDR=0x%08X\n", - RREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR)); - dev_info(adev->dev, " VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR=0x%08X\n", - RREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR)); - dev_info(adev->dev, " VM_CONTEXT0_CNTL2=0x%08X\n", - RREG32(mmVM_CONTEXT0_CNTL2)); - dev_info(adev->dev, " VM_CONTEXT0_CNTL=0x%08X\n", - RREG32(mmVM_CONTEXT0_CNTL)); - dev_info(adev->dev, " VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR=0x%08X\n", - RREG32(mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR)); - dev_info(adev->dev, " VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR=0x%08X\n", - RREG32(mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR)); - dev_info(adev->dev, " mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET=0x%08X\n", - RREG32(mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET)); - dev_info(adev->dev, " VM_CONTEXT1_PAGE_TABLE_START_ADDR=0x%08X\n", - RREG32(mmVM_CONTEXT1_PAGE_TABLE_START_ADDR)); - dev_info(adev->dev, " VM_CONTEXT1_PAGE_TABLE_END_ADDR=0x%08X\n", - RREG32(mmVM_CONTEXT1_PAGE_TABLE_END_ADDR)); - dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR=0x%08X\n", - RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR)); - dev_info(adev->dev, " VM_CONTEXT1_CNTL2=0x%08X\n", - RREG32(mmVM_CONTEXT1_CNTL2)); - dev_info(adev->dev, " VM_CONTEXT1_CNTL=0x%08X\n", - RREG32(mmVM_CONTEXT1_CNTL)); - for (i = 0; i < 16; i++) { - if (i < 8) - dev_info(adev->dev, " VM_CONTEXT%d_PAGE_TABLE_BASE_ADDR=0x%08X\n", - i, RREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i)); - else - dev_info(adev->dev, " VM_CONTEXT%d_PAGE_TABLE_BASE_ADDR=0x%08X\n", - i, RREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8)); - } - dev_info(adev->dev, " MC_VM_SYSTEM_APERTURE_LOW_ADDR=0x%08X\n", - RREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR)); - dev_info(adev->dev, " MC_VM_SYSTEM_APERTURE_HIGH_ADDR=0x%08X\n", - RREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR)); - dev_info(adev->dev, " MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR=0x%08X\n", - RREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR)); - dev_info(adev->dev, " MC_VM_FB_LOCATION=0x%08X\n", - RREG32(mmMC_VM_FB_LOCATION)); - dev_info(adev->dev, " MC_VM_AGP_BASE=0x%08X\n", - RREG32(mmMC_VM_AGP_BASE)); - dev_info(adev->dev, " MC_VM_AGP_TOP=0x%08X\n", - RREG32(mmMC_VM_AGP_TOP)); - dev_info(adev->dev, " MC_VM_AGP_BOT=0x%08X\n", - RREG32(mmMC_VM_AGP_BOT)); - - dev_info(adev->dev, " HDP_REG_COHERENCY_FLUSH_CNTL=0x%08X\n", - RREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL)); - dev_info(adev->dev, " HDP_NONSURFACE_BASE=0x%08X\n", - RREG32(mmHDP_NONSURFACE_BASE)); - dev_info(adev->dev, " HDP_NONSURFACE_INFO=0x%08X\n", - RREG32(mmHDP_NONSURFACE_INFO)); - dev_info(adev->dev, " HDP_NONSURFACE_SIZE=0x%08X\n", - RREG32(mmHDP_NONSURFACE_SIZE)); - dev_info(adev->dev, " HDP_MISC_CNTL=0x%08X\n", - RREG32(mmHDP_MISC_CNTL)); - dev_info(adev->dev, " HDP_HOST_PATH_CNTL=0x%08X\n", - RREG32(mmHDP_HOST_PATH_CNTL)); - - for (i = 0, j = 0; i < 32; i++, j += 0x6) { - dev_info(adev->dev, " %d:\n", i); - dev_info(adev->dev, " 0x%04X=0x%08X\n", - 0xb05 + j, RREG32(0xb05 + j)); - dev_info(adev->dev, " 0x%04X=0x%08X\n", - 0xb06 + j, RREG32(0xb06 + j)); - dev_info(adev->dev, " 0x%04X=0x%08X\n", - 0xb07 + j, RREG32(0xb07 + j)); - dev_info(adev->dev, " 0x%04X=0x%08X\n", - 0xb08 + j, RREG32(0xb08 + j)); - dev_info(adev->dev, " 0x%04X=0x%08X\n", - 0xb09 + j, RREG32(0xb09 + j)); - } - - dev_info(adev->dev, " BIF_FB_EN=0x%08X\n", - RREG32(mmBIF_FB_EN)); -} - static int gmc_v8_0_soft_reset(void *handle) { struct amdgpu_mode_mc_save save; @@ -1209,8 +1137,6 @@ static int gmc_v8_0_soft_reset(void *handle) } if (srbm_soft_reset) { - gmc_v8_0_print_status((void *)adev); - gmc_v8_0_mc_stop(adev, &save); if (gmc_v8_0_wait_for_idle(adev)) { dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); @@ -1234,8 +1160,6 @@ static int gmc_v8_0_soft_reset(void *handle) gmc_v8_0_mc_resume(adev, &save); udelay(50); - - gmc_v8_0_print_status((void *)adev); } return 0; @@ -1313,11 +1237,11 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, } static void fiji_update_mc_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) + bool enable) { uint32_t data; - if (enable) { + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { data = RREG32(mmMC_HUB_MISC_HUB_CG); data |= MC_HUB_MISC_HUB_CG__ENABLE_MASK; WREG32(mmMC_HUB_MISC_HUB_CG, data); @@ -1393,11 +1317,11 @@ static void fiji_update_mc_medium_grain_clock_gating(struct amdgpu_device *adev, } static void fiji_update_mc_light_sleep(struct amdgpu_device *adev, - bool enable) + bool enable) { uint32_t data; - if (enable) { + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) { data = RREG32(mmMC_HUB_MISC_HUB_CG); data |= MC_HUB_MISC_HUB_CG__MEM_LS_ENABLE_MASK; WREG32(mmMC_HUB_MISC_HUB_CG, data); @@ -1497,6 +1421,7 @@ static int gmc_v8_0_set_powergating_state(void *handle, } const struct amd_ip_funcs gmc_v8_0_ip_funcs = { + .name = "gmc_v8_0", .early_init = gmc_v8_0_early_init, .late_init = gmc_v8_0_late_init, .sw_init = gmc_v8_0_sw_init, @@ -1508,7 +1433,6 @@ const struct amd_ip_funcs gmc_v8_0_ip_funcs = { .is_idle = gmc_v8_0_is_idle, .wait_for_idle = gmc_v8_0_wait_for_idle, .soft_reset = gmc_v8_0_soft_reset, - .print_status = gmc_v8_0_print_status, .set_clockgating_state = gmc_v8_0_set_clockgating_state, .set_powergating_state = gmc_v8_0_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c b/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c index 5731b3648..571e37566 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c @@ -72,6 +72,11 @@ static int iceland_dpm_sw_init(void *handle) static int iceland_dpm_sw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + return 0; } @@ -157,6 +162,7 @@ static int iceland_dpm_set_powergating_state(void *handle, } const struct amd_ip_funcs iceland_dpm_ip_funcs = { + .name = "iceland_dpm", .early_init = iceland_dpm_early_init, .late_init = NULL, .sw_init = iceland_dpm_sw_init, @@ -168,7 +174,6 @@ const struct amd_ip_funcs iceland_dpm_ip_funcs = { .is_idle = NULL, .wait_for_idle = NULL, .soft_reset = NULL, - .print_status = NULL, .set_clockgating_state = iceland_dpm_set_clockgating_state, .set_powergating_state = iceland_dpm_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 679e7394a..3b8906ce3 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -103,7 +103,6 @@ static void iceland_ih_disable_interrupts(struct amdgpu_device *adev) */ static int iceland_ih_irq_init(struct amdgpu_device *adev) { - int ret = 0; int rb_bufsz; u32 interrupt_cntl, ih_cntl, ih_rb_cntl; u64 wptr_off; @@ -157,7 +156,7 @@ static int iceland_ih_irq_init(struct amdgpu_device *adev) /* enable interrupts */ iceland_ih_enable_interrupts(adev); - return ret; + return 0; } /** @@ -351,35 +350,6 @@ static int iceland_ih_wait_for_idle(void *handle) return -ETIMEDOUT; } -static void iceland_ih_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "ICELAND IH registers\n"); - dev_info(adev->dev, " SRBM_STATUS=0x%08X\n", - RREG32(mmSRBM_STATUS)); - dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", - RREG32(mmSRBM_STATUS2)); - dev_info(adev->dev, " INTERRUPT_CNTL=0x%08X\n", - RREG32(mmINTERRUPT_CNTL)); - dev_info(adev->dev, " INTERRUPT_CNTL2=0x%08X\n", - RREG32(mmINTERRUPT_CNTL2)); - dev_info(adev->dev, " IH_CNTL=0x%08X\n", - RREG32(mmIH_CNTL)); - dev_info(adev->dev, " IH_RB_CNTL=0x%08X\n", - RREG32(mmIH_RB_CNTL)); - dev_info(adev->dev, " IH_RB_BASE=0x%08X\n", - RREG32(mmIH_RB_BASE)); - dev_info(adev->dev, " IH_RB_WPTR_ADDR_LO=0x%08X\n", - RREG32(mmIH_RB_WPTR_ADDR_LO)); - dev_info(adev->dev, " IH_RB_WPTR_ADDR_HI=0x%08X\n", - RREG32(mmIH_RB_WPTR_ADDR_HI)); - dev_info(adev->dev, " IH_RB_RPTR=0x%08X\n", - RREG32(mmIH_RB_RPTR)); - dev_info(adev->dev, " IH_RB_WPTR=0x%08X\n", - RREG32(mmIH_RB_WPTR)); -} - static int iceland_ih_soft_reset(void *handle) { u32 srbm_soft_reset = 0; @@ -391,8 +361,6 @@ static int iceland_ih_soft_reset(void *handle) SOFT_RESET_IH, 1); if (srbm_soft_reset) { - iceland_ih_print_status((void *)adev); - tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); @@ -407,8 +375,6 @@ static int iceland_ih_soft_reset(void *handle) /* Wait a little for things to settle down */ udelay(50); - - iceland_ih_print_status((void *)adev); } return 0; @@ -427,6 +393,7 @@ static int iceland_ih_set_powergating_state(void *handle, } const struct amd_ip_funcs iceland_ih_ip_funcs = { + .name = "iceland_ih", .early_init = iceland_ih_early_init, .late_init = NULL, .sw_init = iceland_ih_sw_init, @@ -438,7 +405,6 @@ const struct amd_ip_funcs iceland_ih_ip_funcs = { .is_idle = iceland_ih_is_idle, .wait_for_idle = iceland_ih_wait_for_idle, .soft_reset = iceland_ih_soft_reset, - .print_status = iceland_ih_print_status, .set_clockgating_state = iceland_ih_set_clockgating_state, .set_powergating_state = iceland_ih_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 654d76723..a789a863d 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -135,11 +135,6 @@ static void sumo_take_smu_control(struct amdgpu_device *adev, bool enable) #endif } -static u32 sumo_get_sleep_divider_from_id(u32 id) -{ - return 1 << id; -} - static void sumo_construct_sclk_voltage_mapping_table(struct amdgpu_device *adev, struct sumo_sclk_voltage_mapping_table *sclk_voltage_mapping_table, ATOM_AVAILABLE_SCLK_LIST *table) @@ -2176,8 +2171,7 @@ static u8 kv_get_sleep_divider_id_from_clock(struct amdgpu_device *adev, struct kv_power_info *pi = kv_get_pi(adev); u32 i; u32 temp; - u32 min = (min_sclk_in_sr > KV_MINIMUM_ENGINE_CLOCK) ? - min_sclk_in_sr : KV_MINIMUM_ENGINE_CLOCK; + u32 min = max(min_sclk_in_sr, (u32)KV_MINIMUM_ENGINE_CLOCK); if (sclk < min) return 0; @@ -2186,7 +2180,7 @@ static u8 kv_get_sleep_divider_id_from_clock(struct amdgpu_device *adev, return 0; for (i = KV_MAX_DEEPSLEEP_DIVIDER_ID; i > 0; i--) { - temp = sclk / sumo_get_sleep_divider_from_id(i); + temp = sclk >> i; if (temp >= min) break; } @@ -2258,7 +2252,7 @@ static void kv_apply_state_adjust_rules(struct amdgpu_device *adev, if (pi->caps_stable_p_state) { stable_p_state_sclk = (max_limits->sclk * 75) / 100; - for (i = table->count - 1; i >= 0; i++) { + for (i = table->count - 1; i >= 0; i--) { if (stable_p_state_sclk >= table->entries[i].clk) { stable_p_state_sclk = table->entries[i].clk; break; @@ -3147,62 +3141,6 @@ static int kv_dpm_wait_for_idle(void *handle) return 0; } -static void kv_dpm_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "KV/KB DPM registers\n"); - dev_info(adev->dev, " DIDT_SQ_CTRL0=0x%08X\n", - RREG32_DIDT(ixDIDT_SQ_CTRL0)); - dev_info(adev->dev, " DIDT_DB_CTRL0=0x%08X\n", - RREG32_DIDT(ixDIDT_DB_CTRL0)); - dev_info(adev->dev, " DIDT_TD_CTRL0=0x%08X\n", - RREG32_DIDT(ixDIDT_TD_CTRL0)); - dev_info(adev->dev, " DIDT_TCP_CTRL0=0x%08X\n", - RREG32_DIDT(ixDIDT_TCP_CTRL0)); - dev_info(adev->dev, " LCAC_SX0_OVR_SEL=0x%08X\n", - RREG32_SMC(ixLCAC_SX0_OVR_SEL)); - dev_info(adev->dev, " LCAC_SX0_OVR_VAL=0x%08X\n", - RREG32_SMC(ixLCAC_SX0_OVR_VAL)); - dev_info(adev->dev, " LCAC_MC0_OVR_SEL=0x%08X\n", - RREG32_SMC(ixLCAC_MC0_OVR_SEL)); - dev_info(adev->dev, " LCAC_MC0_OVR_VAL=0x%08X\n", - RREG32_SMC(ixLCAC_MC0_OVR_VAL)); - dev_info(adev->dev, " LCAC_MC1_OVR_SEL=0x%08X\n", - RREG32_SMC(ixLCAC_MC1_OVR_SEL)); - dev_info(adev->dev, " LCAC_MC1_OVR_VAL=0x%08X\n", - RREG32_SMC(ixLCAC_MC1_OVR_VAL)); - dev_info(adev->dev, " LCAC_MC2_OVR_SEL=0x%08X\n", - RREG32_SMC(ixLCAC_MC2_OVR_SEL)); - dev_info(adev->dev, " LCAC_MC2_OVR_VAL=0x%08X\n", - RREG32_SMC(ixLCAC_MC2_OVR_VAL)); - dev_info(adev->dev, " LCAC_MC3_OVR_SEL=0x%08X\n", - RREG32_SMC(ixLCAC_MC3_OVR_SEL)); - dev_info(adev->dev, " LCAC_MC3_OVR_VAL=0x%08X\n", - RREG32_SMC(ixLCAC_MC3_OVR_VAL)); - dev_info(adev->dev, " LCAC_CPL_OVR_SEL=0x%08X\n", - RREG32_SMC(ixLCAC_CPL_OVR_SEL)); - dev_info(adev->dev, " LCAC_CPL_OVR_VAL=0x%08X\n", - RREG32_SMC(ixLCAC_CPL_OVR_VAL)); - dev_info(adev->dev, " CG_FREQ_TRAN_VOTING_0=0x%08X\n", - RREG32_SMC(ixCG_FREQ_TRAN_VOTING_0)); - dev_info(adev->dev, " GENERAL_PWRMGT=0x%08X\n", - RREG32_SMC(ixGENERAL_PWRMGT)); - dev_info(adev->dev, " SCLK_PWRMGT_CNTL=0x%08X\n", - RREG32_SMC(ixSCLK_PWRMGT_CNTL)); - dev_info(adev->dev, " SMC_MESSAGE_0=0x%08X\n", - RREG32(mmSMC_MESSAGE_0)); - dev_info(adev->dev, " SMC_RESP_0=0x%08X\n", - RREG32(mmSMC_RESP_0)); - dev_info(adev->dev, " SMC_MSG_ARG_0=0x%08X\n", - RREG32(mmSMC_MSG_ARG_0)); - dev_info(adev->dev, " SMC_IND_INDEX_0=0x%08X\n", - RREG32(mmSMC_IND_INDEX_0)); - dev_info(adev->dev, " SMC_IND_DATA_0=0x%08X\n", - RREG32(mmSMC_IND_DATA_0)); - dev_info(adev->dev, " SMC_IND_ACCESS_CNTL=0x%08X\n", - RREG32(mmSMC_IND_ACCESS_CNTL)); -} static int kv_dpm_soft_reset(void *handle) { @@ -3300,6 +3238,7 @@ static int kv_dpm_set_powergating_state(void *handle, } const struct amd_ip_funcs kv_dpm_ip_funcs = { + .name = "kv_dpm", .early_init = kv_dpm_early_init, .late_init = kv_dpm_late_init, .sw_init = kv_dpm_sw_init, @@ -3311,7 +3250,6 @@ const struct amd_ip_funcs kv_dpm_ip_funcs = { .is_idle = kv_dpm_is_idle, .wait_for_idle = kv_dpm_wait_for_idle, .soft_reset = kv_dpm_soft_reset, - .print_status = kv_dpm_print_status, .set_clockgating_state = kv_dpm_set_clockgating_state, .set_powergating_state = kv_dpm_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 47d143269..36d97195f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -104,6 +104,15 @@ static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev) } } +static void sdma_v2_4_free_microcode(struct amdgpu_device *adev) +{ + int i; + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; + } +} + /** * sdma_v2_4_init_microcode - load ucode images from disk * @@ -241,9 +250,10 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (VI). */ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) { - u32 vmid = ib->vm_id & 0xf; + u32 vmid = vm_id & 0xf; u32 next_rptr = ring->wptr + 5; while ((next_rptr & 7) != 2) @@ -459,6 +469,8 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); /* set the wb address whether it's enabled or not */ WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], @@ -487,7 +499,11 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); ring->ready = true; + } + sdma_v2_4_enable(adev, true); + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; r = amdgpu_ring_test_ring(ring); if (r) { ring->ready = false; @@ -578,8 +594,8 @@ static int sdma_v2_4_start(struct amdgpu_device *adev) return -EINVAL; } - /* unhalt the MEs */ - sdma_v2_4_enable(adev, true); + /* halt the engine before programing */ + sdma_v2_4_enable(adev, false); /* start the gfx rings and rlc compute queues */ r = sdma_v2_4_gfx_resume(adev); @@ -700,7 +716,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); ib.length_dw = 8; - r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); if (r) goto err1; @@ -989,7 +1005,7 @@ static int sdma_v2_4_sw_init(void *handle) ring->ring_obj = NULL; ring->use_doorbell = false; sprintf(ring->name, "sdma%d", i); - r = amdgpu_ring_init(adev, ring, 256 * 1024, + r = amdgpu_ring_init(adev, ring, 1024, SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf, &adev->sdma.trap_irq, (i == 0) ? @@ -1010,6 +1026,7 @@ static int sdma_v2_4_sw_fini(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); + sdma_v2_4_free_microcode(adev); return 0; } @@ -1079,55 +1096,6 @@ static int sdma_v2_4_wait_for_idle(void *handle) return -ETIMEDOUT; } -static void sdma_v2_4_print_status(void *handle) -{ - int i, j; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "VI SDMA registers\n"); - dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", - RREG32(mmSRBM_STATUS2)); - for (i = 0; i < adev->sdma.num_instances; i++) { - dev_info(adev->dev, " SDMA%d_STATUS_REG=0x%08X\n", - i, RREG32(mmSDMA0_STATUS_REG + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_F32_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_SEM_WAIT_FAIL_TIMER_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_IB_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_WPTR=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_HI=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_LO=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_BASE=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n", - i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i])); - mutex_lock(&adev->srbm_mutex); - for (j = 0; j < 16; j++) { - vi_srbm_select(adev, 0, 0, 0, j); - dev_info(adev->dev, " VM %d:\n", j); - dev_info(adev->dev, " SDMA%d_GFX_VIRTUAL_ADDR=0x%08X\n", - i, RREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_APE1_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i])); - } - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - } -} - static int sdma_v2_4_soft_reset(void *handle) { u32 srbm_soft_reset = 0; @@ -1150,8 +1118,6 @@ static int sdma_v2_4_soft_reset(void *handle) } if (srbm_soft_reset) { - sdma_v2_4_print_status((void *)adev); - tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); @@ -1166,8 +1132,6 @@ static int sdma_v2_4_soft_reset(void *handle) /* Wait a little for things to settle down */ udelay(50); - - sdma_v2_4_print_status((void *)adev); } return 0; @@ -1282,6 +1246,7 @@ static int sdma_v2_4_set_powergating_state(void *handle, } const struct amd_ip_funcs sdma_v2_4_ip_funcs = { + .name = "sdma_v2_4", .early_init = sdma_v2_4_early_init, .late_init = NULL, .sw_init = sdma_v2_4_sw_init, @@ -1293,7 +1258,6 @@ const struct amd_ip_funcs sdma_v2_4_ip_funcs = { .is_idle = sdma_v2_4_is_idle, .wait_for_idle = sdma_v2_4_wait_for_idle, .soft_reset = sdma_v2_4_soft_reset, - .print_status = sdma_v2_4_print_status, .set_clockgating_state = sdma_v2_4_set_clockgating_state, .set_powergating_state = sdma_v2_4_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 44f059dbc..95c44942e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -51,6 +51,7 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev); /*(DEBLOBBED)*/ + static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = { SDMA0_REGISTER_OFFSET, @@ -95,6 +96,34 @@ static const u32 fiji_mgcg_cgcg_init[] = mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 }; +static const u32 golden_settings_polaris11_a11[] = +{ + mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, + mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, + mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100, + mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100, + mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100, + mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, + mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, + mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100, + mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100, + mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100, +}; + +static const u32 golden_settings_polaris10_a11[] = +{ + mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, + mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, + mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100, + mmSDMA0_RLC0_IB_CNTL, 0x800f0111, 0x00000100, + mmSDMA0_RLC1_IB_CNTL, 0x800f0111, 0x00000100, + mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, + mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, + mmSDMA1_GFX_IB_CNTL, 0x800f0111, 0x00000100, + mmSDMA1_RLC0_IB_CNTL, 0x800f0111, 0x00000100, + mmSDMA1_RLC1_IB_CNTL, 0x800f0111, 0x00000100, +}; + static const u32 cz_golden_settings_a11[] = { mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, @@ -166,6 +195,16 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_tonga_a11, (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); break; + case CHIP_POLARIS11: + amdgpu_program_register_sequence(adev, + golden_settings_polaris11_a11, + (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); + break; + case CHIP_POLARIS10: + amdgpu_program_register_sequence(adev, + golden_settings_polaris10_a11, + (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); + break; case CHIP_CARRIZO: amdgpu_program_register_sequence(adev, cz_mgcg_cgcg_init, @@ -187,6 +226,15 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev) } } +static void sdma_v3_0_free_microcode(struct amdgpu_device *adev) +{ + int i; + for (i = 0; i < adev->sdma.num_instances; i++) { + release_firmware(adev->sdma.instance[i].fw); + adev->sdma.instance[i].fw = NULL; + } +} + /** * sdma_v3_0_init_microcode - load ucode images from disk * @@ -214,6 +262,12 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) case CHIP_FIJI: chip_name = "fiji"; break; + case CHIP_POLARIS11: + chip_name = "polaris11"; + break; + case CHIP_POLARIS10: + chip_name = "polaris10"; + break; case CHIP_CARRIZO: chip_name = "carrizo"; break; @@ -347,9 +401,10 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (VI). */ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) { - u32 vmid = ib->vm_id & 0xf; + u32 vmid = vm_id & 0xf; u32 next_rptr = ring->wptr + 5; while ((next_rptr & 7) != 2) @@ -446,6 +501,31 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } +unsigned init_cond_exec(struct amdgpu_ring *ring) +{ + unsigned ret; + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE)); + amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, 1); + ret = ring->wptr;/* this is the offset we need patch later */ + amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + return ret; +} + +void patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) +{ + unsigned cur; + BUG_ON(ring->ring[offset] != 0x55aa55aa); + + cur = ring->wptr - 1; + if (likely(cur > offset)) + ring->ring[offset] = cur - offset; + else + ring->ring[offset] = (ring->ring_size>>2) - offset + cur; +} + + /** * sdma_v3_0_gfx_stop - stop the gfx async dma engines * @@ -591,6 +671,8 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); + WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); /* set the wb address whether it's enabled or not */ WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i], @@ -630,7 +712,15 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); ring->ready = true; + } + /* unhalt the MEs */ + sdma_v3_0_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v3_0_ctx_switch_enable(adev, true); + + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; r = amdgpu_ring_test_ring(ring); if (r) { ring->ready = false; @@ -723,10 +813,9 @@ static int sdma_v3_0_start(struct amdgpu_device *adev) } } - /* unhalt the MEs */ - sdma_v3_0_enable(adev, true); - /* enable sdma ring preemption */ - sdma_v3_0_ctx_switch_enable(adev, true); + /* disble sdma engine before programing it */ + sdma_v3_0_ctx_switch_enable(adev, false); + sdma_v3_0_enable(adev, false); /* start the gfx rings and rlc compute queues */ r = sdma_v3_0_gfx_resume(adev); @@ -847,7 +936,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); ib.length_dw = 8; - r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); if (r) goto err1; @@ -1145,7 +1234,7 @@ static int sdma_v3_0_sw_init(void *handle) AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; sprintf(ring->name, "sdma%d", i); - r = amdgpu_ring_init(adev, ring, 256 * 1024, + r = amdgpu_ring_init(adev, ring, 1024, SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf, &adev->sdma.trap_irq, (i == 0) ? @@ -1166,6 +1255,7 @@ static int sdma_v3_0_sw_fini(void *handle) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); + sdma_v3_0_free_microcode(adev); return 0; } @@ -1236,57 +1326,6 @@ static int sdma_v3_0_wait_for_idle(void *handle) return -ETIMEDOUT; } -static void sdma_v3_0_print_status(void *handle) -{ - int i, j; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "VI SDMA registers\n"); - dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", - RREG32(mmSRBM_STATUS2)); - for (i = 0; i < adev->sdma.num_instances; i++) { - dev_info(adev->dev, " SDMA%d_STATUS_REG=0x%08X\n", - i, RREG32(mmSDMA0_STATUS_REG + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_F32_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_SEM_WAIT_FAIL_TIMER_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_IB_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_WPTR=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_HI=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_LO=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_BASE=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n", - i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_DOORBELL=0x%08X\n", - i, RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n", - i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i])); - mutex_lock(&adev->srbm_mutex); - for (j = 0; j < 16; j++) { - vi_srbm_select(adev, 0, 0, 0, j); - dev_info(adev->dev, " VM %d:\n", j); - dev_info(adev->dev, " SDMA%d_GFX_VIRTUAL_ADDR=0x%08X\n", - i, RREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i])); - dev_info(adev->dev, " SDMA%d_GFX_APE1_CNTL=0x%08X\n", - i, RREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i])); - } - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - } -} - static int sdma_v3_0_soft_reset(void *handle) { u32 srbm_soft_reset = 0; @@ -1309,8 +1348,6 @@ static int sdma_v3_0_soft_reset(void *handle) } if (srbm_soft_reset) { - sdma_v3_0_print_status((void *)adev); - tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); @@ -1325,8 +1362,6 @@ static int sdma_v3_0_soft_reset(void *handle) /* Wait a little for things to settle down */ udelay(50); - - sdma_v3_0_print_status((void *)adev); } return 0; @@ -1427,40 +1462,31 @@ static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } -static void fiji_update_sdma_medium_grain_clock_gating( +static void sdma_v3_0_update_sdma_medium_grain_clock_gating( struct amdgpu_device *adev, bool enable) { uint32_t temp, data; + int i; - if (enable) { - temp = data = RREG32(mmSDMA0_CLK_CTRL); - data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); - if (data != temp) - WREG32(mmSDMA0_CLK_CTRL, data); - - temp = data = RREG32(mmSDMA1_CLK_CTRL); - data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); - - if (data != temp) - WREG32(mmSDMA1_CLK_CTRL, data); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + temp = data = RREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i]); + data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); + if (data != temp) + WREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i], data); + } } else { - temp = data = RREG32(mmSDMA0_CLK_CTRL); - data |= SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + for (i = 0; i < adev->sdma.num_instances; i++) { + temp = data = RREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i]); + data |= SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | @@ -1469,54 +1495,35 @@ static void fiji_update_sdma_medium_grain_clock_gating( SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK; - if (data != temp) - WREG32(mmSDMA0_CLK_CTRL, data); - - temp = data = RREG32(mmSDMA1_CLK_CTRL); - data |= SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK; - - if (data != temp) - WREG32(mmSDMA1_CLK_CTRL, data); + if (data != temp) + WREG32(mmSDMA0_CLK_CTRL + sdma_offsets[i], data); + } } } -static void fiji_update_sdma_medium_grain_light_sleep( +static void sdma_v3_0_update_sdma_medium_grain_light_sleep( struct amdgpu_device *adev, bool enable) { uint32_t temp, data; + int i; - if (enable) { - temp = data = RREG32(mmSDMA0_POWER_CNTL); - data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; - - if (temp != data) - WREG32(mmSDMA0_POWER_CNTL, data); - - temp = data = RREG32(mmSDMA1_POWER_CNTL); - data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { + for (i = 0; i < adev->sdma.num_instances; i++) { + temp = data = RREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i]); + data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; - if (temp != data) - WREG32(mmSDMA1_POWER_CNTL, data); + if (temp != data) + WREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i], data); + } } else { - temp = data = RREG32(mmSDMA0_POWER_CNTL); - data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; - - if (temp != data) - WREG32(mmSDMA0_POWER_CNTL, data); - - temp = data = RREG32(mmSDMA1_POWER_CNTL); - data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + for (i = 0; i < adev->sdma.num_instances; i++) { + temp = data = RREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i]); + data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; - if (temp != data) - WREG32(mmSDMA1_POWER_CNTL, data); + if (temp != data) + WREG32(mmSDMA0_POWER_CNTL + sdma_offsets[i], data); + } } } @@ -1527,9 +1534,11 @@ static int sdma_v3_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_FIJI: - fiji_update_sdma_medium_grain_clock_gating(adev, + case CHIP_CARRIZO: + case CHIP_STONEY: + sdma_v3_0_update_sdma_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - fiji_update_sdma_medium_grain_light_sleep(adev, + sdma_v3_0_update_sdma_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); break; default: @@ -1545,6 +1554,7 @@ static int sdma_v3_0_set_powergating_state(void *handle, } const struct amd_ip_funcs sdma_v3_0_ip_funcs = { + .name = "sdma_v3_0", .early_init = sdma_v3_0_early_init, .late_init = NULL, .sw_init = sdma_v3_0_sw_init, @@ -1556,7 +1566,6 @@ const struct amd_ip_funcs sdma_v3_0_ip_funcs = { .is_idle = sdma_v3_0_is_idle, .wait_for_idle = sdma_v3_0_wait_for_idle, .soft_reset = sdma_v3_0_soft_reset, - .print_status = sdma_v3_0_print_status, .set_clockgating_state = sdma_v3_0_set_clockgating_state, .set_powergating_state = sdma_v3_0_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/smu_ucode_xfer_vi.h b/drivers/gpu/drm/amd/amdgpu/smu_ucode_xfer_vi.h index c24a81eeb..880152c0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_ucode_xfer_vi.h +++ b/drivers/gpu/drm/amd/amdgpu/smu_ucode_xfer_vi.h @@ -44,6 +44,7 @@ #define UCODE_ID_IH_REG_RESTORE 11 #define UCODE_ID_VBIOS 12 #define UCODE_ID_MISC_METADATA 13 +#define UCODE_ID_SMU_SK 14 #define UCODE_ID_RLC_SCRATCH 32 #define UCODE_ID_RLC_SRM_ARAM 33 #define UCODE_ID_RLC_SRM_DRAM 34 diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c index 4dc71926d..fc5c33a6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c @@ -71,6 +71,11 @@ static int tonga_dpm_sw_init(void *handle) static int tonga_dpm_sw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + release_firmware(adev->pm.fw); + adev->pm.fw = NULL; + return 0; } @@ -143,6 +148,7 @@ static int tonga_dpm_set_powergating_state(void *handle, } const struct amd_ip_funcs tonga_dpm_ip_funcs = { + .name = "tonga_dpm", .early_init = tonga_dpm_early_init, .late_init = NULL, .sw_init = tonga_dpm_sw_init, @@ -154,7 +160,6 @@ const struct amd_ip_funcs tonga_dpm_ip_funcs = { .is_idle = NULL, .wait_for_idle = NULL, .soft_reset = NULL, - .print_status = NULL, .set_clockgating_state = tonga_dpm_set_clockgating_state, .set_powergating_state = tonga_dpm_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 0f14199cf..c92055805 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -99,7 +99,6 @@ static void tonga_ih_disable_interrupts(struct amdgpu_device *adev) */ static int tonga_ih_irq_init(struct amdgpu_device *adev) { - int ret = 0; int rb_bufsz; u32 interrupt_cntl, ih_rb_cntl, ih_doorbell_rtpr; u64 wptr_off; @@ -165,7 +164,7 @@ static int tonga_ih_irq_init(struct amdgpu_device *adev) /* enable interrupts */ tonga_ih_enable_interrupts(adev); - return ret; + return 0; } /** @@ -374,35 +373,6 @@ static int tonga_ih_wait_for_idle(void *handle) return -ETIMEDOUT; } -static void tonga_ih_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "TONGA IH registers\n"); - dev_info(adev->dev, " SRBM_STATUS=0x%08X\n", - RREG32(mmSRBM_STATUS)); - dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", - RREG32(mmSRBM_STATUS2)); - dev_info(adev->dev, " INTERRUPT_CNTL=0x%08X\n", - RREG32(mmINTERRUPT_CNTL)); - dev_info(adev->dev, " INTERRUPT_CNTL2=0x%08X\n", - RREG32(mmINTERRUPT_CNTL2)); - dev_info(adev->dev, " IH_CNTL=0x%08X\n", - RREG32(mmIH_CNTL)); - dev_info(adev->dev, " IH_RB_CNTL=0x%08X\n", - RREG32(mmIH_RB_CNTL)); - dev_info(adev->dev, " IH_RB_BASE=0x%08X\n", - RREG32(mmIH_RB_BASE)); - dev_info(adev->dev, " IH_RB_WPTR_ADDR_LO=0x%08X\n", - RREG32(mmIH_RB_WPTR_ADDR_LO)); - dev_info(adev->dev, " IH_RB_WPTR_ADDR_HI=0x%08X\n", - RREG32(mmIH_RB_WPTR_ADDR_HI)); - dev_info(adev->dev, " IH_RB_RPTR=0x%08X\n", - RREG32(mmIH_RB_RPTR)); - dev_info(adev->dev, " IH_RB_WPTR=0x%08X\n", - RREG32(mmIH_RB_WPTR)); -} - static int tonga_ih_soft_reset(void *handle) { u32 srbm_soft_reset = 0; @@ -414,8 +384,6 @@ static int tonga_ih_soft_reset(void *handle) SOFT_RESET_IH, 1); if (srbm_soft_reset) { - tonga_ih_print_status(adev); - tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); @@ -430,8 +398,6 @@ static int tonga_ih_soft_reset(void *handle) /* Wait a little for things to settle down */ udelay(50); - - tonga_ih_print_status(adev); } return 0; @@ -450,6 +416,7 @@ static int tonga_ih_set_powergating_state(void *handle, } const struct amd_ip_funcs tonga_ih_ip_funcs = { + .name = "tonga_ih", .early_init = tonga_ih_early_init, .late_init = NULL, .sw_init = tonga_ih_sw_init, @@ -461,7 +428,6 @@ const struct amd_ip_funcs tonga_ih_ip_funcs = { .is_idle = tonga_ih_is_idle, .wait_for_idle = tonga_ih_wait_for_idle, .soft_reset = tonga_ih_soft_reset, - .print_status = tonga_ih_print_status, .set_clockgating_state = tonga_ih_set_clockgating_state, .set_powergating_state = tonga_ih_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index cb4637531..f07551476 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -114,7 +114,7 @@ static int uvd_v4_2_sw_init(void *handle) ring = &adev->uvd.ring; sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 4096, CP_PACKET2, 0xf, + r = amdgpu_ring_init(adev, ring, 512, CP_PACKET2, 0xf, &adev->uvd.irq, 0, AMDGPU_RING_TYPE_UVD); return r; @@ -489,7 +489,8 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) * Write ring commands to execute the indirect buffer */ static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0)); amdgpu_ring_write(ring, ib->gpu_addr); @@ -559,12 +560,13 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev) WREG32(mmUVD_VCPU_CACHE_SIZE0, size); addr += size; - size = AMDGPU_UVD_STACK_SIZE >> 3; + size = AMDGPU_UVD_HEAP_SIZE >> 3; WREG32(mmUVD_VCPU_CACHE_OFFSET1, addr); WREG32(mmUVD_VCPU_CACHE_SIZE1, size); addr += size; - size = AMDGPU_UVD_HEAP_SIZE >> 3; + size = (AMDGPU_UVD_STACK_SIZE + + (AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles)) >> 3; WREG32(mmUVD_VCPU_CACHE_OFFSET2, addr); WREG32(mmUVD_VCPU_CACHE_SIZE2, size); @@ -679,117 +681,6 @@ static int uvd_v4_2_soft_reset(void *handle) return uvd_v4_2_start(adev); } -static void uvd_v4_2_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - dev_info(adev->dev, "UVD 4.2 registers\n"); - dev_info(adev->dev, " UVD_SEMA_ADDR_LOW=0x%08X\n", - RREG32(mmUVD_SEMA_ADDR_LOW)); - dev_info(adev->dev, " UVD_SEMA_ADDR_HIGH=0x%08X\n", - RREG32(mmUVD_SEMA_ADDR_HIGH)); - dev_info(adev->dev, " UVD_SEMA_CMD=0x%08X\n", - RREG32(mmUVD_SEMA_CMD)); - dev_info(adev->dev, " UVD_GPCOM_VCPU_CMD=0x%08X\n", - RREG32(mmUVD_GPCOM_VCPU_CMD)); - dev_info(adev->dev, " UVD_GPCOM_VCPU_DATA0=0x%08X\n", - RREG32(mmUVD_GPCOM_VCPU_DATA0)); - dev_info(adev->dev, " UVD_GPCOM_VCPU_DATA1=0x%08X\n", - RREG32(mmUVD_GPCOM_VCPU_DATA1)); - dev_info(adev->dev, " UVD_ENGINE_CNTL=0x%08X\n", - RREG32(mmUVD_ENGINE_CNTL)); - dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_SEMA_CNTL=0x%08X\n", - RREG32(mmUVD_SEMA_CNTL)); - dev_info(adev->dev, " UVD_LMI_EXT40_ADDR=0x%08X\n", - RREG32(mmUVD_LMI_EXT40_ADDR)); - dev_info(adev->dev, " UVD_CTX_INDEX=0x%08X\n", - RREG32(mmUVD_CTX_INDEX)); - dev_info(adev->dev, " UVD_CTX_DATA=0x%08X\n", - RREG32(mmUVD_CTX_DATA)); - dev_info(adev->dev, " UVD_CGC_GATE=0x%08X\n", - RREG32(mmUVD_CGC_GATE)); - dev_info(adev->dev, " UVD_CGC_CTRL=0x%08X\n", - RREG32(mmUVD_CGC_CTRL)); - dev_info(adev->dev, " UVD_LMI_CTRL2=0x%08X\n", - RREG32(mmUVD_LMI_CTRL2)); - dev_info(adev->dev, " UVD_MASTINT_EN=0x%08X\n", - RREG32(mmUVD_MASTINT_EN)); - dev_info(adev->dev, " UVD_LMI_ADDR_EXT=0x%08X\n", - RREG32(mmUVD_LMI_ADDR_EXT)); - dev_info(adev->dev, " UVD_LMI_CTRL=0x%08X\n", - RREG32(mmUVD_LMI_CTRL)); - dev_info(adev->dev, " UVD_LMI_SWAP_CNTL=0x%08X\n", - RREG32(mmUVD_LMI_SWAP_CNTL)); - dev_info(adev->dev, " UVD_MP_SWAP_CNTL=0x%08X\n", - RREG32(mmUVD_MP_SWAP_CNTL)); - dev_info(adev->dev, " UVD_MPC_SET_MUXA0=0x%08X\n", - RREG32(mmUVD_MPC_SET_MUXA0)); - dev_info(adev->dev, " UVD_MPC_SET_MUXA1=0x%08X\n", - RREG32(mmUVD_MPC_SET_MUXA1)); - dev_info(adev->dev, " UVD_MPC_SET_MUXB0=0x%08X\n", - RREG32(mmUVD_MPC_SET_MUXB0)); - dev_info(adev->dev, " UVD_MPC_SET_MUXB1=0x%08X\n", - RREG32(mmUVD_MPC_SET_MUXB1)); - dev_info(adev->dev, " UVD_MPC_SET_MUX=0x%08X\n", - RREG32(mmUVD_MPC_SET_MUX)); - dev_info(adev->dev, " UVD_MPC_SET_ALU=0x%08X\n", - RREG32(mmUVD_MPC_SET_ALU)); - dev_info(adev->dev, " UVD_VCPU_CACHE_OFFSET0=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_OFFSET0)); - dev_info(adev->dev, " UVD_VCPU_CACHE_SIZE0=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_SIZE0)); - dev_info(adev->dev, " UVD_VCPU_CACHE_OFFSET1=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_OFFSET1)); - dev_info(adev->dev, " UVD_VCPU_CACHE_SIZE1=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_SIZE1)); - dev_info(adev->dev, " UVD_VCPU_CACHE_OFFSET2=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_OFFSET2)); - dev_info(adev->dev, " UVD_VCPU_CACHE_SIZE2=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_SIZE2)); - dev_info(adev->dev, " UVD_VCPU_CNTL=0x%08X\n", - RREG32(mmUVD_VCPU_CNTL)); - dev_info(adev->dev, " UVD_SOFT_RESET=0x%08X\n", - RREG32(mmUVD_SOFT_RESET)); - dev_info(adev->dev, " UVD_RBC_IB_BASE=0x%08X\n", - RREG32(mmUVD_RBC_IB_BASE)); - dev_info(adev->dev, " UVD_RBC_IB_SIZE=0x%08X\n", - RREG32(mmUVD_RBC_IB_SIZE)); - dev_info(adev->dev, " UVD_RBC_RB_BASE=0x%08X\n", - RREG32(mmUVD_RBC_RB_BASE)); - dev_info(adev->dev, " UVD_RBC_RB_RPTR=0x%08X\n", - RREG32(mmUVD_RBC_RB_RPTR)); - dev_info(adev->dev, " UVD_RBC_RB_WPTR=0x%08X\n", - RREG32(mmUVD_RBC_RB_WPTR)); - dev_info(adev->dev, " UVD_RBC_RB_WPTR_CNTL=0x%08X\n", - RREG32(mmUVD_RBC_RB_WPTR_CNTL)); - dev_info(adev->dev, " UVD_RBC_RB_CNTL=0x%08X\n", - RREG32(mmUVD_RBC_RB_CNTL)); - dev_info(adev->dev, " UVD_STATUS=0x%08X\n", - RREG32(mmUVD_STATUS)); - dev_info(adev->dev, " UVD_SEMA_TIMEOUT_STATUS=0x%08X\n", - RREG32(mmUVD_SEMA_TIMEOUT_STATUS)); - dev_info(adev->dev, " UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL=0x%08X\n", - RREG32(mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL)); - dev_info(adev->dev, " UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL=0x%08X\n", - RREG32(mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL)); - dev_info(adev->dev, " UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL=0x%08X\n", - RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL)); - dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n", - RREG32(mmUVD_CONTEXT_ID)); - dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); - -} - static int uvd_v4_2_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -849,6 +740,7 @@ static int uvd_v4_2_set_powergating_state(void *handle, } const struct amd_ip_funcs uvd_v4_2_ip_funcs = { + .name = "uvd_v4_2", .early_init = uvd_v4_2_early_init, .late_init = NULL, .sw_init = uvd_v4_2_sw_init, @@ -860,7 +752,6 @@ const struct amd_ip_funcs uvd_v4_2_ip_funcs = { .is_idle = uvd_v4_2_is_idle, .wait_for_idle = uvd_v4_2_wait_for_idle, .soft_reset = uvd_v4_2_soft_reset, - .print_status = uvd_v4_2_print_status, .set_clockgating_state = uvd_v4_2_set_clockgating_state, .set_powergating_state = uvd_v4_2_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 16476d80f..e0a76a883 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -31,6 +31,7 @@ #include "uvd/uvd_5_0_sh_mask.h" #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" +#include "vi.h" static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev); @@ -110,7 +111,7 @@ static int uvd_v5_0_sw_init(void *handle) ring = &adev->uvd.ring; sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 4096, CP_PACKET2, 0xf, + r = amdgpu_ring_init(adev, ring, 512, CP_PACKET2, 0xf, &adev->uvd.irq, 0, AMDGPU_RING_TYPE_UVD); return r; @@ -271,12 +272,13 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev) WREG32(mmUVD_VCPU_CACHE_SIZE0, size); offset += size; - size = AMDGPU_UVD_STACK_SIZE; + size = AMDGPU_UVD_HEAP_SIZE; WREG32(mmUVD_VCPU_CACHE_OFFSET1, offset >> 3); WREG32(mmUVD_VCPU_CACHE_SIZE1, size); offset += size; - size = AMDGPU_UVD_HEAP_SIZE; + size = AMDGPU_UVD_STACK_SIZE + + (AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles); WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3); WREG32(mmUVD_VCPU_CACHE_SIZE2, size); @@ -537,7 +539,8 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) * Write ring commands to execute the indirect buffer */ static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); @@ -622,120 +625,6 @@ static int uvd_v5_0_soft_reset(void *handle) return uvd_v5_0_start(adev); } -static void uvd_v5_0_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - dev_info(adev->dev, "UVD 5.0 registers\n"); - dev_info(adev->dev, " UVD_SEMA_ADDR_LOW=0x%08X\n", - RREG32(mmUVD_SEMA_ADDR_LOW)); - dev_info(adev->dev, " UVD_SEMA_ADDR_HIGH=0x%08X\n", - RREG32(mmUVD_SEMA_ADDR_HIGH)); - dev_info(adev->dev, " UVD_SEMA_CMD=0x%08X\n", - RREG32(mmUVD_SEMA_CMD)); - dev_info(adev->dev, " UVD_GPCOM_VCPU_CMD=0x%08X\n", - RREG32(mmUVD_GPCOM_VCPU_CMD)); - dev_info(adev->dev, " UVD_GPCOM_VCPU_DATA0=0x%08X\n", - RREG32(mmUVD_GPCOM_VCPU_DATA0)); - dev_info(adev->dev, " UVD_GPCOM_VCPU_DATA1=0x%08X\n", - RREG32(mmUVD_GPCOM_VCPU_DATA1)); - dev_info(adev->dev, " UVD_ENGINE_CNTL=0x%08X\n", - RREG32(mmUVD_ENGINE_CNTL)); - dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_SEMA_CNTL=0x%08X\n", - RREG32(mmUVD_SEMA_CNTL)); - dev_info(adev->dev, " UVD_LMI_EXT40_ADDR=0x%08X\n", - RREG32(mmUVD_LMI_EXT40_ADDR)); - dev_info(adev->dev, " UVD_CTX_INDEX=0x%08X\n", - RREG32(mmUVD_CTX_INDEX)); - dev_info(adev->dev, " UVD_CTX_DATA=0x%08X\n", - RREG32(mmUVD_CTX_DATA)); - dev_info(adev->dev, " UVD_CGC_GATE=0x%08X\n", - RREG32(mmUVD_CGC_GATE)); - dev_info(adev->dev, " UVD_CGC_CTRL=0x%08X\n", - RREG32(mmUVD_CGC_CTRL)); - dev_info(adev->dev, " UVD_LMI_CTRL2=0x%08X\n", - RREG32(mmUVD_LMI_CTRL2)); - dev_info(adev->dev, " UVD_MASTINT_EN=0x%08X\n", - RREG32(mmUVD_MASTINT_EN)); - dev_info(adev->dev, " UVD_LMI_ADDR_EXT=0x%08X\n", - RREG32(mmUVD_LMI_ADDR_EXT)); - dev_info(adev->dev, " UVD_LMI_CTRL=0x%08X\n", - RREG32(mmUVD_LMI_CTRL)); - dev_info(adev->dev, " UVD_LMI_SWAP_CNTL=0x%08X\n", - RREG32(mmUVD_LMI_SWAP_CNTL)); - dev_info(adev->dev, " UVD_MP_SWAP_CNTL=0x%08X\n", - RREG32(mmUVD_MP_SWAP_CNTL)); - dev_info(adev->dev, " UVD_MPC_SET_MUXA0=0x%08X\n", - RREG32(mmUVD_MPC_SET_MUXA0)); - dev_info(adev->dev, " UVD_MPC_SET_MUXA1=0x%08X\n", - RREG32(mmUVD_MPC_SET_MUXA1)); - dev_info(adev->dev, " UVD_MPC_SET_MUXB0=0x%08X\n", - RREG32(mmUVD_MPC_SET_MUXB0)); - dev_info(adev->dev, " UVD_MPC_SET_MUXB1=0x%08X\n", - RREG32(mmUVD_MPC_SET_MUXB1)); - dev_info(adev->dev, " UVD_MPC_SET_MUX=0x%08X\n", - RREG32(mmUVD_MPC_SET_MUX)); - dev_info(adev->dev, " UVD_MPC_SET_ALU=0x%08X\n", - RREG32(mmUVD_MPC_SET_ALU)); - dev_info(adev->dev, " UVD_VCPU_CACHE_OFFSET0=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_OFFSET0)); - dev_info(adev->dev, " UVD_VCPU_CACHE_SIZE0=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_SIZE0)); - dev_info(adev->dev, " UVD_VCPU_CACHE_OFFSET1=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_OFFSET1)); - dev_info(adev->dev, " UVD_VCPU_CACHE_SIZE1=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_SIZE1)); - dev_info(adev->dev, " UVD_VCPU_CACHE_OFFSET2=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_OFFSET2)); - dev_info(adev->dev, " UVD_VCPU_CACHE_SIZE2=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_SIZE2)); - dev_info(adev->dev, " UVD_VCPU_CNTL=0x%08X\n", - RREG32(mmUVD_VCPU_CNTL)); - dev_info(adev->dev, " UVD_SOFT_RESET=0x%08X\n", - RREG32(mmUVD_SOFT_RESET)); - dev_info(adev->dev, " UVD_LMI_RBC_IB_64BIT_BAR_LOW=0x%08X\n", - RREG32(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW)); - dev_info(adev->dev, " UVD_LMI_RBC_IB_64BIT_BAR_HIGH=0x%08X\n", - RREG32(mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH)); - dev_info(adev->dev, " UVD_RBC_IB_SIZE=0x%08X\n", - RREG32(mmUVD_RBC_IB_SIZE)); - dev_info(adev->dev, " UVD_LMI_RBC_RB_64BIT_BAR_LOW=0x%08X\n", - RREG32(mmUVD_LMI_RBC_RB_64BIT_BAR_LOW)); - dev_info(adev->dev, " UVD_LMI_RBC_RB_64BIT_BAR_HIGH=0x%08X\n", - RREG32(mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH)); - dev_info(adev->dev, " UVD_RBC_RB_RPTR=0x%08X\n", - RREG32(mmUVD_RBC_RB_RPTR)); - dev_info(adev->dev, " UVD_RBC_RB_WPTR=0x%08X\n", - RREG32(mmUVD_RBC_RB_WPTR)); - dev_info(adev->dev, " UVD_RBC_RB_WPTR_CNTL=0x%08X\n", - RREG32(mmUVD_RBC_RB_WPTR_CNTL)); - dev_info(adev->dev, " UVD_RBC_RB_CNTL=0x%08X\n", - RREG32(mmUVD_RBC_RB_CNTL)); - dev_info(adev->dev, " UVD_STATUS=0x%08X\n", - RREG32(mmUVD_STATUS)); - dev_info(adev->dev, " UVD_SEMA_TIMEOUT_STATUS=0x%08X\n", - RREG32(mmUVD_SEMA_TIMEOUT_STATUS)); - dev_info(adev->dev, " UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL=0x%08X\n", - RREG32(mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL)); - dev_info(adev->dev, " UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL=0x%08X\n", - RREG32(mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL)); - dev_info(adev->dev, " UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL=0x%08X\n", - RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL)); - dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n", - RREG32(mmUVD_CONTEXT_ID)); - dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); -} - static int uvd_v5_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -754,14 +643,128 @@ static int uvd_v5_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static void uvd_v5_0_set_sw_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data, data1, data2, suvd_flags; + + data = RREG32(mmUVD_CGC_CTRL); + data1 = RREG32(mmUVD_SUVD_CGC_GATE); + data2 = RREG32(mmUVD_SUVD_CGC_CTRL); + + data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | + UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); + + suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK | + UVD_SUVD_CGC_GATE__SIT_MASK | + UVD_SUVD_CGC_GATE__SMP_MASK | + UVD_SUVD_CGC_GATE__SCM_MASK | + UVD_SUVD_CGC_GATE__SDB_MASK; + + data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK | + (1 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_GATE_DLY_TIMER)) | + (4 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_OFF_DELAY)); + + data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | + UVD_CGC_CTRL__UDEC_CM_MODE_MASK | + UVD_CGC_CTRL__UDEC_IT_MODE_MASK | + UVD_CGC_CTRL__UDEC_DB_MODE_MASK | + UVD_CGC_CTRL__UDEC_MP_MODE_MASK | + UVD_CGC_CTRL__SYS_MODE_MASK | + UVD_CGC_CTRL__UDEC_MODE_MASK | + UVD_CGC_CTRL__MPEG2_MODE_MASK | + UVD_CGC_CTRL__REGS_MODE_MASK | + UVD_CGC_CTRL__RBC_MODE_MASK | + UVD_CGC_CTRL__LMI_MC_MODE_MASK | + UVD_CGC_CTRL__LMI_UMC_MODE_MASK | + UVD_CGC_CTRL__IDCT_MODE_MASK | + UVD_CGC_CTRL__MPRD_MODE_MASK | + UVD_CGC_CTRL__MPC_MODE_MASK | + UVD_CGC_CTRL__LBSI_MODE_MASK | + UVD_CGC_CTRL__LRBBM_MODE_MASK | + UVD_CGC_CTRL__WCB_MODE_MASK | + UVD_CGC_CTRL__VCPU_MODE_MASK | + UVD_CGC_CTRL__JPEG_MODE_MASK | + UVD_CGC_CTRL__SCPU_MODE_MASK); + data2 &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK | + UVD_SUVD_CGC_CTRL__SIT_MODE_MASK | + UVD_SUVD_CGC_CTRL__SMP_MODE_MASK | + UVD_SUVD_CGC_CTRL__SCM_MODE_MASK | + UVD_SUVD_CGC_CTRL__SDB_MODE_MASK); + data1 |= suvd_flags; + + WREG32(mmUVD_CGC_CTRL, data); + WREG32(mmUVD_CGC_GATE, 0); + WREG32(mmUVD_SUVD_CGC_GATE, data1); + WREG32(mmUVD_SUVD_CGC_CTRL, data2); +} + +#if 0 +static void uvd_v5_0_set_hw_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data, data1, cgc_flags, suvd_flags; + + data = RREG32(mmUVD_CGC_GATE); + data1 = RREG32(mmUVD_SUVD_CGC_GATE); + + cgc_flags = UVD_CGC_GATE__SYS_MASK | + UVD_CGC_GATE__UDEC_MASK | + UVD_CGC_GATE__MPEG2_MASK | + UVD_CGC_GATE__RBC_MASK | + UVD_CGC_GATE__LMI_MC_MASK | + UVD_CGC_GATE__IDCT_MASK | + UVD_CGC_GATE__MPRD_MASK | + UVD_CGC_GATE__MPC_MASK | + UVD_CGC_GATE__LBSI_MASK | + UVD_CGC_GATE__LRBBM_MASK | + UVD_CGC_GATE__UDEC_RE_MASK | + UVD_CGC_GATE__UDEC_CM_MASK | + UVD_CGC_GATE__UDEC_IT_MASK | + UVD_CGC_GATE__UDEC_DB_MASK | + UVD_CGC_GATE__UDEC_MP_MASK | + UVD_CGC_GATE__WCB_MASK | + UVD_CGC_GATE__VCPU_MASK | + UVD_CGC_GATE__SCPU_MASK; + + suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK | + UVD_SUVD_CGC_GATE__SIT_MASK | + UVD_SUVD_CGC_GATE__SMP_MASK | + UVD_SUVD_CGC_GATE__SCM_MASK | + UVD_SUVD_CGC_GATE__SDB_MASK; + + data |= cgc_flags; + data1 |= suvd_flags; + + WREG32(mmUVD_CGC_GATE, data); + WREG32(mmUVD_SUVD_CGC_GATE, data1); +} +#endif + static int uvd_v5_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + static int curstate = -1; if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) return 0; + if (curstate == state) + return 0; + + curstate = state; + if (enable) { + /* disable HW gating and enable Sw gating */ + uvd_v5_0_set_sw_clock_gating(adev); + } else { + /* wait for STATUS to clear */ + if (uvd_v5_0_wait_for_idle(handle)) + return -EBUSY; + + /* enable HW gates because UVD is idle */ +/* uvd_v5_0_set_hw_clock_gating(adev); */ + } + return 0; } @@ -789,6 +792,7 @@ static int uvd_v5_0_set_powergating_state(void *handle, } const struct amd_ip_funcs uvd_v5_0_ip_funcs = { + .name = "uvd_v5_0", .early_init = uvd_v5_0_early_init, .late_init = NULL, .sw_init = uvd_v5_0_sw_init, @@ -800,7 +804,6 @@ const struct amd_ip_funcs uvd_v5_0_ip_funcs = { .is_idle = uvd_v5_0_is_idle, .wait_for_idle = uvd_v5_0_wait_for_idle, .soft_reset = uvd_v5_0_soft_reset, - .print_status = uvd_v5_0_print_status, .set_clockgating_state = uvd_v5_0_set_clockgating_state, .set_powergating_state = uvd_v5_0_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index d49379145..c9929d665 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -31,11 +31,15 @@ #include "uvd/uvd_6_0_sh_mask.h" #include "oss/oss_2_0_d.h" #include "oss/oss_2_0_sh_mask.h" +#include "smu/smu_7_1_3_d.h" +#include "smu/smu_7_1_3_sh_mask.h" +#include "vi.h" static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev); static int uvd_v6_0_start(struct amdgpu_device *adev); static void uvd_v6_0_stop(struct amdgpu_device *adev); +static void uvd_v6_0_set_sw_clock_gating(struct amdgpu_device *adev); /** * uvd_v6_0_ring_get_rptr - get read pointer @@ -110,7 +114,7 @@ static int uvd_v6_0_sw_init(void *handle) ring = &adev->uvd.ring; sprintf(ring->name, "uvd"); - r = amdgpu_ring_init(adev, ring, 4096, CP_PACKET2, 0xf, + r = amdgpu_ring_init(adev, ring, 512, CP_PACKET2, 0xf, &adev->uvd.irq, 0, AMDGPU_RING_TYPE_UVD); return r; @@ -270,20 +274,24 @@ static void uvd_v6_0_mc_resume(struct amdgpu_device *adev) WREG32(mmUVD_VCPU_CACHE_SIZE0, size); offset += size; - size = AMDGPU_UVD_STACK_SIZE; + size = AMDGPU_UVD_HEAP_SIZE; WREG32(mmUVD_VCPU_CACHE_OFFSET1, offset >> 3); WREG32(mmUVD_VCPU_CACHE_SIZE1, size); offset += size; - size = AMDGPU_UVD_HEAP_SIZE; + size = AMDGPU_UVD_STACK_SIZE + + (AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles); WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3); WREG32(mmUVD_VCPU_CACHE_SIZE2, size); WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + + WREG32(mmUVD_GP_SCRATCH4, adev->uvd.max_handles); } +#if 0 static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev, bool enable) { @@ -360,157 +368,7 @@ static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev, WREG32(mmUVD_CGC_GATE, data); WREG32(mmUVD_SUVD_CGC_GATE, data1); } - -static void tonga_set_uvd_clock_gating_branches(struct amdgpu_device *adev, - bool enable) -{ - u32 data, data1; - - data = RREG32(mmUVD_CGC_GATE); - data1 = RREG32(mmUVD_SUVD_CGC_GATE); - if (enable) { - data |= UVD_CGC_GATE__SYS_MASK | - UVD_CGC_GATE__UDEC_MASK | - UVD_CGC_GATE__MPEG2_MASK | - UVD_CGC_GATE__RBC_MASK | - UVD_CGC_GATE__LMI_MC_MASK | - UVD_CGC_GATE__IDCT_MASK | - UVD_CGC_GATE__MPRD_MASK | - UVD_CGC_GATE__MPC_MASK | - UVD_CGC_GATE__LBSI_MASK | - UVD_CGC_GATE__LRBBM_MASK | - UVD_CGC_GATE__UDEC_RE_MASK | - UVD_CGC_GATE__UDEC_CM_MASK | - UVD_CGC_GATE__UDEC_IT_MASK | - UVD_CGC_GATE__UDEC_DB_MASK | - UVD_CGC_GATE__UDEC_MP_MASK | - UVD_CGC_GATE__WCB_MASK | - UVD_CGC_GATE__VCPU_MASK | - UVD_CGC_GATE__SCPU_MASK; - data1 |= UVD_SUVD_CGC_GATE__SRE_MASK | - UVD_SUVD_CGC_GATE__SIT_MASK | - UVD_SUVD_CGC_GATE__SMP_MASK | - UVD_SUVD_CGC_GATE__SCM_MASK | - UVD_SUVD_CGC_GATE__SDB_MASK; - } else { - data &= ~(UVD_CGC_GATE__SYS_MASK | - UVD_CGC_GATE__UDEC_MASK | - UVD_CGC_GATE__MPEG2_MASK | - UVD_CGC_GATE__RBC_MASK | - UVD_CGC_GATE__LMI_MC_MASK | - UVD_CGC_GATE__LMI_UMC_MASK | - UVD_CGC_GATE__IDCT_MASK | - UVD_CGC_GATE__MPRD_MASK | - UVD_CGC_GATE__MPC_MASK | - UVD_CGC_GATE__LBSI_MASK | - UVD_CGC_GATE__LRBBM_MASK | - UVD_CGC_GATE__UDEC_RE_MASK | - UVD_CGC_GATE__UDEC_CM_MASK | - UVD_CGC_GATE__UDEC_IT_MASK | - UVD_CGC_GATE__UDEC_DB_MASK | - UVD_CGC_GATE__UDEC_MP_MASK | - UVD_CGC_GATE__WCB_MASK | - UVD_CGC_GATE__VCPU_MASK | - UVD_CGC_GATE__SCPU_MASK); - data1 &= ~(UVD_SUVD_CGC_GATE__SRE_MASK | - UVD_SUVD_CGC_GATE__SIT_MASK | - UVD_SUVD_CGC_GATE__SMP_MASK | - UVD_SUVD_CGC_GATE__SCM_MASK | - UVD_SUVD_CGC_GATE__SDB_MASK); - } - WREG32(mmUVD_CGC_GATE, data); - WREG32(mmUVD_SUVD_CGC_GATE, data1); -} - -static void uvd_v6_0_set_uvd_dynamic_clock_mode(struct amdgpu_device *adev, - bool swmode) -{ - u32 data, data1 = 0, data2; - - /* Always un-gate UVD REGS bit */ - data = RREG32(mmUVD_CGC_GATE); - data &= ~(UVD_CGC_GATE__REGS_MASK); - WREG32(mmUVD_CGC_GATE, data); - - data = RREG32(mmUVD_CGC_CTRL); - data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | - UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); - data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK | - 1 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_GATE_DLY_TIMER) | - 4 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_OFF_DELAY); - - data2 = RREG32(mmUVD_SUVD_CGC_CTRL); - if (swmode) { - data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | - UVD_CGC_CTRL__UDEC_CM_MODE_MASK | - UVD_CGC_CTRL__UDEC_IT_MODE_MASK | - UVD_CGC_CTRL__UDEC_DB_MODE_MASK | - UVD_CGC_CTRL__UDEC_MP_MODE_MASK | - UVD_CGC_CTRL__SYS_MODE_MASK | - UVD_CGC_CTRL__UDEC_MODE_MASK | - UVD_CGC_CTRL__MPEG2_MODE_MASK | - UVD_CGC_CTRL__REGS_MODE_MASK | - UVD_CGC_CTRL__RBC_MODE_MASK | - UVD_CGC_CTRL__LMI_MC_MODE_MASK | - UVD_CGC_CTRL__LMI_UMC_MODE_MASK | - UVD_CGC_CTRL__IDCT_MODE_MASK | - UVD_CGC_CTRL__MPRD_MODE_MASK | - UVD_CGC_CTRL__MPC_MODE_MASK | - UVD_CGC_CTRL__LBSI_MODE_MASK | - UVD_CGC_CTRL__LRBBM_MODE_MASK | - UVD_CGC_CTRL__WCB_MODE_MASK | - UVD_CGC_CTRL__VCPU_MODE_MASK | - UVD_CGC_CTRL__JPEG_MODE_MASK | - UVD_CGC_CTRL__SCPU_MODE_MASK); - data1 |= UVD_CGC_CTRL2__DYN_OCLK_RAMP_EN_MASK | - UVD_CGC_CTRL2__DYN_RCLK_RAMP_EN_MASK; - data1 &= ~UVD_CGC_CTRL2__GATER_DIV_ID_MASK; - data1 |= 7 << REG_FIELD_SHIFT(UVD_CGC_CTRL2, GATER_DIV_ID); - data2 &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK | - UVD_SUVD_CGC_CTRL__SIT_MODE_MASK | - UVD_SUVD_CGC_CTRL__SMP_MODE_MASK | - UVD_SUVD_CGC_CTRL__SCM_MODE_MASK | - UVD_SUVD_CGC_CTRL__SDB_MODE_MASK); - } else { - data |= UVD_CGC_CTRL__UDEC_RE_MODE_MASK | - UVD_CGC_CTRL__UDEC_CM_MODE_MASK | - UVD_CGC_CTRL__UDEC_IT_MODE_MASK | - UVD_CGC_CTRL__UDEC_DB_MODE_MASK | - UVD_CGC_CTRL__UDEC_MP_MODE_MASK | - UVD_CGC_CTRL__SYS_MODE_MASK | - UVD_CGC_CTRL__UDEC_MODE_MASK | - UVD_CGC_CTRL__MPEG2_MODE_MASK | - UVD_CGC_CTRL__REGS_MODE_MASK | - UVD_CGC_CTRL__RBC_MODE_MASK | - UVD_CGC_CTRL__LMI_MC_MODE_MASK | - UVD_CGC_CTRL__LMI_UMC_MODE_MASK | - UVD_CGC_CTRL__IDCT_MODE_MASK | - UVD_CGC_CTRL__MPRD_MODE_MASK | - UVD_CGC_CTRL__MPC_MODE_MASK | - UVD_CGC_CTRL__LBSI_MODE_MASK | - UVD_CGC_CTRL__LRBBM_MODE_MASK | - UVD_CGC_CTRL__WCB_MODE_MASK | - UVD_CGC_CTRL__VCPU_MODE_MASK | - UVD_CGC_CTRL__SCPU_MODE_MASK; - data2 |= UVD_SUVD_CGC_CTRL__SRE_MODE_MASK | - UVD_SUVD_CGC_CTRL__SIT_MODE_MASK | - UVD_SUVD_CGC_CTRL__SMP_MODE_MASK | - UVD_SUVD_CGC_CTRL__SCM_MODE_MASK | - UVD_SUVD_CGC_CTRL__SDB_MODE_MASK; - } - WREG32(mmUVD_CGC_CTRL, data); - WREG32(mmUVD_SUVD_CGC_CTRL, data2); - - data = RREG32_UVD_CTX(ixUVD_CGC_CTRL2); - data &= ~(REG_FIELD_MASK(UVD_CGC_CTRL2, DYN_OCLK_RAMP_EN) | - REG_FIELD_MASK(UVD_CGC_CTRL2, DYN_RCLK_RAMP_EN) | - REG_FIELD_MASK(UVD_CGC_CTRL2, GATER_DIV_ID)); - data1 &= (REG_FIELD_MASK(UVD_CGC_CTRL2, DYN_OCLK_RAMP_EN) | - REG_FIELD_MASK(UVD_CGC_CTRL2, DYN_RCLK_RAMP_EN) | - REG_FIELD_MASK(UVD_CGC_CTRL2, GATER_DIV_ID)); - data |= data1; - WREG32_UVD_CTX(ixUVD_CGC_CTRL2, data); -} +#endif /** * uvd_v6_0_start - start UVD block @@ -538,11 +396,7 @@ static int uvd_v6_0_start(struct amdgpu_device *adev) /* Set dynamic clock gating in S/W control mode */ if (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG) { - if (adev->flags & AMD_IS_APU) - cz_set_uvd_clock_gating_branches(adev, false); - else - tonga_set_uvd_clock_gating_branches(adev, false); - uvd_v6_0_set_uvd_dynamic_clock_mode(adev, true); + uvd_v6_0_set_sw_clock_gating(adev); } else { /* disable clock gating */ uint32_t data = RREG32(mmUVD_CGC_CTRL); @@ -777,7 +631,8 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) * Write ring commands to execute the indirect buffer */ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib) + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); @@ -854,112 +709,6 @@ static int uvd_v6_0_soft_reset(void *handle) return uvd_v6_0_start(adev); } -static void uvd_v6_0_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - dev_info(adev->dev, "UVD 6.0 registers\n"); - dev_info(adev->dev, " UVD_SEMA_ADDR_LOW=0x%08X\n", - RREG32(mmUVD_SEMA_ADDR_LOW)); - dev_info(adev->dev, " UVD_SEMA_ADDR_HIGH=0x%08X\n", - RREG32(mmUVD_SEMA_ADDR_HIGH)); - dev_info(adev->dev, " UVD_SEMA_CMD=0x%08X\n", - RREG32(mmUVD_SEMA_CMD)); - dev_info(adev->dev, " UVD_GPCOM_VCPU_CMD=0x%08X\n", - RREG32(mmUVD_GPCOM_VCPU_CMD)); - dev_info(adev->dev, " UVD_GPCOM_VCPU_DATA0=0x%08X\n", - RREG32(mmUVD_GPCOM_VCPU_DATA0)); - dev_info(adev->dev, " UVD_GPCOM_VCPU_DATA1=0x%08X\n", - RREG32(mmUVD_GPCOM_VCPU_DATA1)); - dev_info(adev->dev, " UVD_ENGINE_CNTL=0x%08X\n", - RREG32(mmUVD_ENGINE_CNTL)); - dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_SEMA_CNTL=0x%08X\n", - RREG32(mmUVD_SEMA_CNTL)); - dev_info(adev->dev, " UVD_LMI_EXT40_ADDR=0x%08X\n", - RREG32(mmUVD_LMI_EXT40_ADDR)); - dev_info(adev->dev, " UVD_CTX_INDEX=0x%08X\n", - RREG32(mmUVD_CTX_INDEX)); - dev_info(adev->dev, " UVD_CTX_DATA=0x%08X\n", - RREG32(mmUVD_CTX_DATA)); - dev_info(adev->dev, " UVD_CGC_GATE=0x%08X\n", - RREG32(mmUVD_CGC_GATE)); - dev_info(adev->dev, " UVD_CGC_CTRL=0x%08X\n", - RREG32(mmUVD_CGC_CTRL)); - dev_info(adev->dev, " UVD_LMI_CTRL2=0x%08X\n", - RREG32(mmUVD_LMI_CTRL2)); - dev_info(adev->dev, " UVD_MASTINT_EN=0x%08X\n", - RREG32(mmUVD_MASTINT_EN)); - dev_info(adev->dev, " UVD_LMI_ADDR_EXT=0x%08X\n", - RREG32(mmUVD_LMI_ADDR_EXT)); - dev_info(adev->dev, " UVD_LMI_CTRL=0x%08X\n", - RREG32(mmUVD_LMI_CTRL)); - dev_info(adev->dev, " UVD_LMI_SWAP_CNTL=0x%08X\n", - RREG32(mmUVD_LMI_SWAP_CNTL)); - dev_info(adev->dev, " UVD_MP_SWAP_CNTL=0x%08X\n", - RREG32(mmUVD_MP_SWAP_CNTL)); - dev_info(adev->dev, " UVD_MPC_SET_MUXA0=0x%08X\n", - RREG32(mmUVD_MPC_SET_MUXA0)); - dev_info(adev->dev, " UVD_MPC_SET_MUXA1=0x%08X\n", - RREG32(mmUVD_MPC_SET_MUXA1)); - dev_info(adev->dev, " UVD_MPC_SET_MUXB0=0x%08X\n", - RREG32(mmUVD_MPC_SET_MUXB0)); - dev_info(adev->dev, " UVD_MPC_SET_MUXB1=0x%08X\n", - RREG32(mmUVD_MPC_SET_MUXB1)); - dev_info(adev->dev, " UVD_MPC_SET_MUX=0x%08X\n", - RREG32(mmUVD_MPC_SET_MUX)); - dev_info(adev->dev, " UVD_MPC_SET_ALU=0x%08X\n", - RREG32(mmUVD_MPC_SET_ALU)); - dev_info(adev->dev, " UVD_VCPU_CACHE_OFFSET0=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_OFFSET0)); - dev_info(adev->dev, " UVD_VCPU_CACHE_SIZE0=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_SIZE0)); - dev_info(adev->dev, " UVD_VCPU_CACHE_OFFSET1=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_OFFSET1)); - dev_info(adev->dev, " UVD_VCPU_CACHE_SIZE1=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_SIZE1)); - dev_info(adev->dev, " UVD_VCPU_CACHE_OFFSET2=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_OFFSET2)); - dev_info(adev->dev, " UVD_VCPU_CACHE_SIZE2=0x%08X\n", - RREG32(mmUVD_VCPU_CACHE_SIZE2)); - dev_info(adev->dev, " UVD_VCPU_CNTL=0x%08X\n", - RREG32(mmUVD_VCPU_CNTL)); - dev_info(adev->dev, " UVD_SOFT_RESET=0x%08X\n", - RREG32(mmUVD_SOFT_RESET)); - dev_info(adev->dev, " UVD_RBC_IB_SIZE=0x%08X\n", - RREG32(mmUVD_RBC_IB_SIZE)); - dev_info(adev->dev, " UVD_RBC_RB_RPTR=0x%08X\n", - RREG32(mmUVD_RBC_RB_RPTR)); - dev_info(adev->dev, " UVD_RBC_RB_WPTR=0x%08X\n", - RREG32(mmUVD_RBC_RB_WPTR)); - dev_info(adev->dev, " UVD_RBC_RB_WPTR_CNTL=0x%08X\n", - RREG32(mmUVD_RBC_RB_WPTR_CNTL)); - dev_info(adev->dev, " UVD_RBC_RB_CNTL=0x%08X\n", - RREG32(mmUVD_RBC_RB_CNTL)); - dev_info(adev->dev, " UVD_STATUS=0x%08X\n", - RREG32(mmUVD_STATUS)); - dev_info(adev->dev, " UVD_SEMA_TIMEOUT_STATUS=0x%08X\n", - RREG32(mmUVD_SEMA_TIMEOUT_STATUS)); - dev_info(adev->dev, " UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL=0x%08X\n", - RREG32(mmUVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL)); - dev_info(adev->dev, " UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL=0x%08X\n", - RREG32(mmUVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL)); - dev_info(adev->dev, " UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL=0x%08X\n", - RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL)); - dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n", - RREG32(mmUVD_CONTEXT_ID)); - dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); -} - static int uvd_v6_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -978,25 +727,146 @@ static int uvd_v6_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static void uvd_v6_0_set_sw_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data, data1, data2, suvd_flags; + + data = RREG32(mmUVD_CGC_CTRL); + data1 = RREG32(mmUVD_SUVD_CGC_GATE); + data2 = RREG32(mmUVD_SUVD_CGC_CTRL); + + data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | + UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); + + suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK | + UVD_SUVD_CGC_GATE__SIT_MASK | + UVD_SUVD_CGC_GATE__SMP_MASK | + UVD_SUVD_CGC_GATE__SCM_MASK | + UVD_SUVD_CGC_GATE__SDB_MASK; + + data |= UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK | + (1 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_GATE_DLY_TIMER)) | + (4 << REG_FIELD_SHIFT(UVD_CGC_CTRL, CLK_OFF_DELAY)); + + data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | + UVD_CGC_CTRL__UDEC_CM_MODE_MASK | + UVD_CGC_CTRL__UDEC_IT_MODE_MASK | + UVD_CGC_CTRL__UDEC_DB_MODE_MASK | + UVD_CGC_CTRL__UDEC_MP_MODE_MASK | + UVD_CGC_CTRL__SYS_MODE_MASK | + UVD_CGC_CTRL__UDEC_MODE_MASK | + UVD_CGC_CTRL__MPEG2_MODE_MASK | + UVD_CGC_CTRL__REGS_MODE_MASK | + UVD_CGC_CTRL__RBC_MODE_MASK | + UVD_CGC_CTRL__LMI_MC_MODE_MASK | + UVD_CGC_CTRL__LMI_UMC_MODE_MASK | + UVD_CGC_CTRL__IDCT_MODE_MASK | + UVD_CGC_CTRL__MPRD_MODE_MASK | + UVD_CGC_CTRL__MPC_MODE_MASK | + UVD_CGC_CTRL__LBSI_MODE_MASK | + UVD_CGC_CTRL__LRBBM_MODE_MASK | + UVD_CGC_CTRL__WCB_MODE_MASK | + UVD_CGC_CTRL__VCPU_MODE_MASK | + UVD_CGC_CTRL__JPEG_MODE_MASK | + UVD_CGC_CTRL__SCPU_MODE_MASK | + UVD_CGC_CTRL__JPEG2_MODE_MASK); + data2 &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK | + UVD_SUVD_CGC_CTRL__SIT_MODE_MASK | + UVD_SUVD_CGC_CTRL__SMP_MODE_MASK | + UVD_SUVD_CGC_CTRL__SCM_MODE_MASK | + UVD_SUVD_CGC_CTRL__SDB_MODE_MASK); + data1 |= suvd_flags; + + WREG32(mmUVD_CGC_CTRL, data); + WREG32(mmUVD_CGC_GATE, 0); + WREG32(mmUVD_SUVD_CGC_GATE, data1); + WREG32(mmUVD_SUVD_CGC_CTRL, data2); +} + +#if 0 +static void uvd_v6_0_set_hw_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data, data1, cgc_flags, suvd_flags; + + data = RREG32(mmUVD_CGC_GATE); + data1 = RREG32(mmUVD_SUVD_CGC_GATE); + + cgc_flags = UVD_CGC_GATE__SYS_MASK | + UVD_CGC_GATE__UDEC_MASK | + UVD_CGC_GATE__MPEG2_MASK | + UVD_CGC_GATE__RBC_MASK | + UVD_CGC_GATE__LMI_MC_MASK | + UVD_CGC_GATE__IDCT_MASK | + UVD_CGC_GATE__MPRD_MASK | + UVD_CGC_GATE__MPC_MASK | + UVD_CGC_GATE__LBSI_MASK | + UVD_CGC_GATE__LRBBM_MASK | + UVD_CGC_GATE__UDEC_RE_MASK | + UVD_CGC_GATE__UDEC_CM_MASK | + UVD_CGC_GATE__UDEC_IT_MASK | + UVD_CGC_GATE__UDEC_DB_MASK | + UVD_CGC_GATE__UDEC_MP_MASK | + UVD_CGC_GATE__WCB_MASK | + UVD_CGC_GATE__VCPU_MASK | + UVD_CGC_GATE__SCPU_MASK | + UVD_CGC_GATE__JPEG_MASK | + UVD_CGC_GATE__JPEG2_MASK; + + suvd_flags = UVD_SUVD_CGC_GATE__SRE_MASK | + UVD_SUVD_CGC_GATE__SIT_MASK | + UVD_SUVD_CGC_GATE__SMP_MASK | + UVD_SUVD_CGC_GATE__SCM_MASK | + UVD_SUVD_CGC_GATE__SDB_MASK; + + data |= cgc_flags; + data1 |= suvd_flags; + + WREG32(mmUVD_CGC_GATE, data); + WREG32(mmUVD_SUVD_CGC_GATE, data1); +} +#endif + +static void uvd_v6_set_bypass_mode(struct amdgpu_device *adev, bool enable) +{ + u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL); + + if (enable) + tmp |= (GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK | + GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK); + else + tmp &= ~(GCK_DFS_BYPASS_CNTL__BYPASSDCLK_MASK | + GCK_DFS_BYPASS_CNTL__BYPASSVCLK_MASK); + + WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp); +} + static int uvd_v6_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + static int curstate = -1; + + if (adev->asic_type == CHIP_FIJI) + uvd_v6_set_bypass_mode(adev, enable); if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) return 0; + if (curstate == state) + return 0; + + curstate = state; if (enable) { - if (adev->flags & AMD_IS_APU) - cz_set_uvd_clock_gating_branches(adev, enable); - else - tonga_set_uvd_clock_gating_branches(adev, enable); - uvd_v6_0_set_uvd_dynamic_clock_mode(adev, true); + /* disable HW gating and enable Sw gating */ + uvd_v6_0_set_sw_clock_gating(adev); } else { - uint32_t data = RREG32(mmUVD_CGC_CTRL); - data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; - WREG32(mmUVD_CGC_CTRL, data); + /* wait for STATUS to clear */ + if (uvd_v6_0_wait_for_idle(handle)) + return -EBUSY; + + /* enable HW gates because UVD is idle */ +/* uvd_v6_0_set_hw_clock_gating(adev); */ } return 0; @@ -1026,6 +896,7 @@ static int uvd_v6_0_set_powergating_state(void *handle, } const struct amd_ip_funcs uvd_v6_0_ip_funcs = { + .name = "uvd_v6_0", .early_init = uvd_v6_0_early_init, .late_init = NULL, .sw_init = uvd_v6_0_sw_init, @@ -1037,7 +908,6 @@ const struct amd_ip_funcs uvd_v6_0_ip_funcs = { .is_idle = uvd_v6_0_is_idle, .wait_for_idle = uvd_v6_0_wait_for_idle, .soft_reset = uvd_v6_0_soft_reset, - .print_status = uvd_v6_0_print_status, .set_clockgating_state = uvd_v6_0_set_clockgating_state, .set_powergating_state = uvd_v6_0_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index c7e885bcf..45d92aceb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -44,7 +44,7 @@ static void vce_v2_0_mc_resume(struct amdgpu_device *adev); static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev); static void vce_v2_0_set_irq_funcs(struct amdgpu_device *adev); - +static int vce_v2_0_wait_for_idle(void *handle); /** * vce_v2_0_ring_get_rptr - get read pointer * @@ -201,14 +201,14 @@ static int vce_v2_0_sw_init(void *handle) ring = &adev->vce.ring[0]; sprintf(ring->name, "vce0"); - r = amdgpu_ring_init(adev, ring, 4096, VCE_CMD_NO_OP, 0xf, + r = amdgpu_ring_init(adev, ring, 512, VCE_CMD_NO_OP, 0xf, &adev->vce.irq, 0, AMDGPU_RING_TYPE_VCE); if (r) return r; ring = &adev->vce.ring[1]; sprintf(ring->name, "vce1"); - r = amdgpu_ring_init(adev, ring, 4096, VCE_CMD_NO_OP, 0xf, + r = amdgpu_ring_init(adev, ring, 512, VCE_CMD_NO_OP, 0xf, &adev->vce.irq, 0, AMDGPU_RING_TYPE_VCE); if (r) return r; @@ -240,7 +240,8 @@ static int vce_v2_0_hw_init(void *handle) r = vce_v2_0_start(adev); if (r) - return r; +/* this error mean vcpu not in running state, so just skip ring test, not stop driver initialize */ + return 0; ring = &adev->vce.ring[0]; ring->ready = true; @@ -318,7 +319,7 @@ static void vce_v2_0_set_sw_cg(struct amdgpu_device *adev, bool gated) WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp); WREG32(mmVCE_CGTT_CLK_OVERRIDE, 0); - } else { + } else { tmp = RREG32(mmVCE_CLOCK_GATING_B); tmp |= 0xe7; tmp &= ~0xe70000; @@ -339,6 +340,21 @@ static void vce_v2_0_set_dyn_cg(struct amdgpu_device *adev, bool gated) { u32 orig, tmp; + if (gated) { + if (vce_v2_0_wait_for_idle(adev)) { + DRM_INFO("VCE is busy, Can't set clock gateing"); + return; + } + WREG32_P(mmVCE_VCPU_CNTL, 0, ~VCE_VCPU_CNTL__CLK_EN_MASK); + WREG32_P(mmVCE_SOFT_RESET, VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); + mdelay(100); + WREG32(mmVCE_STATUS, 0); + } else { + WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK, ~VCE_VCPU_CNTL__CLK_EN_MASK); + WREG32_P(mmVCE_SOFT_RESET, VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); + mdelay(100); + } + tmp = RREG32(mmVCE_CLOCK_GATING_B); tmp &= ~0x00060006; if (gated) { @@ -362,6 +378,7 @@ static void vce_v2_0_set_dyn_cg(struct amdgpu_device *adev, bool gated) if (gated) WREG32(mmVCE_CGTT_CLK_OVERRIDE, 0); + WREG32_P(mmVCE_SOFT_RESET, 0, ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK); } static void vce_v2_0_disable_cg(struct amdgpu_device *adev) @@ -478,75 +495,6 @@ static int vce_v2_0_soft_reset(void *handle) return vce_v2_0_start(adev); } -static void vce_v2_0_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "VCE 2.0 registers\n"); - dev_info(adev->dev, " VCE_STATUS=0x%08X\n", - RREG32(mmVCE_STATUS)); - dev_info(adev->dev, " VCE_VCPU_CNTL=0x%08X\n", - RREG32(mmVCE_VCPU_CNTL)); - dev_info(adev->dev, " VCE_VCPU_CACHE_OFFSET0=0x%08X\n", - RREG32(mmVCE_VCPU_CACHE_OFFSET0)); - dev_info(adev->dev, " VCE_VCPU_CACHE_SIZE0=0x%08X\n", - RREG32(mmVCE_VCPU_CACHE_SIZE0)); - dev_info(adev->dev, " VCE_VCPU_CACHE_OFFSET1=0x%08X\n", - RREG32(mmVCE_VCPU_CACHE_OFFSET1)); - dev_info(adev->dev, " VCE_VCPU_CACHE_SIZE1=0x%08X\n", - RREG32(mmVCE_VCPU_CACHE_SIZE1)); - dev_info(adev->dev, " VCE_VCPU_CACHE_OFFSET2=0x%08X\n", - RREG32(mmVCE_VCPU_CACHE_OFFSET2)); - dev_info(adev->dev, " VCE_VCPU_CACHE_SIZE2=0x%08X\n", - RREG32(mmVCE_VCPU_CACHE_SIZE2)); - dev_info(adev->dev, " VCE_SOFT_RESET=0x%08X\n", - RREG32(mmVCE_SOFT_RESET)); - dev_info(adev->dev, " VCE_RB_BASE_LO2=0x%08X\n", - RREG32(mmVCE_RB_BASE_LO2)); - dev_info(adev->dev, " VCE_RB_BASE_HI2=0x%08X\n", - RREG32(mmVCE_RB_BASE_HI2)); - dev_info(adev->dev, " VCE_RB_SIZE2=0x%08X\n", - RREG32(mmVCE_RB_SIZE2)); - dev_info(adev->dev, " VCE_RB_RPTR2=0x%08X\n", - RREG32(mmVCE_RB_RPTR2)); - dev_info(adev->dev, " VCE_RB_WPTR2=0x%08X\n", - RREG32(mmVCE_RB_WPTR2)); - dev_info(adev->dev, " VCE_RB_BASE_LO=0x%08X\n", - RREG32(mmVCE_RB_BASE_LO)); - dev_info(adev->dev, " VCE_RB_BASE_HI=0x%08X\n", - RREG32(mmVCE_RB_BASE_HI)); - dev_info(adev->dev, " VCE_RB_SIZE=0x%08X\n", - RREG32(mmVCE_RB_SIZE)); - dev_info(adev->dev, " VCE_RB_RPTR=0x%08X\n", - RREG32(mmVCE_RB_RPTR)); - dev_info(adev->dev, " VCE_RB_WPTR=0x%08X\n", - RREG32(mmVCE_RB_WPTR)); - dev_info(adev->dev, " VCE_CLOCK_GATING_A=0x%08X\n", - RREG32(mmVCE_CLOCK_GATING_A)); - dev_info(adev->dev, " VCE_CLOCK_GATING_B=0x%08X\n", - RREG32(mmVCE_CLOCK_GATING_B)); - dev_info(adev->dev, " VCE_CGTT_CLK_OVERRIDE=0x%08X\n", - RREG32(mmVCE_CGTT_CLK_OVERRIDE)); - dev_info(adev->dev, " VCE_UENC_CLOCK_GATING=0x%08X\n", - RREG32(mmVCE_UENC_CLOCK_GATING)); - dev_info(adev->dev, " VCE_UENC_REG_CLOCK_GATING=0x%08X\n", - RREG32(mmVCE_UENC_REG_CLOCK_GATING)); - dev_info(adev->dev, " VCE_SYS_INT_EN=0x%08X\n", - RREG32(mmVCE_SYS_INT_EN)); - dev_info(adev->dev, " VCE_LMI_CTRL2=0x%08X\n", - RREG32(mmVCE_LMI_CTRL2)); - dev_info(adev->dev, " VCE_LMI_CTRL=0x%08X\n", - RREG32(mmVCE_LMI_CTRL)); - dev_info(adev->dev, " VCE_LMI_VM_CTRL=0x%08X\n", - RREG32(mmVCE_LMI_VM_CTRL)); - dev_info(adev->dev, " VCE_LMI_SWAP_CNTL=0x%08X\n", - RREG32(mmVCE_LMI_SWAP_CNTL)); - dev_info(adev->dev, " VCE_LMI_SWAP_CNTL1=0x%08X\n", - RREG32(mmVCE_LMI_SWAP_CNTL1)); - dev_info(adev->dev, " VCE_LMI_CACHE_CTRL=0x%08X\n", - RREG32(mmVCE_LMI_CACHE_CTRL)); -} - static int vce_v2_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -619,6 +567,7 @@ static int vce_v2_0_set_powergating_state(void *handle, } const struct amd_ip_funcs vce_v2_0_ip_funcs = { + .name = "vce_v2_0", .early_init = vce_v2_0_early_init, .late_init = NULL, .sw_init = vce_v2_0_sw_init, @@ -630,7 +579,6 @@ const struct amd_ip_funcs vce_v2_0_ip_funcs = { .is_idle = vce_v2_0_is_idle, .wait_for_idle = vce_v2_0_wait_for_idle, .soft_reset = vce_v2_0_soft_reset, - .print_status = vce_v2_0_print_status, .set_clockgating_state = vce_v2_0_set_clockgating_state, .set_powergating_state = vce_v2_0_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index ce468ee5d..30e8099e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -40,9 +40,9 @@ #define GRBM_GFX_INDEX__VCE_INSTANCE__SHIFT 0x04 #define GRBM_GFX_INDEX__VCE_INSTANCE_MASK 0x10 -#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 0x8616 -#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 0x8617 -#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 0x8618 +#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0 0x8616 +#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1 0x8617 +#define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2 0x8618 #define VCE_V3_0_FW_SIZE (384 * 1024) #define VCE_V3_0_STACK_SIZE (64 * 1024) @@ -315,9 +315,11 @@ static unsigned vce_v3_0_get_harvest_config(struct amdgpu_device *adev) { u32 tmp; - /* Fiji, Stoney are single pipe */ + /* Fiji, Stoney, Polaris10, Polaris11 are single pipe */ if ((adev->asic_type == CHIP_FIJI) || - (adev->asic_type == CHIP_STONEY)) + (adev->asic_type == CHIP_STONEY) || + (adev->asic_type == CHIP_POLARIS10) || + (adev->asic_type == CHIP_POLARIS11)) return AMDGPU_VCE_HARVEST_VCE1; /* Tonga and CZ are dual or single pipe */ @@ -381,14 +383,14 @@ static int vce_v3_0_sw_init(void *handle) ring = &adev->vce.ring[0]; sprintf(ring->name, "vce0"); - r = amdgpu_ring_init(adev, ring, 4096, VCE_CMD_NO_OP, 0xf, + r = amdgpu_ring_init(adev, ring, 512, VCE_CMD_NO_OP, 0xf, &adev->vce.irq, 0, AMDGPU_RING_TYPE_VCE); if (r) return r; ring = &adev->vce.ring[1]; sprintf(ring->name, "vce1"); - r = amdgpu_ring_init(adev, ring, 4096, VCE_CMD_NO_OP, 0xf, + r = amdgpu_ring_init(adev, ring, 512, VCE_CMD_NO_OP, 0xf, &adev->vce.irq, 0, AMDGPU_RING_TYPE_VCE); if (r) return r; @@ -564,73 +566,6 @@ static int vce_v3_0_soft_reset(void *handle) return vce_v3_0_start(adev); } -static void vce_v3_0_print_status(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "VCE 3.0 registers\n"); - dev_info(adev->dev, " VCE_STATUS=0x%08X\n", - RREG32(mmVCE_STATUS)); - dev_info(adev->dev, " VCE_VCPU_CNTL=0x%08X\n", - RREG32(mmVCE_VCPU_CNTL)); - dev_info(adev->dev, " VCE_VCPU_CACHE_OFFSET0=0x%08X\n", - RREG32(mmVCE_VCPU_CACHE_OFFSET0)); - dev_info(adev->dev, " VCE_VCPU_CACHE_SIZE0=0x%08X\n", - RREG32(mmVCE_VCPU_CACHE_SIZE0)); - dev_info(adev->dev, " VCE_VCPU_CACHE_OFFSET1=0x%08X\n", - RREG32(mmVCE_VCPU_CACHE_OFFSET1)); - dev_info(adev->dev, " VCE_VCPU_CACHE_SIZE1=0x%08X\n", - RREG32(mmVCE_VCPU_CACHE_SIZE1)); - dev_info(adev->dev, " VCE_VCPU_CACHE_OFFSET2=0x%08X\n", - RREG32(mmVCE_VCPU_CACHE_OFFSET2)); - dev_info(adev->dev, " VCE_VCPU_CACHE_SIZE2=0x%08X\n", - RREG32(mmVCE_VCPU_CACHE_SIZE2)); - dev_info(adev->dev, " VCE_SOFT_RESET=0x%08X\n", - RREG32(mmVCE_SOFT_RESET)); - dev_info(adev->dev, " VCE_RB_BASE_LO2=0x%08X\n", - RREG32(mmVCE_RB_BASE_LO2)); - dev_info(adev->dev, " VCE_RB_BASE_HI2=0x%08X\n", - RREG32(mmVCE_RB_BASE_HI2)); - dev_info(adev->dev, " VCE_RB_SIZE2=0x%08X\n", - RREG32(mmVCE_RB_SIZE2)); - dev_info(adev->dev, " VCE_RB_RPTR2=0x%08X\n", - RREG32(mmVCE_RB_RPTR2)); - dev_info(adev->dev, " VCE_RB_WPTR2=0x%08X\n", - RREG32(mmVCE_RB_WPTR2)); - dev_info(adev->dev, " VCE_RB_BASE_LO=0x%08X\n", - RREG32(mmVCE_RB_BASE_LO)); - dev_info(adev->dev, " VCE_RB_BASE_HI=0x%08X\n", - RREG32(mmVCE_RB_BASE_HI)); - dev_info(adev->dev, " VCE_RB_SIZE=0x%08X\n", - RREG32(mmVCE_RB_SIZE)); - dev_info(adev->dev, " VCE_RB_RPTR=0x%08X\n", - RREG32(mmVCE_RB_RPTR)); - dev_info(adev->dev, " VCE_RB_WPTR=0x%08X\n", - RREG32(mmVCE_RB_WPTR)); - dev_info(adev->dev, " VCE_CLOCK_GATING_A=0x%08X\n", - RREG32(mmVCE_CLOCK_GATING_A)); - dev_info(adev->dev, " VCE_CLOCK_GATING_B=0x%08X\n", - RREG32(mmVCE_CLOCK_GATING_B)); - dev_info(adev->dev, " VCE_UENC_CLOCK_GATING=0x%08X\n", - RREG32(mmVCE_UENC_CLOCK_GATING)); - dev_info(adev->dev, " VCE_UENC_REG_CLOCK_GATING=0x%08X\n", - RREG32(mmVCE_UENC_REG_CLOCK_GATING)); - dev_info(adev->dev, " VCE_SYS_INT_EN=0x%08X\n", - RREG32(mmVCE_SYS_INT_EN)); - dev_info(adev->dev, " VCE_LMI_CTRL2=0x%08X\n", - RREG32(mmVCE_LMI_CTRL2)); - dev_info(adev->dev, " VCE_LMI_CTRL=0x%08X\n", - RREG32(mmVCE_LMI_CTRL)); - dev_info(adev->dev, " VCE_LMI_VM_CTRL=0x%08X\n", - RREG32(mmVCE_LMI_VM_CTRL)); - dev_info(adev->dev, " VCE_LMI_SWAP_CNTL=0x%08X\n", - RREG32(mmVCE_LMI_SWAP_CNTL)); - dev_info(adev->dev, " VCE_LMI_SWAP_CNTL1=0x%08X\n", - RREG32(mmVCE_LMI_SWAP_CNTL1)); - dev_info(adev->dev, " VCE_LMI_CACHE_CTRL=0x%08X\n", - RREG32(mmVCE_LMI_CACHE_CTRL)); -} - static int vce_v3_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -739,6 +674,7 @@ static int vce_v3_0_set_powergating_state(void *handle, } const struct amd_ip_funcs vce_v3_0_ip_funcs = { + .name = "vce_v3_0", .early_init = vce_v3_0_early_init, .late_init = NULL, .sw_init = vce_v3_0_sw_init, @@ -750,7 +686,6 @@ const struct amd_ip_funcs vce_v3_0_ip_funcs = { .is_idle = vce_v3_0_is_idle, .wait_for_idle = vce_v3_0_wait_for_idle, .soft_reset = vce_v3_0_soft_reset, - .print_status = vce_v3_0_print_status, .set_clockgating_state = vce_v3_0_set_clockgating_state, .set_powergating_state = vce_v3_0_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 1c120efa2..d8fca2e11 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -78,6 +78,8 @@ #include "amdgpu_acp.h" #endif +/*(DEBLOBBED)*/ + /* * Indirect registers accessor */ @@ -276,6 +278,8 @@ static void vi_init_golden_registers(struct amdgpu_device *adev) stoney_mgcg_cgcg_init, (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); break; + case CHIP_POLARIS11: + case CHIP_POLARIS10: default: break; } @@ -414,11 +418,25 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev, return true; } -static struct amdgpu_allowed_register_entry tonga_allowed_read_registers[] = { +static u32 vi_get_virtual_caps(struct amdgpu_device *adev) +{ + u32 caps = 0; + u32 reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER); + + if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, IOV_ENABLE)) + caps |= AMDGPU_VIRT_CAPS_SRIOV_EN; + + if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, FUNC_IDENTIFIER)) + caps |= AMDGPU_VIRT_CAPS_IS_VF; + + return caps; +} + +static const struct amdgpu_allowed_register_entry tonga_allowed_read_registers[] = { {mmGB_MACROTILE_MODE7, true}, }; -static struct amdgpu_allowed_register_entry cz_allowed_read_registers[] = { +static const struct amdgpu_allowed_register_entry cz_allowed_read_registers[] = { {mmGB_TILE_MODE7, true}, {mmGB_TILE_MODE12, true}, {mmGB_TILE_MODE17, true}, @@ -426,7 +444,7 @@ static struct amdgpu_allowed_register_entry cz_allowed_read_registers[] = { {mmGB_MACROTILE_MODE7, true}, }; -static struct amdgpu_allowed_register_entry vi_allowed_read_registers[] = { +static const struct amdgpu_allowed_register_entry vi_allowed_read_registers[] = { {mmGRBM_STATUS, false}, {mmGRBM_STATUS2, false}, {mmGRBM_STATUS_SE0, false}, @@ -525,8 +543,8 @@ static uint32_t vi_read_indexed_register(struct amdgpu_device *adev, u32 se_num, static int vi_read_register(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 reg_offset, u32 *value) { - struct amdgpu_allowed_register_entry *asic_register_table = NULL; - struct amdgpu_allowed_register_entry *asic_register_entry; + const struct amdgpu_allowed_register_entry *asic_register_table = NULL; + const struct amdgpu_allowed_register_entry *asic_register_entry; uint32_t size, i; *value = 0; @@ -537,6 +555,8 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num, break; case CHIP_FIJI: case CHIP_TONGA: + case CHIP_POLARIS11: + case CHIP_POLARIS10: case CHIP_CARRIZO: case CHIP_STONEY: asic_register_table = cz_allowed_read_registers; @@ -907,6 +927,74 @@ static const struct amdgpu_ip_block_version fiji_ip_blocks[] = }, }; +static const struct amdgpu_ip_block_version polaris11_ip_blocks[] = +{ + /* ORDER MATTERS! */ + { + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 2, + .minor = 0, + .rev = 0, + .funcs = &vi_common_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GMC, + .major = 8, + .minor = 1, + .rev = 0, + .funcs = &gmc_v8_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 3, + .minor = 1, + .rev = 0, + .funcs = &tonga_ih_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 7, + .minor = 2, + .rev = 0, + .funcs = &amdgpu_pp_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_DCE, + .major = 11, + .minor = 2, + .rev = 0, + .funcs = &dce_v11_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_GFX, + .major = 8, + .minor = 0, + .rev = 0, + .funcs = &gfx_v8_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_SDMA, + .major = 3, + .minor = 1, + .rev = 0, + .funcs = &sdma_v3_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_UVD, + .major = 6, + .minor = 3, + .rev = 0, + .funcs = &uvd_v6_0_ip_funcs, + }, + { + .type = AMD_IP_BLOCK_TYPE_VCE, + .major = 3, + .minor = 4, + .rev = 0, + .funcs = &vce_v3_0_ip_funcs, + }, +}; + static const struct amdgpu_ip_block_version cz_ip_blocks[] = { /* ORDER MATTERS! */ @@ -999,6 +1087,11 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) adev->ip_blocks = tonga_ip_blocks; adev->num_ip_blocks = ARRAY_SIZE(tonga_ip_blocks); break; + case CHIP_POLARIS11: + case CHIP_POLARIS10: + adev->ip_blocks = polaris11_ip_blocks; + adev->num_ip_blocks = ARRAY_SIZE(polaris11_ip_blocks); + break; case CHIP_CARRIZO: case CHIP_STONEY: adev->ip_blocks = cz_ip_blocks; @@ -1036,7 +1129,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .get_xclk = &vi_get_xclk, .set_uvd_clocks = &vi_set_uvd_clocks, .set_vce_clocks = &vi_set_vce_clocks, - .get_cu_info = &gfx_v8_0_get_cu_info, + .get_virtual_caps = &vi_get_virtual_caps, /* these should be moved to their own ip modules */ .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, .wait_for_mc_idle = &gmc_v8_0_mc_wait_for_idle, @@ -1076,18 +1169,68 @@ static int vi_common_early_init(void *handle) adev->external_rev_id = 0x1; break; case CHIP_FIJI: - adev->cg_flags = 0; + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_CGTS | + AMD_CG_SUPPORT_GFX_CGTS_LS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS; adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x3c; break; case CHIP_TONGA: - adev->cg_flags = 0; + adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG; adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x14; break; + case CHIP_POLARIS11: + adev->cg_flags = 0; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x5A; + break; + case CHIP_POLARIS10: + adev->cg_flags = 0; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x50; + break; case CHIP_CARRIZO: + adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_CGTS | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CGTS_LS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x1; + break; case CHIP_STONEY: - adev->cg_flags = 0; + adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS; adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x1; break; @@ -1164,24 +1307,19 @@ static int vi_common_wait_for_idle(void *handle) return 0; } -static void vi_common_print_status(void *handle) -{ - return; -} - static int vi_common_soft_reset(void *handle) { return 0; } -static void fiji_update_bif_medium_grain_light_sleep(struct amdgpu_device *adev, - bool enable) +static void vi_update_bif_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) { uint32_t temp, data; temp = data = RREG32_PCIE(ixPCIE_CNTL2); - if (enable) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK | PCIE_CNTL2__MST_MEM_LS_EN_MASK | PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK; @@ -1194,14 +1332,14 @@ static void fiji_update_bif_medium_grain_light_sleep(struct amdgpu_device *adev, WREG32_PCIE(ixPCIE_CNTL2, data); } -static void fiji_update_hdp_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) +static void vi_update_hdp_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) { uint32_t temp, data; temp = data = RREG32(mmHDP_HOST_PATH_CNTL); - if (enable) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG)) data &= ~HDP_HOST_PATH_CNTL__CLOCK_GATING_DIS_MASK; else data |= HDP_HOST_PATH_CNTL__CLOCK_GATING_DIS_MASK; @@ -1210,14 +1348,14 @@ static void fiji_update_hdp_medium_grain_clock_gating(struct amdgpu_device *adev WREG32(mmHDP_HOST_PATH_CNTL, data); } -static void fiji_update_hdp_light_sleep(struct amdgpu_device *adev, - bool enable) +static void vi_update_hdp_light_sleep(struct amdgpu_device *adev, + bool enable) { uint32_t temp, data; temp = data = RREG32(mmHDP_MEM_POWER_LS); - if (enable) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) data |= HDP_MEM_POWER_LS__LS_ENABLE_MASK; else data &= ~HDP_MEM_POWER_LS__LS_ENABLE_MASK; @@ -1226,14 +1364,14 @@ static void fiji_update_hdp_light_sleep(struct amdgpu_device *adev, WREG32(mmHDP_MEM_POWER_LS, data); } -static void fiji_update_rom_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) +static void vi_update_rom_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) { uint32_t temp, data; temp = data = RREG32_SMC(ixCGTT_ROM_CLK_CTRL0); - if (enable) + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK); else @@ -1245,19 +1383,28 @@ static void fiji_update_rom_medium_grain_clock_gating(struct amdgpu_device *adev } static int vi_common_set_clockgating_state(void *handle, - enum amd_clockgating_state state) + enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; switch (adev->asic_type) { case CHIP_FIJI: - fiji_update_bif_medium_grain_light_sleep(adev, + vi_update_bif_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); - fiji_update_hdp_medium_grain_clock_gating(adev, + vi_update_hdp_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + vi_update_hdp_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + vi_update_rom_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + case CHIP_CARRIZO: + case CHIP_STONEY: + vi_update_bif_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); - fiji_update_hdp_light_sleep(adev, + vi_update_hdp_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - fiji_update_rom_medium_grain_clock_gating(adev, + vi_update_hdp_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); break; default: @@ -1273,6 +1420,7 @@ static int vi_common_set_powergating_state(void *handle, } const struct amd_ip_funcs vi_common_ip_funcs = { + .name = "vi_common", .early_init = vi_common_early_init, .late_init = NULL, .sw_init = vi_common_sw_init, @@ -1284,7 +1432,6 @@ const struct amd_ip_funcs vi_common_ip_funcs = { .is_idle = vi_common_is_idle, .wait_for_idle = vi_common_wait_for_idle, .soft_reset = vi_common_soft_reset, - .print_status = vi_common_print_status, .set_clockgating_state = vi_common_set_clockgating_state, .set_powergating_state = vi_common_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index ace49976f..062ee1676 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -54,7 +54,8 @@ #define AUD3_REGISTER_OFFSET (0x17b4 - 0x17a8) #define AUD4_REGISTER_OFFSET (0x17b8 - 0x17a8) #define AUD5_REGISTER_OFFSET (0x17bc - 0x17a8) -#define AUD6_REGISTER_OFFSET (0x17c4 - 0x17a8) +#define AUD6_REGISTER_OFFSET (0x17c0 - 0x17a8) +#define AUD7_REGISTER_OFFSET (0x17c4 - 0x17a8) /* hpd instance offsets */ #define HPD0_REGISTER_OFFSET (0x1898 - 0x1898) @@ -365,7 +366,7 @@ #define VCE_CMD_IB 0x00000002 #define VCE_CMD_FENCE 0x00000003 #define VCE_CMD_TRAP 0x00000004 -#define VCE_CMD_IB_AUTO 0x00000005 +#define VCE_CMD_IB_AUTO 0x00000005 #define VCE_CMD_SEMAPHORE 0x00000006 #endif -- cgit v1.2.3-54-g00ecf