diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/cik.c')
-rw-r--r-- | drivers/gpu/drm/radeon/cik.c | 133 |
1 files changed, 81 insertions, 52 deletions
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index b625646bf3e2..377afa504d2b 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3483,7 +3483,7 @@ static void cik_gpu_init(struct radeon_device *rdev) u32 mc_shared_chmap, mc_arb_ramcfg; u32 hdp_host_path_cntl; u32 tmp; - int i, j, k; + int i, j; switch (rdev->family) { case CHIP_BONAIRE: @@ -3544,6 +3544,7 @@ static void cik_gpu_init(struct radeon_device *rdev) (rdev->pdev->device == 0x130B) || (rdev->pdev->device == 0x130E) || (rdev->pdev->device == 0x1315) || + (rdev->pdev->device == 0x1318) || (rdev->pdev->device == 0x131B)) { rdev->config.cik.max_cu_per_sh = 4; rdev->config.cik.max_backends_per_se = 1; @@ -3672,12 +3673,11 @@ static void cik_gpu_init(struct radeon_device *rdev) rdev->config.cik.max_sh_per_se, rdev->config.cik.max_backends_per_se); + rdev->config.cik.active_cus = 0; for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { - for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) { - rdev->config.cik.active_cus += - hweight32(cik_get_cu_active_bitmap(rdev, i, j)); - } + rdev->config.cik.active_cus += + hweight32(cik_get_cu_active_bitmap(rdev, i, j)); } } @@ -3801,7 +3801,7 @@ int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2)); radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { tmp = RREG32(scratch); @@ -3920,6 +3920,17 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, 0); } +/** + * cik_semaphore_ring_emit - emit a semaphore on the CP ring + * + * @rdev: radeon_device pointer + * @ring: radeon ring buffer object + * @semaphore: radeon semaphore object + * @emit_wait: Is this a sempahore wait? + * + * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP + * from running ahead of semaphore waits. + */ bool cik_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, @@ -3932,6 +3943,12 @@ bool cik_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, lower_32_bits(addr)); radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); + if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) { + /* Prevent the PFP from running ahead of the semaphore wait */ + radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + radeon_ring_write(ring, 0x0); + } + return true; } @@ -3942,18 +3959,19 @@ bool cik_semaphore_ring_emit(struct radeon_device *rdev, * @src_offset: src GPU address * @dst_offset: dst GPU address * @num_gpu_pages: number of GPU pages to xfer - * @fence: radeon fence object + * @resv: reservation object to sync to * * Copy GPU paging using the CP DMA engine (CIK+). * Used by the radeon ttm implementation to move pages if * registered as the asic copy callback. */ -int cik_copy_cpdma(struct radeon_device *rdev, - uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, - struct radeon_fence **fence) +struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct reservation_object *resv) { struct radeon_semaphore *sem = NULL; + struct radeon_fence *fence; int ring_index = rdev->asic->copy.blit_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; u32 size_in_bytes, cur_size_in_bytes, control; @@ -3963,7 +3981,7 @@ int cik_copy_cpdma(struct radeon_device *rdev, r = radeon_semaphore_create(rdev, &sem); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); - return r; + return ERR_PTR(r); } size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); @@ -3972,10 +3990,10 @@ int cik_copy_cpdma(struct radeon_device *rdev, if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); radeon_semaphore_free(rdev, &sem, NULL); - return r; + return ERR_PTR(r); } - radeon_semaphore_sync_to(sem, *fence); + radeon_semaphore_sync_resv(rdev, sem, resv, false); radeon_semaphore_sync_rings(rdev, sem, ring->idx); for (i = 0; i < num_loops; i++) { @@ -3997,17 +4015,17 @@ int cik_copy_cpdma(struct radeon_device *rdev, dst_offset += cur_size_in_bytes; } - r = radeon_fence_emit(rdev, fence, ring->idx); + r = radeon_fence_emit(rdev, &fence, ring->idx); if (r) { radeon_ring_unlock_undo(rdev, ring); radeon_semaphore_free(rdev, &sem, NULL); - return r; + return ERR_PTR(r); } - radeon_ring_unlock_commit(rdev, ring); - radeon_semaphore_free(rdev, &sem, *fence); + radeon_ring_unlock_commit(rdev, ring, false); + radeon_semaphore_free(rdev, &sem, fence); - return r; + return fence; } /* @@ -4103,7 +4121,7 @@ int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2); ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { radeon_scratch_free(rdev, scratch); radeon_ib_free(rdev, &ib); @@ -4217,7 +4235,7 @@ static int cik_cp_gfx_load_microcode(struct radeon_device *rdev) WREG32(CP_PFP_UCODE_ADDR, 0); for (i = 0; i < fw_size; i++) WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); - WREG32(CP_PFP_UCODE_ADDR, 0); + WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version)); /* CE */ fw_data = (const __le32 *) @@ -4226,7 +4244,7 @@ static int cik_cp_gfx_load_microcode(struct radeon_device *rdev) WREG32(CP_CE_UCODE_ADDR, 0); for (i = 0; i < fw_size; i++) WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); - WREG32(CP_CE_UCODE_ADDR, 0); + WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version)); /* ME */ fw_data = (const __be32 *) @@ -4235,7 +4253,8 @@ static int cik_cp_gfx_load_microcode(struct radeon_device *rdev) WREG32(CP_ME_RAM_WADDR, 0); for (i = 0; i < fw_size; i++) WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++)); - WREG32(CP_ME_RAM_WADDR, 0); + WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version)); + WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version)); } else { const __be32 *fw_data; @@ -4261,10 +4280,6 @@ static int cik_cp_gfx_load_microcode(struct radeon_device *rdev) WREG32(CP_ME_RAM_WADDR, 0); } - WREG32(CP_PFP_UCODE_ADDR, 0); - WREG32(CP_CE_UCODE_ADDR, 0); - WREG32(CP_ME_RAM_WADDR, 0); - WREG32(CP_ME_RAM_RADDR, 0); return 0; } @@ -4324,7 +4339,7 @@ static int cik_cp_gfx_start(struct radeon_device *rdev) radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); return 0; } @@ -4546,7 +4561,7 @@ static int cik_cp_compute_load_microcode(struct radeon_device *rdev) WREG32(CP_MEC_ME1_UCODE_ADDR, 0); for (i = 0; i < fw_size; i++) WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++)); - WREG32(CP_MEC_ME1_UCODE_ADDR, 0); + WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version)); /* MEC2 */ if (rdev->family == CHIP_KAVERI) { @@ -4560,7 +4575,7 @@ static int cik_cp_compute_load_microcode(struct radeon_device *rdev) WREG32(CP_MEC_ME2_UCODE_ADDR, 0); for (i = 0; i < fw_size; i++) WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++)); - WREG32(CP_MEC_ME2_UCODE_ADDR, 0); + WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version)); } } else { const __be32 *fw_data; @@ -4672,7 +4687,7 @@ static int cik_mec_init(struct radeon_device *rdev) r = radeon_bo_create(rdev, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, NULL, + RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL, &rdev->mec.hpd_eop_obj); if (r) { dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r); @@ -4786,7 +4801,7 @@ struct bonaire_mqd */ static int cik_cp_compute_resume(struct radeon_device *rdev) { - int r, i, idx; + int r, i, j, idx; u32 tmp; bool use_doorbell = true; u64 hqd_gpu_addr; @@ -4843,7 +4858,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) sizeof(struct bonaire_mqd), PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT, 0, NULL, - &rdev->ring[idx].mqd_obj); + NULL, &rdev->ring[idx].mqd_obj); if (r) { dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r); return r; @@ -4905,7 +4920,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) mqd->queue_state.cp_hqd_pq_wptr= 0; if (RREG32(CP_HQD_ACTIVE) & 1) { WREG32(CP_HQD_DEQUEUE_REQUEST, 1); - for (i = 0; i < rdev->usec_timeout; i++) { + for (j = 0; j < rdev->usec_timeout; j++) { if (!(RREG32(CP_HQD_ACTIVE) & 1)) break; udelay(1); @@ -5732,20 +5747,17 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) WREG32(0x15D8, 0); WREG32(0x15DC, 0); - /* empty context1-15 */ - /* FIXME start with 4G, once using 2 level pt switch to full - * vm size space - */ + /* restore context1-15 */ /* set vm size, must be a multiple of 4 */ WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn); for (i = 1; i < 16; i++) { if (i < 8) WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2), - rdev->gart.table_addr >> 12); + rdev->vm_manager.saved_table_addr[i]); else WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2), - rdev->gart.table_addr >> 12); + rdev->vm_manager.saved_table_addr[i]); } /* enable context1-15 */ @@ -5810,6 +5822,17 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) */ static void cik_pcie_gart_disable(struct radeon_device *rdev) { + unsigned i; + + for (i = 1; i < 16; ++i) { + uint32_t reg; + if (i < 8) + reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2); + else + reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2); + rdev->vm_manager.saved_table_addr[i] = RREG32(reg); + } + /* Disable all tables */ WREG32(VM_CONTEXT0_CNTL, 0); WREG32(VM_CONTEXT1_CNTL, 0); @@ -5958,14 +5981,14 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) /* update SH_MEM_* regs */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); radeon_ring_write(ring, 0); radeon_ring_write(ring, VMID(vm->id)); radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, SH_MEM_BASES >> 2); radeon_ring_write(ring, 0); @@ -5976,7 +5999,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, SRBM_GFX_CNTL >> 2); radeon_ring_write(ring, 0); @@ -5987,7 +6010,7 @@ void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm) /* bits 0-15 are the VM contexts0-15 */ radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | WRITE_DATA_DST_SEL(0))); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 0); @@ -6201,7 +6224,7 @@ static int cik_rlc_resume(struct radeon_device *rdev) WREG32(RLC_GPM_UCODE_ADDR, 0); for (i = 0; i < size; i++) WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); - WREG32(RLC_GPM_UCODE_ADDR, 0); + WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version)); } else { const __be32 *fw_data; @@ -7726,17 +7749,17 @@ static inline u32 cik_get_ih_wptr(struct radeon_device *rdev) wptr = RREG32(IH_RB_WPTR); if (wptr & RB_OVERFLOW) { + wptr &= ~RB_OVERFLOW; /* When a ring buffer overflow happen start parsing interrupt * from the last not overwritten vector (wptr + 16). Hopefully * this should allow us to catchup. */ - dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n", - wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask); + dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask); rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask; tmp = RREG32(IH_RB_CNTL); tmp |= IH_WPTR_OVERFLOW_CLEAR; WREG32(IH_RB_CNTL, tmp); - wptr &= ~RB_OVERFLOW; } return (wptr & rdev->ih.ptr_mask); } @@ -8226,15 +8249,17 @@ restart_ih: /* wptr/rptr are in bytes! */ rptr += 16; rptr &= rdev->ih.ptr_mask; + WREG32(IH_RB_RPTR, rptr); } if (queue_hotplug) schedule_work(&rdev->hotplug_work); - if (queue_reset) - schedule_work(&rdev->reset_work); + if (queue_reset) { + rdev->needs_reset = true; + wake_up_all(&rdev->fence_queue); + } if (queue_thermal) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; - WREG32(IH_RB_RPTR, rdev->ih.rptr); atomic_set(&rdev->ih.lock, 0); /* make sure wptr hasn't changed while processing */ @@ -9538,6 +9563,9 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev) int ret, i; u16 tmp16; + if (pci_is_root_bus(rdev->pdev->bus)) + return; + if (radeon_pcie_gen2 == 0) return; @@ -9764,7 +9792,8 @@ static void cik_program_aspm(struct radeon_device *rdev) if (orig != data) WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data); - if (!disable_clkreq) { + if (!disable_clkreq && + !pci_is_root_bus(rdev->pdev->bus)) { struct pci_dev *root = rdev->pdev->bus->self; u32 lnkcap; |