summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/radeon/si.c
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2014-05-10 12:17:55 +0200
committerAlex Deucher <alexander.deucher@amd.com>2014-06-02 10:25:02 -0400
commitec3dbbcbd7a6ee165ca7eeafec8dbc733901ab2f (patch)
treef0a715cec425e8815870874892cce639b25eda1f /drivers/gpu/drm/radeon/si.c
parent831719d62f692e28699a7acd7b441c6f0c01b6f7 (diff)
drm/radeon: add large PTE support for NI, SI and CIK v5
This patch implements support for VRAM page table entry compression. PTE construction is enhanced to identify physically contiguous page ranges and mark them in the PTE fragment field. L1/L2 TLB support is enabled for 64KB (SI/CIK) and 256KB (NI) PTE fragments, significantly improving TLB utilization for VRAM allocations. Linear store bandwidth is improved from 60GB/s to 125GB/s on Pitcairn. Unigine Heaven 3.0 sees an average improvement from 24.7 to 27.7 FPS on default settings at 1920x1200 resolution with vsync disabled. See main comment in radeon_vm.c for a technical description. v2 (chk): rebased and simplified. v3 (chk): add missing hw setup v4 (chk): rebased on current drm-fixes-3.15 v5 (chk): fix comments and commit text Signed-off-by: Jay Cornwall <jay@jcornwall.me> Signed-off-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/si.c')
-rw-r--r--drivers/gpu/drm/radeon/si.c5
1 files changed, 4 insertions, 1 deletions
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index ac708e006180..22ecbc07e9a6 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -4044,18 +4044,21 @@ static int si_pcie_gart_enable(struct radeon_device *rdev)
WREG32(MC_VM_MX_L1_TLB_CNTL,
(0xA << 7) |
ENABLE_L1_TLB |
+ ENABLE_L1_FRAGMENT_PROCESSING |
SYSTEM_ACCESS_MODE_NOT_IN_SYS |
ENABLE_ADVANCED_DRIVER_MODEL |
SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
/* Setup L2 cache */
WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
+ ENABLE_L2_FRAGMENT_PROCESSING |
ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
EFFECTIVE_L2_QUEUE_SIZE(7) |
CONTEXT1_IDENTITY_ACCESS_MODE(1));
WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
- L2_CACHE_BIGK_FRAGMENT_SIZE(0));
+ BANK_SELECT(4) |
+ L2_CACHE_BIGK_FRAGMENT_SIZE(4));
/* setup context0 */
WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);