diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 53 |
1 files changed, 37 insertions, 16 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index d8fa08ff4971..f5e7767e7f11 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -100,6 +100,7 @@ static inline u32 lo32(u64 f) } while (0) static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer); +void __gk20a_mm_tlb_invalidate(struct vm_gk20a *vm); static struct mapped_buffer_node *find_mapped_buffer_locked( struct rb_root *root, u64 addr); static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( @@ -1674,6 +1675,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, u32 page_size = gmmu_page_sizes[pgsz_idx]; u64 addr = 0; u64 space_to_skip = buffer_offset; + bool set_tlb_dirty = false; pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, &pde_lo, &pde_hi); @@ -1718,6 +1720,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i; + set_tlb_dirty = true; + if (pde_i == pde_lo) pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx); @@ -1811,6 +1815,9 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, /* rewrite pde */ update_gmmu_pde_locked(vm, pde_i); + __gk20a_mm_tlb_invalidate(vm); + set_tlb_dirty = false; + free_gmmu_pages(vm, pte_ref_ptr, pte->sgt, vm->mm->page_table_sizing[pgsz_idx].order, pte->size); @@ -1820,8 +1827,10 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, } smp_mb(); - vm->tlb_dirty = true; - gk20a_dbg_fn("set tlb dirty"); + if (set_tlb_dirty) { + vm->tlb_dirty = true; + gk20a_dbg_fn("set tlb dirty"); + } return 0; @@ -2946,7 +2955,7 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, return 0; } -void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) +void __gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) { struct gk20a *g = gk20a_from_vm(vm); u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12); @@ -2956,22 +2965,9 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) gk20a_dbg_fn(""); - /* pagetables are considered sw states which are preserved after - prepare_poweroff. When gk20a deinit releases those pagetables, - common code in vm unmap path calls tlb invalidate that touches - hw. Use the power_on flag to skip tlb invalidation when gpu - power is turned off */ - if (!g->power_on) return; - /* No need to invalidate if tlb is clean */ - mutex_lock(&vm->update_gmmu_lock); - if (!vm->tlb_dirty) { - mutex_unlock(&vm->update_gmmu_lock); - return; - } - mutex_lock(&tlb_lock); do { data = gk20a_readl(g, fb_mmu_ctrl_r()); @@ -3010,8 +3006,33 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) out: mutex_unlock(&tlb_lock); +} + +void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm) +{ + struct gk20a *g = gk20a_from_vm(vm); + + gk20a_dbg_fn(""); + + /* pagetables are considered sw states which are preserved after + prepare_poweroff. When gk20a deinit releases those pagetables, + common code in vm unmap path calls tlb invalidate that touches + hw. Use the power_on flag to skip tlb invalidation when gpu + power is turned off */ + + if (!g->power_on) + return; + + /* No need to invalidate if tlb is clean */ + mutex_lock(&vm->update_gmmu_lock); + if (!vm->tlb_dirty) { + mutex_unlock(&vm->update_gmmu_lock); + return; + } vm->tlb_dirty = false; mutex_unlock(&vm->update_gmmu_lock); + + __gk20a_mm_tlb_invalidate(vm); } int gk20a_mm_suspend(struct gk20a *g) |