diff options
author | Kirill Artamonov <kartamonov@nvidia.com> | 2014-02-26 22:58:43 +0200 |
---|---|---|
committer | Winnie Hsu <whsu@nvidia.com> | 2015-01-06 18:32:20 -0800 |
commit | 67e2d427dfef361fce990aa11d7f6618bf9e368a (patch) | |
tree | 32d64cb848df9ac28cbd1e1978bb552c6a886439 | |
parent | f69b7093accdacfa653b4bd45d78e04a2676dc2a (diff) |
gpu: nvgpu: implement mapping for sparse allocation
Implement support for partial buffer mappings.
Whitelist gr_pri_bes_crop_hww_esr accessed by
fec during sparse texture initialization.
bug 1456562
bug 1369014
bug 1361532
Change-Id: Ib0d1ec6438257ac14b40c8466b37856b67e7e34d
Signed-off-by: Kirill Artamonov <kartamonov@nvidia.com>
Reviewed-on: http://git-master/r/375012
(cherry picked from commit a24470f69961508412402b9b06d5b71fbf6f7549)
Reviewed-on: http://git-master/r/601754
Tested-by: Rajkumar Kasirajan <rkasirajan@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Winnie Hsu <whsu@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/as_gk20a.c | 20 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 72 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 8 | ||||
-rw-r--r-- | include/linux/nvhost_as_ioctl.h | 5 |
5 files changed, 69 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 1d604b83eefe..c8e71f158488 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c @@ -131,19 +131,14 @@ static int gk20a_as_ioctl_map_buffer_ex( struct gk20a_as_share *as_share, struct nvhost_as_map_buffer_ex_args *args) { - int i; - gk20a_dbg_fn(""); - /* ensure that padding is not set. this is required for ensuring that - * we can safely use these fields later */ - for (i = 0; i < ARRAY_SIZE(args->padding); i++) - if (args->padding[i]) - return -EINVAL; - return gk20a_vm_map_buffer(as_share, args->dmabuf_fd, - &args->offset, args->flags, - args->kind); + &args->as_offset, args->flags, + args->kind, + args->buffer_offset, + args->mapping_size + ); } static int gk20a_as_ioctl_map_buffer( @@ -152,8 +147,9 @@ static int gk20a_as_ioctl_map_buffer( { gk20a_dbg_fn(""); return gk20a_vm_map_buffer(as_share, args->nvmap_handle, - &args->o_a.align, - args->flags, NV_KIND_DEFAULT); + &args->o_a.offset, + args->flags, NV_KIND_DEFAULT, + 0, 0); /* args->o_a.offset will be set if !err */ } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index e195b433f58f..d6a846207db3 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -4429,6 +4429,7 @@ out: static u32 wl_addr_gk20a[] = { /* this list must be sorted (low to high) */ 0x404468, /* gr_pri_mme_max_instructions */ + 0x408944, /* gr_pri_bes_crop_hww_esr */ 0x418800, /* gr_pri_gpcs_setup_debug */ 0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */ 0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */ diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index b171915163e0..52c0f3c5978e 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -107,7 +107,7 @@ static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( u32 kind); static int update_gmmu_ptes_locked(struct vm_gk20a *vm, enum gmmu_pgsz_gk20a pgsz_idx, - struct sg_table *sgt, + struct sg_table *sgt, u64 buffer_offset, u64 first_vaddr, u64 last_vaddr, u8 kind_v, u32 ctag_offset, bool cacheable, int rw_flag); @@ -1055,7 +1055,7 @@ static int setup_buffer_kind_and_compression(struct device *d, static int validate_fixed_buffer(struct vm_gk20a *vm, struct buffer_attrs *bfr, - u64 map_offset) + u64 map_offset, u64 map_size) { struct device *dev = dev_from_vm(vm); struct vm_reserved_va_node *va_node; @@ -1082,7 +1082,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm, &va_node->va_buffers_list, va_buffers_list) { s64 begin = max(buffer->addr, map_offset); s64 end = min(buffer->addr + - buffer->size, map_offset + bfr->size); + buffer->size, map_offset + map_size); if (end - begin > 0) { gk20a_warn(dev, "overlapping buffer map requested"); return -EINVAL; @@ -1095,6 +1095,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm, static u64 __locked_gmmu_map(struct vm_gk20a *vm, u64 map_offset, struct sg_table *sgt, + u64 buffer_offset, u64 size, int pgsz_idx, u8 kind_v, @@ -1137,6 +1138,7 @@ static u64 __locked_gmmu_map(struct vm_gk20a *vm, err = update_gmmu_ptes_locked(vm, pgsz_idx, sgt, + buffer_offset, map_offset, map_offset + size - 1, kind_v, ctag_offset, @@ -1180,6 +1182,7 @@ static void __locked_gmmu_unmap(struct vm_gk20a *vm, err = update_gmmu_ptes_locked(vm, pgsz_idx, 0, /* n/a for unmap */ + 0, vaddr, vaddr + size - 1, 0, 0, false /* n/a for unmap */, @@ -1272,7 +1275,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, int kind, struct sg_table **sgt, bool user_mapped, - int rw_flag) + int rw_flag, + u64 buffer_offset, + u64 mapping_size) { struct gk20a *g = gk20a_from_vm(vm); struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags; @@ -1322,6 +1327,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, buf_addr = (u64)sg_phys(bfr.sgt->sgl); bfr.align = 1 << __ffs(buf_addr); bfr.pgsz_idx = -1; + mapping_size = mapping_size ? mapping_size : bfr.size; /* If FIX_OFFSET is set, pgsz is determined. Otherwise, select * page size according to memory alignment */ @@ -1350,8 +1356,10 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx]; /* Check if we should use a fixed offset for mapping this buffer */ + if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { - err = validate_fixed_buffer(vm, &bfr, offset_align); + err = validate_fixed_buffer(vm, &bfr, + offset_align, mapping_size); if (err) goto clean_up; @@ -1400,11 +1408,13 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, /* update gmmu ptes */ map_offset = __locked_gmmu_map(vm, map_offset, bfr.sgt, - bfr.size, + buffer_offset, /* sg offset */ + mapping_size, bfr.pgsz_idx, bfr.kind_v, bfr.ctag_offset, flags, rw_flag); + if (!map_offset) goto clean_up; @@ -1447,7 +1457,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, mapped_buffer->dmabuf = dmabuf; mapped_buffer->sgt = bfr.sgt; mapped_buffer->addr = map_offset; - mapped_buffer->size = bfr.size; + mapped_buffer->size = mapping_size; mapped_buffer->pgsz_idx = bfr.pgsz_idx; mapped_buffer->ctag_offset = bfr.ctag_offset; mapped_buffer->ctag_lines = bfr.ctag_lines; @@ -1518,6 +1528,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm, mutex_lock(&vm->update_gmmu_lock); vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */ *sgt, /* sg table */ + 0, /* sg offset */ size, 0, /* page size index = 0 i.e. SZ_4K */ 0, /* kind */ @@ -1647,6 +1658,7 @@ u64 gk20a_mm_iova_addr(struct scatterlist *sgl) static int update_gmmu_ptes_locked(struct vm_gk20a *vm, enum gmmu_pgsz_gk20a pgsz_idx, struct sg_table *sgt, + u64 buffer_offset, u64 first_vaddr, u64 last_vaddr, u8 kind_v, u32 ctag_offset, bool cacheable, @@ -1661,6 +1673,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, u32 ctag_incr; u32 page_size = gmmu_page_sizes[pgsz_idx]; u64 addr = 0; + u64 space_to_skip = buffer_offset; pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr, &pde_lo, &pde_hi); @@ -1673,13 +1686,31 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, * comptags are active) is 128KB. We have checks elsewhere for that. */ ctag_incr = !!ctag_offset; - if (sgt) + cur_offset = 0; + if (sgt) { cur_chunk = sgt->sgl; + /* space_to_skip must be page aligned */ + BUG_ON(space_to_skip & (page_size - 1)); + + while (space_to_skip > 0 && cur_chunk) { + u64 new_addr = gk20a_mm_iova_addr(cur_chunk); + if (new_addr) { + addr = new_addr; + addr += cur_offset; + } + cur_offset += page_size; + addr += page_size; + while (cur_chunk && + cur_offset >= cur_chunk->length) { + cur_offset -= cur_chunk->length; + cur_chunk = sg_next(cur_chunk); + } + space_to_skip -= page_size; + } + } else cur_chunk = NULL; - cur_offset = 0; - for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) { u32 pte_lo, pte_hi; u32 pte_cur; @@ -1711,14 +1742,12 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi); for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) { - if (likely(sgt)) { u64 new_addr = gk20a_mm_iova_addr(cur_chunk); if (new_addr) { addr = new_addr; addr += cur_offset; } - pte_w[0] = gmmu_pte_valid_true_f() | gmmu_pte_address_sys_f(addr >> gmmu_pte_address_shift_v()); @@ -1735,20 +1764,16 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, pte_w[1] |= gmmu_pte_read_disable_true_f(); } - if (!cacheable) pte_w[1] |= gmmu_pte_vol_true_f(); pte->ref_cnt++; - - gk20a_dbg(gpu_dbg_pte, - "pte_cur=%d addr=0x%x,%08x kind=%d" + gk20a_dbg(gpu_dbg_pte, "pte_cur=%d addr=0x%x,%08x kind=%d" " ctag=%d vol=%d refs=%d" " [0x%08x,0x%08x]", pte_cur, hi32(addr), lo32(addr), kind_v, ctag, !cacheable, pte->ref_cnt, pte_w[1], pte_w[0]); - ctag += ctag_incr; cur_offset += page_size; addr += page_size; @@ -1924,7 +1949,7 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr, for (i = 0; i < num_pages; i++) { u64 page_vaddr = __locked_gmmu_map(vm, vaddr, - vm->zero_page_sgt, pgsz, pgsz_idx, 0, 0, + vm->zero_page_sgt, 0, pgsz, pgsz_idx, 0, 0, NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET, gk20a_mem_flag_none); @@ -2010,6 +2035,7 @@ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset) gk20a_err(d, "invalid addr to unmap 0x%llx", offset); return; } + kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref); mutex_unlock(&vm->update_gmmu_lock); } @@ -2299,7 +2325,6 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, va_node->sparse = true; } - list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list); mutex_unlock(&vm->update_gmmu_lock); @@ -2438,7 +2463,9 @@ int gk20a_vm_map_buffer(struct gk20a_as_share *as_share, int dmabuf_fd, u64 *offset_align, u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/ - int kind) + int kind, + u64 buffer_offset, + u64 mapping_size) { int err = 0; struct vm_gk20a *vm = as_share->vm; @@ -2463,7 +2490,10 @@ int gk20a_vm_map_buffer(struct gk20a_as_share *as_share, ret_va = gk20a_vm_map(vm, dmabuf, *offset_align, flags, kind, NULL, true, - gk20a_mem_flag_none); + gk20a_mem_flag_none, + buffer_offset, + mapping_size); + *offset_align = ret_va; if (!ret_va) { dma_buf_put(dmabuf); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 4dfc2b7d675d..8904eb46b34e 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -416,7 +416,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, int kind, struct sg_table **sgt, bool user_mapped, - int rw_flag); + int rw_flag, + u64 buffer_offset, + u64 mapping_size); /* unmap handle from kernel */ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); @@ -457,7 +459,9 @@ int gk20a_vm_map_buffer(struct gk20a_as_share *as_share, int dmabuf_fd, u64 *offset_align, u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/ - int kind); + int kind, + u64 buffer_offset, + u64 mapping_size); int gk20a_vm_unmap_buffer(struct gk20a_as_share *, u64 offset); int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); diff --git a/include/linux/nvhost_as_ioctl.h b/include/linux/nvhost_as_ioctl.h index cb6e8fd3813c..56488c52976f 100644 --- a/include/linux/nvhost_as_ioctl.h +++ b/include/linux/nvhost_as_ioctl.h @@ -146,9 +146,10 @@ struct nvhost_as_map_buffer_ex_args { __u32 dmabuf_fd; /* in */ __u32 page_size; /* inout, 0:= best fit to buffer */ - __u32 padding[4]; /* reserved for future usage */ + __u64 buffer_offset; /* in, offset of mapped buffer region */ + __u64 mapping_size; /* in, size of mapped buffer region */ - __u64 offset; /* in/out, we use this address if flag + __u64 as_offset; /* in/out, we use this address if flag * FIXED_OFFSET is set. This will fail * if space is not properly allocated. The * actual virtual address to which we mapped |