summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKirill Artamonov <kartamonov@nvidia.com>2014-02-26 22:58:43 +0200
committerWinnie Hsu <whsu@nvidia.com>2015-01-06 18:32:20 -0800
commit67e2d427dfef361fce990aa11d7f6618bf9e368a (patch)
tree32d64cb848df9ac28cbd1e1978bb552c6a886439
parentf69b7093accdacfa653b4bd45d78e04a2676dc2a (diff)
gpu: nvgpu: implement mapping for sparse allocation
Implement support for partial buffer mappings. Whitelist gr_pri_bes_crop_hww_esr accessed by fec during sparse texture initialization. bug 1456562 bug 1369014 bug 1361532 Change-Id: Ib0d1ec6438257ac14b40c8466b37856b67e7e34d Signed-off-by: Kirill Artamonov <kartamonov@nvidia.com> Reviewed-on: http://git-master/r/375012 (cherry picked from commit a24470f69961508412402b9b06d5b71fbf6f7549) Reviewed-on: http://git-master/r/601754 Tested-by: Rajkumar Kasirajan <rkasirajan@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Winnie Hsu <whsu@nvidia.com>
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c20
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c1
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c72
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h8
-rw-r--r--include/linux/nvhost_as_ioctl.h5
5 files changed, 69 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 1d604b83eefe..c8e71f158488 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -131,19 +131,14 @@ static int gk20a_as_ioctl_map_buffer_ex(
struct gk20a_as_share *as_share,
struct nvhost_as_map_buffer_ex_args *args)
{
- int i;
-
gk20a_dbg_fn("");
- /* ensure that padding is not set. this is required for ensuring that
- * we can safely use these fields later */
- for (i = 0; i < ARRAY_SIZE(args->padding); i++)
- if (args->padding[i])
- return -EINVAL;
-
return gk20a_vm_map_buffer(as_share, args->dmabuf_fd,
- &args->offset, args->flags,
- args->kind);
+ &args->as_offset, args->flags,
+ args->kind,
+ args->buffer_offset,
+ args->mapping_size
+ );
}
static int gk20a_as_ioctl_map_buffer(
@@ -152,8 +147,9 @@ static int gk20a_as_ioctl_map_buffer(
{
gk20a_dbg_fn("");
return gk20a_vm_map_buffer(as_share, args->nvmap_handle,
- &args->o_a.align,
- args->flags, NV_KIND_DEFAULT);
+ &args->o_a.offset,
+ args->flags, NV_KIND_DEFAULT,
+ 0, 0);
/* args->o_a.offset will be set if !err */
}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index e195b433f58f..d6a846207db3 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -4429,6 +4429,7 @@ out:
static u32 wl_addr_gk20a[] = {
/* this list must be sorted (low to high) */
0x404468, /* gr_pri_mme_max_instructions */
+ 0x408944, /* gr_pri_bes_crop_hww_esr */
0x418800, /* gr_pri_gpcs_setup_debug */
0x419a04, /* gr_pri_gpcs_tpcs_tex_lod_dbg */
0x419a08, /* gr_pri_gpcs_tpcs_tex_samp_dbg */
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index b171915163e0..52c0f3c5978e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -107,7 +107,7 @@ static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
u32 kind);
static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
enum gmmu_pgsz_gk20a pgsz_idx,
- struct sg_table *sgt,
+ struct sg_table *sgt, u64 buffer_offset,
u64 first_vaddr, u64 last_vaddr,
u8 kind_v, u32 ctag_offset, bool cacheable,
int rw_flag);
@@ -1055,7 +1055,7 @@ static int setup_buffer_kind_and_compression(struct device *d,
static int validate_fixed_buffer(struct vm_gk20a *vm,
struct buffer_attrs *bfr,
- u64 map_offset)
+ u64 map_offset, u64 map_size)
{
struct device *dev = dev_from_vm(vm);
struct vm_reserved_va_node *va_node;
@@ -1082,7 +1082,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
&va_node->va_buffers_list, va_buffers_list) {
s64 begin = max(buffer->addr, map_offset);
s64 end = min(buffer->addr +
- buffer->size, map_offset + bfr->size);
+ buffer->size, map_offset + map_size);
if (end - begin > 0) {
gk20a_warn(dev, "overlapping buffer map requested");
return -EINVAL;
@@ -1095,6 +1095,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
static u64 __locked_gmmu_map(struct vm_gk20a *vm,
u64 map_offset,
struct sg_table *sgt,
+ u64 buffer_offset,
u64 size,
int pgsz_idx,
u8 kind_v,
@@ -1137,6 +1138,7 @@ static u64 __locked_gmmu_map(struct vm_gk20a *vm,
err = update_gmmu_ptes_locked(vm, pgsz_idx,
sgt,
+ buffer_offset,
map_offset, map_offset + size - 1,
kind_v,
ctag_offset,
@@ -1180,6 +1182,7 @@ static void __locked_gmmu_unmap(struct vm_gk20a *vm,
err = update_gmmu_ptes_locked(vm,
pgsz_idx,
0, /* n/a for unmap */
+ 0,
vaddr,
vaddr + size - 1,
0, 0, false /* n/a for unmap */,
@@ -1272,7 +1275,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
int kind,
struct sg_table **sgt,
bool user_mapped,
- int rw_flag)
+ int rw_flag,
+ u64 buffer_offset,
+ u64 mapping_size)
{
struct gk20a *g = gk20a_from_vm(vm);
struct gk20a_allocator *ctag_allocator = &g->gr.comp_tags;
@@ -1322,6 +1327,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
buf_addr = (u64)sg_phys(bfr.sgt->sgl);
bfr.align = 1 << __ffs(buf_addr);
bfr.pgsz_idx = -1;
+ mapping_size = mapping_size ? mapping_size : bfr.size;
/* If FIX_OFFSET is set, pgsz is determined. Otherwise, select
* page size according to memory alignment */
@@ -1350,8 +1356,10 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
gmmu_page_size = gmmu_page_sizes[bfr.pgsz_idx];
/* Check if we should use a fixed offset for mapping this buffer */
+
if (flags & NVHOST_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
- err = validate_fixed_buffer(vm, &bfr, offset_align);
+ err = validate_fixed_buffer(vm, &bfr,
+ offset_align, mapping_size);
if (err)
goto clean_up;
@@ -1400,11 +1408,13 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
/* update gmmu ptes */
map_offset = __locked_gmmu_map(vm, map_offset,
bfr.sgt,
- bfr.size,
+ buffer_offset, /* sg offset */
+ mapping_size,
bfr.pgsz_idx,
bfr.kind_v,
bfr.ctag_offset,
flags, rw_flag);
+
if (!map_offset)
goto clean_up;
@@ -1447,7 +1457,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
mapped_buffer->dmabuf = dmabuf;
mapped_buffer->sgt = bfr.sgt;
mapped_buffer->addr = map_offset;
- mapped_buffer->size = bfr.size;
+ mapped_buffer->size = mapping_size;
mapped_buffer->pgsz_idx = bfr.pgsz_idx;
mapped_buffer->ctag_offset = bfr.ctag_offset;
mapped_buffer->ctag_lines = bfr.ctag_lines;
@@ -1518,6 +1528,7 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
mutex_lock(&vm->update_gmmu_lock);
vaddr = __locked_gmmu_map(vm, 0, /* already mapped? - No */
*sgt, /* sg table */
+ 0, /* sg offset */
size,
0, /* page size index = 0 i.e. SZ_4K */
0, /* kind */
@@ -1647,6 +1658,7 @@ u64 gk20a_mm_iova_addr(struct scatterlist *sgl)
static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
enum gmmu_pgsz_gk20a pgsz_idx,
struct sg_table *sgt,
+ u64 buffer_offset,
u64 first_vaddr, u64 last_vaddr,
u8 kind_v, u32 ctag_offset,
bool cacheable,
@@ -1661,6 +1673,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
u32 ctag_incr;
u32 page_size = gmmu_page_sizes[pgsz_idx];
u64 addr = 0;
+ u64 space_to_skip = buffer_offset;
pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
&pde_lo, &pde_hi);
@@ -1673,13 +1686,31 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
* comptags are active) is 128KB. We have checks elsewhere for that. */
ctag_incr = !!ctag_offset;
- if (sgt)
+ cur_offset = 0;
+ if (sgt) {
cur_chunk = sgt->sgl;
+ /* space_to_skip must be page aligned */
+ BUG_ON(space_to_skip & (page_size - 1));
+
+ while (space_to_skip > 0 && cur_chunk) {
+ u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
+ if (new_addr) {
+ addr = new_addr;
+ addr += cur_offset;
+ }
+ cur_offset += page_size;
+ addr += page_size;
+ while (cur_chunk &&
+ cur_offset >= cur_chunk->length) {
+ cur_offset -= cur_chunk->length;
+ cur_chunk = sg_next(cur_chunk);
+ }
+ space_to_skip -= page_size;
+ }
+ }
else
cur_chunk = NULL;
- cur_offset = 0;
-
for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
u32 pte_lo, pte_hi;
u32 pte_cur;
@@ -1711,14 +1742,12 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
-
if (likely(sgt)) {
u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
if (new_addr) {
addr = new_addr;
addr += cur_offset;
}
-
pte_w[0] = gmmu_pte_valid_true_f() |
gmmu_pte_address_sys_f(addr
>> gmmu_pte_address_shift_v());
@@ -1735,20 +1764,16 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
pte_w[1] |=
gmmu_pte_read_disable_true_f();
}
-
if (!cacheable)
pte_w[1] |= gmmu_pte_vol_true_f();
pte->ref_cnt++;
-
- gk20a_dbg(gpu_dbg_pte,
- "pte_cur=%d addr=0x%x,%08x kind=%d"
+ gk20a_dbg(gpu_dbg_pte, "pte_cur=%d addr=0x%x,%08x kind=%d"
" ctag=%d vol=%d refs=%d"
" [0x%08x,0x%08x]",
pte_cur, hi32(addr), lo32(addr),
kind_v, ctag, !cacheable,
pte->ref_cnt, pte_w[1], pte_w[0]);
-
ctag += ctag_incr;
cur_offset += page_size;
addr += page_size;
@@ -1924,7 +1949,7 @@ static int gk20a_vm_put_empty(struct vm_gk20a *vm, u64 vaddr,
for (i = 0; i < num_pages; i++) {
u64 page_vaddr = __locked_gmmu_map(vm, vaddr,
- vm->zero_page_sgt, pgsz, pgsz_idx, 0, 0,
+ vm->zero_page_sgt, 0, pgsz, pgsz_idx, 0, 0,
NVHOST_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET,
gk20a_mem_flag_none);
@@ -2010,6 +2035,7 @@ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
return;
}
+
kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
mutex_unlock(&vm->update_gmmu_lock);
}
@@ -2299,7 +2325,6 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
va_node->sparse = true;
}
-
list_add_tail(&va_node->reserved_va_list, &vm->reserved_va_list);
mutex_unlock(&vm->update_gmmu_lock);
@@ -2438,7 +2463,9 @@ int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
int dmabuf_fd,
u64 *offset_align,
u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/
- int kind)
+ int kind,
+ u64 buffer_offset,
+ u64 mapping_size)
{
int err = 0;
struct vm_gk20a *vm = as_share->vm;
@@ -2463,7 +2490,10 @@ int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
ret_va = gk20a_vm_map(vm, dmabuf, *offset_align,
flags, kind, NULL, true,
- gk20a_mem_flag_none);
+ gk20a_mem_flag_none,
+ buffer_offset,
+ mapping_size);
+
*offset_align = ret_va;
if (!ret_va) {
dma_buf_put(dmabuf);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 4dfc2b7d675d..8904eb46b34e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -416,7 +416,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
int kind,
struct sg_table **sgt,
bool user_mapped,
- int rw_flag);
+ int rw_flag,
+ u64 buffer_offset,
+ u64 mapping_size);
/* unmap handle from kernel */
void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
@@ -457,7 +459,9 @@ int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
int dmabuf_fd,
u64 *offset_align,
u32 flags, /*NVHOST_AS_MAP_BUFFER_FLAGS_*/
- int kind);
+ int kind,
+ u64 buffer_offset,
+ u64 mapping_size);
int gk20a_vm_unmap_buffer(struct gk20a_as_share *, u64 offset);
int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
diff --git a/include/linux/nvhost_as_ioctl.h b/include/linux/nvhost_as_ioctl.h
index cb6e8fd3813c..56488c52976f 100644
--- a/include/linux/nvhost_as_ioctl.h
+++ b/include/linux/nvhost_as_ioctl.h
@@ -146,9 +146,10 @@ struct nvhost_as_map_buffer_ex_args {
__u32 dmabuf_fd; /* in */
__u32 page_size; /* inout, 0:= best fit to buffer */
- __u32 padding[4]; /* reserved for future usage */
+ __u64 buffer_offset; /* in, offset of mapped buffer region */
+ __u64 mapping_size; /* in, size of mapped buffer region */
- __u64 offset; /* in/out, we use this address if flag
+ __u64 as_offset; /* in/out, we use this address if flag
* FIXED_OFFSET is set. This will fail
* if space is not properly allocated. The
* actual virtual address to which we mapped